diff --git a/.gitignore b/.gitignore index be75938ec401b1d72fa54773c85191aaac7d7f35..828bbe9bd3363853ae3f58f54a8d5f60cefad837 100644 --- a/.gitignore +++ b/.gitignore @@ -27,6 +27,7 @@ Podfile.lock /tensorflow/contrib/lite/examples/ios/simple/data/*.txt /tensorflow/contrib/lite/examples/ios/simple/data/*.tflite xcuserdata/** +/api_init_files_list.txt # Android .gradle diff --git a/CODEOWNERS b/CODEOWNERS index 007a304c3e706ce968576ec8979c08f1a3bcc552..b9f0313cc6d59d3fbdcd014e1a528126d863075a 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -45,7 +45,7 @@ # /tensorflow/contrib/session_bundle/ @nfiedel @sukritiramesh # /tensorflow/contrib/slim/ @sguada @thenbasilmanran # /tensorflow/contrib/stateless/ @girving -# /tensorflow/contrib/tensor_forest/ @gilberthendry @thomascolthurst +# /tensorflow/contrib/tensor_forest/ @gilberthendry @thomascolthurst @yupbank # /tensorflow/contrib/testing/ @dandelionmane # /tensorflow/contrib/timeseries/ @allenlavoie # /tensorflow/contrib/tpu/ @frankchn @saeta @jhseu diff --git a/README.md b/README.md index c66f7e3f3f49ed90e4e75475185585a932049f37..e1a50c87e26d493ba3ac760f357905d89aa40dab 100644 --- a/README.md +++ b/README.md @@ -7,14 +7,14 @@ | **`Documentation`** | **`Linux CPU`** | **`Linux GPU`** | **`Mac OS CPU`** | **`Windows CPU`** | **`Android`** | |-----------------|---------------------|------------------|-------------------|---------------|---------------| -| [![Documentation](https://img.shields.io/badge/api-reference-blue.svg)](https://www.tensorflow.org/api_docs/) | [![Build Status](https://ci.tensorflow.org/buildStatus/icon?job=tensorflow-master-cpu)](https://ci.tensorflow.org/job/tensorflow-master-cpu) | [![Build Status](https://ci.tensorflow.org/buildStatus/icon?job=tensorflow-master-linux-gpu)](https://ci.tensorflow.org/job/tensorflow-master-linux-gpu) | [![Build Status](https://ci.tensorflow.org/buildStatus/icon?job=tensorflow-master-mac)](https://ci.tensorflow.org/job/tensorflow-master-mac) | [![Build Status](https://ci.tensorflow.org/buildStatus/icon?job=tensorflow-master-win-cmake-py)](https://ci.tensorflow.org/job/tensorflow-master-win-cmake-py) | [![Build Status](https://ci.tensorflow.org/buildStatus/icon?job=tensorflow-master-android)](https://ci.tensorflow.org/job/tensorflow-master-android) [ ![Download](https://api.bintray.com/packages/google/tensorflow/tensorflow/images/download.svg) ](https://bintray.com/google/tensorflow/tensorflow/_latestVersion) +| [![Documentation](https://img.shields.io/badge/api-reference-blue.svg)](https://www.tensorflow.org/api_docs/) | ![Build Status](https://storage.googleapis.com/tensorflow-kokoro-build-badges/ubuntu-cc.png) | ![Build Status](https://storage.googleapis.com/tensorflow-kokoro-build-badges/ubuntu-gpu-cc.png) | ![Build Status](https://storage.googleapis.com/tensorflow-kokoro-build-badges/macos-py2-cc.png) | [![Build Status](https://ci.tensorflow.org/buildStatus/icon?job=tensorflow-master-win-cmake-py)](https://ci.tensorflow.org/job/tensorflow-master-win-cmake-py) | [![Build Status](https://ci.tensorflow.org/buildStatus/icon?job=tensorflow-master-android)](https://ci.tensorflow.org/job/tensorflow-master-android) [ ![Download](https://api.bintray.com/packages/google/tensorflow/tensorflow/images/download.svg) ](https://bintray.com/google/tensorflow/tensorflow/_latestVersion) **TensorFlow** is an open source software library for numerical computation using data flow graphs. The graph nodes represent mathematical operations, while the graph edges represent the multidimensional data arrays (tensors) that flow between them. This flexible architecture enables you to deploy computation to one or more CPUs or GPUs in a desktop, server, or mobile device without rewriting -code. TensorFlow also includes TensorBoard, a data visualization toolkit. +code. TensorFlow also includes [TensorBoard](https://www.tensorflow.org/programmers_guide/summaries_and_tensorboard), a data visualization toolkit. TensorFlow was originally developed by researchers and engineers working on the Google Brain team within Google's Machine Intelligence Research diff --git a/RELEASE.md b/RELEASE.md index c63d9f20c9a842ceed97afc25690073d082c42cb..2717c75740aeea7821fb6c57dfc85908e86e9d51 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -1,3 +1,61 @@ +# Release 1.8.0 + +## Major Features And Improvements +* Can now pass `tf.contrib.distribute.MirroredStrategy()` to `tf.estimator.RunConfig()` to run an Estimator model on multiple GPUs on one machine. +* Add `tf.contrib.data.prefetch_to_device()`, which supports prefetching to GPU memory. +* Added Gradient Boosted Trees as pre-made Estimators: BoostedTreesClassifier, BoostedTreesRegressor. +* Add 3rd generation pipeline config for Cloud TPUs which improves performance and usability. +* `tf.contrib.bayesflow` is moving out to it's own repo. +* Added `tf.contrib.{proto,rpc}` to allow generic proto parsing and RPC communication. + +## Bug Fixes and Other Changes +* `tf.data`: + * Add `tf.contrib.data.prefetch_to_device`, which enables prefetching dataset elements to GPU memory. + * Add `tf.contrib.data.AUTOTUNE`, which allows the tf.data runtime to automatically tune the prefetch buffer sizes based on your system and environment. + * Add `tf.contrib.data.make_csv_dataset` for building datasets of CSV files. +* Eager Execution: + * With eager execution Datasets can now be used as standard python iterators (`for batch in dataset:`). Both `Dataset.__iter__()` and `Dataset.make_one_shot_iterator()` can now be used to create iterators when eager execution is enabled. + * Automatic device placement has been enabled (i.e., use a GPU if available automatically, without requiring an explicit `with tf.device(“/gpu:0”)`) (Fixes #14133) + * `tf.GradientTape` has moved out of contrib. +* `tf.keras`: + * Added the fashion mnist dataset. + * New data preprocessing functions: `image/random_brightness`, `sequence/TimeseriesGenerator`, and `text/hashing_trick`. +* Accelerated Linear Algebra (XLA): + * Select and scatter in reference util and evaluator now use lexicographical order to break ties. +* TensorFlow Debugger (tfdbg) CLI: + * During tensor-filter operations, allow exclusion of nodes by regular expressions. + * Fix spurious background colors in some text terminals. +* `tf.contrib`: + * Add meta-distribution BatchReshape which reshapes batch dimensions. + * `tf.contrib.layers.recompute_grad` works for explicit gradient checkpointing on TPU. + * Add `tf.contrib.framework.argsort`. + * Allow `DNNBoostedTreeCombinedEstimator` to work with core versions of feature columns and losses. + * Add non-linear image warping ops: `tf.contrib.image.sparse_image_warp`, `tf.contrib.image.dense_image_warp`, and `tf.contrib.image.interpolate_spline`. + * Fix bug in `tf.contrib.opt.MultitaskOptimizerWrapper` where types of tensors were mismatched. +* Other: + * Low-level graph construction now calls the TensorFlow C API. This change should be invisible to most users, but can be disabled by setting the environment variable `TF_C_API_GRAPH_CONSTRUCTION=0` in this release. Future releases will remove the ability to disable this change. Please [file a bug](https://github.com/tensorflow/tensorflow/issues/new) if you find yourself using this escape hatch. + * Add description of shapes and a pointer to tutorial notebook in `tf.distributions.Distribution`. + * Update scatter operations: + * Add `tf.scatter_min` and `tf.scatter_max` + * Extend scatter operations to work with a scalar update parameter. + * Move cuDNN RNN ops to core for use in TensorFlow codebase only. + * Add `float64` support for `Conv2d`, `Conv2dBackpropInput`, and `Conv2dBackpropFilter`. + * Add `float64` support for `AvgPool`/`AvgPoolGrad`. + * Make graph name scope thread local so that they work correctly in multi-threaded environments. + * Update nsync synchronization library to avoid slow primitives on Linux. + * Removed need to put nsync/public on C include path when building custom ops. + * Add `tf.image.psnr`, `tf.image.ssim`, `tf.image.ssim_multiscale`, `tf.image.image_gradients`, `tf.image.sobel_edges`. + * Add links to https://js.tensorflow.org. + * Fix non-uniformity of orthogonal matrices. + * Fix bug where multi-image Estimator eval summaries were not displayed correctly. + +## Thanks to our Contributors + +This release contains contributions from many people at Google, as well as: + +4d55397500, Aghasy, Alan Du, Alan Lee, Alan Yee, Alex Wiltschko, Animesh Karnewar, Ankit Gupta, Anton Matosov, Aris L, Ben Barsdell, Brent Yi, Brett Koonce, Carl Thomé, cbockman, Chikanaga Tomoyuki, Chris Tava, CéDric Deltheil, Dahan Gong, Dalmo Cirne, Daniel Erenrich, David Norman, DavidNorman, Edd Wilder-James, Fanjin Zeng, Felix Abecassis, fo40225, George Sterpu, Giovanni Terlingen, Gor Baghdasaryan, Guillaume Klein, Hanchen Li, Ilya Polenov, Jakub Kolodziejczyk, Jason Sadler, Jayaram Bobba, Jerry Liu, jinghuangintel, Jiongyan Zhang (张炯衍), Joel Shor, Jong Wook Kim, Julian Eisenschlos, Karl Lessard, Krish Ravindranath, Loo Rong Jie, Lukas Geiger, Luke Iwanski, Mahmoud Abuzaina, ManHyuk, Marvin Richter, Maximilian Mitchell, Mohammad Ashraf Bhuiyan, msofka, Mustafa Kasap, Nathan Burnham, Nathan Luehr, Naveen Marri, ngc92, nio1814, Oleg Zabluda, Ou Changkun, Panos Ipeirotis, Paul Van Eck, Peter Lee, Piotr Czapla, qjivy, Rholais Lii, Rodrigo Formigone, Russell Klopfer, ryantimjohn, Sang Han, SebastiáN RamíRez, shengfuintel, Siby Jose Plathottam, Silver Chan, Stanislaw Antol, Taehoon Lee, Tarang Chugh, Ted Chang, Thomas Bastiani, Xian Xu, Xiaoming (Jason) Cui, Yan Facai (颜发才), yaox12, Yashal Shakti Kanungo, Yong Tang, Yuan (Terry) Tang, Yuxin Wu, Ziyue(Louis) Lu + + # Release 1.7.0 ## Major Features And Improvements @@ -9,6 +67,8 @@ * Distributed Mutex / CriticalSection added to `tf.contrib.framework.CriticalSection`. * Better text processing with `tf.regex_replace`. * Easy, efficient sequence input with `tf.contrib.data.bucket_by_sequence_length` +* Initial support for `tf.contrib.tensorrt` that enables native TensorRT in + TensorFlow. ## Bug Fixes and Other Changes * Accelerated Linear Algebra (XLA): @@ -50,6 +110,15 @@ * Support `float16` `dtype` in `tf.linalg.*`. * Add `tf.estimator.export.TensorServingInputReceiver` that allows `tf.estimator.Estimator.export_savedmodel` to pass raw tensors to model functions. +## Deprecations + +* TensorFlow 1.7 may be the last time we support Cuda versions below 8.0. + Starting with TensorFlow 1.8 release, 8.0 will be the minimum supported + version. +* TensorFlow 1.7 may be the last time we support cuDNN versions below 6.0. + Starting with TensorFlow 1.8 release, 6.0 will be the minimum supported + version. + ## Thanks to our Contributors This release contains contributions from many people at Google, as well as: diff --git a/WORKSPACE b/WORKSPACE index 11c5cdb2070e79b16540a39f13cab28608962340..4ddfb9a3832ea1ea639ace887e1d601bdd857086 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -2,11 +2,11 @@ workspace(name = "org_tensorflow") http_archive( name = "io_bazel_rules_closure", - sha256 = "6691c58a2cd30a86776dd9bb34898b041e37136f2dc7e24cadaeaf599c95c657", - strip_prefix = "rules_closure-08039ba8ca59f64248bb3b6ae016460fe9c9914f", + sha256 = "a38539c5b5c358548e75b44141b4ab637bba7c4dc02b46b1f62a96d6433f56ae", + strip_prefix = "rules_closure-dbb96841cc0a5fb2664c37822803b06dab20c7d1", urls = [ - "https://mirror.bazel.build/github.com/bazelbuild/rules_closure/archive/08039ba8ca59f64248bb3b6ae016460fe9c9914f.tar.gz", - "https://github.com/bazelbuild/rules_closure/archive/08039ba8ca59f64248bb3b6ae016460fe9c9914f.tar.gz", # 2018-01-16 + "https://mirror.bazel.build/github.com/bazelbuild/rules_closure/archive/dbb96841cc0a5fb2664c37822803b06dab20c7d1.tar.gz", + "https://github.com/bazelbuild/rules_closure/archive/dbb96841cc0a5fb2664c37822803b06dab20c7d1.tar.gz", # 2018-04-13 ], ) diff --git a/configure.py b/configure.py index 6744082d5d55c3a039b7a4efa7a539e77185cabd..b745e374a2baaffec73f9f9382e1bab322e7f0fd 100644 --- a/configure.py +++ b/configure.py @@ -35,6 +35,7 @@ except ImportError: _DEFAULT_CUDA_VERSION = '9.0' _DEFAULT_CUDNN_VERSION = '7' +_DEFAULT_NCCL_VERSION = '1.3' _DEFAULT_CUDA_COMPUTE_CAPABILITIES = '3.5,5.2' _DEFAULT_CUDA_PATH = '/usr/local/cuda' _DEFAULT_CUDA_PATH_LINUX = '/opt/cuda' @@ -225,8 +226,6 @@ def setup_python(environ_cp): # Set-up env variables used by python_configure.bzl write_action_env_to_bazelrc('PYTHON_BIN_PATH', python_bin_path) write_action_env_to_bazelrc('PYTHON_LIB_PATH', python_lib_path) - write_to_bazelrc('build --force_python=py%s' % python_major_version) - write_to_bazelrc('build --host_force_python=py%s' % python_major_version) write_to_bazelrc('build --python_path=\"%s"' % python_bin_path) environ_cp['PYTHON_BIN_PATH'] = python_bin_path @@ -484,6 +483,8 @@ def set_cc_opt_flags(environ_cp): if is_ppc64le(): # gcc on ppc64le does not support -march, use mcpu instead default_cc_opt_flags = '-mcpu=native' + elif is_windows(): + default_cc_opt_flags = '/arch:AVX' else: default_cc_opt_flags = '-march=native' question = ('Please specify optimization flags to use during compilation when' @@ -494,7 +495,7 @@ def set_cc_opt_flags(environ_cp): for opt in cc_opt_flags.split(): write_to_bazelrc('build:opt --copt=%s' % opt) # It should be safe on the same build host. - if not is_ppc64le(): + if not is_ppc64le() and not is_windows(): write_to_bazelrc('build:opt --host_copt=-march=native') write_to_bazelrc('build:opt --define with_default_optimizations=true') # TODO(mikecase): Remove these default defines once we are able to get @@ -1102,6 +1103,81 @@ def set_tf_tensorrt_install_path(environ_cp): write_action_env_to_bazelrc('TF_TENSORRT_VERSION', tf_tensorrt_version) +def set_tf_nccl_install_path(environ_cp): + """Set NCCL_INSTALL_PATH and TF_NCCL_VERSION. + + Args: + environ_cp: copy of the os.environ. + + Raises: + ValueError: if this method was called under non-Linux platform. + UserInputError: if user has provided invalid input multiple times. + """ + if not is_linux(): + raise ValueError('Currently NCCL is only supported on Linux platforms.') + + ask_nccl_version = ( + 'Please specify the NCCL version you want to use. ' + '[Leave empty to default to NCCL %s]: ') % _DEFAULT_NCCL_VERSION + + for _ in range(_DEFAULT_PROMPT_ASK_ATTEMPTS): + tf_nccl_version = get_from_env_or_user_or_default( + environ_cp, 'TF_NCCL_VERSION', ask_nccl_version, _DEFAULT_NCCL_VERSION) + tf_nccl_version = reformat_version_sequence(str(tf_nccl_version), 1) + + if tf_nccl_version == '1': + break # No need to get install path, NCCL 1 is a GitHub repo. + + # TODO(csigg): Look with ldconfig first if we can find the library in paths + # like /usr/lib/x86_64-linux-gnu and the header file in the corresponding + # include directory. This is where the NCCL .deb packages install them. + # Then ask the user if we should use that. Instead of a single + # NCCL_INSTALL_PATH, pass separate NCCL_LIB_PATH and NCCL_HDR_PATH to + # nccl_configure.bzl + default_nccl_path = environ_cp.get('CUDA_TOOLKIT_PATH') + ask_nccl_path = (r'Please specify the location where NCCL %s library is ' + 'installed. Refer to README.md for more details. [Default ' + 'is %s]:') % (tf_nccl_version, default_nccl_path) + nccl_install_path = get_from_env_or_user_or_default( + environ_cp, 'NCCL_INSTALL_PATH', ask_nccl_path, default_nccl_path) + + # Result returned from "read" will be used unexpanded. That make "~" + # unusable. Going through one more level of expansion to handle that. + nccl_install_path = os.path.realpath(os.path.expanduser(nccl_install_path)) + if is_windows() or is_cygwin(): + nccl_install_path = cygpath(nccl_install_path) + + if is_windows(): + nccl_lib_path = 'lib/x64/nccl.lib' + elif is_linux(): + nccl_lib_path = 'lib/libnccl.so.%s' % tf_nccl_version + elif is_macos(): + nccl_lib_path = 'lib/libnccl.%s.dylib' % tf_nccl_version + + nccl_lib_path = os.path.join(nccl_install_path, nccl_lib_path) + nccl_hdr_path = os.path.join(nccl_install_path, 'include/nccl.h') + if os.path.exists(nccl_lib_path) and os.path.exists(nccl_hdr_path): + # Set NCCL_INSTALL_PATH + environ_cp['NCCL_INSTALL_PATH'] = nccl_install_path + write_action_env_to_bazelrc('NCCL_INSTALL_PATH', nccl_install_path) + break + + # Reset and Retry + print('Invalid path to NCCL %s toolkit, %s or %s not found. Please use the ' + 'O/S agnostic package of NCCL 2' % (tf_nccl_version, nccl_lib_path, + nccl_hdr_path)) + + environ_cp['TF_NCCL_VERSION'] = '' + else: + raise UserInputError('Invalid TF_NCCL setting was provided %d ' + 'times in a row. Assuming to be a scripting mistake.' % + _DEFAULT_PROMPT_ASK_ATTEMPTS) + + # Set TF_NCCL_VERSION + environ_cp['TF_NCCL_VERSION'] = tf_nccl_version + write_action_env_to_bazelrc('TF_NCCL_VERSION', tf_nccl_version) + + def get_native_cuda_compute_capabilities(environ_cp): """Get native cuda compute capabilities. @@ -1438,6 +1514,8 @@ def main(): set_tf_cudnn_version(environ_cp) if is_linux(): set_tf_tensorrt_install_path(environ_cp) + set_tf_nccl_install_path(environ_cp) + set_tf_cuda_compute_capabilities(environ_cp) if 'LD_LIBRARY_PATH' in environ_cp and environ_cp.get( 'LD_LIBRARY_PATH') != '1': diff --git a/tensorflow/BUILD b/tensorflow/BUILD index 823393ebdf1f4b658361f31963a275a683e61002..f2ad16fa04f5beb6616c58c28d0f0c460c3e3a17 100644 --- a/tensorflow/BUILD +++ b/tensorflow/BUILD @@ -401,25 +401,6 @@ package_group( ], ) -py_library( - name = "tensorflow_py", - srcs = ["__init__.py"], - srcs_version = "PY2AND3", - visibility = ["//visibility:public"], - deps = ["//tensorflow/python"], -) - -py_library( - name = "experimental_tensorflow_py", - srcs = ["experimental_api.py"], - srcs_version = "PY2AND3", - visibility = ["//tensorflow/tools/api/tests:__subpackages__"], - deps = [ - "//tensorflow/python", - "//tensorflow/tools/api/generator:python_api", - ], -) - load( "//third_party/mkl:build_defs.bzl", "if_mkl", @@ -469,11 +450,12 @@ tf_cc_shared_object( linkstatic = 1, visibility = ["//visibility:public"], deps = [ + "//tensorflow/core:core_cpu_impl", "//tensorflow/core:framework_internal_impl", + "//tensorflow/core:gpu_runtime_impl", + "//tensorflow/core/grappler/optimizers:custom_graph_optimizer_registry_impl", "//tensorflow/core:lib_internal_impl", - "//tensorflow/core:core_cpu_impl", "//tensorflow/stream_executor:stream_executor_impl", - "//tensorflow/core:gpu_runtime_impl", ] + tf_additional_binary_deps(), ) @@ -553,3 +535,14 @@ exports_files( "tf_exported_symbols.lds", ], ) + +py_library( + name = "tensorflow_py", + srcs = ["__init__.py"], + srcs_version = "PY2AND3", + visibility = ["//visibility:public"], + deps = [ + "//tensorflow/python", + "//tensorflow/tools/api/generator:python_api", + ], +) diff --git a/tensorflow/__init__.py b/tensorflow/__init__.py index 78ad6aec19f3bbbfcb389012ac1577573b3e4901..c8683e3976c90add3f1f54d8e575c798327e9273 100644 --- a/tensorflow/__init__.py +++ b/tensorflow/__init__.py @@ -20,14 +20,19 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +# pylint: disable=g-bad-import-order +from tensorflow.python import pywrap_tensorflow # pylint: disable=unused-import # pylint: disable=wildcard-import -from tensorflow.python import * # pylint: disable=redefined-builtin +from tensorflow.tools.api.generator.api import * # pylint: disable=redefined-builtin # pylint: enable=wildcard-import from tensorflow.python.util.lazy_loader import LazyLoader contrib = LazyLoader('contrib', globals(), 'tensorflow.contrib') del LazyLoader +from tensorflow.python.platform import flags # pylint: disable=g-import-not-at-top +app.flags = flags # pylint: disable=undefined-variable + del absolute_import del division del print_function diff --git a/tensorflow/c/BUILD b/tensorflow/c/BUILD index 2367014cd02c721ea96581919c3efc96e772d9a6..8a9301d584775cff3ae315e6fd856b00d1734248 100644 --- a/tensorflow/c/BUILD +++ b/tensorflow/c/BUILD @@ -122,6 +122,7 @@ tf_cuda_library( "//tensorflow/core:core_cpu", "//tensorflow/core:framework", "//tensorflow/core:lib", + "//tensorflow/core:lib_platform", "//tensorflow/core:protos_all_cc", ], ) diff --git a/tensorflow/c/c_api.h b/tensorflow/c/c_api.h index b32f574628c4d1dc5c3bb3f1265a1b12adee28bc..c8594347451dffd465d7fa926cc53818dc9e38d4 100644 --- a/tensorflow/c/c_api.h +++ b/tensorflow/c/c_api.h @@ -72,7 +72,7 @@ limitations under the License. #ifdef SWIG #define TF_CAPI_EXPORT #else -#if defined(COMPILER_MSVC) +#if defined(_WIN32) #ifdef TF_COMPILE_LIBRARY #define TF_CAPI_EXPORT __declspec(dllexport) #else @@ -80,7 +80,7 @@ limitations under the License. #endif // TF_COMPILE_LIBRARY #else #define TF_CAPI_EXPORT __attribute__((visibility("default"))) -#endif // COMPILER_MSVC +#endif // _WIN32 #endif // SWIG #ifdef __cplusplus @@ -1496,7 +1496,8 @@ TF_CAPI_EXPORT extern int TF_DeviceListCount(const TF_DeviceList* list); // If index is out of bounds, an error code will be set in the status object, // and a null pointer will be returned. TF_CAPI_EXPORT extern const char* TF_DeviceListName(const TF_DeviceList* list, - int index, TF_Status*); + int index, + TF_Status* status); // Retrieves the type of the device at the given index. // @@ -1506,14 +1507,15 @@ TF_CAPI_EXPORT extern const char* TF_DeviceListName(const TF_DeviceList* list, // If index is out of bounds, an error code will be set in the status object, // and a null pointer will be returned. TF_CAPI_EXPORT extern const char* TF_DeviceListType(const TF_DeviceList* list, - int index, TF_Status*); + int index, + TF_Status* status); // Retrieve the amount of memory associated with a given device. // // If index is out of bounds, an error code will be set in the status object, // and -1 will be returned. TF_CAPI_EXPORT extern int64_t TF_DeviceListMemoryBytes( - const TF_DeviceList* list, int index, TF_Status*); + const TF_DeviceList* list, int index, TF_Status* status); // -------------------------------------------------------------------------- // Load plugins containing custom ops and kernels diff --git a/tensorflow/c/c_api_experimental.cc b/tensorflow/c/c_api_experimental.cc index bea93785717e2161fcec941485ac3c3f7f3e3ed5..d3916bc16778a942b7eab4df93bbc19955b19e31 100644 --- a/tensorflow/c/c_api_experimental.cc +++ b/tensorflow/c/c_api_experimental.cc @@ -20,6 +20,7 @@ limitations under the License. #include "tensorflow/core/graph/graph.h" #include "tensorflow/core/graph/node_builder.h" #include "tensorflow/core/lib/strings/strcat.h" +#include "tensorflow/core/platform/platform.h" #include "tensorflow/core/protobuf/config.pb.h" using tensorflow::FunctionDef; @@ -56,57 +57,6 @@ void TF_EnableXLACompilation(TF_SessionOptions* options, unsigned char enable) { } } -void TF_InitializeTPU(TF_Session* session, TF_Status* status) { - VLOG(1) << "Initializing TPU"; - TF_Operation* config_op = - TF_GraphOperationByName(session->graph, "ConfigureDistributedTPU"); - if (config_op == nullptr) { - status->status = tensorflow::errors::Internal( - "Unable to find node ConfigureDistributedTPU in the TF graph."); - return; - } - - TF_Output config_node{config_op, 0}; - - TF_Tensor* dummy_output; - TF_SessionRun(session, /*run_options*/ nullptr, - // input related parameters - /*inputs*/ nullptr, /*input_values*/ nullptr, /*ninputs*/ 0, - // output related parameters - /*outputs*/ &config_node, /*output_values*/ &dummy_output, - /*noutputs*/ 1, - /*targets*/ nullptr, /*ntargets*/ 0, - /*run_metadata*/ nullptr, status); - if (status->status.ok()) { - TF_DeleteTensor(dummy_output); - } -} - -void TF_ShutdownTPU(TF_Session* session, TF_Status* status) { - { - tensorflow::mutex_lock c(session->graph->mu); - VLOG(1) << "Shutting down TPU, with input graph: " - << session->graph->graph.ToGraphDefDebug().DebugString(); - } - - TF_Operation* shutdown_op = - TF_GraphOperationByName(session->graph, "ShutdownDistributedTPU"); - if (shutdown_op == nullptr) { - status->status = tensorflow::errors::Internal( - "Unable to find node ShutdownDistributedTPU in the TF graph."); - return; - } - - TF_SessionRun(session, /*run_options*/ nullptr, - // input related parameters - /*inputs*/ nullptr, /*input_values*/ nullptr, /*ninputs*/ 0, - // output related parameters - /*outputs*/ nullptr, /*output_values*/ nullptr, - /*noutputs*/ 0, - /*targets*/ &shutdown_op, /*ntargets*/ 1, - /*run_metadata*/ nullptr, status); -} - const char* TF_GraphDebugString(TF_Graph* graph, size_t* len) { tensorflow::mutex_lock c(graph->mu); const auto& debug_str = graph->graph.ToGraphDefDebug().DebugString(); @@ -234,12 +184,19 @@ library { return std::move(functions[0]); } +#if not defined(PLATFORM_WINDOWS) // On success, returns a set of TF_Function instances encoding a dataset // node stack that reads a Imagenet TFRecordFile dataset from `file_path`, and // sets `dataset_name` to the created dataset name. The returned functions must // be deleted by calling TF_DeleteFunction. static std::vector CreateImagenetDatasetFunctions( const char* file_path, std::string* dataset_name, TF_Status* status) { +#if defined(PLATFORM_WINDOWS) + status->status = tensorflow::errors::Unimplemented( + "TF_MakeFileBasedIteratorGetNextWithDatasets in the experimental C API " + "is not implemented for Windows"); + return std::vector(); +#else const char* func_def = R"PREFIX( library { function { @@ -7118,8 +7075,11 @@ library { DCHECK(found); }; return CreateFunctionsFromTextProto(func_def, &mutate_proto_func, status); +#endif } +#endif +#if not defined(PLATFORM_WINDOWS) // On success, returns a set of TF_Function instances encoding a dataset // node stack that reads an MNIST file dataset from `file_path`, and // sets `dataset_name` to the created dataset name. The returned functions must @@ -7127,6 +7087,12 @@ library { static std::vector CreateMNISTDatasetFunctions( const char* file_path, int batch_size, std::string* dataset_name, TF_Status* status) { +#if defined(PLATFORM_WINDOWS) + status->status = tensorflow::errors::Unimplemented( + "TF_MakeFileBasedIteratorGetNextWithDatasets in the experimental C API " + "is not implemented for Windows"); + return nullptr; +#else const char* func_def = R"PREFIX( library { function { @@ -8256,7 +8222,9 @@ library { DCHECK(found_batch_size); }; return CreateFunctionsFromTextProto(func_def, &mutate_proto_func, status); +#endif } +#endif // Adds the input functions to `graph`. On success, returns the created // IteratorGetNext node. @@ -8350,6 +8318,13 @@ TF_Operation* TF_MakeFakeIteratorGetNextWithDatasets(TF_Graph* graph, TF_Operation* TF_MakeFileBasedIteratorGetNextWithDatasets( TF_Graph* graph, const char* file_path, int batch_size, unsigned char is_mnist, TF_Status* status) { +#if defined(PLATFORM_WINDOWS) + // TODO(ashankar): get these functions working on Windows. + status->status = tensorflow::errors::Unimplemented( + "TF_MakeFileBasedIteratorGetNextWithDatasets in the experimental C API " + "is not implemented for Windows"); + return nullptr; +#else tensorflow::Status s; std::string dataset_name; @@ -8391,4 +8366,5 @@ TF_Operation* TF_MakeFileBasedIteratorGetNextWithDatasets( << graph->graph.ToGraphDefDebug().DebugString(); return getnext_node; +#endif } diff --git a/tensorflow/c/c_api_experimental.h b/tensorflow/c/c_api_experimental.h index ebcec8176b63f9a91c847ebe96fba3ff023fc599..88cb173cd25f4219e32392f6722a6ea7d358a553 100644 --- a/tensorflow/c/c_api_experimental.h +++ b/tensorflow/c/c_api_experimental.h @@ -35,7 +35,7 @@ limitations under the License. #ifdef SWIG #define TF_CAPI_EXPORT #else -#if defined(COMPILER_MSVC) +#if defined(_WIN32) #ifdef TF_COMPILE_LIBRARY #define TF_CAPI_EXPORT __declspec(dllexport) #else @@ -43,7 +43,7 @@ limitations under the License. #endif // TF_COMPILE_LIBRARY #else #define TF_CAPI_EXPORT __attribute__((visibility("default"))) -#endif // COMPILER_MSVC +#endif // _WIN32 #endif // SWIG #ifdef __cplusplus @@ -60,27 +60,6 @@ extern "C" { TF_CAPI_EXPORT extern void TF_EnableXLACompilation(TF_SessionOptions* options, unsigned char enable); -// Initializes TPU system. Must be called exactly once before TF_SessionRun() is -// called on a TPU graph. -// -// The session graph must contain a node named ConfigureDistributedTPU. -// TODO(b/74774824): Improve the API on initializing TPU system. -TF_CAPI_EXPORT extern void TF_InitializeTPU(TF_Session* session, - TF_Status* status); - -// Shuts down TPU system. For any `session` where TF_InitializeTPU() has -// been successfully called, this call must be made exactly once before the -// session is closed. -// The session graph must contain a node named ShutdownDistributedTPU. -TF_CAPI_EXPORT extern void TF_ShutdownTPU(TF_Session* session, - TF_Status* status); - -// Returns the graph content in a human-readable format, with length set in -// `len`. The format is subject to change in the future. -// The returned string is heap-allocated, and caller should call free() on it. -TF_CAPI_EXPORT extern const char* TF_GraphDebugString(TF_Graph* graph, - size_t* len); - // Returns the graph content in a human-readable format, with length set in // `len`. The format is subject to change in the future. // The returned string is heap-allocated, and caller should call free() on it. diff --git a/tensorflow/c/c_api_test.cc b/tensorflow/c/c_api_test.cc index 028f146be31790b211e546978302e81afe26b231..9b86425aa5fbc2be2872b3f5d2809eaa844f9d68 100644 --- a/tensorflow/c/c_api_test.cc +++ b/tensorflow/c/c_api_test.cc @@ -53,7 +53,7 @@ Status TF_TensorToTensor(const TF_Tensor* src, Tensor* dst); namespace { static void ExpectHasSubstr(StringPiece s, StringPiece expected) { - EXPECT_TRUE(StringPiece(s).contains(expected)) + EXPECT_TRUE(str_util::StrContains(s, expected)) << "'" << s << "' does not contain '" << expected << "'"; } @@ -1700,7 +1700,7 @@ TEST_F(CApiGradientsTest, OpWithNoGradientRegistered_NoGradInputs) { TestGradientsError(false); } -// REGISTER_OP for CApiTestAttributesTest test cases. +// REGISTER_OP for CApiAttributesTest test cases. // Registers two ops, each with a single attribute called 'v'. // The attribute in one op will have a type 'type', the other // will have list(type). diff --git a/tensorflow/c/eager/BUILD b/tensorflow/c/eager/BUILD index a2d96357ac8a55be7fe03bf58e33ff1733967dd1..fae922ea3b484f668c3abaf0a1ae30235c5a2d2b 100644 --- a/tensorflow/c/eager/BUILD +++ b/tensorflow/c/eager/BUILD @@ -31,7 +31,6 @@ tf_cuda_library( "//tensorflow/core/common_runtime/eager:context", "//tensorflow/core/common_runtime/eager:eager_executor", "//tensorflow/core/common_runtime/eager:execute", - "//tensorflow/core/common_runtime/eager:execute_node", "//tensorflow/core/common_runtime/eager:kernel_and_device", "//tensorflow/core/common_runtime/eager:tensor_handle", "//tensorflow/core/common_runtime/eager:copy_to_device_node", @@ -41,6 +40,8 @@ tf_cuda_library( "//tensorflow/core:lib", "//tensorflow/core:lib_internal", "//tensorflow/core:protos_all_cc", + # TODO(b/74620627): move this here + "//tensorflow/python:cpp_shape_inference_proto_cc", ], }) + select({ "//tensorflow:with_xla_support": [ @@ -49,6 +50,7 @@ tf_cuda_library( ], "//conditions:default": [], }) + [ + "//tensorflow/core/common_runtime/eager:eager_operation", "//tensorflow/core:gpu_runtime", ], ) @@ -71,6 +73,7 @@ tf_cuda_library( "//tensorflow/core:lib_internal", "//tensorflow/core/common_runtime/eager:context", "//tensorflow/core/common_runtime/eager:eager_executor", + "//tensorflow/core/common_runtime/eager:eager_operation", "//tensorflow/core/common_runtime/eager:kernel_and_device", "//tensorflow/core/common_runtime/eager:tensor_handle", ], diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc index bb1492fca2d568e0965481d5e95a50254971f8f3..975bde7c7f30697ecdacfafdf9f1cd610b1f68d0 100644 --- a/tensorflow/c/eager/c_api.cc +++ b/tensorflow/c/eager/c_api.cc @@ -34,7 +34,6 @@ limitations under the License. #include "tensorflow/core/common_runtime/device_set.h" #include "tensorflow/core/common_runtime/eager/copy_to_device_node.h" #include "tensorflow/core/common_runtime/eager/execute.h" -#include "tensorflow/core/common_runtime/eager/execute_node.h" #include "tensorflow/core/common_runtime/function.h" #include "tensorflow/core/common_runtime/rendezvous_mgr.h" #include "tensorflow/core/framework/node_def_util.h" @@ -49,6 +48,7 @@ limitations under the License. #include "tensorflow/core/platform/mutex.h" #include "tensorflow/core/platform/thread_annotations.h" #include "tensorflow/core/public/version.h" +#include "tensorflow/python/framework/cpp_shape_inference.pb.h" using tensorflow::int64; using tensorflow::string; @@ -116,9 +116,7 @@ TFE_Context* TFE_NewContext(const TFE_ContextOptions* opts, TF_Status* status) { opts->async, std::move(device_mgr), r); } -void TFE_DeleteContext(TFE_Context* ctx, TF_Status* status) { - delete ctx; -} +void TFE_DeleteContext(TFE_Context* ctx, TF_Status* status) { delete ctx; } TF_DeviceList* TFE_ContextListDevices(TFE_Context* ctx, TF_Status* status) { TF_DeviceList* list = new TF_DeviceList; @@ -220,9 +218,6 @@ TF_Tensor* TFE_TensorHandleResolve(TFE_TensorHandle* h, TF_Status* status) { } return retval; } -} // extern "C" - -extern "C" { TFE_Op* TFE_NewOp(TFE_Context* ctx, const char* op_or_function_name, TF_Status* status) { @@ -242,21 +237,18 @@ TFE_Op* TFE_NewOp(TFE_Context* ctx, const char* op_or_function_name, void TFE_DeleteOp(TFE_Op* op) { delete op; } void TFE_OpSetDevice(TFE_Op* op, const char* device_name, TF_Status* status) { - tensorflow::Device* d = nullptr; - if (device_name != nullptr && strlen(device_name) > 0) { - status->status = op->ctx->context.FindDeviceByName(device_name, &d); - } - op->device = d; + status->status = op->operation.SetDevice(device_name); } const char* TFE_OpGetDevice(TFE_Op* op, TF_Status* status) { - tensorflow::Device* device = - (op->device == nullptr) ? op->ctx->context.HostCPU() : op->device; + tensorflow::Device* device = (op->operation.Device() == nullptr) + ? op->operation.EagerContext()->HostCPU() + : op->operation.Device(); return device->name().c_str(); } void TFE_OpSetXLACompilation(TFE_Op* op, unsigned char enable) { - op->use_xla = enable; + op->operation.SetUseXla(enable); #ifndef TENSORFLOW_EAGER_USE_XLA LOG(WARNING) << "This call is a no-op, as the TensorFlow library is not " "built with XLA support."; @@ -264,22 +256,20 @@ void TFE_OpSetXLACompilation(TFE_Op* op, unsigned char enable) { } void TFE_OpAddInput(TFE_Op* op, TFE_TensorHandle* h, TF_Status* status) { - h->handle->Ref(); - op->inputs.push_back(h->handle); - op->attrs.NumInputs(op->inputs.size()); + op->operation.AddInput(h->handle); } TF_AttrType TFE_OpGetAttrType(TFE_Op* op, const char* attr_name, unsigned char* is_list, TF_Status* status) { TF_AttrType ret; - if (op->is_function()) { + if (op->operation.is_function()) { status->status = tensorflow::errors::Unimplemented( "TODO(apassos): Support for attributes for TensorFlow functions is not " "ready yet."); return TF_ATTR_INT; // The compiler requires that we return something. } - status->status = - tensorflow::AttrTypeByName(*op->attr_types, attr_name, &ret, is_list); + status->status = tensorflow::AttrTypeByName(*op->operation.AttrTypes(), + attr_name, &ret, is_list); return ret; } @@ -298,23 +288,24 @@ TF_AttrType TFE_OpNameGetAttrType(TFE_Context* ctx, } void TFE_OpSetAttrString(TFE_Op* op, const char* attr_name, const char* value) { - op->attrs.Set(attr_name, value); + op->operation.MutableAttrs()->Set(attr_name, value); } void TFE_OpSetAttrInt(TFE_Op* op, const char* attr_name, int64_t value) { - op->attrs.Set(attr_name, static_cast(value)); + op->operation.MutableAttrs()->Set(attr_name, static_cast(value)); } void TFE_OpSetAttrFloat(TFE_Op* op, const char* attr_name, float value) { - op->attrs.Set(attr_name, value); + op->operation.MutableAttrs()->Set(attr_name, value); } void TFE_OpSetAttrBool(TFE_Op* op, const char* attr_name, unsigned char value) { - op->attrs.Set(attr_name, (value == 0) ? false : true); + op->operation.MutableAttrs()->Set(attr_name, (value == 0) ? false : true); } void TFE_OpSetAttrType(TFE_Op* op, const char* attr_name, TF_DataType value) { - op->attrs.Set(attr_name, static_cast(value)); + op->operation.MutableAttrs()->Set(attr_name, + static_cast(value)); } void TFE_OpSetAttrShape(TFE_Op* op, const char* attr_name, const int64_t* dims, @@ -336,23 +327,24 @@ void TFE_OpSetAttrShape(TFE_Op* op, const char* attr_name, const int64_t* dims, proto.add_dim()->set_size(dims[d]); } } - op->attrs.Set(attr_name, proto); + op->operation.MutableAttrs()->Set(attr_name, proto); } void TFE_OpSetAttrFunction(TFE_Op* op, const char* attr_name, const TFE_Op* value) { tensorflow::AttrValue attr_value; tensorflow::NameAttrList* func = attr_value.mutable_func(); - func->set_name(value->name); - value->attrs.FillAttrValueMap(func->mutable_attr()); - op->attrs.Set(attr_name, attr_value); + func->set_name(value->operation.Name()); + value->operation.Attrs().FillAttrValueMap(func->mutable_attr()); + op->operation.MutableAttrs()->Set(attr_name, attr_value); } #define TFE_OP_SET_ATTR_LIST(fn, type) \ void fn(TFE_Op* op, const char* attr_name, const type* values, \ int num_values) { \ - op->attrs.Set(attr_name, tensorflow::gtl::ArraySlice( \ - values, num_values)); \ + op->operation.MutableAttrs()->Set( \ + attr_name, \ + tensorflow::gtl::ArraySlice(values, num_values)); \ } TFE_OP_SET_ATTR_LIST(TFE_OpSetAttrStringList, char*) TFE_OP_SET_ATTR_LIST(TFE_OpSetAttrFloatList, float) @@ -360,14 +352,14 @@ TFE_OP_SET_ATTR_LIST(TFE_OpSetAttrFloatList, float) void TFE_OpSetAttrIntList(TFE_Op* op, const char* attr_name, const int64_t* values, int num_values) { - op->attrs.Set(attr_name, - tensorflow::gtl::ArraySlice( - reinterpret_cast(values), num_values)); + op->operation.MutableAttrs()->Set( + attr_name, tensorflow::gtl::ArraySlice( + reinterpret_cast(values), num_values)); } void TFE_OpSetAttrTypeList(TFE_Op* op, const char* attr_name, const TF_DataType* values, int num_values) { - op->attrs.Set( + op->operation.MutableAttrs()->Set( attr_name, tensorflow::gtl::ArraySlice( reinterpret_cast(values), num_values)); @@ -379,8 +371,8 @@ void TFE_OpSetAttrBoolList(TFE_Op* op, const char* attr_name, for (int i = 0; i < num_values; ++i) { b[i] = values[i]; } - op->attrs.Set(attr_name, - tensorflow::gtl::ArraySlice(b.get(), num_values)); + op->operation.MutableAttrs()->Set( + attr_name, tensorflow::gtl::ArraySlice(b.get(), num_values)); } void TFE_OpSetAttrShapeList(TFE_Op* op, const char* attr_name, @@ -410,9 +402,9 @@ void TFE_OpSetAttrShapeList(TFE_Op* op, const char* attr_name, } } } - op->attrs.Set(attr_name, - tensorflow::gtl::ArraySlice( - proto.get(), num_values)); + op->operation.MutableAttrs()->Set( + attr_name, tensorflow::gtl::ArraySlice( + proto.get(), num_values)); } void TFE_OpSetAttrFunctionList(TFE_Op* op, const char* attr_name, @@ -420,532 +412,25 @@ void TFE_OpSetAttrFunctionList(TFE_Op* op, const char* attr_name, std::unique_ptr funcs( new tensorflow::NameAttrList[num_values]); for (int i = 0; i < num_values; i++) { - funcs[i].set_name(value[i]->name); - value[i]->attrs.FillAttrValueMap(funcs[i].mutable_attr()); - } - op->attrs.Set(attr_name, - tensorflow::gtl::ArraySlice( - funcs.get(), num_values)); -} -} // extern "C" - -namespace { - -// Initializes the step stats if needed. -void MaybeInitializeStepStats(tensorflow::StepStats* step_stats, - tensorflow::EagerContext* ctx) { - // Lazily initialize the RunMetadata with information about all devices if - // this is the first call. - while (step_stats->dev_stats_size() < ctx->devices()->size()) { - int device_idx = step_stats->dev_stats_size(); - auto* dev_stats = step_stats->add_dev_stats(); - dev_stats->set_device(ctx->devices()->at(device_idx)->name()); - } -} - -int StepStatsDeviceIndex(tensorflow::StepStats* step_stats, - tensorflow::EagerContext* ctx, - tensorflow::Device* device) { - // Find the current device's index. - if (device == nullptr) { - device = ctx->HostCPU(); - } - for (int i = 0; i < ctx->devices()->size(); ++i) { - if (ctx->devices()->at(i) == device || - ctx->devices()->at(i)->name() == device->name()) { - return i; - } - } - // TODO(apassos) do not fall back to host CPU if device is unknown. - return 0; -} - -tensorflow::Status ValidateInputTypeAndPlacement( - tensorflow::EagerContext* ctx, tensorflow::Device* op_device, TFE_Op* op, - const tensorflow::OpKernel* kernel, tensorflow::RunMetadata* run_metadata) { - tensorflow::Device* host_device = ctx->HostCPU(); - const tensorflow::MemoryTypeVector& memtypes = kernel->input_memory_types(); - if (memtypes.size() != op->inputs.size()) { - return tensorflow::errors::InvalidArgument( - "expected ", memtypes.size(), " inputs, got ", op->inputs.size()); - } - for (int i = 0; i < op->inputs.size(); ++i) { - const tensorflow::Device* expected_device = - memtypes[i] == tensorflow::HOST_MEMORY ? host_device : op_device; - tensorflow::TensorHandle* handle = op->inputs[i]; - tensorflow::Device* handle_device = nullptr; - TF_RETURN_IF_ERROR(handle->Device(&handle_device)); - const tensorflow::Device* actual_device = - handle_device == nullptr ? host_device : handle_device; - if (expected_device != actual_device) { - switch (ctx->GetDevicePlacementPolicy()) { - case tensorflow::DEVICE_PLACEMENT_SILENT_FOR_INT32: - // TODO(xpan): See if we could bubble python related error up - // to python level. - if (handle->dtype == tensorflow::DT_INT32) { - // Note: enabling silent copies of int32 tensors to match behavior - // of graph mode. - break; - } - TF_FALLTHROUGH_INTENDED; - case tensorflow::DEVICE_PLACEMENT_EXPLICIT: - return tensorflow::errors::InvalidArgument( - "Tensors on conflicting devices:" - " cannot compute ", - op->name, " as input #", i, " was expected to be on ", - expected_device->name(), " but is actually on ", - actual_device->name(), " (operation running on ", - op_device->name(), ")", - " Tensors can be copied explicitly using .gpu() or .cpu()," - " or transparently copied by using tfe.enable_eager_execution(" - "tfe.DEVICE_PLACEMENT_SILENT). Copying tensors between devices" - " may slow down your model"); - case tensorflow::DEVICE_PLACEMENT_WARN: - LOG(WARNING) << "before computing " << op->name << " input #" << i - << " was expected to be on " << expected_device->name() - << " but is actually on " << actual_device->name() - << " (operation running on " << op_device->name() - << "). This triggers a copy which can be a performance " - "bottleneck."; - break; - case tensorflow::DEVICE_PLACEMENT_SILENT: // Do nothing. - break; - } - // We are only here if the policy is warn or silent copies, so we should - // trigger a copy. - auto pre_time = tensorflow::Env::Default()->NowMicros(); - tensorflow::TensorHandle* copied_tensor = nullptr; - tensorflow::Status status = tensorflow::EagerCopyToDevice( - handle, ctx, expected_device->name().c_str(), &copied_tensor); - if (run_metadata != nullptr) { - auto* step_stats = run_metadata->mutable_step_stats(); - MaybeInitializeStepStats(step_stats, ctx); - // Record the sending on the source device for now. - int device_idx = StepStatsDeviceIndex(step_stats, ctx, handle_device); - auto* dev_stats = step_stats->mutable_dev_stats(device_idx); - auto* node_stats = dev_stats->add_node_stats(); - node_stats->set_node_name("_Send"); - node_stats->set_all_start_micros(pre_time); - node_stats->set_op_end_rel_micros( - tensorflow::Env::Default()->NowMicros() - pre_time); - } - if (!status.ok()) { - if (copied_tensor != nullptr) copied_tensor->Unref(); - return tensorflow::errors::Internal( - "Failed copying input tensor from ", actual_device->name(), " to ", - expected_device->name(), " in order to run ", op->name, ": ", - status.error_message()); - } - handle->Unref(); - handle = copied_tensor; - op->inputs[i] = copied_tensor; - } - if (handle->dtype != kernel->input_type(i)) { - return tensorflow::errors::InvalidArgument( - "cannot compute ", op->name, " as input #", i, - " was expected to be a ", - tensorflow::DataTypeString(kernel->input_type(i)), - " tensor but is a ", tensorflow::DataTypeString(handle->dtype), - " tensor"); - } - } - return tensorflow::Status::OK(); -} - -tensorflow::Device* SelectDevice(const tensorflow::NodeDef& ndef, - TFE_Context* ctx, TF_Status* status) { - tensorflow::DeviceSet ds; - for (tensorflow::Device* d : *ctx->context.devices()) { - ds.AddDevice(d); - } - tensorflow::DeviceTypeVector final_devices; - status->status = tensorflow::SupportedDeviceTypesForNode( - ds.PrioritizedDeviceTypeList(), ndef, &final_devices); - if (!status->status.ok()) { - return nullptr; - } - if (final_devices.empty()) { - status->status = tensorflow::errors::Internal( - "Could not find valid device for node ", ndef.DebugString()); - return nullptr; - } - for (tensorflow::Device* d : *ctx->context.devices()) { - if (d->device_type() == final_devices[0].type_string()) { - return d; - } - } - status->status = tensorflow::errors::Unknown( - "Could not find a device for node ", ndef.DebugString()); - return nullptr; -} - - -#ifdef TENSORFLOW_EAGER_USE_XLA -// Synthesizes and returns a wrapper function over `op`, which must be a -// primitive op (e.g. matmul). -// -// The wrapper function conforms to the function signature expected by -// _XlaLaunchOp, with input params ordered by . For example, if the op has input params , they will be reordered to as the input params to the synthesized function. -// -// It populates `const_input_types`, `arg_input_types` and -// `op_input_to_func_input` based on the reordering results, that the caller can -// use them to build an _XlaLaunchOp. On error, it returns NULL, and sets -// `status` accordingly. -const tensorflow::FunctionDef* OpToFunction( - TFE_Op* op, std::vector* const_input_types, - std::vector* arg_input_types, - tensorflow::gtl::FlatMap* op_input_to_func_input, - TF_Status* status) { - DCHECK(!op->is_function()); - - tensorflow::FunctionDef fdef; - - // Get the OpDef of the op we are trying to encapsulate. - TFE_Context* ctx = op->ctx; - const tensorflow::OpRegistrationData* op_data; - { - status->status = ctx->context.FindFunctionOpData(op->name, &op_data); - if (!status->status.ok()) { - return nullptr; - } - } - const tensorflow::OpDef& op_def = op_data->op_def; - - tensorflow::OpDef* signature = fdef.mutable_signature(); - - // Handle constant inputs. - const std::unordered_set const_inputs( - *tensorflow::XlaOpRegistry::CompileTimeConstantInputs(op->name)); - - // First add place holders for the input args, so that we can refer to them by - // position in the next loop. Also tally up the resource inputs. - int num_resource_inputs = 0; - for (int i = 0; i < op_def.input_arg_size(); ++i) { - if (op_def.input_arg(i).type() == tensorflow::DT_RESOURCE) { - ++num_resource_inputs; - } - signature->add_input_arg(); - } - - // Now we map the input params from `op_def` to `signature`, where the param - // ordering for `signature` is: . - int const_index = 0; - int arg_index = const_inputs.size(); - int resource_index = op_def.input_arg_size() - num_resource_inputs; - for (int i = 0; i < op_def.input_arg_size(); ++i) { - const tensorflow::OpDef::ArgDef& op_input_arg = op_def.input_arg(i); - tensorflow::OpDef::ArgDef* func_input_arg = nullptr; - if (const_inputs.find(op_input_arg.name()) != const_inputs.end()) { - VLOG(1) << "For const input, mapping op input " << i << " to func input " - << const_index; - (*op_input_to_func_input)[i] = const_index; - func_input_arg = signature->mutable_input_arg(const_index++); - const_input_types->push_back( - static_cast(op->inputs[i]->dtype)); - } else if (op_input_arg.type() == tensorflow::DT_RESOURCE) { - VLOG(1) << "For resource input, mapping op input " << i - << " to func input " << resource_index; - (*op_input_to_func_input)[i] = resource_index; - func_input_arg = signature->mutable_input_arg(resource_index++); - } else { - VLOG(1) << "For arg input, mapping op input " << i << " to func input " - << arg_index; - (*op_input_to_func_input)[i] = arg_index; - func_input_arg = signature->mutable_input_arg(arg_index++); - arg_input_types->push_back( - static_cast(op->inputs[i]->dtype)); - } - - func_input_arg->set_name(op_input_arg.name()); - func_input_arg->set_type(op->inputs[i]->dtype); - } - VLOG(1) << "Added OpDef Inputs: " << fdef.DebugString(); - - // Resources args are at the end of the function input params, and we should - // have iterated over all of them. - DCHECK_EQ(signature->input_arg_size(), resource_index); - - // Make the synthesized function's name unique. - signature->set_name(tensorflow::strings::StrCat( - op_def.name(), func_id_generator.fetch_add(1))); - - // Add the node def and set its input names to match op_def's names. - const tensorflow::NodeDef& ndef = op->attrs.BuildNodeDef(); - DCHECK_EQ(signature->input_arg_size(), ndef.input_size()); - *fdef.add_node_def() = ndef; - for (int i = 0; i < op_def.input_arg_size(); ++i) { - fdef.mutable_node_def(0)->set_input(i, op_def.input_arg(i).name()); - } - VLOG(1) << "Added NodeDef: " << fdef.DebugString(); - - // Fix the output names and set output types. - for (int i = 0; i < op_def.output_arg_size(); ++i) { - tensorflow::OpDef::ArgDef* arg = signature->add_output_arg(); - const tensorflow::OpDef::ArgDef& op_def_arg = op_def.output_arg(i); - const string& out_tensor_name = tensorflow::strings::StrCat( - ndef.name(), ":", op_def_arg.name(), ":", 0); - arg->set_name(op_def_arg.name()); - (*fdef.mutable_ret())[op_def_arg.name()] = out_tensor_name; - const string& type_attr = op_def_arg.type_attr(); - if (!type_attr.empty()) { - auto i = ndef.attr().find(type_attr); - if (i == ndef.attr().end()) { - status->status = tensorflow::errors::InvalidArgument( - tensorflow::strings::StrCat("Could not find attr ", type_attr, - " in NodeDef ", ndef.DebugString())); - return nullptr; - } - arg->set_type(i->second.type()); - } - } - VLOG(1) << "Fixed Output names and all types: " << fdef.DebugString(); - - status->status = ctx->context.AddFunctionDef(fdef); - if (!status->status.ok()) return nullptr; - const auto ret = ctx->context.FindFunctionDef(signature->name()); - DCHECK(ret != nullptr); - return ret; -} - -// Builds an _XLALaunchOp as a wrapper over 'op', so that 'op' can be executed -// via XLA. -std::unique_ptr BuildXlaLaunch(TFE_Op* op, TF_Status* status) { - VLOG(1) << "Creating _XlaLaunchOp for TFE_Op " << op->name; - auto launch_op = - std::unique_ptr(TFE_NewOp(op->ctx, "_XlaLaunch", status)); - if (TF_GetCode(status) != TF_OK) return nullptr; - if (op->device) { - TFE_OpSetDevice(launch_op.get(), op->device->name().c_str(), status); - if (TF_GetCode(status) != TF_OK) return nullptr; - } - - const tensorflow::FunctionDef* fdef; - { - fdef = op->ctx->context.FindFunctionDef(op->name); - } - std::vector const_input_types; - std::vector arg_input_types; - tensorflow::gtl::FlatMap op_input_to_func_input; - if (fdef == nullptr) { - // See if this is a primitive op, and if so create a function for it, so - // that _XlaLaunchOp can access it. - fdef = OpToFunction(op, &const_input_types, &arg_input_types, - &op_input_to_func_input, status); - if (!status->status.ok()) return nullptr; - } else { - // TODO(hongm): XlaOpRegistry::CompileTimeConstantInputs() does not work for - // functions, so we need to find another way to handle constant inputs. - for (int i = const_input_types.size(); - i < fdef->signature().input_arg_size(); ++i) { - VLOG(1) << "Adding Targs from input arg " << i; - const tensorflow::OpDef::ArgDef& arg = fdef->signature().input_arg(i); - arg_input_types.push_back(static_cast(arg.type())); - } - } - DCHECK(fdef != nullptr); - - // Copy inputs and their devices. - // Since input param reordering may have occurred between `op` and `launch_op` - // via `op_input_to_func_input`, adjust the actual inputs accordingly. - launch_op->inputs = op->inputs; - for (tensorflow::TensorHandle* h : launch_op->inputs) { - h->Ref(); - } - if (!op_input_to_func_input.empty()) { - DCHECK_EQ(op->inputs.size(), op_input_to_func_input.size()); - for (int i = 0; i < op_input_to_func_input.size(); ++i) { - VLOG(1) << "mapping op input " << i << " to func input " - << op_input_to_func_input[i]; - - launch_op->inputs[op_input_to_func_input[i]] = op->inputs[i]; - } - } - launch_op->attrs.NumInputs(op->inputs.size()); - - TFE_OpSetAttrTypeList(launch_op.get(), "Tconstants", const_input_types.data(), - const_input_types.size()); - - // Set Targs and Nresources attrs. - TFE_OpSetAttrTypeList(launch_op.get(), "Targs", arg_input_types.data(), - arg_input_types.size()); - const int num_resource_inputs = fdef->signature().input_arg_size() - - const_input_types.size() - - arg_input_types.size(); - TFE_OpSetAttrInt(launch_op.get(), "Nresources", num_resource_inputs); - - // Set Tresults attr. - std::vector tresults; - for (const tensorflow::OpDef::ArgDef& arg : fdef->signature().output_arg()) { - tresults.push_back(static_cast(arg.type())); + funcs[i].set_name(value[i]->operation.Name()); + value[i]->operation.Attrs().FillAttrValueMap(funcs[i].mutable_attr()); } - TFE_OpSetAttrTypeList(launch_op.get(), "Tresults", tresults.data(), - tresults.size()); - - // Set function attr. - tensorflow::AttrValue attr_value; - tensorflow::NameAttrList* func = attr_value.mutable_func(); - func->set_name(fdef->signature().name()); - launch_op->attrs.Set("function", attr_value); - - return launch_op; + op->operation.MutableAttrs()->Set( + attr_name, tensorflow::gtl::ArraySlice( + funcs.get(), num_values)); } -#endif // TENSORFLOW_EAGER_USE_XLA - -} // namespace - -extern "C" { void TFE_Execute(TFE_Op* op, TFE_TensorHandle** retvals, int* num_retvals, TF_Status* status) { - TFE_Context* ctx = op->ctx; - status->status = ctx->context.GetStatus(); + tensorflow::gtl::InlinedVector handle_retvals( + *num_retvals); + status->status = + tensorflow::EagerExecute(&op->operation, &handle_retvals, num_retvals); if (!status->status.ok()) { return; } -#ifdef TENSORFLOW_EAGER_USE_XLA - std::unique_ptr xla_launch_op; - if (op->use_xla && op->name != "_XlaLaunch") { - xla_launch_op = BuildXlaLaunch(op, status); - if (!status->status.ok()) { - return; - } - op = xla_launch_op.get(); - } -#endif // TENSORFLOW_EAGER_USE_XLA - // Ensure all resource-touching ops run in the device the resource is, - // regardless of anything else that has been specified. This is identical to - // the graph mode behavior. - for (int i = 0; i < op->inputs.size(); ++i) { - tensorflow::Device* input_op_device = nullptr; - status->status = op->inputs[i]->OpDevice(&input_op_device); - if (!status->status.ok()) return; - VLOG(2) << "for op " << op->name << " input " << i << " " - << tensorflow::DataTypeString(op->inputs[i]->dtype) << " " - << (input_op_device == nullptr ? "cpu" : input_op_device->name()) - << " " << (op->device == nullptr ? "cpu" : op->device->name()); - if (op->inputs[i]->dtype == tensorflow::DT_RESOURCE && - (input_op_device != op->device || input_op_device == nullptr)) { - tensorflow::Device* d = - input_op_device == nullptr ? ctx->context.HostCPU() : input_op_device; - VLOG(1) << "Changing device of operation " << op->name << " to " - << d->name() << " because input #" << i - << " is a resource in this device."; - op->device = d; - } - } - tensorflow::Device* device = op->device; - - tensorflow::Fprint128 cache_key = - op->attrs.CacheKey(device == nullptr ? "unspecified" : device->name()); - tensorflow::KernelAndDevice* kernel = ctx->context.GetCachedKernel(cache_key); - if (kernel == nullptr) { - const tensorflow::NodeDef& ndef = op->attrs.BuildNodeDef(); - if (device == nullptr) { - device = SelectDevice(ndef, ctx, status); - if (!status->status.ok()) { - return; - } - } - CHECK(device != nullptr); - if (ctx->context.LogDevicePlacement()) { - LOG(INFO) << "Executing op " << ndef.op() << " in device " - << device->name(); - } - kernel = new tensorflow::KernelAndDevice(ctx->context.GetRendezvous()); - // Knowledge of the implementation of Init (and in-turn - // FunctionLibraryRuntime::CreateKernel) tells us that ctx->func_lib_def - // will be accessed, so grab on to the lock. - // See WARNING comment in Execute (before kernel->Run) - would be nice to - // rework to avoid this subtlety. - tensorflow::tf_shared_lock l(*ctx->context.FunctionsMu()); - status->status = tensorflow::KernelAndDevice::Init( - ndef, ctx->context.func_lib(device), kernel); - if (!status->status.ok()) { - delete kernel; - return; - } - // Update output_dtypes inside `kernel`. - const tensorflow::OpDef* op_def = nullptr; - const tensorflow::FunctionDef* function_def = - ctx->context.FuncLibDef()->Find(ndef.op()); - if (function_def != nullptr) { - op_def = &(function_def->signature()); - } - if (op_def == nullptr) { - status->status = OpDefForOp(ndef.op().c_str(), &op_def); - if (!status->status.ok()) { - return; - } - } - tensorflow::DataTypeVector input_dtypes; - status->status = InOutTypesForNode(ndef, *op_def, &input_dtypes, - kernel->mutable_output_dtypes()); - if (!status->status.ok()) { - return; - } - ctx->context.AddKernelToCache(cache_key, kernel); - } - const tensorflow::DataTypeVector& output_dtypes = kernel->output_dtypes(); - const int output_dtypes_size = output_dtypes.size(); - if (output_dtypes_size > *num_retvals) { - TF_SetStatus(status, TF_INVALID_ARGUMENT, - tensorflow::strings::StrCat("Expecting ", output_dtypes.size(), - " outputs, but *num_retvals is ", - *num_retvals) - .c_str()); - return; - } - *num_retvals = output_dtypes_size; - if (device == nullptr) { - // TODO(apassos) debug how the assignment below might return a different - // device from the one requested above. - device = kernel->device(); - } - status->status = ValidateInputTypeAndPlacement( - &ctx->context, device, op, kernel->kernel(), - ctx->context.ShouldStoreMetadata() ? ctx->context.RunMetadataProto() - : nullptr); - if (!status->status.ok()) return; - std::unique_ptr maybe_stats; - if (ctx->context.ShouldStoreMetadata()) { - maybe_stats.reset(new tensorflow::NodeExecStats); - maybe_stats->set_node_name(op->name); - maybe_stats->set_all_start_micros(tensorflow::Env::Default()->NowMicros()); - maybe_stats->set_op_start_rel_micros(0); - maybe_stats->set_scheduled_micros(tensorflow::Env::Default()->NowMicros()); - // TODO(apassos) track referenced tensors - } - if (ctx->context.Async()) { - // Note that for async mode, execution order will make sure that all - // input handles are ready before executing them. - // TODO(agarwal): Consider executing "cheap" kernels inline for performance. - tensorflow::gtl::InlinedVector handle_retvals( - *num_retvals); - tensorflow::uint64 id = op->ctx->context.NextId(); - for (int i = 0; i < *num_retvals; ++i) { - tensorflow::TensorHandle* h = - new tensorflow::TensorHandle(id, output_dtypes[i], &op->ctx->context); - retvals[i] = new TFE_TensorHandle(h); - handle_retvals[i] = h; - } - tensorflow::EagerNode* node = new tensorflow::ExecuteNode( - id, &op->ctx->context, op->device, op->inputs, kernel, - maybe_stats.release(), output_dtypes, handle_retvals); - ctx->context.ExecutorAdd(node); - } else { - // Execute checks if retvals[i] is nullptr or not to figure if it needs to - // allocate it. - std::vector handle_retvals(*num_retvals, - nullptr); - status->status = tensorflow::EagerExecute( - &op->ctx->context, op->device, op->inputs, kernel, maybe_stats.get(), - handle_retvals.data(), *num_retvals); - for (int i = 0; i < *num_retvals; ++i) { - retvals[i] = new TFE_TensorHandle(handle_retvals[i]); - } + for (int i = 0; i < *num_retvals; ++i) { + retvals[i] = new TFE_TensorHandle(handle_retvals[i]); } } @@ -1018,6 +503,62 @@ void TFE_ContextExportRunMetadata(TFE_Context* ctx, TF_Buffer* buf, ctx->context.RunMetadataProto()->Clear(); } +void TFE_GetResourceHandleShapeAndType(TF_Graph* graph, TF_Output output, + TF_Buffer* output_proto, + TF_Status* status) { + tensorflow::Node* node = &output.oper->node; + tensorflow::CppShapeInferenceResult::HandleData handle_data; + handle_data.set_is_set(true); + { + tensorflow::mutex_lock l(graph->mu); + tensorflow::shape_inference::InferenceContext* ic = + graph->refiner.GetContext(node); + CHECK(ic != nullptr); + CHECK_LT(output.index, ic->num_outputs()); + const auto* shapes_and_types = + ic->output_handle_shapes_and_types(output.index); + if (shapes_and_types == nullptr) { + output_proto->data = nullptr; + output_proto->length = 0; + output_proto->data_deallocator = nullptr; + return; + } + + for (const auto& p : *shapes_and_types) { + auto* out_shape_and_type = handle_data.add_shape_and_type(); + ic->ShapeHandleToProto(p.shape, out_shape_and_type->mutable_shape()); + out_shape_and_type->set_dtype(p.dtype); + } + } + status->status = MessageToBuffer(handle_data, output_proto); +} + +void TFE_SetResourceHandleShapeAndType(TF_Graph* graph, TF_Output output, + const void* proto, size_t proto_len, + TF_Status* status) { + tensorflow::CppShapeInferenceResult::HandleData handle_data; + if (!handle_data.ParseFromArray(proto, proto_len)) { + status->status = tensorflow::errors::InvalidArgument( + "Couldn't deserialize HandleData proto"); + return; + } + DCHECK(handle_data.is_set()); + + tensorflow::mutex_lock l(graph->mu); + tensorflow::shape_inference::InferenceContext* ic = + graph->refiner.GetContext(&output.oper->node); + + std::vector shapes_and_types; + for (const auto& shape_and_type_proto : handle_data.shape_and_type()) { + tensorflow::shape_inference::ShapeHandle shape; + status->status = + ic->MakeShapeFromShapeProto(shape_and_type_proto.shape(), &shape); + if (status->status.ok()) return; + shapes_and_types.emplace_back(shape, shape_and_type_proto.dtype()); + } + ic->set_output_handle_shapes_and_types(output.index, shapes_and_types); +} + namespace { TFE_Op* GetFunc(TFE_Context* ctx, const tensorflow::NameAttrList& func, TF_Status* status) { @@ -1088,10 +629,3 @@ void SetOpAttrValueScalar(TFE_Context* ctx, TFE_Op* op, } } } // namespace tensorflow - - -TFE_Op::~TFE_Op() { - for (tensorflow::TensorHandle* h : inputs) { - h->Unref(); - } -} diff --git a/tensorflow/c/eager/c_api.h b/tensorflow/c/eager/c_api.h index 3926c22ce1f9e194b1452c796c83944d10cfdc64..ba77f3cd07ff686782ad7b5c5751811aa3336555 100644 --- a/tensorflow/c/eager/c_api.h +++ b/tensorflow/c/eager/c_api.h @@ -30,7 +30,7 @@ limitations under the License. #ifdef SWIG #define TF_CAPI_EXPORT #else -#if defined(COMPILER_MSVC) +#if defined(_WIN32) #ifdef TF_COMPILE_LIBRARY #define TF_CAPI_EXPORT __declspec(dllexport) #else @@ -38,7 +38,7 @@ limitations under the License. #endif // TF_COMPILE_LIBRARY #else #define TF_CAPI_EXPORT __attribute__((visibility("default"))) -#endif // COMPILER_MSVC +#endif // _WIN32 #endif // SWIG #ifdef __cplusplus @@ -329,6 +329,20 @@ TF_CAPI_EXPORT extern void TFE_ContextExportRunMetadata(TFE_Context* ctx, TF_Buffer* buf, TF_Status* status); +// Returns the serialized CppShapeInferenceResult::HandleData proto for +// `output` if its a resource tensor, or otherwise returns an empty buffer. +TF_CAPI_EXPORT extern void TFE_GetResourceHandleShapeAndType( + TF_Graph* graph, TF_Output output, TF_Buffer* output_proto, + TF_Status* status); + +// Sets `output` based on `proto`, which should be a serialized +// CppShapeInferenceResult::HandleData proto. +TF_CAPI_EXPORT extern void TFE_SetResourceHandleShapeAndType(TF_Graph* graph, + TF_Output output, + const void* proto, + size_t proto_len, + TF_Status* status); + #ifdef __cplusplus } /* end extern "C" */ #endif diff --git a/tensorflow/c/eager/c_api_internal.h b/tensorflow/c/eager/c_api_internal.h index 05dc64f521735f944559392f470a37590e93f17c..49e1aab1cef9577256d9b081858cf094c788caf8 100644 --- a/tensorflow/c/eager/c_api_internal.h +++ b/tensorflow/c/eager/c_api_internal.h @@ -32,6 +32,7 @@ limitations under the License. #include "tensorflow/core/common_runtime/device_factory.h" #include "tensorflow/core/common_runtime/eager/context.h" #include "tensorflow/core/common_runtime/eager/eager_executor.h" +#include "tensorflow/core/common_runtime/eager/eager_operation.h" #include "tensorflow/core/common_runtime/eager/kernel_and_device.h" #include "tensorflow/core/common_runtime/eager/tensor_handle.h" #include "tensorflow/core/common_runtime/function.h" @@ -45,7 +46,6 @@ limitations under the License. #include "tensorflow/core/platform/thread_annotations.h" #include "tensorflow/core/public/version.h" - struct TFE_ContextOptions { TF_SessionOptions session_options; // true if async execution is enabled. @@ -85,19 +85,9 @@ struct TFE_Op { // t is NULL iff the TFE_Op corresponds to a TensorFlow function instead of a // primitive operation. TFE_Op(TFE_Context* ctx, const char* op, const tensorflow::AttrTypeMap* t) - : ctx(ctx), name(op), attrs(op), attr_types(t), device(nullptr) {} - - ~TFE_Op(); - - bool const is_function() const { return attr_types == nullptr; } + : operation(&ctx->context, op, t) {} - TFE_Context* ctx; // Must outlive the TFE_Op. - const tensorflow::string name; - tensorflow::AttrBuilder attrs; - const tensorflow::AttrTypeMap* attr_types; - tensorflow::gtl::InlinedVector inputs; - tensorflow::Device* device; - bool use_xla = false; + tensorflow::EagerOperation operation; }; namespace tensorflow { diff --git a/tensorflow/c/eager/runtime.cc b/tensorflow/c/eager/runtime.cc index abe2793ce894ad07c252575c5d55d98342916eac..e6c51ab17a867a0697f15d7683d8ca52c062035d 100644 --- a/tensorflow/c/eager/runtime.cc +++ b/tensorflow/c/eager/runtime.cc @@ -184,8 +184,7 @@ void CombineUnordered(const tensorflow::Fprint128& a, inline tensorflow::Fprint128 CacheKeyHelper(StringPiece s, const tensorflow::Fprint128& b) { - // TODO(agarwal): avoid ToString(). - tensorflow::Fprint128 a = tensorflow::Fingerprint128(s.ToString()); + tensorflow::Fprint128 a = tensorflow::Fingerprint128(s); return FingerprintCat128(a, b); } @@ -213,10 +212,8 @@ tensorflow::Fprint128 AttrBuilder::CacheKey(const string& device) const { if (node_def_finalized_) return f; } for (const auto& p : string_attrs_) { - // TODO(agarwal): avoid ToString(). - CombineUnordered(CacheKeyHelper(p.first, tensorflow::Fingerprint128( - p.second.ToString())), - &f); + CombineUnordered( + CacheKeyHelper(p.first, tensorflow::Fingerprint128(p.second)), &f); } for (const auto& p : int_attrs_) { CombineUnordered(CacheKeyHelper(p.first, static_cast(p.second)), diff --git a/tensorflow/cc/gradients/array_grad.cc b/tensorflow/cc/gradients/array_grad.cc index 6545e4ee3eb406436937a43ddac66d017af8e108..ff348fadb24e29a83bd6c8853aa67931f6df4182 100644 --- a/tensorflow/cc/gradients/array_grad.cc +++ b/tensorflow/cc/gradients/array_grad.cc @@ -385,6 +385,42 @@ Status MirrorPadGradGrad(const Scope& scope, const Operation& op, } REGISTER_GRADIENT_OP("MirrorPadGrad", MirrorPadGradGrad); +Status StridedSliceGradHelper(const Scope& scope, const Operation& op, + const std::vector& grad_inputs, + std::vector* grad_outputs) { + Input x = Shape(scope, op.input(0)); + Input begin = op.input(1); + Input end = op.input(2); + Input strides = op.input(3); + int64 begin_mask; + int64 end_mask; + int64 ellipsis_mask; + int64 new_axis_mask; + int64 shrink_axis_mask; + TF_RETURN_IF_ERROR( + GetNodeAttr(op.node()->attrs(), "begin_mask", &begin_mask)); + TF_RETURN_IF_ERROR(GetNodeAttr(op.node()->attrs(), "end_mask", &end_mask)); + TF_RETURN_IF_ERROR( + GetNodeAttr(op.node()->attrs(), "ellipsis_mask", &ellipsis_mask)); + TF_RETURN_IF_ERROR( + GetNodeAttr(op.node()->attrs(), "new_axis_mask", &new_axis_mask)); + TF_RETURN_IF_ERROR( + GetNodeAttr(op.node()->attrs(), "shrink_axis_mask", &shrink_axis_mask)); + grad_outputs->push_back( + StridedSliceGrad(scope, x, begin, end, strides, grad_inputs[0], + StridedSliceGrad::BeginMask(begin_mask) + .EndMask(end_mask) + .EllipsisMask(ellipsis_mask) + .NewAxisMask(new_axis_mask) + .ShrinkAxisMask(shrink_axis_mask))); + // No gradients returned for begin, end and strides + grad_outputs->push_back(NoGradient()); + grad_outputs->push_back(NoGradient()); + grad_outputs->push_back(NoGradient()); + return scope.status(); +} +REGISTER_GRADIENT_OP("StridedSlice", StridedSliceGradHelper); + } // anonymous namespace } // namespace ops } // namespace tensorflow diff --git a/tensorflow/cc/gradients/array_grad_test.cc b/tensorflow/cc/gradients/array_grad_test.cc index 4a215fcc9299cf8b8da04cbf151640631ed0d449..de3bd0fc9e2493f8ff76163f5be6bd4327c58c5a 100644 --- a/tensorflow/cc/gradients/array_grad_test.cc +++ b/tensorflow/cc/gradients/array_grad_test.cc @@ -354,5 +354,29 @@ TEST_F(ArrayGradTest, MirrorPadGradGrad_Symmetric) { RunTest(x, x_shape, y, y_shape); } +TEST_F(ArrayGradTest, StridedSliceGrad) { + TensorShape x_shape({6, 4, 4}); + auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(x_shape)); + + // y = x[2:6:2, 1:3, 1:3] + auto y = StridedSlice(scope_, x, {2, 1, 1}, {6, 3, 3}, {2, 1, 1}); + // y.shape = [2, 2, 2]; + RunTest(x, x_shape, y, {2, 2, 2}); + + // y = x[2:6:2, 1:3, 1:3] + // begin_mask = 1<<1 (ignore begin_index = 1) + // end_mask = 1<<2 (ignore end_index = 2) + y = StridedSlice(scope_, x, {2, 1, 1}, {6, 3, 3}, {2, 1, 1}, + StridedSlice::BeginMask(1 << 1).EndMask(1 << 2)); + // y.shape = [2, 3, 3]; + RunTest(x, x_shape, y, {2, 3, 3}); + + // y = [tf.newaxis, 2:6:2, 1:3, 1:3] + y = StridedSlice(scope_, x, {0, 2, 1, 1}, {0, 6, 3, 3}, {1, 2, 1, 1}, + StridedSlice::NewAxisMask(1 << 0)); + // y.shape = [1, 2, 2, 2]; + RunTest(x, x_shape, y, {1, 2, 2, 2}); +} + } // namespace } // namespace tensorflow diff --git a/tensorflow/cc/profiler/BUILD b/tensorflow/cc/profiler/BUILD index 00799526fce572e7bb80199ccb8ce1cc89874031..cf65fe1ab99b49207a64e86310178141b30d07d7 100644 --- a/tensorflow/cc/profiler/BUILD +++ b/tensorflow/cc/profiler/BUILD @@ -9,6 +9,9 @@ load("//tensorflow:tensorflow.bzl", "tf_cuda_cc_test") tf_cuda_cc_test( name = "profiler_test", srcs = ["profiler_test.cc"], + tags = [ + "noguitar", # b/77649654 + ], deps = [ ":profiler", "//tensorflow/cc:cc_ops", diff --git a/tensorflow/cc/saved_model/loader_test.cc b/tensorflow/cc/saved_model/loader_test.cc index 4c64d2cfe3c10e6c7ed82a2d72460a0b34283bb2..72b8bc18710b0ee77cb01ed3ad0c2abb5183efb2 100644 --- a/tensorflow/cc/saved_model/loader_test.cc +++ b/tensorflow/cc/saved_model/loader_test.cc @@ -24,6 +24,7 @@ limitations under the License. #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/lib/core/status_test_util.h" #include "tensorflow/core/lib/io/path.h" +#include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/platform/test.h" namespace tensorflow { @@ -133,9 +134,9 @@ TEST_F(LoaderTest, NoTagMatch) { Status st = LoadSavedModel(session_options, run_options, export_dir, {"missing-tag"}, &bundle); EXPECT_FALSE(st.ok()); - EXPECT_TRUE(StringPiece(st.error_message()) - .contains("Could not find meta graph def matching supplied " - "tags: { missing-tag }")) + EXPECT_TRUE(str_util::StrContains( + st.error_message(), + "Could not find meta graph def matching supplied tags: { missing-tag }")) << st.error_message(); } @@ -149,9 +150,9 @@ TEST_F(LoaderTest, NoTagMatchMultiple) { Status st = LoadSavedModel(session_options, run_options, export_dir, {kSavedModelTagServe, "missing-tag"}, &bundle); EXPECT_FALSE(st.ok()); - EXPECT_TRUE( - StringPiece(st.error_message()) - .contains("Could not find meta graph def matching supplied tags: ")) + EXPECT_TRUE(str_util::StrContains( + st.error_message(), + "Could not find meta graph def matching supplied tags: ")) << st.error_message(); } @@ -169,7 +170,7 @@ TEST_F(LoaderTest, SessionCreationFailure) { Status st = LoadSavedModel(session_options, run_options, export_dir, {kSavedModelTagServe}, &bundle); EXPECT_FALSE(st.ok()); - EXPECT_TRUE(StringPiece(st.error_message()).contains(kInvalidTarget)) + EXPECT_TRUE(str_util::StrContains(st.error_message(), kInvalidTarget)) << st.error_message(); } diff --git a/tensorflow/cc/tutorials/example_trainer.cc b/tensorflow/cc/tutorials/example_trainer.cc index 3675d72ee354533a7d84b5e8783cde452d8d60c9..5dbc4f5f6aa389978e55ca2656c17ff97202203d 100644 --- a/tensorflow/cc/tutorials/example_trainer.cc +++ b/tensorflow/cc/tutorials/example_trainer.cc @@ -24,6 +24,7 @@ limitations under the License. #include "tensorflow/core/graph/default_device.h" #include "tensorflow/core/graph/graph_def_builder.h" #include "tensorflow/core/lib/core/threadpool.h" +#include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/lib/strings/stringprintf.h" #include "tensorflow/core/platform/init_main.h" #include "tensorflow/core/platform/logging.h" @@ -166,7 +167,8 @@ namespace { bool ParseInt32Flag(tensorflow::StringPiece arg, tensorflow::StringPiece flag, int32* dst) { - if (arg.Consume(flag) && arg.Consume("=")) { + if (tensorflow::str_util::ConsumePrefix(&arg, flag) && + tensorflow::str_util::ConsumePrefix(&arg, "=")) { char extra; return (sscanf(arg.data(), "%d%c", dst, &extra) == 1); } @@ -176,7 +178,7 @@ bool ParseInt32Flag(tensorflow::StringPiece arg, tensorflow::StringPiece flag, bool ParseBoolFlag(tensorflow::StringPiece arg, tensorflow::StringPiece flag, bool* dst) { - if (arg.Consume(flag)) { + if (tensorflow::str_util::ConsumePrefix(&arg, flag)) { if (arg.empty()) { *dst = true; return true; diff --git a/tensorflow/compiler/aot/BUILD b/tensorflow/compiler/aot/BUILD index fa03b1f3c2dfc334d4a3871e6a1bf5503fa8d5f8..19e6bf68e77725bb3cae4e1d338c52dff472cb18 100644 --- a/tensorflow/compiler/aot/BUILD +++ b/tensorflow/compiler/aot/BUILD @@ -60,6 +60,7 @@ cc_library( "//tensorflow/compiler/tf2xla:tf2xla_util", "//tensorflow/compiler/tf2xla:xla_compiler", "//tensorflow/compiler/tf2xla/kernels:xla_cpu_only_ops", + "//tensorflow/compiler/tf2xla/kernels:xla_dummy_ops", "//tensorflow/compiler/tf2xla/kernels:xla_ops", "//tensorflow/compiler/xla:shape_util", "//tensorflow/compiler/xla:statusor", diff --git a/tensorflow/compiler/aot/embedded_protocol_buffers.cc b/tensorflow/compiler/aot/embedded_protocol_buffers.cc index 6489929a576d6469c4ff1358ca5ee9d27fb578bb..0048eec93bbe10271d9aa535203f19473a38b342 100644 --- a/tensorflow/compiler/aot/embedded_protocol_buffers.cc +++ b/tensorflow/compiler/aot/embedded_protocol_buffers.cc @@ -19,7 +19,6 @@ limitations under the License. #include #include "llvm/ADT/Triple.h" -#include "llvm/ExecutionEngine/ObjectMemoryBuffer.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/LegacyPassManager.h" diff --git a/tensorflow/compiler/aot/runtime.cc b/tensorflow/compiler/aot/runtime.cc index 5772776666129ed55a479c8917e69df3f3ce2fc0..5e74079fc158379b8977ada6412141e39142c3d3 100644 --- a/tensorflow/compiler/aot/runtime.cc +++ b/tensorflow/compiler/aot/runtime.cc @@ -31,7 +31,7 @@ namespace { inline void* aligned_malloc(size_t size, int minimum_alignment) { #if defined(__ANDROID__) || defined(OS_ANDROID) || defined(OS_CYGWIN) return memalign(minimum_alignment, size); -#elif defined(COMPILER_MSVC) +#elif defined(_WIN32) return _aligned_malloc(size, minimum_alignment); #else // !__ANDROID__ && !OS_ANDROID && !OS_CYGWIN void* ptr = nullptr; @@ -48,7 +48,7 @@ inline void* aligned_malloc(size_t size, int minimum_alignment) { } inline void aligned_free(void* aligned_memory) { -#if defined(COMPILER_MSVC) +#if defined(_WIN32) _aligned_free(aligned_memory); #else free(aligned_memory); diff --git a/tensorflow/compiler/aot/tests/BUILD b/tensorflow/compiler/aot/tests/BUILD index b053dad1b57c258b7cb0d6831923e6a0f30f5e7e..bb73cb19c57a654058af5bbb4535c76b0aca8e8c 100644 --- a/tensorflow/compiler/aot/tests/BUILD +++ b/tensorflow/compiler/aot/tests/BUILD @@ -14,6 +14,7 @@ test_suite( ":test_graph_tfadd_test", ":test_graph_tfadd_with_ckpt_saver_test", ":test_graph_tfadd_with_ckpt_test", + ":test_graph_tfassert_eq_test", ":test_graph_tffunction_test", ":test_graph_tfgather_test", ":test_graph_tfmatmul_test", @@ -33,6 +34,7 @@ py_binary( "//tensorflow/python", # TODO(b/34059704): remove when fixed "//tensorflow/python:array_ops", "//tensorflow/python:client", + "//tensorflow/python:control_flow_ops", "//tensorflow/python:framework_for_generated_wrappers", "//tensorflow/python:math_ops", "//tensorflow/python:platform", @@ -52,6 +54,7 @@ genrule( "test_graph_tfadd_with_ckpt_saver.ckpt", "test_graph_tfadd_with_ckpt_saver.pb", "test_graph_tfadd_with_ckpt_saver.saver", + "test_graph_tfassert_eq.pb", "test_graph_tffunction.pb", "test_graph_tfgather.pb", "test_graph_tfmatmul.pb", @@ -104,6 +107,17 @@ tf_library( ], ) +tf_library( + name = "test_graph_tfassert_eq", + testonly = 1, + config = "test_graph_tfassert_eq.config.pbtxt", + cpp_class = "AssertComp", + graph = "test_graph_tfassert_eq.pb", + tags = [ + "manual", + ], +) + tf_library( name = "test_graph_tffunction", testonly = 1, @@ -170,6 +184,7 @@ tf_cc_test( ":test_graph_tfadd", ":test_graph_tfadd_with_ckpt", ":test_graph_tfadd_with_ckpt_saver", + ":test_graph_tfassert_eq", ":test_graph_tffunction", ":test_graph_tfgather", ":test_graph_tfmatmul", diff --git a/tensorflow/compiler/aot/tests/make_test_graphs.py b/tensorflow/compiler/aot/tests/make_test_graphs.py index 89c7cd4507cbd476104a039d6083d8f89de11278..67767f55dae9b15aafbd8b129328bde2c59a9ef3 100644 --- a/tensorflow/compiler/aot/tests/make_test_graphs.py +++ b/tensorflow/compiler/aot/tests/make_test_graphs.py @@ -29,6 +29,7 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import function from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops +from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import variables from tensorflow.python.platform import app @@ -125,6 +126,14 @@ def tfsplits(_): array_ops.identity(y, name='result') +def tfassert_eq(_): + x = array_ops.placeholder(dtypes.int32, name='x_hold') + y = array_ops.placeholder(dtypes.int32, name='y_hold') + control_flow_ops.Assert( + math_ops.equal(x, y), ['Expected x == y.'], name='assert_eq') + math_ops.add(x, math_ops.negative(y), name='x_y_diff') + + def write_graph(build_graph, out_dir): """Build a graph using build_graph and write it out.""" g = ops.Graph() @@ -144,6 +153,7 @@ def main(_): write_graph(tfmatmulandadd, FLAGS.out_dir) write_graph(tffunction, FLAGS.out_dir) write_graph(tfsplits, FLAGS.out_dir) + write_graph(tfassert_eq, FLAGS.out_dir) if __name__ == '__main__': diff --git a/tensorflow/compiler/aot/tests/test_graph_tfassert_eq.config.pbtxt b/tensorflow/compiler/aot/tests/test_graph_tfassert_eq.config.pbtxt new file mode 100644 index 0000000000000000000000000000000000000000..8732d1709e809bb47d3769c483483c2c4f350e1c --- /dev/null +++ b/tensorflow/compiler/aot/tests/test_graph_tfassert_eq.config.pbtxt @@ -0,0 +1,16 @@ +# Text form of tensorflow.tf2xla.Config proto. +feed { + id { node_name: "x_hold" } + shape { + dim { size: 1 } + } +} +feed { + id { node_name: "y_hold" } + shape { + dim { size: 1 } + } +} +fetch { + id { node_name: "x_y_diff" } +} diff --git a/tensorflow/compiler/aot/tests/tfcompile_test.cc b/tensorflow/compiler/aot/tests/tfcompile_test.cc index 413efd9cea3b6f71574615ad9ca92471ff925781..67dbd643bfc7bf2c214e7eb5ae8bd2cc7d6e164b 100644 --- a/tensorflow/compiler/aot/tests/tfcompile_test.cc +++ b/tensorflow/compiler/aot/tests/tfcompile_test.cc @@ -20,6 +20,7 @@ limitations under the License. #include "tensorflow/compiler/aot/tests/test_graph_tfadd.h" #include "tensorflow/compiler/aot/tests/test_graph_tfadd_with_ckpt.h" #include "tensorflow/compiler/aot/tests/test_graph_tfadd_with_ckpt_saver.h" +#include "tensorflow/compiler/aot/tests/test_graph_tfassert_eq.h" #include "tensorflow/compiler/aot/tests/test_graph_tffunction.h" #include "tensorflow/compiler/aot/tests/test_graph_tfgather.h" #include "tensorflow/compiler/aot/tests/test_graph_tfmatmul.h" @@ -413,6 +414,23 @@ TEST(TFCompileTest, Splits) { EXPECT_NEAR(expected[3], fn.result0(1, 1), 1e4); } +TEST(TFCompileTest, AssertEqAndReturnDiff) { + // Assert is converted into a no-op in XLA, so there is no failure even if the + // two args are different. + AssertComp assert; + EXPECT_EQ(assert.arg0_data(), assert.args()[0]); + EXPECT_EQ(assert.arg1_data(), assert.args()[1]); + + assert.arg0() = 2; + assert.arg1() = 1; + const int32 expected_result = assert.arg0() - assert.arg1(); + EXPECT_TRUE(assert.Run()); + EXPECT_EQ(assert.error_msg(), ""); + EXPECT_EQ(assert.result0(), expected_result); + EXPECT_EQ(assert.result0_data()[0], expected_result); + EXPECT_EQ(assert.result0_data(), assert.results()[0]); +} + TEST(TFCompileTest, LookupNameIndex) { // add doesn't have any names defined in its config. AddComp add; diff --git a/tensorflow/compiler/jit/BUILD b/tensorflow/compiler/jit/BUILD index 24aa203c00b3a011ae11007e308f8bbb6998204e..50fa95c4f322e85c22f7be2d63f2bcd194ee419e 100644 --- a/tensorflow/compiler/jit/BUILD +++ b/tensorflow/compiler/jit/BUILD @@ -183,6 +183,13 @@ cc_library( ], ) +cc_library( + name = "shape_inference_helpers", + srcs = ["shape_inference_helpers.cc"], + hdrs = ["shape_inference_helpers.h"], + deps = ["//tensorflow/core:graph"], +) + # Internal targets below this point. cc_library( @@ -204,14 +211,14 @@ cc_library( ":common", ":xla_compilation_cache", ":xla_tensor", + "//tensorflow/compiler/tf2xla:common", "//tensorflow/compiler/tf2xla:xla_compiler", - "//tensorflow/compiler/xla:status_macros", "//tensorflow/compiler/xla:statusor", "//tensorflow/compiler/xla/client:client_library", "//tensorflow/compiler/xla/client:local_client", - "//tensorflow/core:core_cpu", "//tensorflow/core:core_cpu_internal", "//tensorflow/core:framework", + "//tensorflow/core:gpu_runtime", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", "//tensorflow/core:protos_all_cc", @@ -293,6 +300,7 @@ cc_library( deps = [ ":common", ":graph_to_functiondef", + ":shape_inference_helpers", ":union_find", "//tensorflow/compiler/jit/graphcycles", "//tensorflow/compiler/jit/kernels:parallel_check_op", @@ -310,6 +318,7 @@ cc_library( "//tensorflow/core:lib", "//tensorflow/core:lib_internal", "//tensorflow/core:protos_all_cc", + "//tensorflow/core/kernels:bounds_check", ], ) @@ -318,6 +327,25 @@ cc_library( hdrs = ["union_find.h"], ) +cc_library( + name = "producer_consumer_queue", + hdrs = ["producer_consumer_queue.h"], + deps = ["//tensorflow/core:lib"], +) + +tf_cc_test( + name = "producer_consumer_queue_test", + size = "small", + srcs = ["producer_consumer_queue_test.cc"], + deps = [ + ":producer_consumer_queue", + "//tensorflow/core:lib", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + ], +) + tf_cc_test( name = "graph_to_functiondef_test", size = "small", diff --git a/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc b/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc index 53ec6c1e6027644e999d10b2eb7d29c550c26e0e..7507e193b56d7200876fb938001d06666784b905 100644 --- a/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc +++ b/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc @@ -23,8 +23,10 @@ limitations under the License. #include #include "tensorflow/compiler/jit/graph_to_functiondef.h" +#include "tensorflow/compiler/jit/graphcycles/graphcycles.h" #include "tensorflow/compiler/jit/legacy_flags/encapsulate_subgraphs_pass_flags.h" #include "tensorflow/compiler/jit/mark_for_compilation_pass.h" +#include "tensorflow/compiler/jit/shape_inference_helpers.h" #include "tensorflow/compiler/tf2xla/const_analysis.h" #include "tensorflow/compiler/tf2xla/dump_graph.h" #include "tensorflow/compiler/xla/status_macros.h" @@ -36,6 +38,7 @@ limitations under the License. #include "tensorflow/core/framework/node_def_builder.h" #include "tensorflow/core/framework/node_def_util.h" #include "tensorflow/core/graph/algorithm.h" +#include "tensorflow/core/graph/control_flow.h" #include "tensorflow/core/graph/graph.h" #include "tensorflow/core/graph/graph_def_builder.h" #include "tensorflow/core/graph/tensor_id.h" @@ -158,6 +161,11 @@ class Encapsulator { std::move(outside_compilation_attribute)), graph_in_(graph_in) {} + // Find dependencies between subgraphs and outside_compilation clusters that + // only manifest via edges between outside_compilation clusters in the outer + // (non-compiled) graph. + Status FindClusterDependencies(); + // Find subgraphs marked with 'group_attribute', and build a new // subgraph, one for each value of 'group_attribute'. Status SplitIntoSubgraphs(); @@ -228,6 +236,19 @@ class Encapsulator { // the shapes of any ancestor RAH outputs. If it can be determined that the // shape of the SFH inputs will not be inferrable even once the shapes of the // RAH outputs are known, an error is returned by the rewriter. + // + // Once edges between compiled and outside_compilation clusters have been + // replaced by send/recv ops, some dependencies may no longer be apparent. + // A clustering pass finds all the dependencies between HC nodes that are only + // present as a result of edges between nodes in outside_compilaton clusters. + // Suppose there is a path from outside_compilation cluster C in subgraph S + // to outside_compilation cluster D in subgraph T. If S != T then a control + // edge is added from the call node for S to the call node for T, which + // ensures that C will execute before D because S executes before T. If S==T + // then a control dependency is added between the HC nodes for C and D in S, + // and the HC node for C is added to an 'ancestors' attr in the HC node for D + // so that during compilation of the HC node for D, an XLA control dependency + // can be added to ensure C's SendToHost executes before D's RecvFromHost. class Subgraph { public: // Creates a graph to build the subgraph in, if it doesn't already exist, @@ -322,6 +343,18 @@ class Encapsulator { void RecordOutsideCompilationOutputOrControl( const string& outside_compilation_id, const Edge* edge); + // Records the fact that there is a path from a node in outside_compilation + // cluster ancestor to node in cluster successor that does not go through + // the subgraph. + void RecordOutsideCompilationDependency(const string& successor, + const string& ancestor); + + // Returns the mapping from outside_compilation cluster C to the set of + // outside_compilation clusters that have a path to C entirely outside + // compiled subgraphs. + const std::unordered_map> + OutsideCompilationAncestorMap() const; + // Adds the HostCompute nodes for each outside_compilation subgraph. Status AddHostComputes( const string& subgraph_name, @@ -404,6 +437,13 @@ class Encapsulator { Status AddHostComputeKeyPlaceholder(OutsideCompilationSubgraph* oc_subgraph, Graph* graph_out); + // Get the set of outside_compilation clusters and the dependency edges + // between them. + void GetActiveClusterDependencyGraph( + std::unordered_set* clusters, + std::unordered_set* has_successor, + std::unordered_map>* ancestors_map); + // Builds a _RecvAtHost node producing all the inputs of an // outside_compilation subgraph and stores it in oc_subgraph.recv_at_host. Status AddRecvAtHostNode(const string& group_attribute, @@ -466,6 +506,14 @@ class Encapsulator { // The outside_compilation clusters in this subgraph. std::unordered_map outside_compilation_subgraphs_; + // For each outside_compilation cluster C, the outside_compilation clusters + // that have a path to C outside the compiled graph. + std::unordered_map> + outside_compilation_ancestors_; + // For each outside_compilation cluster C, the outside_compilation clusters + // that have a path from C outside the compiled graph. + std::unordered_map> + outside_compilation_successors_; // NoOp node in the output graph that is sequenced after the call node and // used to prevent host-side outside_compilation sends and recvs from being @@ -554,6 +602,10 @@ class Encapsulator { std::unordered_set, NodeSlot::PairHasher>* edges_added); + // Adds control dependencies between subgraph call nodes that have + // dependencies via outside_compilation edges. + Status AddCallNodeDependencies(Graph* graph_out); + // Adds all edges to the output graph. Status AddEdgesToOutputGraph( const std::unordered_map& node_images, @@ -576,7 +628,8 @@ class Encapsulator { // satisfied, e.g., because send_node depends on a node that doesn't have a // registered shape inference function. Status DoStaticShapeInferenceForOutsideCompilationSend( - const Graph& graph_in, const ShapeRefiner& shape_refiner, + const Graph& graph_in, const BackEdgeHelper& back_edge_helper, + const ShapeRefiner& shape_refiner, const std::unordered_set& recv_at_host_nodes, Node* send_node, FunctionLibraryDefinition* library, std::vector* static_shape_out, @@ -599,7 +652,7 @@ class Encapsulator { // to nodes in pruned_graph. Status MakeGraphForOutsideCompilationSends( const Graph& graph, std::unique_ptr* pruned_graph, - ShapeRefiner* shape_refiner, + BackEdgeHelper* back_edge_helper, ShapeRefiner* shape_refiner, std::unordered_map* node_images, FunctionLibraryDefinition* library); @@ -617,10 +670,65 @@ class Encapsulator { const Graph* graph_in_; std::unordered_map subgraphs_; + // For each subgraph S the subgraphs S' such that there is a path in some + // outside_compilation cluster C in S to some outside_compilation cluster C' + // in S', that goes only through the uncompiled graph. + std::unordered_map> subgraph_ancestors_; TF_DISALLOW_COPY_AND_ASSIGN(Encapsulator); }; +namespace { + +// Return in 'sorted' a topological sort of clusters according to the +// dependencies encoded in ancestors. clusters is the list of all clusters +// including clusters that are not present in the ancestors map. has_successors +// is the set of clusters that are ancestors of some other cluster. +void TopologicalClusterSort( + const std::unordered_set& clusters, + const std::unordered_set& has_successors, + const std::unordered_map>& ancestors, + std::vector* sorted) { + // The nodes are placed in 'sorted' in topological order. + sorted->clear(); + // We don't use the standard DFS because we are not operating on Node* + // objects. + struct Work { + string cluster; + bool leave; + }; + std::set visited; + std::vector stack; + // Seed the processing list with clusters that have no successors. + for (const auto& cluster : clusters) { + if (has_successors.find(cluster) == has_successors.end()) { + stack.push_back({cluster, false}); + } + } + while (!stack.empty()) { + const Work item = stack.back(); + stack.pop_back(); + if (item.leave) { + sorted->push_back(item.cluster); + continue; + } + + if (visited.find(item.cluster) != visited.end()) continue; + visited.insert(item.cluster); + + stack.push_back({item.cluster, true}); + const auto& iter = ancestors.find(item.cluster); + if (iter != ancestors.end()) { + for (const auto& ancestor : iter->second) { + stack.push_back({ancestor, false}); + } + } + } + CHECK(sorted->size() == clusters.size()); +} + +} // namespace + Node* Encapsulator::Subgraph::GetCallNodeForInputs() const { return call_node_inputs_; } @@ -783,12 +891,71 @@ void Encapsulator::Subgraph::RecordOutsideCompilationOutputOrControl( } } +void Encapsulator::Subgraph::RecordOutsideCompilationDependency( + const string& successor, const string& ancestor) { + outside_compilation_ancestors_[successor].insert(ancestor); + outside_compilation_successors_[ancestor].insert(successor); +} + +const std::unordered_map> +Encapsulator::Subgraph::OutsideCompilationAncestorMap() const { + return outside_compilation_ancestors_; +} + +void Encapsulator::Subgraph::GetActiveClusterDependencyGraph( + std::unordered_set* clusters, + std::unordered_set* has_successor, + std::unordered_map>* ancestors_map) { + // During initial clustering the ancestor and successor datastructures may + // have been built including oc_cluster names that never turned into subgraphs + // because they had no edges into or out of the compiled cluster. Remove them + // before proceeding to simplify the logic. Get the set of clusters that was + // actually added, then remove references to the others. + for (const auto& oc_subgraph : outside_compilation_subgraphs_) { + clusters->insert(oc_subgraph.first); + } + for (const auto& cluster : outside_compilation_successors_) { + if (clusters->find(cluster.first) != clusters->end()) { + for (const auto& successor : cluster.second) { + if (clusters->find(successor) != clusters->end()) { + has_successor->insert(cluster.first); + break; + } + } + } + } + for (const auto& cluster : outside_compilation_ancestors_) { + if (clusters->find(cluster.first) != clusters->end()) { + std::unordered_set& ancestors = (*ancestors_map)[cluster.first]; + for (const auto& ancestor : cluster.second) { + if (clusters->find(ancestor) != clusters->end()) { + ancestors.insert(ancestor); + } + } + } + } +} + Status Encapsulator::Subgraph::AddHostComputes( const string& subgraph_name, const std::unordered_map& node_images) { - for (auto& oc_subgraph_iter : outside_compilation_subgraphs_) { - const string& oc_subgraph_name = oc_subgraph_iter.first; - OutsideCompilationSubgraph& oc_subgraph = oc_subgraph_iter.second; + // Get the set of outside_compilation clusters and the dependency edges + // between them. + std::unordered_set clusters; + std::unordered_set has_successor; + std::unordered_map> ancestors_map; + GetActiveClusterDependencyGraph(&clusters, &has_successor, &ancestors_map); + // Topologically sort the outside_compilation clusters according to their + // dependency relation. + std::vector sorted_clusters; + TopologicalClusterSort(clusters, has_successor, ancestors_map, + &sorted_clusters); + + // The host compute nodes added for each outside_compilation_cluster; + std::unordered_map host_compute_node; + for (const string& oc_subgraph_name : sorted_clusters) { + OutsideCompilationSubgraph& oc_subgraph = + outside_compilation_subgraphs_[oc_subgraph_name]; if (!oc_subgraph.inputs.empty() || !oc_subgraph.control_inputs.empty() || !oc_subgraph.outputs_by_src.empty() || !oc_subgraph.control_outputs.empty()) { @@ -808,13 +975,22 @@ Status Encapsulator::Subgraph::AddHostComputes( inputs[input_index].Reset(src_image->name(), src_slot, dtype); input_dtypes[input_index] = dtype; } - for (const auto& output : oc_subgraph.outputs_by_src) { DataType dtype = output.first.dtype; int output_index = output.second; output_dtypes[output_index] = dtype; } + std::vector host_compute_ancestors; + const auto iter = ancestors_map.find(oc_subgraph_name); + if (iter != ancestors_map.end()) { + for (const string& ancestor_cluster : iter->second) { + host_compute_ancestors.push_back( + outside_compilation_subgraphs_[ancestor_cluster] + .host_compute_name); + } + } + NodeDef host_compute_def; NodeDefBuilder builder(strings::StrCat("outside_compilation_", oc_subgraph_name, "_host_compute"), @@ -822,14 +998,17 @@ Status Encapsulator::Subgraph::AddHostComputes( builder.Input(inputs); builder.Attr("Tinputs", input_dtypes); builder.Attr("Toutputs", output_dtypes); + builder.Attr("ancestors", host_compute_ancestors); builder.Attr("key", strings::StrCat("host_compute_channel_", subgraph_name, "_", oc_subgraph_name)); + builder.Attr("_outside_compilation_subgraph", oc_subgraph_name); Status s = builder.Finalize(&host_compute_def); if (!s.ok()) return s; Node* host_compute = graph_->AddNode(host_compute_def, &s); if (!s.ok()) return s; + host_compute_node[host_compute->name()] = host_compute; oc_subgraph.host_compute_name = host_compute->name(); // Connect the _HostCompute node to its producers in the subgraph. @@ -848,6 +1027,12 @@ Status Encapsulator::Subgraph::AddHostComputes( graph_->AddControlEdge(src_image, host_compute); } + // Connect the _HostCompute node to its ancestor host compute nodes. + for (const auto& ancestor_name : host_compute_ancestors) { + Node* ancestor = host_compute_node[ancestor_name]; + graph_->AddControlEdge(ancestor, host_compute); + } + // Connect the consumers in the subgraph to the _HostCompute node. for (const auto& output : oc_subgraph.outputs_by_dst) { const Node* dst_node = output.first.node; @@ -1650,6 +1835,17 @@ Status Encapsulator::CopyEdgeToOutputGraph( return Status::OK(); } +Status Encapsulator::AddCallNodeDependencies(Graph* graph_out) { + for (const auto& ancestors : subgraph_ancestors_) { + const string& subgraph = ancestors.first; + for (const string& ancestor : ancestors.second) { + graph_out->AddControlEdge(subgraphs_[ancestor].GetCallNodeForOutputs(), + subgraphs_[subgraph].GetCallNodeForInputs()); + } + } + return Status::OK(); +} + Status Encapsulator::AddEdgesToOutputGraph( const std::unordered_map& node_images, bool parallel_checking, Graph* graph_out) { @@ -1699,6 +1895,7 @@ Status Encapsulator::AddEdgesToOutputGraph( Subgraph& subgraph = subgraph_entry.second; subgraph.ConnectSequencerToCallNode(graph_out); } + TF_RETURN_IF_ERROR(AddCallNodeDependencies(graph_out)); return Status::OK(); } @@ -1711,9 +1908,13 @@ namespace { // matter because it will only be used subsequently for shape inference. (It // would be possible to add a switch statement over data_type to create a value // for the constant, but that would entail maintaining the logic as new types -// are added, and is not necessary.) -Node* AddDummyShapedNode(DataType data_type, const TensorShapeProto& shape, - Graph* graph_out) { +// are added, and is not necessary.) If the node being replaced was within a +// control flow frame, adds appropriate Enter nodes so that the use of the Const +// is well-formed. +Node* AddDummyShapedNode(const Node* src_node, int src_port, + const std::vector& control_flow_info, + const TensorShapeProto& shape, Graph* graph_out) { + DataType data_type = src_node->output_type(src_port); TensorProto dummy_proto; dummy_proto.set_dtype(data_type); *dummy_proto.mutable_tensor_shape() = shape; @@ -1724,7 +1925,23 @@ Node* AddDummyShapedNode(DataType data_type, const TensorShapeProto& shape, NodeBuilder node_builder(options.GetNameForOp("KnownShape"), "Const", options.op_registry()); node_builder.Attr("dtype", data_type).Attr("value", dummy_proto); - return options.FinalizeBuilder(&node_builder); + Node* node = options.FinalizeBuilder(&node_builder); + // Add any Enter nodes required to bring the constant to the correct control + // flow frame. + while (!control_flow_info[src_node->id()].frame_name.empty()) { + NodeBuilder enter_builder(options.GetNameForOp("Enter"), "Enter", + options.op_registry()); + enter_builder.Attr("frame_name", + control_flow_info[src_node->id()].frame_name); + enter_builder.Attr("is_constant", true); + enter_builder.Input(node, 0); + Node* enter_node = options.FinalizeBuilder(&enter_builder); + // Adopt the new Enter node as the value in the current frame. + node = enter_node; + // Recurse to the parent frame to see if more Enter nodes need to be added. + src_node = control_flow_info[src_node->id()].parent_frame; + } + return node; } // Adds a copy of node_in to graph_out and adds the mapping to @@ -1766,17 +1983,30 @@ Status CopyShapeInferenceNodeToGraph( } } } + // Work around the fact that Enter nodes refuse to propagate shape information + // unless they are marked loop invariant. Since we are never going to execute + // this graph, marking them all loop invariant is fine. + if (node_out->type_string() == "Enter") { + node_out->ClearAttr("is_constant"); + node_out->AddAttr("is_constant", true); + } return Status::OK(); } } // namespace Status Encapsulator::DoStaticShapeInferenceForOutsideCompilationSend( - const Graph& graph_in, const ShapeRefiner& shape_refiner, + const Graph& graph_in, const BackEdgeHelper& back_edge_helper, + const ShapeRefiner& shape_refiner, const std::unordered_set& recv_at_host_nodes, Node* send_node, FunctionLibraryDefinition* library, std::vector* static_shape_out, std::unique_ptr* graph_out) { + // Get the control flow structure of the input graph so we can build + // well-formed output graphs. + std::vector control_flow_info; + TF_RETURN_IF_ERROR(BuildControlFlowInfo(&graph_in, &control_flow_info)); + // Maps from nodes in graph_in to nodes in graph_out. // // When an edge has fully defined shape the source node in graph_in is @@ -1801,7 +2031,6 @@ Status Encapsulator::DoStaticShapeInferenceForOutsideCompilationSend( // We don't use the standard ReverseDFS because we want to cut off traversal // whenever we find an output with fully defined shape. - // TODO(misard) make this work properly in the presence of control flow. struct Work { Node* node; bool leave; // Are we entering or leaving node? @@ -1839,8 +2068,9 @@ Status Encapsulator::DoStaticShapeInferenceForOutsideCompilationSend( TensorShapeProto proto; context->ShapeHandleToProto(shape, &proto); if (dummy_node_images.find(src_node) == dummy_node_images.end()) { - dummy_node_images[src_node] = AddDummyShapedNode( - src_node->output_type(src_port), proto, graph_out->get()); + dummy_node_images[src_node] = + AddDummyShapedNode(src_node, src_port, control_flow_info, + proto, graph_out->get()); } // The final input to the send node is the dynamic key, which we // don't include in the static shapes. @@ -1888,6 +2118,214 @@ Status Encapsulator::DoStaticShapeInferenceForOutsideCompilationSend( } } + for (const auto edge : back_edge_helper.RemovedEdges()) { + if (copied_node_images.find(edge.dst) != copied_node_images.end()) { + // The destination of this back edge was added to the inference graph, so + // fix it up. + Node* dst = copied_node_images[edge.dst]; + if (dst->type_string() != "Merge") { + return errors::InvalidArgument( + "outside_compilation cluster contains a back-edge to node ", + dst->name(), " of type ", dst->type_string(), + ". The analysis pass only supports back-edges to Merge nodes."); + } + const Edge* existing_input_edge; + if (edge.dst_input != 1 || dst->num_inputs() != 2 || + !dst->input_edge(0, &existing_input_edge).ok()) { + // TODO(misard) if we see graphs built with a different structure, relax + // this constraint. Leaving it here for now to avoid writing unnecessary + // complex code since we believe graphs generated by front ends all have + // the back edge as the second input to the merge node. + return errors::Internal( + "Internal assumption failed while rewriting an outside_compilation " + "cluster that contains a while loop. Logic assumes back-edge is to " + "port 1 of a 2-input " + "Merge node."); + } + // Connect the existing edge to both inputs of the Merge node so that the + // graph will be well-formed. + (*graph_out) + ->AddEdge(existing_input_edge->src(), + existing_input_edge->src_output(), dst, edge.dst_input); + } + } + + return Status::OK(); +} + +namespace { + +// Helper struct for building cluster dependencies and also debugging cycles in +// the dependencies. While computing dependencies we construct a mapping from +// Node* to PathDetails. +struct PathDetails { + struct SubgraphAndCluster { + string subgraph; + string outside_compilation_cluster; + bool operator==(const SubgraphAndCluster& other) const { + return subgraph == other.subgraph && + outside_compilation_cluster == other.outside_compilation_cluster; + } + }; + + struct SubgraphAndClusterHash { + inline std::size_t operator()(const SubgraphAndCluster& v) const { + return hash()( + strings::StrCat(v.subgraph, v.outside_compilation_cluster)); + } + }; + + typedef std::unordered_set + SubgraphAndClusterSet; + + // Returns the set of (subgraph, oc_cluster) pairs that should be recorded as + // ancestors for any successor of this node. If the node is in the outer + // graph, it returns the transitive union of the ancestors of the node's + // inputs. If the node is in an outside_compilation cluster, it returns just + // that cluster. If the node is compiled, it returns the empty set. + SubgraphAndClusterSet AncestorsForSuccessor() { + if (subgraph.empty()) { + return ancestor_clusters; + } else if (outside_compilation_cluster.empty()) { + return SubgraphAndClusterSet(); + } else { + SubgraphAndCluster entry; + entry.subgraph = subgraph; + entry.outside_compilation_cluster = outside_compilation_cluster; + return SubgraphAndClusterSet({entry}); + } + } + + // The transitive union of the ancestor's of this node's inputs. This is only + // saved for debugging in order to print out enough information to debug a + // discovered cycle. + SubgraphAndClusterSet ancestor_clusters; + // The subgraph attr on this node. + string subgraph; + // The outside_compilation attr on this node. + string outside_compilation_cluster; +}; + +// Adds an edge from ancestor to successor to the cycle detector, and returns an +// error if that edge causes the formation of a cycle. In the error case, logs +// the contents of the node_ancestors_map to facilitate debugging. +Status CheckClusterDependencyForCycles( + const string& ancestor, const string& successor, + const std::unordered_map>& ancestors, + const std::unordered_map& node_ancestors_map, + GraphCycles* cycle_detector, std::map* cycle_detector_map) { + if (cycle_detector_map->find(ancestor) == cycle_detector_map->end()) { + (*cycle_detector_map)[ancestor] = cycle_detector->NewNode(); + } + if (cycle_detector_map->find(successor) == cycle_detector_map->end()) { + (*cycle_detector_map)[successor] = cycle_detector->NewNode(); + } + + if (!cycle_detector->InsertEdge((*cycle_detector_map)[ancestor], + (*cycle_detector_map)[successor])) { + LOG(ERROR) << "Cycle in outside_compilation clusters"; + for (const auto& cluster : ancestors) { + LOG(ERROR) << "Cluster " << cluster.first << " depends on:"; + for (const auto& ancestor : cluster.second) { + LOG(ERROR) << " " << ancestor; + } + } + for (const auto& node_ancestors : node_ancestors_map) { + LOG(ERROR) << "Node " << node_ancestors.first->name() << " (" + << node_ancestors.second.subgraph << ";" + << node_ancestors.second.outside_compilation_cluster + << ") has ancestor clusters:"; + for (const auto& ancestor : node_ancestors.second.ancestor_clusters) { + LOG(ERROR) << " " << ancestor.subgraph << ";" + << ancestor.outside_compilation_cluster; + } + } + return errors::InvalidArgument( + "Can't compile outside_compilation clusters because there is a " + "dependency cycle: see error log for details."); + } + return Status::OK(); +} + +} // namespace + +Status Encapsulator::FindClusterDependencies() { + // Map from nodes to ancestor details. A node is entered into the map if it is + // in a compilation subgraph, and outside_compilation cluster, or appears on a + // path in the outer graph leading from an outside_compilation subgraph. + std::unordered_map node_ancestors_map; + // We check that clusters are acyclic using this cycle detector. + GraphCycles cycle_detector; + // Map from cluster name to cycle detector node id. + std::map cycle_detector_map; + // Process the nodes in topologically-sorted order. + std::vector nodes; + GetReversePostOrder(*graph_in_, &nodes); + for (Node* node : nodes) { + string subgraph_name; + string oc_cluster; + TF_RETURN_IF_ERROR(GetFunctionNameAttr(node, &subgraph_name, &oc_cluster)); + // First create an entry in the ancestors map if the node is in a compiled + // subgraph or outside_compilation cluster, or if any incoming edge is from + // a node with an ancestor map entry; and find the union of all the + // ancestors. + if (!subgraph_name.empty()) { + node_ancestors_map[node].subgraph = subgraph_name; + node_ancestors_map[node].outside_compilation_cluster = oc_cluster; + } + for (Node* src : node->in_nodes()) { + const auto iter = node_ancestors_map.find(src); + if (iter != node_ancestors_map.end()) { + const auto& ancestors_to_follow = iter->second.AncestorsForSuccessor(); + for (const auto& ancestor : ancestors_to_follow) { + if (ancestor.subgraph != subgraph_name || + ancestor.outside_compilation_cluster != oc_cluster) { + node_ancestors_map[node].ancestor_clusters.insert(ancestor); + } + } + } + } + if (!subgraph_name.empty()) { + // The node is in a compiled subgraph or an outside_compilation cluster. + if (oc_cluster.empty()) { + // The node is not in an outside_compilation cluster. Record the + // subgraph's ancestor dependencies. + for (const auto& cluster : node_ancestors_map[node].ancestor_clusters) { + if (cluster.subgraph != subgraph_name) { + subgraph_ancestors_[subgraph_name].insert(cluster.subgraph); + TF_RETURN_IF_ERROR(CheckClusterDependencyForCycles( + cluster.subgraph, subgraph_name, subgraph_ancestors_, + node_ancestors_map, &cycle_detector, &cycle_detector_map)); + } + } + } else { + Subgraph& subgraph = subgraphs_[subgraph_name]; + // The node is in an outside_compilation cluster. Record the cluster + // and/or subgraph ancestor dependencies. + for (const auto& cluster : node_ancestors_map[node].ancestor_clusters) { + if (cluster.subgraph == subgraph_name) { + // The ancestor is in the same subgraph. + if (cluster.outside_compilation_cluster != oc_cluster) { + // But not in the same oc_cluster, so record the dependency. + subgraph.RecordOutsideCompilationDependency( + oc_cluster, cluster.outside_compilation_cluster); + TF_RETURN_IF_ERROR(CheckClusterDependencyForCycles( + cluster.outside_compilation_cluster, oc_cluster, + subgraph.OutsideCompilationAncestorMap(), node_ancestors_map, + &cycle_detector, &cycle_detector_map)); + } + } else { + // The ancestor is in a different subgraph, so record the + // dependency. + subgraph_ancestors_[subgraph_name].insert(cluster.subgraph); + TF_RETURN_IF_ERROR(CheckClusterDependencyForCycles( + cluster.subgraph, subgraph_name, subgraph_ancestors_, + node_ancestors_map, &cycle_detector, &cycle_detector_map)); + } + } + } + } + } return Status::OK(); } @@ -1955,7 +2393,7 @@ Status Encapsulator::MakePrunedGraphCopyAndInline( Status Encapsulator::MakeGraphForOutsideCompilationSends( const Graph& graph, std::unique_ptr* pruned_graph, - ShapeRefiner* shape_refiner, + BackEdgeHelper* back_edge_helper, ShapeRefiner* shape_refiner, std::unordered_map* node_images, FunctionLibraryDefinition* library) { // Find all the send_from_host nodes in all subgraphs, to use as roots for the @@ -1977,10 +2415,15 @@ Status Encapsulator::MakeGraphForOutsideCompilationSends( // nodes, inlining any functions as needed. TF_RETURN_IF_ERROR(MakePrunedGraphCopyAndInline( graph, send_from_host_nodes, pruned_graph, node_images, library)); + FixupSourceAndSinkEdges(pruned_graph->get()); + + // Remove back edges from any cycles in the pruned graph to simplify shape + // inference traversal. They will be fixed up in the per-subgraph shape + // inference graphs stored in the function library. + TF_RETURN_IF_ERROR(back_edge_helper->Remove(pruned_graph->get())); // Perform shape inference on the pruned graph. shape_refiner->set_require_shape_inference_fns(false); - FixupSourceAndSinkEdges(pruned_graph->get()); std::vector post_order; GetReversePostOrder(*(*pruned_graph), &post_order); for (auto node : post_order) { @@ -1998,11 +2441,13 @@ Status Encapsulator::MakeGraphForOutsideCompilationSends( Status Encapsulator::GetShapeInfoForOutsideCompilationSends( Graph* graph_out, FunctionLibraryDefinition* library) { + BackEdgeHelper back_edge_helper; std::unique_ptr pruned_graph; ShapeRefiner shape_refiner(graph_out->versions(), graph_out->op_registry()); std::unordered_map node_images; TF_RETURN_IF_ERROR(MakeGraphForOutsideCompilationSends( - *graph_out, &pruned_graph, &shape_refiner, &node_images, library)); + *graph_out, &pruned_graph, &back_edge_helper, &shape_refiner, + &node_images, library)); if (VLOG_IS_ON(1)) { dump_graph::DumpGraphToFile("pruned_graph_for_shape_inference", @@ -2032,7 +2477,7 @@ Status Encapsulator::GetShapeInfoForOutsideCompilationSends( std::unique_ptr graph; if (send_node != nullptr) { TF_RETURN_IF_ERROR(DoStaticShapeInferenceForOutsideCompilationSend( - *pruned_graph, shape_refiner, recv_at_host_names, + *pruned_graph, back_edge_helper, shape_refiner, recv_at_host_names, node_images[send_node], library, &static_shape, &graph)); if (graph == nullptr) { VLOG(2) << "Send node " << send_node->name() << " shapes"; @@ -2090,6 +2535,7 @@ Status EncapsulateSubgraphsInFunctions( Encapsulator encapsulator(std::move(group_attribute), std::move(outside_compilation_attribute), &graph_in); + TF_RETURN_IF_ERROR(encapsulator.FindClusterDependencies()); TF_RETURN_IF_ERROR(encapsulator.SplitIntoSubgraphs()); TF_RETURN_IF_ERROR(encapsulator.BuildFunctionDefs( diff --git a/tensorflow/compiler/jit/encapsulate_subgraphs_pass_test.cc b/tensorflow/compiler/jit/encapsulate_subgraphs_pass_test.cc index 56efe98fdbfaae888832344946e6782a3a0be18b..3502d1bb4596bc8e8a57523d0d77dd3f12b9bec6 100644 --- a/tensorflow/compiler/jit/encapsulate_subgraphs_pass_test.cc +++ b/tensorflow/compiler/jit/encapsulate_subgraphs_pass_test.cc @@ -74,7 +74,7 @@ bool EqualProtoMap(const ::tensorflow::protobuf::Map& a, if (!compare(elt_a.first, elt_a.second, iter->second)) { if (diff) { *diff = strings::StrCat(map_name, " expected: element with key '", - key_to_string(elt_a.first), " has value '", + key_to_string(elt_a.first), "' has value '", value_to_string(elt_a.second), "' got: '", value_to_string(iter->second), "'"); } @@ -121,8 +121,22 @@ bool EqualFunctionNodeDef(const NodeDef& a, const NodeDef& b, } return false; } + std::unordered_set control_input_a; + std::unordered_set control_input_b; for (int i = 0; i < a.input_size(); ++i) { - if (a.input(i) != b.input(i)) { + if (str_util::StartsWith(a.input(i), "^")) { + if (!str_util::StartsWith(b.input(i), "^")) { + if (diff) { + *diff = strings::StrCat( + diff_preamble, " mismatch for node ", a.name(), " input ", i, + ", expected control input ", a.input(i), " got ", b.input(i), + " expected:\n", a.DebugString(), "\ngot:\n", b.DebugString()); + } + return false; + } + control_input_a.insert(a.input(i)); + control_input_b.insert(b.input(i)); + } else if (a.input(i) != b.input(i)) { if (diff) { *diff = strings::StrCat(diff_preamble, " mismatch for node ", a.name(), " input ", i, ", expected ", a.input(i), @@ -132,11 +146,29 @@ bool EqualFunctionNodeDef(const NodeDef& a, const NodeDef& b, return false; } } + if (control_input_a != control_input_b) { + if (diff) { + *diff = strings::StrCat(diff_preamble, " mismatch for node ", a.name(), + " control inputs differ expected:\n", + a.DebugString(), "\ngot:\n", b.DebugString()); + } + return false; + } return EqualProtoMap( a.attr(), b.attr(), [](const string& s) { return s; }, [](const AttrValue& v) { return v.DebugString(); }, [](const string& key, const AttrValue& av, const AttrValue& bv) { - return av.DebugString() == bv.DebugString(); + if (key == "ancestors") { + // The ancestors are added from a set so the order is unpredictable; + // just compare set equality not list equality. + std::unordered_set a_set(av.list().s().begin(), + av.list().s().end()); + std::unordered_set b_set(bv.list().s().begin(), + bv.list().s().end()); + return a_set == b_set; + } else { + return av.DebugString() == bv.DebugString(); + } }, strings::StrCat(diff_preamble, " attr mismatch for node ", a.name()), diff); @@ -261,6 +293,7 @@ REGISTER_OP("XlaHostCompute") .Output("outputs: Toutputs") .Attr("Tinputs: list(type) >= 0") .Attr("Toutputs: list(type) >= 0") + .Attr("ancestors: list(string) >= 0") .Attr("key: string") .Attr("shape_inference_graph: string = ''") .Attr("shapes: list(shape) >= 0") @@ -899,10 +932,12 @@ TEST(EncapsulateSubgraphsTest, OneFunctionOneOutside) { {"C:o:0", "c:o:0"}, {{"Tinputs", gtl::ArraySlice({DT_FLOAT, DT_FLOAT})}, {"Toutputs", gtl::ArraySlice({DT_FLOAT})}, + {"ancestors", gtl::ArraySlice({})}, {"key", "host_compute_channel_F1_O1"}, {"shape_inference_graph", "_outside_compilation_shape_inference_F1_O1"}, - {"shapes", gtl::ArraySlice({})}}, + {"shapes", gtl::ArraySlice({})}, + {"_outside_compilation_subgraph", "O1"}}, {"c"}}, }, {{"f_0_retval", "F:o:0"}}); @@ -1043,20 +1078,25 @@ TEST(EncapsulateSubgraphsTest, OneFunctionTwoOutside) { {"D:o:0", "F:o:0"}, {{"Tinputs", gtl::ArraySlice({DT_FLOAT, DT_FLOAT})}, {"Toutputs", gtl::ArraySlice({DT_FLOAT})}, + {"ancestors", + gtl::ArraySlice({"outside_compilation_O1_host_compute"})}, {"key", "host_compute_channel_F1_O2"}, {"shape_inference_graph", "_outside_compilation_shape_inference_F1_O2"}, - {"shapes", gtl::ArraySlice({})}}, - {"F"}}, + {"shapes", gtl::ArraySlice({})}, + {"_outside_compilation_subgraph", "O2"}}, + {"F", "outside_compilation_O1_host_compute"}}, {{"outside_compilation_O1_host_compute"}, "XlaHostCompute", {"C:o:0", "D:o:0"}, {{"Tinputs", gtl::ArraySlice({DT_FLOAT, DT_FLOAT})}, {"Toutputs", gtl::ArraySlice({DT_FLOAT})}, + {"ancestors", gtl::ArraySlice({})}, {"key", "host_compute_channel_F1_O1"}, {"shape_inference_graph", "_outside_compilation_shape_inference_F1_O1"}, - {"shapes", gtl::ArraySlice({})}}, + {"shapes", gtl::ArraySlice({})}, + {"_outside_compilation_subgraph", "O1"}}, {"D"}}, }, {{"i_0_retval", "I:o:0"}}); @@ -1190,10 +1230,12 @@ TEST(EncapsulateSubgraphsTest, TwoFunctionsTwoOutside) { {"C:o:0", "D:o:0"}, {{"Tinputs", gtl::ArraySlice({DT_FLOAT, DT_FLOAT})}, {"Toutputs", gtl::ArraySlice({DT_FLOAT})}, + {"ancestors", gtl::ArraySlice({})}, {"key", "host_compute_channel_F1_O1"}, {"shape_inference_graph", "_outside_compilation_shape_inference_F1_O1"}, - {"shapes", gtl::ArraySlice({})}}, + {"shapes", gtl::ArraySlice({})}, + {"_outside_compilation_subgraph", "O1"}}, {"D"}}, }, {{"d_0_retval", "D:o:0"}, {"f_0_retval", "F:o:0"}}); @@ -1211,10 +1253,12 @@ TEST(EncapsulateSubgraphsTest, TwoFunctionsTwoOutside) { {"G:o:0"}, {{"Tinputs", gtl::ArraySlice({DT_FLOAT})}, {"Toutputs", gtl::ArraySlice({DT_FLOAT})}, + {"ancestors", gtl::ArraySlice({})}, {"key", "host_compute_channel_F2_O1"}, {"shape_inference_graph", ""}, {"shapes", - gtl::ArraySlice({shape_proto_expected})}}}, + gtl::ArraySlice({shape_proto_expected})}, + {"_outside_compilation_subgraph", "O1"}}}, }, {{"g_0_retval", "G:o:0"}, {"i_0_retval", "I:o:0"}}); @@ -1274,6 +1318,179 @@ TEST(EncapsulateSubgraphsTest, TwoFunctionsTwoOutside) { TF_EXPECT_FUNCTIONDEFLIBRARY_EQ(library_expected, library); } +// Test with two functions to transform, each with one outside_compilation +// cluster, with the dependency between them purely from an outside_compilation +// edge. +TEST(EncapsulateSubgraphsTest, TwoFunctionsTwoOutsideDependencyFromOutside) { + FunctionDefLibrary library; + GraphDef graphdef; + + { + GraphDefBuilder b1(GraphDefBuilder::kFailImmediately); + Node* a = InputShaped(b1.opts().WithName("A")); + Node* b = InputShaped(b1.opts().WithName("B")); + Node* c = Unary(a, b1.opts().WithName("C").WithAttr("_encapsulate", "F1")); + Node* d = + Binary(b, c, b1.opts().WithName("D").WithAttr("_encapsulate", "F1")); + Node* e = Binary(c, d, + b1.opts() + .WithName("E") + .WithControlInputs({b, d}) + .WithAttr("_encapsulate", "F1") + .WithAttr("_outside", "O1")); + Node* f = Binary(c, e, + b1.opts().WithName("F").WithControlInput(e).WithAttr( + "_encapsulate", "F1")); + Node* g = + Binary(a, b, b1.opts().WithName("G").WithAttr("_encapsulate", "F2")); + Node* h = Unary(g, b1.opts() + .WithName("H") + .WithAttr("_encapsulate", "F2") + .WithAttr("_outside", "O1") + .WithControlInput(e)); + Node* i = Unary(h, b1.opts().WithName("I").WithAttr("_encapsulate", "F2")); + Binary(f, i, b1.opts().WithName("J")); + TF_EXPECT_OK(b1.ToGraphDef(&graphdef)); + } + + TF_EXPECT_OK(Encapsulate(&graphdef, &library)); + + FunctionDefLibrary library_expected; + GraphDef graphdef_expected; + + { + GraphDefBuilder shape(GraphDefBuilder::kFailImmediately); + Node* key_constant = + KeyPlaceholderShape(shape.opts().WithName("KnownShape/_0")); + Node* recv = RecvAtHost(ops::NodeOut(key_constant, 0), "F1", "O1", + {DT_FLOAT, DT_FLOAT}, shape.opts()); + Node* e = Binary(ops::NodeOut(recv, 0), ops::NodeOut(recv, 1), + shape.opts() + .WithName("E") + .WithAttr("_encapsulate", "F1") + .WithAttr("_outside", "O1")); + SendFromHost(ops::NodeOut(key_constant, 0), "F1", "O1", {e}, shape.opts()); + TF_EXPECT_OK( + AddGraphDefToFunctionLibrary(shape, "F1_O1", &library_expected)); + } + + { + GraphDefBuilder shape(GraphDefBuilder::kFailImmediately); + Node* key_constant = + KeyPlaceholderShape(shape.opts().WithName("KnownShape/_0")); + Node* recv = RecvAtHost(ops::NodeOut(key_constant, 0), "F2", "O1", + {DT_FLOAT}, shape.opts()); + Node* h = Unary(recv, shape.opts() + .WithName("H") + .WithAttr("_encapsulate", "F2") + .WithAttr("_outside", "O1")); + SendFromHost(ops::NodeOut(key_constant, 0), "F2", "O1", {h}, shape.opts()); + TF_EXPECT_OK( + AddGraphDefToFunctionLibrary(shape, "F2_O1", &library_expected)); + } + + *library_expected.add_function() = FunctionDefHelper::Create( + "F1", {"a_0_arg:float", "b_0_arg:float"}, {"f_0_retval:float"}, {}, + { + {{"C"}, "UnaryTest", {"a_0_arg"}}, + {{"D"}, "BinaryTest", {"b_0_arg", "C:o:0"}}, + {{"F"}, + "BinaryTest", + {"C:o:0", "outside_compilation_O1_host_compute:outputs:0"}, + {}, + {"outside_compilation_O1_host_compute"}}, + {{"outside_compilation_O1_host_compute"}, + "XlaHostCompute", + {"C:o:0", "D:o:0"}, + {{"Tinputs", gtl::ArraySlice({DT_FLOAT, DT_FLOAT})}, + {"Toutputs", gtl::ArraySlice({DT_FLOAT})}, + {"ancestors", gtl::ArraySlice({})}, + {"key", "host_compute_channel_F1_O1"}, + {"shape_inference_graph", + "_outside_compilation_shape_inference_F1_O1"}, + {"shapes", gtl::ArraySlice({})}, + {"_outside_compilation_subgraph", "O1"}}, + {"D"}}, + }, + {{"f_0_retval", "F:o:0"}}); + + *library_expected.add_function() = FunctionDefHelper::Create( + "F2", {"a_0_arg:float", "b_0_arg:float"}, {"i_0_retval:float"}, {}, + { + {{"G"}, "BinaryTest", {"a_0_arg", "b_0_arg"}}, + {{"I"}, + "UnaryTest", + {"outside_compilation_O1_host_compute:outputs:0"}}, + {{"outside_compilation_O1_host_compute"}, + "XlaHostCompute", + {"G:o:0"}, + {{"Tinputs", gtl::ArraySlice({DT_FLOAT})}, + {"Toutputs", gtl::ArraySlice({DT_FLOAT})}, + {"ancestors", gtl::ArraySlice({})}, + {"key", "host_compute_channel_F2_O1"}, + {"shape_inference_graph", + "_outside_compilation_shape_inference_F2_O1"}, + {"shapes", gtl::ArraySlice({})}, + {"_outside_compilation_subgraph", "O1"}}}, + }, + {{"i_0_retval", "I:o:0"}}); + + { + std::unique_ptr lib_def( + new FunctionLibraryDefinition(OpRegistry::Global(), library_expected)); + GraphDefBuilder b2(GraphDefBuilder::kFailImmediately, lib_def.get()); + Node* a = InputShaped(b2.opts().WithName("A")); + Node* b = InputShaped(b2.opts().WithName("B")); + + Node* key_constant1 = + KeyPlaceholder("F1", b2.opts().WithName("F1_key_placeholder")); + Node* recv1 = RecvAtHost(ops::NodeOut(key_constant1, 0), "F1", "O1", + {DT_FLOAT, DT_FLOAT}, b2.opts()); + Node* e = Binary(ops::NodeOut(recv1, 0), ops::NodeOut(recv1, 1), + b2.opts() + .WithName("E") + .WithControlInputs({recv1, b}) + .WithAttr("_encapsulate", "F1") + .WithAttr("_outside", "O1")); + Node* send1 = SendFromHost(ops::NodeOut(key_constant1, 0), "F1", "O1", {e}, + b2.opts().WithControlInput(e)); + Node* s1 = Sequencer( + b2.opts().WithName("F1_sequencer").WithControlInputs({recv1, send1}), + "F1"); + + NodeBuilder node_builder1("F1", "F1", lib_def.get()); + node_builder1.Input(a).Input(b); + Node* call1 = + b2.opts().WithControlInput(s1).FinalizeBuilder(&node_builder1); + + Node* key_constant2 = + KeyPlaceholder("F2", b2.opts().WithName("F2_key_placeholder")); + Node* recv2 = RecvAtHost(ops::NodeOut(key_constant2, 0), "F2", "O1", + {DT_FLOAT}, b2.opts()); + Node* h = Unary(recv2, b2.opts() + .WithName("H") + .WithAttr("_encapsulate", "F2") + .WithAttr("_outside", "O1") + .WithControlInput(e)); + Node* send2 = SendFromHost(ops::NodeOut(key_constant2, 0), "F2", "O1", {h}, + b2.opts()); + + Node* s2 = Sequencer( + b2.opts().WithName("F2_sequencer").WithControlInputs({recv2, send2}), + "F2"); + NodeBuilder node_builder2("F2", "F2", lib_def.get()); + node_builder2.Input(a).Input(b); + Node* call2 = b2.opts() + .WithControlInputs({s2, call1}) + .FinalizeBuilder(&node_builder2); + Binary(call1, call2, b2.opts().WithName("J")); + TF_EXPECT_OK(b2.ToGraphDef(&graphdef_expected)); + } + + TF_EXPECT_GRAPH_EQ(graphdef_expected, graphdef); + TF_EXPECT_FUNCTIONDEFLIBRARY_EQ(library_expected, library); +} + // Test with one outside_compilation cluster that has no inputs from the // compiled subgraph. TEST(EncapsulateSubgraphsTest, OutsideCompilationNoInputs) { @@ -1318,10 +1535,12 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationNoInputs) { {}, {{"Tinputs", gtl::ArraySlice({})}, {"Toutputs", gtl::ArraySlice({DT_FLOAT})}, + {"ancestors", gtl::ArraySlice({})}, {"key", "host_compute_channel_F1_O1"}, {"shape_inference_graph", ""}, {"shapes", - gtl::ArraySlice({shape_proto_expected})}}}, + gtl::ArraySlice({shape_proto_expected})}, + {"_outside_compilation_subgraph", "O1"}}}, }, {{"f_0_retval", "F:o:0"}}); @@ -1400,10 +1619,12 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationControlInput) { {}, {{"Tinputs", gtl::ArraySlice({})}, {"Toutputs", gtl::ArraySlice({DT_FLOAT})}, + {"ancestors", gtl::ArraySlice({})}, {"key", "host_compute_channel_F1_O1"}, {"shape_inference_graph", ""}, {"shapes", - gtl::ArraySlice({shape_proto_expected})}}, + gtl::ArraySlice({shape_proto_expected})}, + {"_outside_compilation_subgraph", "O1"}}, {"D"}}, }, {{"f_0_retval", "F:o:0"}}); @@ -1480,9 +1701,11 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationNoOutputs) { {"D:o:0"}, {{"Tinputs", gtl::ArraySlice({DT_FLOAT})}, {"Toutputs", gtl::ArraySlice({})}, + {"ancestors", gtl::ArraySlice({})}, {"key", "host_compute_channel_F1_O1"}, {"shape_inference_graph", ""}, - {"shapes", gtl::ArraySlice({})}}}, + {"shapes", gtl::ArraySlice({})}, + {"_outside_compilation_subgraph", "O1"}}}, }, {{"f_0_retval", "F:o:0"}}); @@ -1559,9 +1782,11 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationControlOutput) { {"D:o:0"}, {{"Tinputs", gtl::ArraySlice({DT_FLOAT})}, {"Toutputs", gtl::ArraySlice({})}, + {"ancestors", gtl::ArraySlice({})}, {"key", "host_compute_channel_F1_O1"}, {"shape_inference_graph", ""}, - {"shapes", gtl::ArraySlice({})}}}, + {"shapes", gtl::ArraySlice({})}, + {"_outside_compilation_subgraph", "O1"}}}, }, {{"f_0_retval", "F:o:0"}}); @@ -1598,6 +1823,371 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationControlOutput) { TF_EXPECT_FUNCTIONDEFLIBRARY_EQ(library_expected, library); } +// Test with two outside_compilation clusters that interact outside the compiled +// subgraph, where the ancestor has no HostCompute Op. +TEST(EncapsulateSubgraphsTest, + OutsideCompilationClusterDependencyNoSrcCluster) { + FunctionDefLibrary library; + GraphDef graphdef; + + { + GraphDefBuilder b1(GraphDefBuilder::kFailImmediately); + Node* a = Input(b1.opts().WithName("A")); + Node* b = Input(b1.opts().WithName("B")); + Node* c = Unary(a, b1.opts().WithName("C").WithAttr("_encapsulate", "F1")); + Node* d = + Binary(b, c, b1.opts().WithName("D").WithAttr("_encapsulate", "F1")); + Node* e = Unary(a, b1.opts() + .WithName("E") + .WithAttr("_encapsulate", "F1") + .WithAttr("_outside", "O1")); + Node* f = Unary(d, b1.opts().WithName("F").WithAttr("_encapsulate", "F1")); + Node* g = Unary(f, b1.opts() + .WithName("G") + .WithAttr("_encapsulate", "F1") + .WithAttr("_outside", "O2") + .WithControlInput(e)); + Node* h = Unary(g, b1.opts().WithName("H").WithAttr("_encapsulate", "F1")); + Binary(e, h, b1.opts().WithName("I")); + TF_EXPECT_OK(b1.ToGraphDef(&graphdef)); + } + + TF_EXPECT_OK(Encapsulate(&graphdef, &library)); + + FunctionDefLibrary library_expected; + GraphDef graphdef_expected; + + { + GraphDefBuilder shape2(GraphDefBuilder::kFailImmediately); + Node* key_constant = + KeyPlaceholderShape(shape2.opts().WithName("KnownShape/_0")); + Node* recv2 = RecvAtHost(ops::NodeOut(key_constant, 0), "F1", "O2", + {DT_FLOAT}, shape2.opts()); + Node* g = Unary(ops::NodeOut(recv2, 0), shape2.opts() + .WithName("G") + .WithAttr("_encapsulate", "F1") + .WithAttr("_outside", "O2")); + SendFromHost(ops::NodeOut(key_constant, 0), "F1", "O2", {g}, shape2.opts()); + TF_EXPECT_OK( + AddGraphDefToFunctionLibrary(shape2, "F1_O2", &library_expected)); + } + + *library_expected.add_function() = FunctionDefHelper::Create( + "F1", {"a_0_arg:float", "b_0_arg:float"}, {"h_0_retval:float"}, {}, + { + {{"C"}, "UnaryTest", {"a_0_arg"}}, + {{"D"}, "BinaryTest", {"b_0_arg", "C:o:0"}}, + {{"F"}, "UnaryTest", {"D:o:0"}}, + {{"H"}, + "UnaryTest", + {"outside_compilation_O2_host_compute:outputs:0"}}, + {{"outside_compilation_O2_host_compute"}, + "XlaHostCompute", + {"F:o:0"}, + {{"Tinputs", gtl::ArraySlice({DT_FLOAT})}, + {"Toutputs", gtl::ArraySlice({DT_FLOAT})}, + {"ancestors", gtl::ArraySlice({})}, + {"key", "host_compute_channel_F1_O2"}, + {"shape_inference_graph", + "_outside_compilation_shape_inference_F1_O2"}, + {"shapes", gtl::ArraySlice({})}, + {"_outside_compilation_subgraph", "O2"}}}, + }, + {{"h_0_retval", "H:o:0"}}); + + { + std::unique_ptr lib_def( + new FunctionLibraryDefinition(OpRegistry::Global(), library_expected)); + GraphDefBuilder b2(GraphDefBuilder::kFailImmediately, lib_def.get()); + Node* a = Input(b2.opts().WithName("A")); + Node* b = Input(b2.opts().WithName("B")); + + Node* e = Unary(a, b2.opts() + .WithName("E") + .WithAttr("_encapsulate", "F1") + .WithAttr("_outside", "O1")); + Node* key_constant = + KeyPlaceholder("F1", b2.opts().WithName("F1_key_placeholder")); + Node* recv = RecvAtHost(ops::NodeOut(key_constant, 0), "F1", "O2", + {DT_FLOAT}, b2.opts()); + Node* g = Unary(recv, b2.opts() + .WithName("G") + .WithAttr("_encapsulate", "F1") + .WithAttr("_outside", "O2") + .WithControlInput(e)); + Node* send = + SendFromHost(ops::NodeOut(key_constant, 0), "F1", "O2", {g}, b2.opts()); + Node* s1 = Sequencer( + b2.opts().WithName("F1_sequencer").WithControlInputs({recv, send}), + "F1"); + NodeBuilder node_builder1("F1", "F1", lib_def.get()); + node_builder1.Input(a).Input(b).ControlInput(s1); + Node* call1 = b2.opts().FinalizeBuilder(&node_builder1); + + Binary(e, call1, b2.opts().WithName("I")); + TF_EXPECT_OK(b2.ToGraphDef(&graphdef_expected)); + } + + TF_EXPECT_GRAPH_EQ(graphdef_expected, graphdef); + TF_EXPECT_FUNCTIONDEFLIBRARY_EQ(library_expected, library); +} + +// Test with two outside_compilation clusters that interact outside the compiled +// subgraph, where the successor has no HostCompute Op. +TEST(EncapsulateSubgraphsTest, + OutsideCompilationClusterDependencyNoDstCluster) { + FunctionDefLibrary library; + GraphDef graphdef; + + { + GraphDefBuilder b1(GraphDefBuilder::kFailImmediately); + Node* a = Input(b1.opts().WithName("A")); + Node* b = Input(b1.opts().WithName("B")); + Node* c = Unary(a, b1.opts().WithName("C").WithAttr("_encapsulate", "F1")); + Node* d = + Binary(b, c, b1.opts().WithName("D").WithAttr("_encapsulate", "F1")); + Node* e = Unary(d, b1.opts() + .WithName("E") + .WithAttr("_encapsulate", "F1") + .WithAttr("_outside", "O1")); + Node* f = Unary(e, b1.opts().WithName("F").WithAttr("_encapsulate", "F1")); + /*Node* g =*/Unary(a, b1.opts() + .WithName("G") + .WithAttr("_encapsulate", "F1") + .WithAttr("_outside", "O2") + .WithControlInput(e)); + Node* h = Unary(f, b1.opts().WithName("H").WithAttr("_encapsulate", "F1")); + Binary(e, h, b1.opts().WithName("I")); + TF_EXPECT_OK(b1.ToGraphDef(&graphdef)); + } + + TF_EXPECT_OK(Encapsulate(&graphdef, &library)); + + FunctionDefLibrary library_expected; + GraphDef graphdef_expected; + + { + GraphDefBuilder shape1(GraphDefBuilder::kFailImmediately); + Node* key_constant = + KeyPlaceholderShape(shape1.opts().WithName("KnownShape/_0")); + Node* recv2 = RecvAtHost(ops::NodeOut(key_constant, 0), "F1", "O1", + {DT_FLOAT}, shape1.opts()); + Node* e = Unary(ops::NodeOut(recv2, 0), shape1.opts() + .WithName("E") + .WithAttr("_encapsulate", "F1") + .WithAttr("_outside", "O1")); + SendFromHost(ops::NodeOut(key_constant, 0), "F1", "O1", {e}, shape1.opts()); + TF_EXPECT_OK( + AddGraphDefToFunctionLibrary(shape1, "F1_O1", &library_expected)); + } + + *library_expected.add_function() = FunctionDefHelper::Create( + "F1", {"a_0_arg:float", "b_0_arg:float"}, {"h_0_retval:float"}, {}, + { + {{"C"}, "UnaryTest", {"a_0_arg"}}, + {{"D"}, "BinaryTest", {"b_0_arg", "C:o:0"}}, + {{"F"}, + "UnaryTest", + {"outside_compilation_O1_host_compute:outputs:0"}}, + {{"H"}, "UnaryTest", {"F:o:0"}}, + {{"outside_compilation_O1_host_compute"}, + "XlaHostCompute", + {"D:o:0"}, + {{"Tinputs", gtl::ArraySlice({DT_FLOAT})}, + {"Toutputs", gtl::ArraySlice({DT_FLOAT})}, + {"ancestors", gtl::ArraySlice({})}, + {"key", "host_compute_channel_F1_O1"}, + {"shape_inference_graph", + "_outside_compilation_shape_inference_F1_O1"}, + {"shapes", gtl::ArraySlice({})}, + {"_outside_compilation_subgraph", "O1"}}}, + }, + {{"h_0_retval", "H:o:0"}}); + + { + std::unique_ptr lib_def( + new FunctionLibraryDefinition(OpRegistry::Global(), library_expected)); + GraphDefBuilder b2(GraphDefBuilder::kFailImmediately, lib_def.get()); + Node* a = Input(b2.opts().WithName("A")); + Node* b = Input(b2.opts().WithName("B")); + + Node* key_constant = + KeyPlaceholder("F1", b2.opts().WithName("F1_key_placeholder")); + Node* recv = RecvAtHost(ops::NodeOut(key_constant, 0), "F1", "O1", + {DT_FLOAT}, b2.opts()); + Node* e = Unary(recv, b2.opts() + .WithName("E") + .WithAttr("_encapsulate", "F1") + .WithAttr("_outside", "O1")); + Node* send = + SendFromHost(ops::NodeOut(key_constant, 0), "F1", "O1", {e}, b2.opts()); + /*Node* g =*/Unary(a, b2.opts() + .WithName("G") + .WithAttr("_encapsulate", "F1") + .WithAttr("_outside", "O2") + .WithControlInput(e)); + Node* s1 = Sequencer( + b2.opts().WithName("F1_sequencer").WithControlInputs({recv, send}), + "F1"); + NodeBuilder node_builder1("F1", "F1", lib_def.get()); + node_builder1.Input(a).Input(b).ControlInput(s1); + Node* call1 = b2.opts().FinalizeBuilder(&node_builder1); + + Binary(e, call1, b2.opts().WithName("I")); + TF_EXPECT_OK(b2.ToGraphDef(&graphdef_expected)); + } + + TF_EXPECT_GRAPH_EQ(graphdef_expected, graphdef); + TF_EXPECT_FUNCTIONDEFLIBRARY_EQ(library_expected, library); +} + +// Test with two outside_compilation clusters that interact outside the compiled +// subgraph. +TEST(EncapsulateSubgraphsTest, OutsideCompilationClusterDependency) { + FunctionDefLibrary library; + GraphDef graphdef; + + { + GraphDefBuilder b1(GraphDefBuilder::kFailImmediately); + Node* a = Input(b1.opts().WithName("A")); + Node* b = Input(b1.opts().WithName("B")); + Node* c = Unary(a, b1.opts().WithName("C").WithAttr("_encapsulate", "F1")); + Node* d = + Binary(b, c, b1.opts().WithName("D").WithAttr("_encapsulate", "F1")); + Node* e = Unary(d, b1.opts() + .WithName("E") + .WithAttr("_encapsulate", "F1") + .WithAttr("_outside", "O1")); + Node* f = Unary(e, b1.opts().WithName("F").WithAttr("_encapsulate", "F1")); + Node* g = Unary(d, b1.opts() + .WithName("G") + .WithAttr("_encapsulate", "F1") + .WithAttr("_outside", "O2") + .WithControlInput(e)); + Node* h = Unary(f, b1.opts().WithName("H").WithAttr("_encapsulate", "F1")); + /*Node* i =*/Binary(d, e, + b1.opts() + .WithName("I") + .WithAttr("_encapsulate", "F1") + .WithAttr("_outside", "O3") + .WithControlInput(g)); + Binary(e, h, b1.opts().WithName("J")); + TF_EXPECT_OK(b1.ToGraphDef(&graphdef)); + } + + TF_EXPECT_OK(Encapsulate(&graphdef, &library)); + + FunctionDefLibrary library_expected; + GraphDef graphdef_expected; + + { + GraphDefBuilder shape1(GraphDefBuilder::kFailImmediately); + Node* key_constant = + KeyPlaceholderShape(shape1.opts().WithName("KnownShape/_0")); + Node* recv2 = RecvAtHost(ops::NodeOut(key_constant, 0), "F1", "O1", + {DT_FLOAT}, shape1.opts()); + Node* e = Unary(ops::NodeOut(recv2, 0), shape1.opts() + .WithName("E") + .WithAttr("_encapsulate", "F1") + .WithAttr("_outside", "O1")); + SendFromHost(ops::NodeOut(key_constant, 0), "F1", "O1", {e}, shape1.opts()); + TF_EXPECT_OK( + AddGraphDefToFunctionLibrary(shape1, "F1_O1", &library_expected)); + } + + *library_expected.add_function() = FunctionDefHelper::Create( + "F1", {"a_0_arg:float", "b_0_arg:float"}, {"h_0_retval:float"}, {}, + {{{"C"}, "UnaryTest", {"a_0_arg"}}, + {{"D"}, "BinaryTest", {"b_0_arg", "C:o:0"}}, + {{"F"}, "UnaryTest", {"outside_compilation_O1_host_compute:outputs:0"}}, + {{"H"}, "UnaryTest", {"F:o:0"}}, + {{"outside_compilation_O1_host_compute"}, + "XlaHostCompute", + {"D:o:0"}, + {{"Tinputs", gtl::ArraySlice({DT_FLOAT})}, + {"Toutputs", gtl::ArraySlice({DT_FLOAT})}, + {"ancestors", gtl::ArraySlice({})}, + {"key", "host_compute_channel_F1_O1"}, + {"shape_inference_graph", + "_outside_compilation_shape_inference_F1_O1"}, + {"shapes", gtl::ArraySlice({})}, + {"_outside_compilation_subgraph", "O1"}}}, + {{"outside_compilation_O2_host_compute"}, + "XlaHostCompute", + {"D:o:0"}, + {{"Tinputs", gtl::ArraySlice({DT_FLOAT})}, + {"Toutputs", gtl::ArraySlice({})}, + {"ancestors", + gtl::ArraySlice({"outside_compilation_O1_host_compute"})}, + {"key", "host_compute_channel_F1_O2"}, + {"shape_inference_graph", ""}, + {"shapes", gtl::ArraySlice({})}, + {"_outside_compilation_subgraph", "O2"}}, + {"outside_compilation_O1_host_compute"}}, + {{"outside_compilation_O3_host_compute"}, + "XlaHostCompute", + {"D:o:0"}, + {{"Tinputs", gtl::ArraySlice({DT_FLOAT})}, + {"Toutputs", gtl::ArraySlice({})}, + {"ancestors", + gtl::ArraySlice({"outside_compilation_O1_host_compute", + "outside_compilation_O2_host_compute"})}, + {"key", "host_compute_channel_F1_O3"}, + {"shape_inference_graph", ""}, + {"shapes", gtl::ArraySlice({})}, + {"_outside_compilation_subgraph", "O3"}}, + {"outside_compilation_O1_host_compute", + "outside_compilation_O2_host_compute"}}}, + {{"h_0_retval", "H:o:0"}}); + + { + std::unique_ptr lib_def( + new FunctionLibraryDefinition(OpRegistry::Global(), library_expected)); + GraphDefBuilder b2(GraphDefBuilder::kFailImmediately, lib_def.get()); + Node* a = Input(b2.opts().WithName("A")); + Node* b = Input(b2.opts().WithName("B")); + + Node* key_constant = + KeyPlaceholder("F1", b2.opts().WithName("F1_key_placeholder")); + Node* recv1 = RecvAtHost(ops::NodeOut(key_constant, 0), "F1", "O1", + {DT_FLOAT}, b2.opts()); + Node* e = Unary(recv1, b2.opts() + .WithName("E") + .WithAttr("_encapsulate", "F1") + .WithAttr("_outside", "O1")); + Node* send = + SendFromHost(ops::NodeOut(key_constant, 0), "F1", "O1", {e}, b2.opts()); + Node* recv2 = RecvAtHost(ops::NodeOut(key_constant, 0), "F1", "O2", + {DT_FLOAT}, b2.opts()); + Node* g = Unary(recv2, b2.opts() + .WithName("G") + .WithAttr("_encapsulate", "F1") + .WithAttr("_outside", "O2") + .WithControlInput(e)); + Node* recv3 = RecvAtHost(ops::NodeOut(key_constant, 0), "F1", "O3", + {DT_FLOAT}, b2.opts()); + /*Node* i =*/Binary(recv3, e, + b2.opts() + .WithName("I") + .WithAttr("_encapsulate", "F1") + .WithAttr("_outside", "O3") + .WithControlInput(g)); + Node* s1 = Sequencer(b2.opts() + .WithName("F1_sequencer") + .WithControlInputs({recv1, send, recv2, recv3}), + "F1"); + NodeBuilder node_builder1("F1", "F1", lib_def.get()); + node_builder1.Input(a).Input(b).ControlInput(s1); + Node* call1 = b2.opts().FinalizeBuilder(&node_builder1); + + Binary(e, call1, b2.opts().WithName("J")); + TF_EXPECT_OK(b2.ToGraphDef(&graphdef_expected)); + } + + TF_EXPECT_GRAPH_EQ(graphdef_expected, graphdef); + TF_EXPECT_FUNCTIONDEFLIBRARY_EQ(library_expected, library); +} + // Test with one outside_compilation cluster that has no outputs from the // compiled subgraph. TEST(EncapsulateSubgraphsTest, OutsideCompilationNoInputsOrOutputs) { @@ -1722,10 +2312,12 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationShapeInference) { {"c:o:0"}, {{"Tinputs", gtl::ArraySlice({DT_FLOAT})}, {"Toutputs", gtl::ArraySlice({DT_FLOAT})}, + {"ancestors", gtl::ArraySlice({})}, {"key", "host_compute_channel_F1_O1"}, {"shape_inference_graph", "_outside_compilation_shape_inference_F1_O1"}, - {"shapes", gtl::ArraySlice({})}}, + {"shapes", gtl::ArraySlice({})}, + {"_outside_compilation_subgraph", "O1"}}, {"c"}}, }, {{"f_0_retval", "F:o:0"}}); diff --git a/tensorflow/compiler/jit/kernels/xla_launch_op.cc b/tensorflow/compiler/jit/kernels/xla_launch_op.cc index 2d6511a45b9b37df8405d34dd2aec5ba31254c16..f48941fce329313e4484b3c2dd900eeac884ed34 100644 --- a/tensorflow/compiler/jit/kernels/xla_launch_op.cc +++ b/tensorflow/compiler/jit/kernels/xla_launch_op.cc @@ -155,6 +155,9 @@ void XlaLocalLaunchOp::Compute(OpKernelContext* ctx) { options.graph_def_version = ctx->function_library()->graph_def_version(); options.allow_cpu_custom_calls = (platform_id_ == gpu::host::kHostPlatformId); options.device_allocator = xla_allocator; + // TODO(b/77671268): We don't set variable_representation_shape_fn here. This + // is restricted to Variables, but we need something like this to apply to + // normal Tensors too. const XlaCompiler::CompilationResult* kernel; xla::LocalExecutable* executable; @@ -179,8 +182,10 @@ void XlaLocalLaunchOp::Compute(OpKernelContext* ctx) { run_options.set_stream(stream); run_options.set_allocator(xla_allocator); run_options.set_intra_op_thread_pool(&ctx->eigen_cpu_device()); + run_options.set_rng_seed(ctx->step_id()); Env* env = Env::Default(); auto start_time = env->NowMicros(); + auto run_result = executable->Run(launch_context.arguments(), run_options); OP_REQUIRES(ctx, run_result.ok(), run_result.status()); diff --git a/tensorflow/compiler/jit/mark_for_compilation_pass.cc b/tensorflow/compiler/jit/mark_for_compilation_pass.cc index f651768a67278628e40445291d7fb271bb1ae611..8e2ee0f1d71bc17b4c12c792c38002af4f9eb5eb 100644 --- a/tensorflow/compiler/jit/mark_for_compilation_pass.cc +++ b/tensorflow/compiler/jit/mark_for_compilation_pass.cc @@ -35,6 +35,7 @@ limitations under the License. #include "tensorflow/core/framework/types.h" #include "tensorflow/core/graph/algorithm.h" #include "tensorflow/core/graph/control_flow.h" +#include "tensorflow/core/kernels/bounds_check.h" #include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/public/version.h" @@ -50,6 +51,15 @@ bool HasXLAKernel(const Node& node, const DeviceType& jit_device_type) { // is really a kind of function call and will be handled by // IsCompilableCall(). if (node.type_string() == "SymbolicGradient") return false; + if (node.type_string() == "Const") { + // Skip Const op with type DT_STRING, since XLA doesn't support it, but the + // registered Const KernelDef says that it does, to support no-op Assert for + // tfcompile. + const AttrValue* attr = node.attrs().Find("dtype"); + if (attr != nullptr && attr->type() == DT_STRING) { + return false; + } + } return FindKernelDef(jit_device_type, node.def(), nullptr, nullptr).ok(); } @@ -432,6 +442,9 @@ string DescribeCycle(const GraphCycles& cycles, const Graph& graph, int src, } auto node_name = [&cycles, &graph](int node_id) { + if (!FastBoundsCheck(node_id, graph.num_node_ids())) { + return string("(null)"); + } auto* node = graph.FindNodeId(node_id); if (node == nullptr) { return string("(null)"); @@ -728,11 +741,15 @@ Status MarkForCompilationPass::RunImpl( } } - // Count the number of elements in each cluster. - std::vector cluster_sizes(graph->num_node_ids()); + // Count the number of non-trivial elements in each cluster. + std::vector effective_cluster_sizes(graph->num_node_ids()); for (const Node* n : compilation_candidates) { int cluster = clusters[n->id()].Get().representative; - cluster_sizes[cluster]++; + // Identity nodes will be removed if the node gets marked for compilation. + // Therefore we don't want to count them towards the effective cluster size. + if (n->def().op() != "Identity") { + effective_cluster_sizes[cluster]++; + } } // Names for each cluster. @@ -765,9 +782,12 @@ Status MarkForCompilationPass::RunImpl( const XlaOpRegistry::DeviceRegistration* registration; XlaOpRegistry::GetCompilationDevice(device_type.type(), ®istration); - // Or compile if this is a cluster of >= min_cluster_size compilable - // operators. - if (cluster_sizes[cluster] >= min_cluster_size || marked_for_compilation || + // Compile if this is a cluster of >= min_cluster_size compilable operators. + // Also, always compile if the operator is placed on a device that requires + // compilation, or if it contains at least one op that is marked for + // compilation that is not an Identity op. + if (effective_cluster_sizes[cluster] >= min_cluster_size || + (effective_cluster_sizes[cluster] > 0 && marked_for_compilation) || registration->requires_compilation) { string& name = cluster_names[cluster]; diff --git a/tensorflow/compiler/jit/mark_for_compilation_pass_test.cc b/tensorflow/compiler/jit/mark_for_compilation_pass_test.cc index 381c0205fdf10fcde592658dbdb54edb905954f6..703d8825d74ced8d4d69c31ccd730adc89a8bffe 100644 --- a/tensorflow/compiler/jit/mark_for_compilation_pass_test.cc +++ b/tensorflow/compiler/jit/mark_for_compilation_pass_test.cc @@ -16,7 +16,9 @@ limitations under the License. #include "tensorflow/compiler/jit/mark_for_compilation_pass.h" #include "tensorflow/cc/framework/ops.h" +#include "tensorflow/cc/ops/array_ops.h" #include "tensorflow/cc/ops/control_flow_ops_internal.h" +#include "tensorflow/cc/ops/function_ops.h" #include "tensorflow/cc/ops/standard_ops.h" #include "tensorflow/compiler/jit/defs.h" #include "tensorflow/compiler/tf2xla/xla_op_kernel.h" @@ -138,7 +140,7 @@ TEST(XlaCompilationTest, CompilableCycles) { EXPECT_EQ(clusters["A"], clusters["C"]); } -TEST(XlaCompilationTest, UnsupportedTypes) { +TEST(XlaCompilationTest, Complex128Unsupported) { std::unique_ptr graph(new Graph(OpRegistry::Global())); GraphDef graphdef; { @@ -158,6 +160,27 @@ TEST(XlaCompilationTest, UnsupportedTypes) { EXPECT_TRUE(clusters.empty()); } +TEST(XlaCompilationTest, HalfSupported) { + std::unique_ptr graph(new Graph(OpRegistry::Global())); + GraphDef graphdef; + { + GraphDefBuilder builder(GraphDefBuilder::kFailImmediately); + Tensor t(DT_HALF, TensorShape()); + t.scalar()() = static_cast(0.0f); + Node* a = ops::SourceOp("Const", builder.opts() + .WithName("A") + .WithAttr("dtype", DT_HALF) + .WithAttr("value", t)); + Node* b = ops::UnaryOp("Neg", a, builder.opts().WithName("B")); + ops::BinaryOp("MatMul", a, b, builder.opts().WithName("C")); + TF_EXPECT_OK(GraphDefBuilderToGraph(builder, graph.get())); + } + + TF_ASSERT_OK(MarkForCompilation(&graph)); + auto clusters = GetClusters(*graph); + EXPECT_FALSE(clusters.empty()); +} + TEST(XlaCompilationTest, ConcatWithConstArg) { std::unique_ptr graph(new Graph(OpRegistry::Global())); GraphDef graphdef; @@ -554,5 +577,61 @@ TEST(XlaCompilationTest, Retval) { EXPECT_EQ(clusters["A"], clusters["B"]); } +TEST(XlaCompilationTest, DontCountIdentityOps) { + std::unique_ptr graph(new Graph(OpRegistry::Global())); + Scope root = Scope::NewRootScope().ExitOnError(); + { + auto a = ops::_Arg(root.WithOpName("A"), DT_INT32, 0); + auto b = ops::Identity(root.WithOpName("B"), a); + auto c = ops::Identity(root.WithOpName("C"), b); + auto r = ops::_Retval(root.WithOpName("R"), c, 0); + } + TF_ASSERT_OK(root.ToGraph(graph.get())); + TF_ASSERT_OK(MarkForCompilation(&graph)); + auto clusters = GetClusters(*graph); + + EXPECT_TRUE(clusters.empty()); +} + +TEST(XlaCompilationTest, DontCountIdentityOpsWithLocalJit) { + std::unique_ptr graph(new Graph(OpRegistry::Global())); + Scope root = Scope::NewRootScope().ExitOnError(); + { + auto a = ops::_Arg(root.WithOpName("A"), DT_INT32, 0); + auto b = ops::Identity(root.WithOpName("B"), a); + b.node()->AddAttr(kXlaCompileAttr, true); + auto r = ops::_Retval(root.WithOpName("R"), b, 0); + } + TF_ASSERT_OK(root.ToGraph(graph.get())); + TF_ASSERT_OK(MarkForCompilation(&graph)); + auto clusters = GetClusters(*graph); + + EXPECT_TRUE(clusters.empty()); +} + +TEST(XlaCompilationTest, ConstOp) { + // valid data type + { + std::unique_ptr graph(new Graph(OpRegistry::Global())); + Scope root = Scope::NewRootScope().ExitOnError(); + auto c = ops::Const(root.WithOpName("const"), 0.5f); + c.node()->AddAttr(kXlaCompileAttr, true); + TF_ASSERT_OK(root.ToGraph(graph.get())); + TF_ASSERT_OK(MarkForCompilation(&graph)); + EXPECT_EQ(1, GetClusters(*graph).size()); + } + + // invalid data type + { + std::unique_ptr graph(new Graph(OpRegistry::Global())); + Scope root = Scope::NewRootScope().ExitOnError(); + auto c = ops::Const(root.WithOpName("const"), string("string")); + c.node()->AddAttr(kXlaCompileAttr, true); + TF_ASSERT_OK(root.ToGraph(graph.get())); + TF_ASSERT_OK(MarkForCompilation(&graph)); + EXPECT_TRUE(GetClusters(*graph).empty()); + } +} + } // namespace } // namespace tensorflow diff --git a/tensorflow/compiler/jit/producer_consumer_queue.h b/tensorflow/compiler/jit/producer_consumer_queue.h new file mode 100644 index 0000000000000000000000000000000000000000..7c8c04152d2f3a0fd46711df24756b7e68b967ea --- /dev/null +++ b/tensorflow/compiler/jit/producer_consumer_queue.h @@ -0,0 +1,132 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_JIT_PRODUCER_CONSUMER_QUEUE_H_ +#define TENSORFLOW_COMPILER_JIT_PRODUCER_CONSUMER_QUEUE_H_ + +#include +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/mutex.h" + +namespace tensorflow { + +// A thread-safe, first-in-first-out queue. +template +class ProducerConsumerQueue { + public: + ProducerConsumerQueue() + : capacity_(std::numeric_limits::max()) {} + ~ProducerConsumerQueue() = default; + + // Wait until the queue is non-full, then append a copy of v. + void Put(const T &v); + + // Wait until the queue is non-empty, then remove and return the head value. + T Get(); + + // If the queue is non-empty, remove the head value, placing it in *pv, and + // return true; otherwise return false. + bool TryGet(T *pv); + + // Set the capacity of the queue; the queue is full whenever count() >= + // capacity(). The initial value is the maximum size_t. Requires size > 0. + void set_capacity(std::size_t size); + + // Return the capacity of the queue. + std::size_t capacity() const; + + // Return the number of elements in the queue. + std::size_t count() const; + + // Implementation details follow. Clients should ignore. + private: + mutable tensorflow::mutex mu_; // protects all fields below + tensorflow::condition_variable non_empty_ GUARDED_BY(mu_); + tensorflow::condition_variable non_full_ GUARDED_BY(mu_); + std::size_t capacity_ GUARDED_BY(mu_); + std::deque queue_ GUARDED_BY(mu_); + + TF_DISALLOW_COPY_AND_ASSIGN(ProducerConsumerQueue); +}; + +// ------------------------------------------------------ +// Implementation details follow. Clients should ignore. + +// Wait until the queue is non-full, then append a copy of v. +template +void ProducerConsumerQueue::Put(const T &v) { + mutex_lock lock(mu_); + while (queue_.size() >= capacity_) { + non_full_.wait(lock); + } + queue_.push_back(v); + non_empty_.notify_one(); +} + +// Wait until the queue is non-empty, then remove and return the head value. +template +T ProducerConsumerQueue::Get() { + mutex_lock lock(mu_); + while (queue_.empty()) { + non_empty_.wait(lock); + } + non_full_.notify_one(); + T result_value = queue_.front(); + queue_.pop_front(); + return result_value; +} + +// If the queue is non-empty, remove the head value, placing it in *pv, and +// return true; otherwise return false. +template +bool ProducerConsumerQueue::TryGet(T *pv) { + mutex_lock lock(mu_); + bool got_element = !queue_.empty(); + if (got_element) { + non_full_.notify_one(); + *pv = queue_.front(); + queue_.pop_front(); + } + return got_element; +} + +// Set the capacity of the queue; the queue is full whenever count() >= +// capacity(). The initial value is the maximum size_t. Requires size > 0. +template +void ProducerConsumerQueue::set_capacity(std::size_t size) { + mutex_lock lock(mu_); + CHECK_NE(size, 0); + capacity_ = size; + non_full_.notify_all(); +} + +// Return the capacity of the queue. +template +std::size_t ProducerConsumerQueue::capacity() const { + mutex_lock lock(mu_); + std::size_t max_elements = capacity_; + return max_elements; +} + +// Return the number of elements in the queue. +template +std::size_t ProducerConsumerQueue::count() const { + mutex_lock lock(mu_); + std::size_t num_elements = queue_.size(); + return num_elements; +} +} // namespace tensorflow + +#endif // TENSORFLOW_COMPILER_JIT_PRODUCER_CONSUMER_QUEUE_H_ diff --git a/tensorflow/compiler/jit/producer_consumer_queue_test.cc b/tensorflow/compiler/jit/producer_consumer_queue_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..f61260c6e52756ee039829afdc7452f5f760c221 --- /dev/null +++ b/tensorflow/compiler/jit/producer_consumer_queue_test.cc @@ -0,0 +1,139 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/jit/producer_consumer_queue.h" + +#include "tensorflow/core/lib/core/threadpool.h" +#include "tensorflow/core/platform/env.h" +#include "tensorflow/core/platform/mutex.h" +#include "tensorflow/core/platform/test.h" + +namespace tensorflow { +namespace { + +typedef ProducerConsumerQueue IntQueue; + +// Insert integers between low inclusive and high exclusive into q. +void PushRange(IntQueue *q, int low, int high) { + while (low != high) { + q->Put(low); + VLOG(2) << "Pushing " << low; + ++low; + } +} + +// Push the numbers between 0 and 999 inclusive from several threads in the +// pool. +void PushRanges(IntQueue *queue, thread::ThreadPool *pool) { + VLOG(1) << "Adding 20-36"; + pool->Schedule([queue] { PushRange(queue, 20, 36); }); + VLOG(1) << "Adding 7-20"; + pool->Schedule([queue] { PushRange(queue, 7, 20); }); + VLOG(1) << "Adding 36-501"; + pool->Schedule([queue] { PushRange(queue, 36, 501); }); + VLOG(1) << "Adding 501-1000"; + pool->Schedule([queue] { PushRange(queue, 501, 1000); }); + VLOG(1) << "Adding 0-5"; + pool->Schedule([queue] { PushRange(queue, 0, 5); }); + VLOG(1) << "Adding 5-7"; + pool->Schedule([queue] { PushRange(queue, 5, 7); }); +} + +// Pop elements from queue using Get(). Make sure that exactly elements +// were present and their values are all integers between 0 and high-1 +// inclusive. +void GetRange(IntQueue *queue, int high) { + VLOG(1) << "Testing Wait"; + std::vector results; + for (int i = 0; i != high; ++i) { + int r = queue->Get(); + VLOG(2) << "Waited and got " << r; + results.push_back(r); + } + CHECK_EQ(queue->count(), 0); + std::sort(results.begin(), results.end()); + for (int i = 0; i != high; ++i) { + CHECK(results[i] == i); + } +} + +// Pop elements from queue using TryGet(). Make sure that exactly +// elements were present and their values are all integers between 0 and high-1 +// inclusive. +void TryGetRange(IntQueue *queue, int high) { + std::vector results; + // Give up if we don't get all the elements back from the queue + // in 10 seconds. + int timeout = 10; + int r; + for (int i = 0; i != high; ++i) { + while (!queue->TryGet(&r)) { + if (!timeout--) { + LOG(FATAL) << "Can't find all elements in the queue"; + } + VLOG(1) << "Sleeping for a second..."; + sleep(1); + } + VLOG(2) << "Popped " << r; + results.push_back(r); + } + CHECK_EQ(queue->count(), 0); + CHECK(!queue->TryGet(&r)); + std::sort(results.begin(), results.end()); + for (int i = 0; i != high; ++i) { + CHECK_EQ(i, results[i]); + } +} + +const int kNumThreads = 15; + +TEST(ProducerConsumerQueue, GetRange) { + IntQueue queue; + { + thread::ThreadPool pool(Env::Default(), "test", kNumThreads); + PushRanges(&queue, &pool); + } + GetRange(&queue, 1000); +} + +TEST(ProducerConsumerQueue, TryGetRange) { + IntQueue queue; + { + thread::ThreadPool pool(Env::Default(), "test", kNumThreads); + PushRanges(&queue, &pool); + } + TryGetRange(&queue, 1000); +} + +TEST(ProducerConsumerQueue, ParallelGetRange) { + IntQueue queue; + { + thread::ThreadPool pool(Env::Default(), "test", kNumThreads); + pool.Schedule([&queue] { GetRange(&queue, 1000); }); + PushRanges(&queue, &pool); + } +} + +TEST(ProducerConsumerQueue, ParallelTryGetRange) { + IntQueue queue; + { + thread::ThreadPool pool(Env::Default(), "test", kNumThreads); + pool.Schedule([&queue] { TryGetRange(&queue, 1000); }); + PushRanges(&queue, &pool); + } +} + +} // namespace +} // namespace tensorflow diff --git a/tensorflow/compiler/jit/shape_inference_helpers.cc b/tensorflow/compiler/jit/shape_inference_helpers.cc new file mode 100644 index 0000000000000000000000000000000000000000..d9cfa16526bc5d809942a35e86075b4ec6e88a59 --- /dev/null +++ b/tensorflow/compiler/jit/shape_inference_helpers.cc @@ -0,0 +1,66 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Contains helpers for use in shape inference. + +#include "tensorflow/compiler/jit/shape_inference_helpers.h" + +#include + +#include "tensorflow/core/graph/graph.h" + +namespace tensorflow { + +Status BackEdgeHelper::Remove(Graph* graph) { + if (graph_ != nullptr) { + return errors::Internal("BackEdgeHelper duplicate call to Remove."); + } + graph_ = graph; + for (Node* n : graph_->nodes()) { + if (n->IsMerge()) { + for (const Edge* e : n->in_edges()) { + if (e->src()->IsNextIteration()) { + back_edges_.push_back( + BackEdge{e, e->src(), e->src_output(), e->dst(), e->dst_input()}); + } + } + } + } + for (const BackEdge& be : back_edges_) { + graph_->RemoveEdge(be.edge); + } + return Status::OK(); +} + +const std::vector& BackEdgeHelper::RemovedEdges() + const { + return back_edges_; +} + +Status BackEdgeHelper::Replace() { + if (graph_ == nullptr) { + return errors::Internal("BackEdgeHelper Replace called before Remove."); + } + if (replaced_) { + return errors::Internal("BackEdgeHelper Replace called more than once."); + } + replaced_ = true; + for (const BackEdge& be : back_edges_) { + graph_->AddEdge(be.src, be.src_output, be.dst, be.dst_input); + } + return Status::OK(); +} + +} // namespace tensorflow diff --git a/tensorflow/compiler/jit/shape_inference_helpers.h b/tensorflow/compiler/jit/shape_inference_helpers.h new file mode 100644 index 0000000000000000000000000000000000000000..2f053c9a45dd47ca1b056634d2248d6181e77d68 --- /dev/null +++ b/tensorflow/compiler/jit/shape_inference_helpers.h @@ -0,0 +1,65 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_JIT_SHAPE_INFERENCE_HELPERS_H_ +#define TENSORFLOW_COMPILER_JIT_SHAPE_INFERENCE_HELPERS_H_ + +#include + +#include "tensorflow/core/graph/graph.h" + +namespace tensorflow { + +// Helper class to temporarily remove, then replace, the back edges in a +// graph. Simple algorithms for shape inference don't work with cycles, and this +// class can be used to remove cycles before running inference and replace them +// after. Correct usage requires exactly one call to Remove(), followed by any +// number of calls to RemovedEdges() and at most one call to Replace(). The call +// to Replace() is optional if the graph will be discarded without being +// executed, e.g., if it is being used purely for a shape inference pass. +class BackEdgeHelper { + public: + struct BackEdge { + const Edge* edge; + Node* src; + int src_output; + Node* dst; + int dst_input; + }; + + BackEdgeHelper() = default; + // Disallows copy and assign. + BackEdgeHelper(const BackEdgeHelper& other) = delete; + BackEdgeHelper& operator=(const BackEdgeHelper& other) = delete; + + // Temporarily removes all the back edges in graph. + Status Remove(Graph* graph); + + // Gets the list of removed edges. + const std::vector& RemovedEdges() const; + + // Replaces the back edges removed by a prior call to Remove. + Status Replace(); + + private: + Graph* graph_ = nullptr; // not owned + std::vector back_edges_; + // Set once Replace has been called. + bool replaced_ = false; +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_COMPILER_JIT_SHAPE_INFERENCE_HELPERS_H_ diff --git a/tensorflow/compiler/jit/xla_compilation_cache.h b/tensorflow/compiler/jit/xla_compilation_cache.h index 5c0c79b880c474969464f23b4485734c404cef07..be1043d8c3fc0573922837e541615114a6d7a1a5 100644 --- a/tensorflow/compiler/jit/xla_compilation_cache.h +++ b/tensorflow/compiler/jit/xla_compilation_cache.h @@ -52,13 +52,14 @@ class XlaCompilationCache : public ResourceBase { // Compiles a function into a XlaCompiler::CompilationResult that can be used // to execute an XLA Computation. Compilation results are cached. // `function` is the name of a Tensorflow function to compile. - // `constant_args` is a maps of tensorflow argument number to constant value. + // `constant_args` is a map of tensorflow argument number to its constant + // value. // `variable_args` is a snapshot of the current values of the // resource variable arguments to `function`; uninitialized variables are // represented by an absent OptionalTensor. // The result of compilation is written to `*compilation_result`, which must // be non-null. If `executable` is non-null, also builds an - // xla::LocalExecutable and sets `executable to point to it. The resulting + // xla::LocalExecutable and sets `executable` to point to it. The resulting // executable pointer may be null if the computation has no non-constant // outputs. Status Compile(const XlaCompiler::Options& options, @@ -96,6 +97,7 @@ class XlaCompilationCache : public ResourceBase { xla::LocalExecutable** executable, const XlaCompiler::CompileOptions* compile_options, bool compile_single_op); + // Takes `result` which has been compiled from a Tensorflow subgraph to a // XLA computation already, and generates an XLA LocalExecutable `executable`. Status BuildExecutable(const XlaCompiler::Options& options, diff --git a/tensorflow/compiler/jit/xla_compile_on_demand_op.cc b/tensorflow/compiler/jit/xla_compile_on_demand_op.cc index 682d6ea8ccc4a54912ccad4666cf0a7a03a7a698..6c2782e28e99df0801ab8d938fa3f961e4e1574c 100644 --- a/tensorflow/compiler/jit/xla_compile_on_demand_op.cc +++ b/tensorflow/compiler/jit/xla_compile_on_demand_op.cc @@ -67,6 +67,7 @@ Status XlaCompileOnDemandOp::Run(OpKernelContext* ctx, run_options.set_stream(stream); run_options.set_allocator(client->backend().memory_allocator()); run_options.set_intra_op_thread_pool(&ctx->eigen_cpu_device()); + run_options.set_rng_seed(ctx->step_id()); auto run_result = executable->Run(launch_context.arguments(), run_options); TF_RETURN_IF_ERROR(run_result.status()); diff --git a/tensorflow/compiler/jit/xla_cpu_device.cc b/tensorflow/compiler/jit/xla_cpu_device.cc index d2dfdeea68129b536477aa75f66c9d267f5a9434..bc07dbd7bdf005fde781f7a1e6775080e363abfb 100644 --- a/tensorflow/compiler/jit/xla_cpu_device.cc +++ b/tensorflow/compiler/jit/xla_cpu_device.cc @@ -62,8 +62,8 @@ REGISTER_LOCAL_DEVICE_FACTORY(DEVICE_XLA_CPU, XlaCpuDeviceFactory); // Kernel registrations -constexpr std::array kAllXlaCpuTypes = { - {DT_INT32, DT_INT64, DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_BOOL}}; +constexpr std::array kAllXlaCpuTypes = { + {DT_INT32, DT_INT64, DT_HALF, DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_BOOL}}; REGISTER_XLA_LAUNCH_KERNEL(DEVICE_XLA_CPU, XlaLocalLaunchOp, kAllXlaCpuTypes); REGISTER_XLA_DEVICE_KERNELS(DEVICE_XLA_CPU, kAllXlaCpuTypes); diff --git a/tensorflow/compiler/jit/xla_device_context.cc b/tensorflow/compiler/jit/xla_device_context.cc index 6a57831cde1212671c253ef944e3379770db4a8d..43eb164012610723214cf39360698010c9dbdbd4 100644 --- a/tensorflow/compiler/jit/xla_device_context.cc +++ b/tensorflow/compiler/jit/xla_device_context.cc @@ -19,6 +19,7 @@ limitations under the License. #include "tensorflow/compiler/tf2xla/literal_util.h" #include "tensorflow/compiler/tf2xla/shape_util.h" #include "tensorflow/compiler/xla/util.h" +#include "tensorflow/core/common_runtime/device.h" #include "tensorflow/core/common_runtime/dma_helper.h" #include "tensorflow/core/platform/mem.h" @@ -53,8 +54,33 @@ XlaTransferManager::XlaTransferManager(se::Stream* stream, bool transfer_as_literal) : stream_(stream), client_(client), + transfer_manager_(client->backend().transfer_manager()), transfer_as_literal_(transfer_as_literal) {} +Status XlaTransferManager::TransferLiteralToDevice( + const Tensor& host_tensor, Tensor* device_tensor) const { + xla::Literal literal; + TF_RETURN_IF_ERROR(HostTensorToLiteral(host_tensor, &literal)); + VLOG(1) << "Transfer to device as literal: " << literal.ToString(); + + const xla::ShapedBuffer& shaped_buffer = + XlaTensor::FromTensor(device_tensor)->shaped_buffer(); + return transfer_manager_->TransferLiteralToDevice(stream_->parent(), literal, + shaped_buffer); +} + +Status XlaTransferManager::TransferLiteralFromDevice( + Tensor* host_tensor, const Tensor& device_tensor) const { + const xla::ShapedBuffer& shaped_buffer = + XlaTensor::FromTensor(&device_tensor)->shaped_buffer(); + + TF_ASSIGN_OR_RETURN(std::unique_ptr literal, + transfer_manager_->TransferLiteralFromDevice( + stream_->parent(), shaped_buffer)); + VLOG(1) << "Transfer from device as literal: " << literal->ToString(); + return LiteralToHostTensor(*literal, host_tensor->dtype(), host_tensor); +} + void XlaTransferManager::CopyCPUTensorToDevice(const Tensor* cpu_tensor, Device* device, Tensor* device_tensor, @@ -86,9 +112,7 @@ void XlaTransferManager::CopyCPUTensorToDevice(const Tensor* cpu_tensor, XlaTensor::DeviceMemoryFromTensor(*device_tensor); Status status; if (transfer_as_literal_) { - status = xla::Unimplemented( - "XlaTransferManager::CopyCPUTensorToDevice not implemented for " - "literals"); + status = TransferLiteralToDevice(*cpu_tensor, device_tensor); } else { stream_->ThenMemcpy(&dev_dst_ptr, src_ptr, total_bytes); // TODO(hpucha): Make this asynchronous. @@ -129,9 +153,7 @@ void XlaTransferManager::CopyDeviceTensorToCPU(const Tensor* device_tensor, Status status; if (transfer_as_literal_) { - status = xla::Unimplemented( - "XlaTransferManager::CopyDeviceTensorToCPU not implemented for " - "literals"); + status = TransferLiteralFromDevice(cpu_tensor, *device_tensor); } else { stream_->ThenMemcpy(dst_ptr, dev_src_ptr, total_bytes); // TODO(hpucha): Make this asynchronous. diff --git a/tensorflow/compiler/jit/xla_device_context.h b/tensorflow/compiler/jit/xla_device_context.h index a8ad511fbd2d7f06601608101b8346ff30f8fc20..ad914a1c23b5f2ea7063722f85e027a99fdb68f9 100644 --- a/tensorflow/compiler/jit/xla_device_context.h +++ b/tensorflow/compiler/jit/xla_device_context.h @@ -57,11 +57,18 @@ class XlaTransferManager { perftools::gputools::Stream* stream() const { return stream_; } private: + Status TransferLiteralToDevice(const Tensor& host_tensor, + Tensor* device_tensor) const; + Status TransferLiteralFromDevice(Tensor* host_tensor, + const Tensor& device_tensor) const; + // Stream obtained from a Device, used to transfer tensors between // CPU and device. perftools::gputools::Stream* stream_; // For the underlying memory allocator and XLA's TransferManager. xla::LocalClient* client_; + // Transfer manager, for marshalling data to and from the device. + xla::TransferManager* transfer_manager_; // True if we must use XLA's TransferManager for correct device transfers. bool transfer_as_literal_; }; diff --git a/tensorflow/compiler/jit/xla_gpu_device.cc b/tensorflow/compiler/jit/xla_gpu_device.cc index 5a1db817745f56d6bcc26ff6fc441b7c902ee2b5..ac60423d959ca44e7d92e2d965cf731287b1f83f 100644 --- a/tensorflow/compiler/jit/xla_gpu_device.cc +++ b/tensorflow/compiler/jit/xla_gpu_device.cc @@ -62,8 +62,9 @@ REGISTER_LOCAL_DEVICE_FACTORY(DEVICE_XLA_GPU, XlaGpuDeviceFactory); // Kernel registrations -constexpr std::array kAllXlaGpuTypes = { - {DT_INT32, DT_INT64, DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_BOOL}}; +constexpr std::array kAllXlaGpuTypes = { + {DT_INT32, DT_INT64, DT_HALF, DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_BOOL, + DT_BFLOAT16}}; REGISTER_XLA_LAUNCH_KERNEL(DEVICE_XLA_GPU, XlaLocalLaunchOp, kAllXlaGpuTypes); REGISTER_XLA_DEVICE_KERNELS(DEVICE_XLA_GPU, kAllXlaGpuTypes); diff --git a/tensorflow/compiler/jit/xla_launch_util.cc b/tensorflow/compiler/jit/xla_launch_util.cc index 354be1e1b54b2f2e808b2216cfc1fe110dbb3857..3520501c1a39d243cc84583c47cbb0663bb201a8 100644 --- a/tensorflow/compiler/jit/xla_launch_util.cc +++ b/tensorflow/compiler/jit/xla_launch_util.cc @@ -16,12 +16,14 @@ limitations under the License. #include "tensorflow/compiler/jit/xla_launch_util.h" #include "tensorflow/compiler/jit/defs.h" +#include "tensorflow/compiler/tf2xla/shape_util.h" #include "tensorflow/compiler/tf2xla/xla_compiler.h" #include "tensorflow/compiler/xla/client/client_library.h" #include "tensorflow/compiler/xla/client/local_client.h" #include "tensorflow/compiler/xla/statusor.h" #include "tensorflow/core/common_runtime/dma_helper.h" #include "tensorflow/core/common_runtime/function.h" +#include "tensorflow/core/common_runtime/gpu_device_context.h" #include "tensorflow/core/framework/allocator.h" #include "tensorflow/core/framework/node_def_util.h" #include "tensorflow/core/framework/op.h" @@ -30,10 +32,13 @@ limitations under the License. #include "tensorflow/core/framework/types.h" #include "tensorflow/core/util/stream_executor_util.h" +namespace { namespace gpu = perftools::gputools; +using xla::ScopedShapedBuffer; +using xla::ShapedBuffer; +} // anonymous namespace namespace tensorflow { - std::map SnapshotResourceVariables(OpKernelContext* ctx, int num_variables) { std::map snapshot; @@ -78,17 +83,17 @@ namespace { // Return the 'index''th subtree of the given ShapedBuffer as a // ScopedShapedBuffer. The returned ScopedShapedBuffer takes ownership of the // subtree, and sets the input's buffer pointers to nullptr for the subtree. -std::unique_ptr ExtractSubShapedBuffer( - xla::ShapedBuffer* shaped_buffer, int index, +ScopedShapedBuffer ExtractSubShapedBuffer( + ShapedBuffer* shaped_buffer, int index, xla::DeviceMemoryAllocator* allocator) { xla::Shape on_host_shape = xla::ShapeUtil::GetTupleElementShape( shaped_buffer->on_host_shape(), index); xla::Shape on_device_shape = xla::ShapeUtil::GetTupleElementShape( shaped_buffer->on_device_shape(), index); - xla::ShapedBuffer sub_shaped_buffer(on_host_shape, on_device_shape, - shaped_buffer->platform(), - shaped_buffer->device_ordinal()); + ShapedBuffer sub_shaped_buffer(on_host_shape, on_device_shape, + shaped_buffer->platform(), + shaped_buffer->device_ordinal()); auto& shape_tree = shaped_buffer->buffers(); auto& sub_shape_tree = sub_shaped_buffer.buffers(); @@ -100,8 +105,7 @@ std::unique_ptr ExtractSubShapedBuffer( index_to_buffer.second = gpu::DeviceMemoryBase(nullptr, 0); } } - return xla::ScopedShapedBuffer::MakeScoped(&sub_shaped_buffer, allocator) - .ValueOrDie(); + return ScopedShapedBuffer(std::move(sub_shaped_buffer), allocator); } } // namespace @@ -116,10 +120,10 @@ XlaComputationLaunchContext::XlaComputationLaunchContext( void XlaComputationLaunchContext::PopulateInputs( OpKernelContext* ctx, const XlaCompiler::CompilationResult* kernel, const std::map& variables) { - // Build xla::ShapedBuffers that point directly to the Tensor buffers. + // Build ShapedBuffers that point directly to the Tensor buffers. arg_buffers_.reserve(kernel->xla_input_shapes.size() + 1); arg_buffers_.resize(kernel->xla_input_shapes.size()); - arg_ptrs_ = std::vector(arg_buffers_.size()); + arg_ptrs_ = std::vector(arg_buffers_.size()); // Pass remaining parameters. const Tensor* t; @@ -138,8 +142,7 @@ void XlaComputationLaunchContext::PopulateInputs( if (xla::ShapeUtil::IsTuple(on_device_shape)) { const XlaTensor* xla_tensor = XlaTensor::FromTensor(t); CHECK(xla_tensor && xla_tensor->has_shaped_buffer()); - arg_ptrs_[i] = - const_cast(&xla_tensor->shaped_buffer()); + arg_ptrs_[i] = const_cast(&xla_tensor->shaped_buffer()); } else { CHECK(xla::ShapeUtil::Equal(shape, on_device_shape)) << "On-device shape " @@ -147,7 +150,7 @@ void XlaComputationLaunchContext::PopulateInputs( << " not the same as on-host shape " << xla::ShapeUtil::HumanStringWithLayout(shape); gpu::DeviceMemoryBase dmem = XlaTensor::DeviceMemoryFromTensor(*t); - arg_buffers_[i] = xla::MakeUnique( + arg_buffers_[i] = xla::MakeUnique( /*on_host_shape=*/shape, /*on_device_shape=*/shape, client_->platform(), client_->default_device_ordinal()); arg_buffers_[i]->set_buffer(dmem, /*index=*/{}); @@ -158,13 +161,15 @@ void XlaComputationLaunchContext::PopulateInputs( void XlaComputationLaunchContext::PopulateOutputs( OpKernelContext* ctx, const XlaCompiler::CompilationResult* kernel, - std::unique_ptr output) { + ScopedShapedBuffer output) { gpu::Stream* stream = ctx->op_device_context() ? ctx->op_device_context()->stream() : nullptr; // Computation output should always be a tuple. if (VLOG_IS_ON(2)) { - VLOG(2) << "Result tuple shape: " << output->on_host_shape().DebugString(); + VLOG(2) << "Result tuple shape: " << output.on_host_shape().DebugString(); + VLOG(2) << "Result tuple shape (on device): " + << output.on_device_shape().DebugString(); } CHECK_EQ(ctx->num_outputs(), kernel->outputs.size()); @@ -179,6 +184,10 @@ void XlaComputationLaunchContext::PopulateOutputs( const size_t total_bytes = const_tensor.TotalBytes(); if (stream && total_bytes > 0) { // Copy host -> device. (Empty tensors don't have backing buffers.) + // Manually allocate memory using an XlaTensorBuffer so we can allocate + // as much memory as the device requires (as given by + // GetByteSizeRequirement). This avoids XlaTransferManager having to + // reallocate the device buffer later. VLOG(1) << "Constant output tensor on device"; OP_REQUIRES_OK( @@ -189,15 +198,23 @@ void XlaComputationLaunchContext::PopulateOutputs( client_, stream->parent()->device_ordinal())); } - const void* src_ptr = DMAHelper::base(&const_tensor); - gpu::DeviceMemoryBase dst_ptr = - XlaTensor::DeviceMemoryFromTensor(*output_tensor); - // Memcpying asynchronously is safe for the GPU, but the CPU uses a - // shared allocator so hold a reference to the copied-to buffer until - // complete. - TensorReference ref(*output_tensor); - stream->ThenMemcpy(&dst_ptr, src_ptr, total_bytes); - stream->ThenDoHostCallback([ref] { ref.Unref(); }); + Device* device = dynamic_cast(ctx->device()); + OP_REQUIRES(ctx, device != nullptr, + errors::Internal("DeviceBase was not a Device.")); + ctx->op_device_context()->CopyCPUTensorToDevice( + &const_tensor, device, output_tensor, + [&](Status status) { TF_CHECK_OK(status); }); + + if (device->device_type() == DEVICE_GPU) { + // The GPUDeviceContext enqueues the host->device transfer in a + // separate stream from the main compute stream. We must ensure the + // compute stream is synchronized with the host->device transfer + // stream now otherwise we will create a race condition. + auto* gpu_device_context = + static_cast(ctx->op_device_context()); + gpu_device_context->stream()->ThenWaitFor( + gpu_device_context->host_to_device_stream()); + } } else { // No copy required. ctx->set_output(i, const_tensor); @@ -210,18 +227,18 @@ void XlaComputationLaunchContext::PopulateOutputs( const TensorShape& shape = kernel->outputs[i].shape; VLOG(2) << "Retval " << i << " shape " << shape.DebugString(); - gpu::DeviceMemoryBase buffer = output->buffer({output_num}); + gpu::DeviceMemoryBase buffer = output.buffer({output_num}); if (allocate_xla_tensors_) { Tensor* output_tensor; OP_REQUIRES_OK(ctx, ctx->allocate_output(i, shape, &output_tensor)); XlaTensor* xla_tensor = XlaTensor::FromTensor(output_tensor); CHECK(xla_tensor); - xla_tensor->set_shaped_buffer( - ExtractSubShapedBuffer(output.get(), output_num, xla_allocator_)); + xla_tensor->set_shaped_buffer(ScopedShapedBuffer( + ExtractSubShapedBuffer(&output, output_num, xla_allocator_))); } else { Tensor output_tensor = XlaTensorBuffer::MakeTensor( ctx->expected_output_dtype(i), shape, buffer, allocator); - output->set_buffer(gpu::DeviceMemoryBase(nullptr, 0), {output_num}); + output.set_buffer(gpu::DeviceMemoryBase(nullptr, 0), {output_num}); ctx->set_output(i, output_tensor); } ++output_num; @@ -241,7 +258,7 @@ void XlaComputationLaunchContext::PopulateOutputs( write.input_index >= 0 && write.input_index < ctx->num_inputs(), errors::Internal("Invalid input index for variable write.")); - gpu::DeviceMemoryBase buffer = output->buffer({output_num}); + gpu::DeviceMemoryBase buffer = output.buffer({output_num}); Var* variable = nullptr; // TODO(b/35625933): tensorflow::Var should contain a PersistentTensor, @@ -266,12 +283,12 @@ void XlaComputationLaunchContext::PopulateOutputs( XlaTensor* xla_tensor = XlaTensor::FromTensor(&output_tensor); CHECK(xla_tensor); xla_tensor->set_shaped_buffer( - ExtractSubShapedBuffer(output.get(), output_num, xla_allocator_)); + ExtractSubShapedBuffer(&output, output_num, xla_allocator_)); *variable->tensor() = output_tensor; } else { Tensor output_tensor = XlaTensorBuffer::MakeTensor( write.type, write.shape, buffer, allocator); - output->set_buffer(gpu::DeviceMemoryBase(nullptr, 0), {output_num}); + output.set_buffer(gpu::DeviceMemoryBase(nullptr, 0), {output_num}); *variable->tensor() = output_tensor; } ++output_num; diff --git a/tensorflow/compiler/jit/xla_launch_util.h b/tensorflow/compiler/jit/xla_launch_util.h index 14f70fe35891040ff3460567adb223be0f1c910f..26dcaa8a51d90f6706106ab8a4d762ddfec6a88d 100644 --- a/tensorflow/compiler/jit/xla_launch_util.h +++ b/tensorflow/compiler/jit/xla_launch_util.h @@ -87,7 +87,7 @@ class XlaComputationLaunchContext { // Given the XLA output in `output`, populate all outputs of `ctx`. void PopulateOutputs(OpKernelContext* ctx, const XlaCompiler::CompilationResult* kernel, - std::unique_ptr output); + xla::ScopedShapedBuffer output); // Return the argument list. Only valid after PopulateInputs() has been // called. diff --git a/tensorflow/compiler/jit/xla_tensor.cc b/tensorflow/compiler/jit/xla_tensor.cc index 956328e6757f4c903e3995a54635682d19052794..84b2835c4066005c194f73becb841fbcc48399b3 100644 --- a/tensorflow/compiler/jit/xla_tensor.cc +++ b/tensorflow/compiler/jit/xla_tensor.cc @@ -65,10 +65,8 @@ Status XlaTensor::AllocateShapedBuffer(DataType dtype, const TensorShape& shape, device_ordinal, size, /*retry_on_failure=*/false)); } - TF_ASSIGN_OR_RETURN(auto scoped_buffer, - xla::ScopedShapedBuffer::MakeScoped( - &buffer, client->backend().memory_allocator())); - set_shaped_buffer(std::move(scoped_buffer)); + set_shaped_buffer(xla::ScopedShapedBuffer( + std::move(buffer), client->backend().memory_allocator())); return Status::OK(); } diff --git a/tensorflow/compiler/jit/xla_tensor.h b/tensorflow/compiler/jit/xla_tensor.h index 5ff2fb08f03548260215c6aeded2c124f8d28f43..2334fd272beb9e8185de10048421369179a0e475 100644 --- a/tensorflow/compiler/jit/xla_tensor.h +++ b/tensorflow/compiler/jit/xla_tensor.h @@ -64,9 +64,9 @@ class XlaTensor { return *shaped_buffer_; } // Mutates the TensorInfo to set the ShapedBuffer. - void set_shaped_buffer( - std::unique_ptr shaped_buffer) { - shaped_buffer_ = std::move(shaped_buffer); + void set_shaped_buffer(xla::ScopedShapedBuffer shaped_buffer) { + shaped_buffer_ = + xla::MakeUnique(std::move(shaped_buffer)); } // Some tensors on the device may have known values on the host. We use these diff --git a/tensorflow/compiler/tests/BUILD b/tensorflow/compiler/tests/BUILD index 204a2a2f906b763eaf07311b6c85fcd1eac4457f..46b86c53aa6870c9290e25da27f82a06302f6865 100644 --- a/tensorflow/compiler/tests/BUILD +++ b/tensorflow/compiler/tests/BUILD @@ -124,6 +124,7 @@ tf_xla_py_test( name = "categorical_op_test", size = "small", srcs = ["categorical_op_test.py"], + tags = ["optonly"], deps = [ ":xla_test", "//tensorflow/python:framework_for_generated_wrappers", @@ -191,6 +192,31 @@ tf_xla_py_test( ], ) +tf_xla_py_test( + name = "oom_test", + size = "medium", + srcs = ["oom_test.py"], + disabled_backends = [ + "cpu", + "cpu_ondemand", + ], + tags = [ + # Allocates very large amounts of memory and does not work under TSAN. + "notsan", + "optonly", # Times out frequently in fastbuild. + ], + deps = [ + ":xla_test", + "//tensorflow/python:array_ops", + "//tensorflow/python:array_ops_gen", + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:gradient_checker", + "//tensorflow/python:gradients", + "//tensorflow/python:math_ops", + "//tensorflow/python:platform_test", + ], +) + tf_xla_py_test( name = "conv2d_test", size = "medium", @@ -245,6 +271,18 @@ tf_xla_py_test( ], ) +tf_xla_py_test( + name = "dynamic_slice_ops_test", + size = "small", + srcs = ["dynamic_slice_ops_test.py"], + deps = [ + "//tensorflow/compiler/tests:xla_test", + "//tensorflow/compiler/tf2xla/python:xla", + "//tensorflow/python:array_ops", + "//tensorflow/python:dtypes", + ], +) + tf_xla_py_test( name = "dynamic_stitch_test", size = "small", @@ -302,7 +340,7 @@ tf_xla_py_test( tf_xla_py_test( name = "ftrl_test", - size = "small", + size = "medium", srcs = ["ftrl_test.py"], deps = [ ":xla_test", @@ -375,7 +413,6 @@ tf_xla_py_test( name = "momentum_test", size = "small", srcs = ["momentum_test.py"], - tags = ["no_oss"], deps = [ ":xla_test", "//tensorflow/python:array_ops", @@ -472,6 +509,22 @@ tf_xla_py_test( ], ) +tf_xla_py_test( + name = "reduce_window_test", + size = "small", + srcs = ["reduce_window_test.py"], + disabled_backends = ["cpu_ondemand"], + deps = [ + ":xla_test", + "//tensorflow/compiler/tf2xla/python:xla", + "//tensorflow/python:array_ops", + "//tensorflow/python:errors", + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:math_ops", + "//tensorflow/python:platform_test", + ], +) + tf_xla_py_test( name = "reverse_ops_test", size = "medium", @@ -664,6 +717,21 @@ tf_xla_py_test( ], ) +tf_xla_py_test( + name = "while_test", + size = "small", + srcs = ["while_test.py"], + disabled_backends = ["cpu_ondemand"], + deps = [ + ":xla_test", + "//tensorflow/compiler/tf2xla/python:xla", + "//tensorflow/python:array_ops", + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:platform_test", + "//tensorflow/python:training", + ], +) + tf_xla_py_test( name = "gather_test", size = "medium", diff --git a/tensorflow/compiler/tests/binary_ops_test.py b/tensorflow/compiler/tests/binary_ops_test.py index ba7b9bacd2b794c74409d517a9c05bfbb14a845f..1e4dd32916c3a40282735fb8f75670b0e9ef0dc9 100644 --- a/tensorflow/compiler/tests/binary_ops_test.py +++ b/tensorflow/compiler/tests/binary_ops_test.py @@ -190,19 +190,24 @@ class BinaryOpsTest(XLATestCase): ], equality_test=self.ListsAreClose) - self._testBinary( - gen_nn_ops.sparse_softmax_cross_entropy_with_logits, - np.array([[0.1, 0.2, 0.3, 0.4], [0.5, 0.6, 0.7, 0.8], - [0.9, 1.0, 1.1, 1.2]], dtype=dtype), - np.array([2, 1, 7], dtype=np.int32), - expected=[ - np.array([1.342536, 1.442536, np.nan], dtype=dtype), - np.array([[0.213838, 0.236328, -0.738817, 0.288651], - [0.213838, -0.763672, 0.261183, 0.288651], - [np.nan, np.nan, np.nan, np.nan]], - dtype=dtype), - ], - equality_test=self.ListsAreClose) + # TODO(b/68813416): Fails with bfloat16. + if dtype != dtypes.bfloat16.as_numpy_dtype: + self._testBinary( + gen_nn_ops.sparse_softmax_cross_entropy_with_logits, + np.array( + [[0.1, 0.2, 0.3, 0.4], [0.5, 0.6, 0.7, 0.8], + [0.9, 1.0, 1.1, 1.2]], + dtype=dtype), + np.array([2, 1, 7], dtype=np.int32), + expected=[ + np.array([1.342536, 1.442536, np.nan], dtype=dtype), + np.array( + [[0.213838, 0.236328, -0.738817, 0.288651], [ + 0.213838, -0.763672, 0.261183, 0.288651 + ], [np.nan, np.nan, np.nan, np.nan]], + dtype=dtype), + ], + equality_test=self.ListsAreClose) def testIntOps(self): for dtype in self.int_types: @@ -260,12 +265,6 @@ class BinaryOpsTest(XLATestCase): np.array([[1], [2]], dtype=dtype), dtype(7), expected=np.array([[8], [9]], dtype=dtype)) - self._testBinary( - math_ops.add, - np.array([0xffffffff, 0xfffffffff, 1, 1], dtype=np.int64), - np.array([1, 1, 0xffffffff, 0xfffffffff], dtype=np.int64), - expected=np.array( - [1 << 32, 1 << 36, 1 << 32, 1 << 36], dtype=np.int64)) self._testBinary( math_ops.subtract, @@ -361,6 +360,14 @@ class BinaryOpsTest(XLATestCase): np.array([2, -1], dtype=dtype), expected=np.array([[[[3, 1], [5, 3]]]], dtype=dtype)) + if np.int64 in self.numeric_types: + self._testBinary( + math_ops.add, + np.array([0xffffffff, 0xfffffffff, 1, 1], dtype=np.int64), + np.array([1, 1, 0xffffffff, 0xfffffffff], dtype=np.int64), + expected=np.array([1 << 32, 1 << 36, 1 << 32, 1 << 36], + dtype=np.int64)) + def testComplexOps(self): for dtype in self.complex_types: ctypes = {np.complex64: np.float32} diff --git a/tensorflow/compiler/tests/build_defs.bzl b/tensorflow/compiler/tests/build_defs.bzl index 0528a5415d579a844e68403ace1bb8982a10a841..7b114d4f85d3a5cadc6af25b55c5a21f90d2a768 100644 --- a/tensorflow/compiler/tests/build_defs.bzl +++ b/tensorflow/compiler/tests/build_defs.bzl @@ -51,12 +51,12 @@ def tf_xla_py_test(name, srcs=[], deps=[], tags=[], data=[], main=None, if backend == "cpu": backend_args += [ "--test_device=XLA_CPU", - "--types=DT_FLOAT,DT_DOUBLE,DT_INT32,DT_INT64,DT_BOOL,DT_COMPLEX64" + "--types=DT_HALF,DT_FLOAT,DT_DOUBLE,DT_INT32,DT_INT64,DT_BOOL,DT_COMPLEX64" ] elif backend == "gpu": backend_args += [ "--test_device=XLA_GPU", - "--types=DT_FLOAT,DT_DOUBLE,DT_INT32,DT_INT64,DT_BOOL,DT_COMPLEX64" + "--types=DT_HALF,DT_FLOAT,DT_DOUBLE,DT_INT32,DT_INT64,DT_BOOL,DT_COMPLEX64,DT_BFLOAT16" ] backend_tags += ["requires-gpu-sm35"] elif backend in plugins: @@ -89,4 +89,3 @@ def generate_backend_suites(backends=[]): backends = all_backends() for backend in backends: native.test_suite(name="%s_tests" % backend, tags=["tf_xla_%s" % backend]) - diff --git a/tensorflow/compiler/tests/cholesky_op_test.py b/tensorflow/compiler/tests/cholesky_op_test.py index 5010fe5e21d0782e68d4e6d5bf6b4df1b44793a3..1a8989d7c2f617525c301f30fd899a01362310bf 100644 --- a/tensorflow/compiler/tests/cholesky_op_test.py +++ b/tensorflow/compiler/tests/cholesky_op_test.py @@ -34,6 +34,13 @@ from tensorflow.python.platform import test class CholeskyOpTest(XLATestCase): + # Cholesky defined for float64, float32, complex64, complex128 + # (https://www.tensorflow.org/api_docs/python/tf/cholesky) + @property + def float_types(self): + return set(super(CholeskyOpTest, self).float_types).intersection( + (np.float64, np.float32, np.complex64, np.complex128)) + def _verifyCholeskyBase(self, sess, placeholder, x, chol, verification, atol): chol_np, verification_np = sess.run([chol, verification], {placeholder: x}) self.assertAllClose(x, verification_np, atol=atol) diff --git a/tensorflow/compiler/tests/dynamic_slice_ops_test.py b/tensorflow/compiler/tests/dynamic_slice_ops_test.py new file mode 100644 index 0000000000000000000000000000000000000000..6a46d2ec3e7aee3a4ecfbf1ab9f622d8eb659e3c --- /dev/null +++ b/tensorflow/compiler/tests/dynamic_slice_ops_test.py @@ -0,0 +1,93 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for XLA dynamic slicing ops.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.compiler.tests.xla_test import XLATestCase +from tensorflow.compiler.tf2xla.python import xla +from tensorflow.python.framework import dtypes +from tensorflow.python.ops import array_ops +from tensorflow.python.platform import test + + +class DynamicUpdateSliceOpsTest(XLATestCase): + + def _assertOpOutputMatchesExpected(self, op, args, expected): + with self.test_session() as session: + with self.test_scope(): + placeholders = [ + array_ops.placeholder(dtypes.as_dtype(arg.dtype), arg.shape) + for arg in args + ] + feeds = {placeholders[i]: args[i] for i in range(0, len(args))} + output = op(*placeholders) + result = session.run(output, feeds) + self.assertAllClose(result, expected, rtol=1e-3) + + def testUpdateSlice(self): + for dtype in self.numeric_types: + self._assertOpOutputMatchesExpected( + xla.dynamic_update_slice, [ + np.array([], dtype=dtype), + np.array([], dtype=dtype), + np.array([0], dtype=np.int32) + ], + expected=np.array([], dtype=dtype)) + + self._assertOpOutputMatchesExpected( + xla.dynamic_update_slice, [ + np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], dtype=dtype), + np.array([-1, -2, -3], dtype=dtype), + np.array([6], dtype=np.int32) + ], + expected=np.array([1, 2, 3, 4, 5, 6, -1, -2, -3, 10], dtype=dtype)) + + self._assertOpOutputMatchesExpected( + xla.dynamic_update_slice, [ + np.array( + [[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]], dtype=dtype), + np.array([[42, 43], [44, 45]], dtype=dtype), + np.array([1, 2], dtype=np.int32) + ], + expected=np.array( + [[1, 2, 3, 4], [5, 6, 42, 43], [9, 10, 44, 45]], dtype=dtype)) + + self._assertOpOutputMatchesExpected( + xla.dynamic_update_slice, [ + np.array( + [[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]], dtype=dtype), + np.array([[], []], dtype=dtype), + np.array([1, 2], dtype=np.int32) + ], + expected=np.array( + [[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]], dtype=dtype)) + + self._assertOpOutputMatchesExpected( + xla.dynamic_update_slice, [ + np.array( + [[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]], dtype=dtype), + np.ones([3, 4], dtype=dtype), + np.array([0, 0], dtype=np.int32) + ], + expected=np.ones([3, 4], dtype=dtype)) + + +if __name__ == '__main__': + test.main() diff --git a/tensorflow/compiler/tests/ftrl_test.py b/tensorflow/compiler/tests/ftrl_test.py index f9db4cf2017c0b4b6dc0cfeeda6dca7bb9d14f19..8e6407dffdac3adbcda8cbca2109ef9196defa8c 100644 --- a/tensorflow/compiler/tests/ftrl_test.py +++ b/tensorflow/compiler/tests/ftrl_test.py @@ -134,9 +134,15 @@ class FtrlOptimizerTest(XLATestCase): # Validate updated params self.assertAllCloseAccordingToType( - np.array([-2.60260963, -4.29698515]), var0.eval(), float_rtol=1e-5) + np.array([-2.60260963, -4.29698515]), + var0.eval(), + float_rtol=1e-5, + half_rtol=1e-2) self.assertAllCloseAccordingToType( - np.array([-0.28432083, -0.56694895]), var1.eval(), float_rtol=1e-5) + np.array([-0.28432083, -0.56694895]), + var1.eval(), + float_rtol=1e-5, + half_rtol=1e-2) def testFtrlwithoutRegularization2(self): for dtype in self.float_types: @@ -272,8 +278,8 @@ class FtrlOptimizerTest(XLATestCase): with self.test_session(), self.test_scope(): val2, val3 = self.equivAdagradTest_AdagradPart(steps, dtype) - self.assertAllCloseAccordingToType(val0, val2, rtol=1e-4) - self.assertAllCloseAccordingToType(val1, val3, rtol=1e-4) + self.assertAllCloseAccordingToType(val0, val2, rtol=1e-4, half_rtol=1e-2) + self.assertAllCloseAccordingToType(val1, val3, rtol=1e-4, half_rtol=1e-2) def testEquivGradientDescentwithoutRegularization(self): steps = 5 diff --git a/tensorflow/compiler/tests/function_test.py b/tensorflow/compiler/tests/function_test.py index 11d8a99ffe1a136a54b16e20f1792062203f7969..fbc3c994d163a504351fcccd1ba71a0997e6516f 100644 --- a/tensorflow/compiler/tests/function_test.py +++ b/tensorflow/compiler/tests/function_test.py @@ -105,6 +105,28 @@ class FunctionTest(XLATestCase): result = sess.run(call_f) self.assertAllClose(result, expected, rtol=1e-3) + def testCompileTimeConstantsInDefun(self): + """Tests that XLA handles compile-time constants in defuns.""" + with self.test_session() as sess: + + @function.Defun(dtypes.float32, dtypes.int32, dtypes.int32) + def Foo(a, c, d): + # c and d must be known at compile time + x = array_ops.slice(a, c, d) + return x + + a = array_ops.placeholder(dtypes.float32) + c = array_ops.placeholder(dtypes.int32, shape=[4]) + d = array_ops.placeholder(dtypes.int32, shape=[4]) + with self.test_scope(): + call_f = Foo(a, c, d) + result = sess.run(call_f, feed_dict={ + a: np.ones([1, 4, 4, 1]), + c: [0, 0, 0, 0], + d: [1, 2, 2, 1]}) + + self.assertAllEqual(np.ones([1, 2, 2, 1]), result) + # TODO(b/36139787): Re-enable this test when noinline works again. def DISABLED_testFunctionsNoInline(self): diff --git a/tensorflow/compiler/tests/image_ops_test.py b/tensorflow/compiler/tests/image_ops_test.py index 3bc41b7cfd72bec7572097f8c53eef314a4369f6..42e637734c578fcc70473060cb156e172a0a1995 100644 --- a/tensorflow/compiler/tests/image_ops_test.py +++ b/tensorflow/compiler/tests/image_ops_test.py @@ -34,6 +34,13 @@ from tensorflow.python.ops import image_ops from tensorflow.python.platform import test +def GenerateNumpyRandomRGB(shape): + # Only generate floating points that are fractions like n / 256, since they + # are RGB pixels. Some low-precision floating point types in this test can't + # handle arbitrary precision floating points well. + return np.random.randint(0, 256, shape) / 256. + + class RGBToHSVTest(XLATestCase): def testBatch(self): @@ -43,7 +50,7 @@ class RGBToHSVTest(XLATestCase): shape = (batch_size, 2, 7, 3) for nptype in self.float_types: - inp = np.random.rand(*shape).astype(nptype) + inp = GenerateNumpyRandomRGB(shape).astype(nptype) # Convert to HSV and back, as a batch and individually with self.test_session() as sess: @@ -65,7 +72,8 @@ class RGBToHSVTest(XLATestCase): # Verify that processing batch elements together is the same as separate self.assertAllClose(batch1, join1) self.assertAllClose(batch2, join2) - self.assertAllCloseAccordingToType(batch2, inp, bfloat16_atol=0.03) + self.assertAllCloseAccordingToType( + batch2, inp, bfloat16_atol=0.03, half_rtol=0.02) def testRGBToHSVRoundTrip(self): data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1] @@ -82,7 +90,7 @@ class RGBToHSVTest(XLATestCase): def testRGBToHSVNumpy(self): """Tests the RGB to HSV conversion matches a reference implementation.""" for nptype in self.float_types: - rgb_flat = np.random.random(64 * 3).reshape((64, 3)).astype(nptype) + rgb_flat = GenerateNumpyRandomRGB((64, 3)).astype(nptype) rgb_np = rgb_flat.reshape(4, 4, 4, 3) hsv_np = np.array([ colorsys.rgb_to_hsv( diff --git a/tensorflow/compiler/tests/jit_test.py b/tensorflow/compiler/tests/jit_test.py index f9d87c2d1cfe5c1a7487e124c971a54ffcfede15..1f7da659e5590b86c96964bbd14a4175341783c8 100644 --- a/tensorflow/compiler/tests/jit_test.py +++ b/tensorflow/compiler/tests/jit_test.py @@ -23,6 +23,7 @@ import numpy as np from tensorflow.contrib.compiler import jit from tensorflow.core.protobuf import config_pb2 +from tensorflow.core.protobuf import rewriter_config_pb2 from tensorflow.python.client import session as session_lib from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes @@ -38,6 +39,18 @@ from tensorflow.python.platform import test jit_scope = jit.experimental_jit_scope +# Disable rewrites to make sure we don't end up having to update this test +# whenever we implement new ones. +def NoRewriteSessionConfig(): + rewriter_config = rewriter_config_pb2.RewriterConfig( + disable_model_pruning=True, + arithmetic_optimization=rewriter_config_pb2.RewriterConfig.OFF, + dependency_optimization=rewriter_config_pb2.RewriterConfig.OFF, + function_optimization=rewriter_config_pb2.RewriterConfig.OFF) + graph_options = config_pb2.GraphOptions(rewrite_options=rewriter_config) + return config_pb2.ConfigProto(graph_options=graph_options) + + def CompiledKernel(fn, *inputs, **kwargs): """Execute 'fn' as a compiled XLA kernel, with 'inputs'.""" name = kwargs.pop("name", None) @@ -81,7 +94,7 @@ class JitLaunchTest(test.TestCase): # actually ran. However, it is sometimes possible for _XlaLaunch ops to be # constant-folded away, so the check is optional. def _compare(self, fn, args, require_kernel_launch=True, noinline=None): - with session_lib.Session() as sess: + with session_lib.Session(config=NoRewriteSessionConfig()) as sess: placeholders = [] feeds = {} for arg in args: @@ -258,7 +271,7 @@ class XlaCompilationTest(test.TestCase): def testReshape(self): """Tests an operator with compile-time constant and non-constant inputs.""" - with self.test_session() as sess: + with self.test_session(config=NoRewriteSessionConfig()) as sess: x = array_ops.placeholder(dtypes.float32) y = array_ops.placeholder(dtypes.int32) with jit_scope(): @@ -282,7 +295,7 @@ class XlaCompilationTest(test.TestCase): def testIgnoredArguments(self): """Tests that JIT computations can ignore formal parameters.""" - with self.test_session() as sess: + with self.test_session(config=NoRewriteSessionConfig()) as sess: x = array_ops.placeholder(dtypes.int32) y = array_ops.placeholder(dtypes.int32) with jit_scope(): @@ -306,7 +319,7 @@ class XlaCompilationTest(test.TestCase): def testLoops(self): """Tests that compilation accepts computations containing loops.""" - with self.test_session() as session: + with self.test_session(config=NoRewriteSessionConfig()) as session: x = array_ops.placeholder(dtypes.float32) with jit_scope(): c = lambda i, _: math_ops.less(i, 5) @@ -324,7 +337,7 @@ class XlaCompilationTest(test.TestCase): def testCond(self): """Tests that compilation handles switch operators.""" - with self.test_session() as session: + with self.test_session(config=NoRewriteSessionConfig()) as session: x = array_ops.placeholder(dtypes.float32) y = array_ops.placeholder(dtypes.float32) c = array_ops.placeholder(dtypes.bool) @@ -365,7 +378,8 @@ class XlaCompilationTest(test.TestCase): inp = array_ops.placeholder(dtypes.float32) out = Entry(inp) - with self.test_session(graph=g, use_gpu=True) as sess: + with self.test_session( + config=NoRewriteSessionConfig(), graph=g, use_gpu=True) as sess: run_metadata = config_pb2.RunMetadata() val = sess.run(out, feed_dict={inp: [2., 10.]}, @@ -377,7 +391,7 @@ class XlaCompilationTest(test.TestCase): def testLoopDeadlock(self): """Regression test for bug that caused deadlocks in graphs with loops.""" - with self.test_session() as session: + with self.test_session(config=NoRewriteSessionConfig()) as session: x = array_ops.placeholder(dtypes.float32) with jit_scope(): y = x + 1.0 @@ -404,10 +418,10 @@ class XlaCompilationTest(test.TestCase): y = Forward(x) dx, = gradients_impl.gradients(y, [x], 1.0) - cfg = config_pb2.ConfigProto(graph_options=config_pb2.GraphOptions( - optimizer_options=config_pb2.OptimizerOptions( - opt_level=config_pb2.OptimizerOptions.L1, - do_function_inlining=True))) + cfg = NoRewriteSessionConfig() + cfg.graph_options.optimizer_options.opt_level = ( + config_pb2.OptimizerOptions.L1) + cfg.graph_options.optimizer_options.do_function_inlining = True with session_lib.Session(graph=g, config=cfg) as sess: run_metadata = config_pb2.RunMetadata() dx_val = sess.run(dx, diff --git a/tensorflow/compiler/tests/matrix_triangular_solve_op_test.py b/tensorflow/compiler/tests/matrix_triangular_solve_op_test.py index cccb7f5789dce39ef8c3d4b3a7573aaa983b3fbd..5819b2bf2b55b9213a039c0ba82dd0bf1c738b00 100644 --- a/tensorflow/compiler/tests/matrix_triangular_solve_op_test.py +++ b/tensorflow/compiler/tests/matrix_triangular_solve_op_test.py @@ -37,6 +37,14 @@ def MakePlaceholder(x): class MatrixTriangularSolveOpTest(XLATestCase): + # MatrixTriangularSolve defined for float64, float32, complex64, complex128 + # (https://www.tensorflow.org/api_docs/python/tf/matrix_triangular_solve) + @property + def float_types(self): + return set(super(MatrixTriangularSolveOpTest, + self).float_types).intersection( + (np.float64, np.float32, np.complex64, np.complex128)) + def _VerifyTriangularSolveBase(self, sess, placeholder_a, placeholder_ca, placeholder_b, a, clean_a, b, verification, atol): diff --git a/tensorflow/compiler/tests/oom_test.py b/tensorflow/compiler/tests/oom_test.py new file mode 100644 index 0000000000000000000000000000000000000000..1434e965e3d7eaeca94ad0fa97498f884e30e115 --- /dev/null +++ b/tensorflow/compiler/tests/oom_test.py @@ -0,0 +1,61 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Functional tests for out-of-memory conditions.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.compiler.tests import xla_test +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import errors +from tensorflow.python.ops import array_ops +from tensorflow.python.platform import googletest + + +class OutOfMemoryTest(xla_test.XLATestCase): + + def testOutputOutOfMemory(self): + """Allocates tensors until out of memory. + + Generates a large rank-1 tensor. The tensor is an output of an XLA + computation, not constant. + + Check that a ResourceExhaustedError is raised and can be caught. + + We spin in a loop generating larger and larger tensors until an OOM event + happens. We may be running sandboxed, so have a small host memory limit, so + any hardcoded value is unlikely to land in the sweet spot between device + memory size and host memory size with stability. + """ + + def test_loop(): + size = 2e8 + while True: + with self.test_session(): + # Force the compiled code to not be constant by feeding in an addend. + p = array_ops.placeholder(dtypes.float32, shape=[]) + with self.test_scope(): + # Create a large R1 tensor. + c = array_ops.zeros([size, 1]) + p + + c.eval(feed_dict={p: 1.0}) + size *= 2 + + self.assertRaises(errors.ResourceExhaustedError, test_loop) + + +if __name__ == "__main__": + googletest.main() diff --git a/tensorflow/compiler/tests/reduce_window_test.py b/tensorflow/compiler/tests/reduce_window_test.py new file mode 100644 index 0000000000000000000000000000000000000000..e78a63465b80644d8810d9fa7433653bc4639fed --- /dev/null +++ b/tensorflow/compiler/tests/reduce_window_test.py @@ -0,0 +1,102 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for xla.reduce_window.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.compiler.tests.xla_test import XLATestCase +from tensorflow.compiler.tf2xla.python import xla +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import function +from tensorflow.python.ops import array_ops +from tensorflow.python.platform import googletest + + +class ReduceWindowTest(XLATestCase): + """Test cases for xla.reduce_window.""" + + def _reduce_window(self, operand, init, reducer, **kwargs): + with self.test_session(): + placeholder = array_ops.placeholder(operand.dtype) + with self.test_scope(): + output = xla.reduce_window(placeholder, init, reducer, **kwargs) + return output.eval(feed_dict={placeholder: operand}) + + def testReduceWindow(self): + + # TODO(b/77644762): float16 and float64 ReduceWindow are unimplemented. + for dtype in set(self.numeric_types).intersection( + set([dtypes.bfloat16.as_numpy_dtype, np.float32])): + + @function.Defun(dtype, dtype) + def sum_reducer(x, y): + return x + y + + @function.Defun(dtype, dtype) + def mul_reducer(x, y): + return x * y + + self.assertAllClose( + np.array([3, 5, 7, 9, 11, 13], dtype=dtype), + self._reduce_window( + np.array([1, 2, 3, 4, 5, 6, 7], dtype=dtype), + 0.0, + sum_reducer, + window_dimensions=[2])) + + self.assertAllClose( + np.array([3, 7, 11], dtype=dtype), + self._reduce_window( + np.array([1, 2, 3, 4, 5, 6, 7], dtype=dtype), + 0.0, + sum_reducer, + window_dimensions=[2], + window_strides=[2])) + + self.assertAllClose( + np.array([1, 4, 7], dtype=dtype), + self._reduce_window( + np.array([1, 2, 3, 4, 5, 6, 7], dtype=dtype), + 0.0, + sum_reducer, + window_dimensions=[1], + window_strides=[3])) + + self.assertAllClose( + np.array([[24, 36, 24], [96, 0, 0]], dtype=dtype), + self._reduce_window( + np.array([[1, 2, 3, 4], [4, 3, 2, 1], [2, 4, 0, 1]], dtype=dtype), + 1.0, + mul_reducer, + window_dimensions=[2, 2], + window_strides=[1, 1])) + + self.assertAllClose( + np.array([[0, 0, 0], [5, 10, 5], [2, 4, 1], [0, 0, 0]], dtype=dtype), + self._reduce_window( + np.array([[1, 2, 3, 4], [4, 3, 2, 1], [2, 4, 0, 1]], dtype=dtype), + 0.0, + sum_reducer, + window_dimensions=[2, 2], + window_strides=[2, 2], + padding=[[2, 3], [1, 2]])) + + +if __name__ == '__main__': + googletest.main() diff --git a/tensorflow/compiler/tests/spacetobatch_op_test.py b/tensorflow/compiler/tests/spacetobatch_op_test.py index 92518aadc4bf5c601cfb4192c093799784b6aa72..f37c34156f96761632247be4bc1b62fca54f666e 100644 --- a/tensorflow/compiler/tests/spacetobatch_op_test.py +++ b/tensorflow/compiler/tests/spacetobatch_op_test.py @@ -21,6 +21,7 @@ from __future__ import print_function import numpy as np from tensorflow.compiler.tests.xla_test import XLATestCase +from tensorflow.python.framework import dtypes from tensorflow.python.ops import array_ops from tensorflow.python.ops import gen_array_ops from tensorflow.python.platform import test @@ -156,14 +157,32 @@ class SpaceToBatchNDTest(XLATestCase): paddings = np.array(paddings).reshape((len(block_shape), 2)) with self.test_session() as sess, self.test_scope(): for dtype in self.float_types: + # TODO(b/68813416): Skip bfloat16's as the input type for direct is + # float32 and results in a mismatch, while making testDirect provide the + # correctly typed input results in 'no fill-function for data-type' + # error. + if dtype == dtypes.bfloat16.as_numpy_dtype: + continue + if dtype == np.float16: + actual_inputs = np.array(inputs).astype(dtype) + actual_paddings = np.array(paddings).astype(dtype) + expected_outputs = np.array(outputs).astype(dtype) + else: + actual_inputs = inputs + actual_paddings = paddings + expected_outputs = outputs placeholder = array_ops.placeholder(dtype) # outputs = space_to_batch(inputs) - x_tf = array_ops.space_to_batch_nd(placeholder, block_shape, paddings) - self.assertAllEqual(sess.run(x_tf, {placeholder: inputs}), outputs) + x_tf = array_ops.space_to_batch_nd(placeholder, block_shape, + actual_paddings) + self.assertAllEqual( + sess.run(x_tf, {placeholder: actual_inputs}), expected_outputs) # inputs = batch_to_space(outputs) placeholder = array_ops.placeholder(dtype) - x_tf = array_ops.batch_to_space_nd(placeholder, block_shape, paddings) - self.assertAllEqual(sess.run(x_tf, {placeholder: outputs}), inputs) + x_tf = array_ops.batch_to_space_nd(placeholder, block_shape, + actual_paddings) + self.assertAllEqual( + sess.run(x_tf, {placeholder: expected_outputs}), actual_inputs) def _testDirect(self, input_shape, block_shape, paddings): inputs = np.arange(np.prod(input_shape), dtype=np.float32) diff --git a/tensorflow/compiler/tests/ternary_ops_test.py b/tensorflow/compiler/tests/ternary_ops_test.py index ba5f829936fd82ca0cc53eda34aefbca6d80482b..75a2cf07c5adc262cdbd9c10d317a5d00f370e3d 100644 --- a/tensorflow/compiler/tests/ternary_ops_test.py +++ b/tensorflow/compiler/tests/ternary_ops_test.py @@ -23,6 +23,7 @@ import numpy as np from tensorflow.compiler.tests.xla_test import XLATestCase from tensorflow.python.framework import dtypes from tensorflow.python.ops import array_ops +from tensorflow.python.ops import gen_math_ops from tensorflow.python.ops import math_ops from tensorflow.python.platform import googletest @@ -119,6 +120,23 @@ class TernaryOpsTest(XLATestCase): np.array([2, 1], dtype=np.int32), expected=np.array([[2], [5]], dtype=dtype)) + def testClipByValue(self): + # TODO(b/78258593): enable integer types here too. + for dtype in self.float_types: + test_cases = [ + (np.array([2, 4, 5], dtype=dtype), dtype(7)), # + (dtype(1), np.array([2, 4, 5], dtype=dtype)), # + (np.array([-2, 7, 7], dtype=dtype), np.array([-2, 9, 8], dtype=dtype)) + ] + x = np.array([-2, 10, 6], dtype=dtype) + for lower, upper in test_cases: + self._testTernary( + gen_math_ops._clip_by_value, + x, + lower, + upper, + expected=np.minimum(np.maximum(x, lower), upper)) + if __name__ == "__main__": googletest.main() diff --git a/tensorflow/compiler/tests/unary_ops_test.py b/tensorflow/compiler/tests/unary_ops_test.py index a8ab23537806cfd566f0fd4f47ecff50e8af2a8d..ba79f393a8f9b24ac506d2130957c38ecd442509 100644 --- a/tensorflow/compiler/tests/unary_ops_test.py +++ b/tensorflow/compiler/tests/unary_ops_test.py @@ -154,6 +154,9 @@ class UnaryOpsTest(XLATestCase): def testFloatOps(self): for dtype in self.float_types: + # TODO(b/77694432): Half test failed on CPU, last ran on 04-06-2018. + if dtype == np.float16 and self.device == "XLA_CPU": + continue x = np.arange(-0.90, 0.90, 0.25) self._assertOpOutputMatchesExpected( math_ops.acos, @@ -793,7 +796,10 @@ class UnaryOpsTest(XLATestCase): self._assertSoftplusMatchesExpected([[-2, 0, 8]], dtype) self._assertSoftplusMatchesExpected( [[-9, 7, -5, 3, -1], [1, -3, 5, -7, 9]], dtype) - log_eps = np.log(np.finfo(dtype).eps) + if dtype == dtypes.bfloat16.as_numpy_dtype: + log_eps = np.log(np.finfo(np.float32).eps) + else: + log_eps = np.log(np.finfo(dtype).eps) one = dtype(1) ten = dtype(10) self._assertSoftplusMatchesExpected([ diff --git a/tensorflow/compiler/tests/variable_ops_test.py b/tensorflow/compiler/tests/variable_ops_test.py index b08d6ab21e0746558cb3d4818d4c822c45d2e9ee..8ecad00f6e23b3a7746bbb473102ac847bf4cbfd 100644 --- a/tensorflow/compiler/tests/variable_ops_test.py +++ b/tensorflow/compiler/tests/variable_ops_test.py @@ -230,7 +230,10 @@ class SliceAssignTest(XLATestCase): # shrink shape changes checker[1:2, 1] = [66] checker[1, 1:2] = [66] - checker[1, 1] = 66 + if dtype != dtypes.bfloat16.as_numpy_dtype: + # TODO(b/68813416): valnp call above results in an ndarray and not a + # number for bfloat16s. + checker[1, 1] = 66 # newaxis shape changes checker[:, None, :] = [[[10, 20, 30]], [[40, 50, 50]]] # shrink and newaxis @@ -243,8 +246,11 @@ class SliceAssignTest(XLATestCase): # Assign vector to scalar (rank-0) using newaxis checker2 = StridedSliceAssignChecker(self, 222, dtype=dtype) - checker2[()] = 6 # no indices - checker2[...] = 6 # ellipsis + if dtype != dtypes.bfloat16.as_numpy_dtype: + # TODO(b/68813416): valnp call above results in an ndarray and not a + # number for bfloat16s. + checker2[()] = 6 # no indices + checker2[...] = 6 # ellipsis checker2[None] = [6] # new axis def testUninitialized(self): diff --git a/tensorflow/compiler/tests/while_test.py b/tensorflow/compiler/tests/while_test.py new file mode 100644 index 0000000000000000000000000000000000000000..f79eb27435cc954cebde4357c1d946a320f4ed75 --- /dev/null +++ b/tensorflow/compiler/tests/while_test.py @@ -0,0 +1,130 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for while loops in XLA.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.compiler.tests.xla_test import XLATestCase +from tensorflow.compiler.tf2xla.python import xla +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import function +from tensorflow.python.ops import array_ops +from tensorflow.python.platform import test + + +class WhileTest(XLATestCase): + + def testSingletonLoopHandrolled(self): + # Define a function for the loop body + @function.Defun(dtypes.int32) + def loop_body(step): + step_out = step + constant_op.constant(1, dtype=dtypes.int32) + return step_out + + # Define a function for the loop condition + @function.Defun(dtypes.int32) + def loop_cond(step): + return step < 10 + + with self.test_session() as sess: + init_index = array_ops.placeholder(dtypes.int32, []) + with self.test_scope(): + loop_outputs = xla.while_loop([init_index], loop_cond, loop_body) + + result = sess.run(loop_outputs, {init_index: 0}) + self.assertAllClose(result, [10], rtol=1e-3) + + def testCountingLoopHandrolled(self): + # Define a function for the loop body + @function.Defun(dtypes.int32, dtypes.float32) + def loop_body(step, rsum): + step_out = step + constant_op.constant(1, dtype=dtypes.int32) + sum_out = rsum + constant_op.constant(1.5, dtype=dtypes.float32) + return step_out, sum_out + + # Define a function for the loop condition + @function.Defun(dtypes.int32, dtypes.float32) + def loop_cond(step, rsum): + del rsum + return step < 10 + + with self.test_session() as sess: + init_index = array_ops.placeholder(dtypes.int32, []) + init_sum = array_ops.placeholder(dtypes.float32, []) + with self.test_scope(): + loop_outputs = xla.while_loop([init_index, init_sum], loop_cond, + loop_body) + + result = sess.run(loop_outputs, {init_index: 0, init_sum: 0.0}) + self.assertAllClose(result, [10, 15.0], rtol=1e-3) + no_iters_result = sess.run(loop_outputs, {init_index: 10, init_sum: 0.0}) + self.assertAllClose(no_iters_result, [10, 0.0], rtol=1e-3) + + def testCountingLoopHandrolledC64(self): + # Define a function for the loop body + @function.Defun(dtypes.int32, dtypes.complex64) + def loop_body(step, rsum): + step_out = step + constant_op.constant(1, dtype=dtypes.int32) + sum_out = rsum + constant_op.constant(1.5 + 2j, dtype=dtypes.complex64) + return step_out, sum_out + + # Define a function for the loop condition + @function.Defun(dtypes.int32, dtypes.complex64) + def loop_cond(step, rsum): + del rsum + return step < 10 + + with self.test_session() as sess: + init_index = array_ops.placeholder(dtypes.int32, []) + init_sum = array_ops.placeholder(dtypes.complex64, []) + with self.test_scope(): + loop_outputs = xla.while_loop([init_index, init_sum], loop_cond, + loop_body) + + result = sess.run(loop_outputs, {init_index: 0, init_sum: 0.0}) + self.assertAllClose(result[1], np.complex64(15 + 20j), rtol=1e-3) + no_iters_result = sess.run(loop_outputs, {init_index: 10, init_sum: 0.0}) + self.assertAllClose(no_iters_result[1], np.complex64(0), rtol=1e-3) + + def testLoopWithConstantOutput(self): + # Define a function for the loop body + @function.Defun(dtypes.int32, dtypes.int32) + def loop_body(step, x): + del x + step_out = step + constant_op.constant(1, dtype=dtypes.int32) + return (step_out, 7) + + # Define a function for the loop condition + @function.Defun(dtypes.int32, dtypes.int32) + def loop_cond(step, x): + del x + return step < 10 + + with self.test_session() as sess: + init_index = array_ops.placeholder(dtypes.int32, []) + with self.test_scope(): + loop_outputs = xla.while_loop([init_index, 42], loop_cond, loop_body) + + result = sess.run(loop_outputs, {init_index: 0}) + self.assertAllClose(result, [10, 7], rtol=1e-3) + + +if __name__ == '__main__': + test.main() diff --git a/tensorflow/compiler/tf2xla/BUILD b/tensorflow/compiler/tf2xla/BUILD index e7daf4e01c45c3705216fce7dd3db5baa0c261fc..ba5c3a14849cefcb680b03425232724ff32375a8 100644 --- a/tensorflow/compiler/tf2xla/BUILD +++ b/tensorflow/compiler/tf2xla/BUILD @@ -415,7 +415,7 @@ cc_library( "//tensorflow/compiler/jit:graph_to_functiondef", "//tensorflow/compiler/jit:union_find", "//tensorflow/compiler/tf2xla:dump_graph", - "//tensorflow/compiler/tf2xla/ops:functional_ops", + "//tensorflow/compiler/tf2xla/ops:xla_ops", "//tensorflow/compiler/xla:status_macros", "//tensorflow/compiler/xla:util", "//tensorflow/core:core_cpu", @@ -437,7 +437,7 @@ tf_cc_test( "//tensorflow/cc:function_ops", "//tensorflow/cc:ops", "//tensorflow/cc:resource_variable_ops", - "//tensorflow/compiler/tf2xla/cc:functional_ops", + "//tensorflow/compiler/tf2xla/cc:xla_ops", "//tensorflow/compiler/xla:status_macros", "//tensorflow/core:core_cpu", "//tensorflow/core:core_cpu_internal", diff --git a/tensorflow/compiler/tf2xla/cc/BUILD b/tensorflow/compiler/tf2xla/cc/BUILD index c30bb9cacd48fb93ac359a6a25699ba6a74183c5..4f8bb8ad743afe69a6544c2ae0dc7309891b2df3 100644 --- a/tensorflow/compiler/tf2xla/cc/BUILD +++ b/tensorflow/compiler/tf2xla/cc/BUILD @@ -7,44 +7,20 @@ licenses(["notice"]) # Apache 2.0 load("//tensorflow:tensorflow.bzl", "tf_gen_op_wrapper_cc") tf_gen_op_wrapper_cc( - name = "functional_ops_gen", - include_internal_ops = 1, - out_ops_file = "ops/functional_ops", - deps = ["//tensorflow/compiler/tf2xla/ops:functional_ops"], + name = "xla_ops_gen", + out_ops_file = "ops/xla_ops", + deps = ["//tensorflow/compiler/tf2xla/ops:xla_ops"], ) cc_library( - name = "functional_ops", - srcs = ["ops/functional_ops.cc"], - hdrs = ["ops/functional_ops.h"], + name = "xla_ops", + srcs = ["ops/xla_ops.cc"], + hdrs = ["ops/xla_ops.h"], deps = [ "//tensorflow/cc:const_op", "//tensorflow/cc:ops", "//tensorflow/cc:scope", - "//tensorflow/compiler/tf2xla/ops:functional_ops", - "//tensorflow/core:core_cpu", - "//tensorflow/core:framework", - "//tensorflow/core:lib", - "//tensorflow/core:protos_all_cc", - ], -) - -tf_gen_op_wrapper_cc( - name = "sendrecv_ops_gen", - include_internal_ops = 1, - out_ops_file = "ops/sendrecv_ops", - deps = ["//tensorflow/compiler/tf2xla/ops:sendrecv_ops"], -) - -cc_library( - name = "sendrecv_ops", - srcs = ["ops/sendrecv_ops.cc"], - hdrs = ["ops/sendrecv_ops.h"], - deps = [ - "//tensorflow/cc:const_op", - "//tensorflow/cc:ops", - "//tensorflow/cc:scope", - "//tensorflow/compiler/tf2xla/ops:sendrecv_ops", + "//tensorflow/compiler/tf2xla/ops:xla_ops", "//tensorflow/core:core_cpu", "//tensorflow/core:framework", "//tensorflow/core:lib", diff --git a/tensorflow/compiler/tf2xla/functionalize_control_flow.cc b/tensorflow/compiler/tf2xla/functionalize_control_flow.cc index 8b7beef83ec2ed0df780d6a9cb2a4bcf737d008b..23629d85aed316764b8ddd775c51c3890e5c16e3 100644 --- a/tensorflow/compiler/tf2xla/functionalize_control_flow.cc +++ b/tensorflow/compiler/tf2xla/functionalize_control_flow.cc @@ -870,6 +870,9 @@ FunctionalizeCond::DeterminePredicateSwitchOrder() { // Merge the inputs of the switch node with one another. This results in // predicates and control input residing in the same cluster. for (const Edge* e : n->in_edges()) { + // Only consider the data inputs to the Switch node. + if (e->IsControlEdge()) continue; + Node* src = e->src(); UnionFind* src_cluster = find_output_cluster(src); int src_cluster_depth = switch_depth[src_cluster->Get().representative]; @@ -901,6 +904,14 @@ FunctionalizeCond::DeterminePredicateSwitchOrder() { int src_depth = switch_depth[src_id]; if (!e->IsControlEdge() || new_switch_depth == src_depth) { if (src_depth != new_switch_depth) { + // TODO(b/77601805) remove this when outside_compilation supports + // control flow. + if (str_util::StrContains(src->name(), "outside_compilation") || + str_util::StrContains(n->name(), "outside_compilation")) { + return errors::InvalidArgument( + "outside_compilation is not yet supported within TensorFlow " + "control flow constructs b/77601805"); + } return errors::InvalidArgument( "Unable to functionalize control flow in graph: Operand ('", src->name(), "') and operator ('", n->name(), diff --git a/tensorflow/compiler/tf2xla/functionalize_control_flow_test.cc b/tensorflow/compiler/tf2xla/functionalize_control_flow_test.cc index bc7276c3afd5060d6faeceb4d479416299ecc5da..e494f42e8ed254ac0c7c7a23a13728d3f015e9d3 100644 --- a/tensorflow/compiler/tf2xla/functionalize_control_flow_test.cc +++ b/tensorflow/compiler/tf2xla/functionalize_control_flow_test.cc @@ -20,7 +20,7 @@ limitations under the License. #include "tensorflow/cc/ops/function_ops.h" #include "tensorflow/cc/ops/resource_variable_ops.h" #include "tensorflow/cc/ops/standard_ops.h" -#include "tensorflow/compiler/tf2xla/cc/ops/functional_ops.h" +#include "tensorflow/compiler/tf2xla/cc/ops/xla_ops.h" #include "tensorflow/compiler/tf2xla/test_util.h" #include "tensorflow/compiler/xla/status_macros.h" #include "tensorflow/core/common_runtime/function.h" diff --git a/tensorflow/compiler/tf2xla/kernels/BUILD b/tensorflow/compiler/tf2xla/kernels/BUILD index f1bc7d6af49a09f84ef251eaa1c3d684792d0c1e..00fd08b1a0750739445a124adc7ccf436a4a9b71 100644 --- a/tensorflow/compiler/tf2xla/kernels/BUILD +++ b/tensorflow/compiler/tf2xla/kernels/BUILD @@ -21,6 +21,7 @@ tf_kernel_library( "cast_op.cc", "categorical_op.cc", "cholesky_op.cc", + "clip_by_value_op.cc", "concat_op.cc", "const_op.cc", "conv_ops.cc", @@ -29,6 +30,7 @@ tf_kernel_library( "cwise_ops.h", "depthtospace_op.cc", "diag_op.cc", + "dynamic_slice_ops.cc", "dynamic_stitch_op.cc", "elu_op.cc", "extract_image_patches_op.cc", @@ -56,6 +58,7 @@ tf_kernel_library( "pooling_ops.cc", "quantize_and_dequantize_op.cc", "random_ops.cc", + "reduce_window_op.cc", "reduction_ops.cc", "reduction_ops.h", "reduction_ops_common.cc", @@ -103,7 +106,7 @@ tf_kernel_library( "//tensorflow/compiler/tf2xla/lib:triangular_solve", "//tensorflow/compiler/tf2xla/lib:util", "//tensorflow/compiler/tf2xla/lib:while_loop", - "//tensorflow/compiler/tf2xla/ops:sendrecv_ops", + "//tensorflow/compiler/tf2xla/ops:xla_ops", "//tensorflow/compiler/xla:array4d", "//tensorflow/compiler/xla:literal_util", "//tensorflow/compiler/xla:shape_util", @@ -146,7 +149,7 @@ tf_kernel_library( deps = [ "//tensorflow/compiler/tf2xla:common", "//tensorflow/compiler/tf2xla:xla_compiler", - "//tensorflow/compiler/tf2xla/ops:functional_ops", + "//tensorflow/compiler/tf2xla/ops:xla_ops", "//tensorflow/compiler/xla:literal_util", "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/core:framework", @@ -162,7 +165,7 @@ tf_kernel_library( deps = [ "//tensorflow/compiler/tf2xla:common", "//tensorflow/compiler/tf2xla:xla_compiler", - "//tensorflow/compiler/tf2xla/ops:functional_ops", + "//tensorflow/compiler/tf2xla/ops:xla_ops", "//tensorflow/compiler/xla:literal_util", "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/core:framework", @@ -171,6 +174,23 @@ tf_kernel_library( ], ) +# Kernels that have a dummy (no-op) implementation. +tf_kernel_library( + name = "xla_dummy_ops", + srcs = [ + "assert_op.cc", + "check_numerics_op.cc", + ], + deps = [ + "//tensorflow/compiler/tf2xla:xla_compiler", + "//tensorflow/core:array_ops_op_lib", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:logging_ops_op_lib", + ], + alwayslink = 1, +) + # Kernels that only work on CPU, because they use XLA custom calls. # Only link this when using the CPU backend for XLA. tf_kernel_library( diff --git a/tensorflow/compiler/tf2xla/kernels/assert_op.cc b/tensorflow/compiler/tf2xla/kernels/assert_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..af4ab5e8ef6e268226edc90515706405ac36858c --- /dev/null +++ b/tensorflow/compiler/tf2xla/kernels/assert_op.cc @@ -0,0 +1,49 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/tf2xla/xla_op_kernel.h" +#include "tensorflow/compiler/tf2xla/xla_op_registry.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/mutex.h" + +namespace tensorflow { + +namespace { + +// This TensorFlow op supports the Assert primitve. +class AssertOp : public XlaOpKernel { + public: + explicit AssertOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {} + ~AssertOp() override {} + + void Compile(XlaOpKernelContext* ctx) override { + static mutex mu(tensorflow::LINKER_INITIALIZED); + static int log_counter = 0; + + mutex_lock l(mu); + if (log_counter < 20) { + ++log_counter; + LOG(WARNING) << "Ignoring Assert operator " << name(); + } + } + + private: + TF_DISALLOW_COPY_AND_ASSIGN(AssertOp); +}; + +REGISTER_XLA_OP(Name("Assert"), AssertOp); + +} // anonymous namespace +} // namespace tensorflow diff --git a/tensorflow/compiler/tf2xla/kernels/check_numerics_op.cc b/tensorflow/compiler/tf2xla/kernels/check_numerics_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..6061e822d8d9c6c807a63aad4e9e9526a49e456c --- /dev/null +++ b/tensorflow/compiler/tf2xla/kernels/check_numerics_op.cc @@ -0,0 +1,50 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/tf2xla/xla_op_kernel.h" +#include "tensorflow/compiler/tf2xla/xla_op_registry.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/mutex.h" + +namespace tensorflow { +namespace { + +class CheckNumericsOp : public XlaOpKernel { + public: + explicit CheckNumericsOp(OpKernelConstruction* context) + : XlaOpKernel(context) {} + + void Compile(XlaOpKernelContext* ctx) override { + // TODO(b/32223192): add a real implementation of CheckNumerics + { + static mutex mu(tensorflow::LINKER_INITIALIZED); + static int log_counter = 0; + mutex_lock l(mu); + if (log_counter < 20) { + ++log_counter; + LOG(WARNING) << "Ignoring CheckNumerics operator " << name(); + } + } + ctx->SetOutput(0, ctx->Input(0)); + } + + private: + TF_DISALLOW_COPY_AND_ASSIGN(CheckNumericsOp); +}; + +REGISTER_XLA_OP(Name("CheckNumerics"), CheckNumericsOp); + +} // anonymous namespace +} // namespace tensorflow diff --git a/tensorflow/compiler/tf2xla/kernels/clip_by_value_op.cc b/tensorflow/compiler/tf2xla/kernels/clip_by_value_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..fdf75be7b1156540d762e3bc04a51f2478f00f46 --- /dev/null +++ b/tensorflow/compiler/tf2xla/kernels/clip_by_value_op.cc @@ -0,0 +1,61 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/tf2xla/xla_op_kernel.h" +#include "tensorflow/compiler/tf2xla/xla_op_registry.h" +#include "tensorflow/core/framework/tensor_shape.h" + +namespace tensorflow { +namespace { + +class ClipByValueOp : public XlaOpKernel { + public: + explicit ClipByValueOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {} + + void Compile(XlaOpKernelContext* ctx) override { + const TensorShape shape = ctx->InputShape(0); + const TensorShape min_shape = ctx->InputShape(1); + const TensorShape max_shape = ctx->InputShape(2); + + xla::ComputationBuilder* builder = ctx->builder(); + auto input = ctx->Input(0); + auto min = ctx->Input(1); + auto max = ctx->Input(2); + + auto shape_error = [&]() -> tensorflow::Status { + return errors::InvalidArgument( + "clip_value_min and clip_value_max must be either of " + "the same shape as input, or a scalar. ", + "Input shape: ", shape.DebugString(), + " clip_value_min shape: ", min_shape.DebugString(), + " clip_value_max shape: ", max_shape.DebugString()); + }; + + if (shape != min_shape) { + OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(min_shape), shape_error()); + min = builder->Broadcast(min, shape.dim_sizes()); + } + if (shape != max_shape) { + OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(max_shape), shape_error()); + max = builder->Broadcast(max, shape.dim_sizes()); + } + ctx->SetOutput(0, builder->Clamp(min, input, max)); + } +}; + +REGISTER_XLA_OP(Name("ClipByValue"), ClipByValueOp); + +} // namespace +} // namespace tensorflow diff --git a/tensorflow/compiler/tf2xla/kernels/dynamic_slice_ops.cc b/tensorflow/compiler/tf2xla/kernels/dynamic_slice_ops.cc new file mode 100644 index 0000000000000000000000000000000000000000..800ef5ab98d70ad822c6efffb33db28b46ae50fe --- /dev/null +++ b/tensorflow/compiler/tf2xla/kernels/dynamic_slice_ops.cc @@ -0,0 +1,69 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include + +#include "tensorflow/compiler/tf2xla/shape_util.h" +#include "tensorflow/compiler/tf2xla/xla_op_kernel.h" +#include "tensorflow/compiler/tf2xla/xla_op_registry.h" +#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/core/framework/op_kernel.h" + +#include "tensorflow/compiler/tf2xla/type_util.h" +#include "tensorflow/compiler/tf2xla/xla_helpers.h" +#include "tensorflow/core/framework/kernel_def_builder.h" + +namespace tensorflow { +namespace { + +class DynamicUpdateSliceOp : public XlaOpKernel { + public: + explicit DynamicUpdateSliceOp(OpKernelConstruction* context) + : XlaOpKernel(context) {} + + void Compile(XlaOpKernelContext* ctx) override { + VLOG(3) << "DynamicUpdateSliceOp::Compile"; + + DataType index_type = input_type(2); + OP_REQUIRES(ctx, index_type == DT_INT32 || index_type == DT_INT64, + errors::InvalidArgument("index must be int32 or int64")); + + const TensorShape input_shape = ctx->InputShape(0); + const TensorShape update_shape = ctx->InputShape(1); + const TensorShape index_shape = ctx->InputShape(2); + + OP_REQUIRES( + ctx, + TensorShapeUtils::IsVector(index_shape) && + index_shape.num_elements() == input_shape.dims(), + errors::InvalidArgument("index must be a vector with length equal to " + "the number of input dimensions")); + OP_REQUIRES( + ctx, input_shape.dims() == update_shape.dims(), + errors::InvalidArgument("input and update must have the same rank," + " input shape is ", + input_shape.DebugString(), "; update shape is ", + update_shape.DebugString())); + + xla::ComputationDataHandle result = ctx->builder()->DynamicUpdateSlice( + ctx->Input(0), ctx->Input(1), ctx->Input(2)); + ctx->SetOutput(0, result); + } +}; + +REGISTER_XLA_OP(Name("XlaDynamicUpdateSlice"), DynamicUpdateSliceOp); + +} // namespace +} // namespace tensorflow diff --git a/tensorflow/compiler/tf2xla/kernels/gather_op.cc b/tensorflow/compiler/tf2xla/kernels/gather_op.cc index 7945c05af40df21a798a2cff51fe7f8e935793f6..0b79cb0916ee8a7d0e26c5dc12557639336f8ab1 100644 --- a/tensorflow/compiler/tf2xla/kernels/gather_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/gather_op.cc @@ -29,52 +29,54 @@ namespace tensorflow { Status XlaGather(const xla::ComputationDataHandle& input, const TensorShape& input_shape, const xla::ComputationDataHandle& indices, - TensorShape indices_shape, int64 axis, bool indices_are_nd, - DataType dtype, DataType index_type, + const TensorShape& indices_shape, int64 axis, + bool indices_are_nd, DataType dtype, DataType index_type, xla::ComputationBuilder* builder, xla::ComputationDataHandle* gather_output) { + // There is no deep reason why we need this precondition, but this is the only + // combination that is used and tested today. + CHECK(!indices_are_nd || axis == 0); + + // num_index_dims is the number of components in each index in the indices + // tensor. + // + // num_indices is the total number of (n dimensional or scalar) indices in the + // indices tensor. + // // If the indices are N-dimensional, then the minor dimension of indices // should be of size N and correspond to the N indices. - int64 num_index_dims = 1; + int64 num_index_dims; + int64 num_indices = 1; if (indices_are_nd) { CHECK_GE(indices_shape.dims(), 1); num_index_dims = indices_shape.dim_size(indices_shape.dims() - 1); - indices_shape.RemoveLastDims(1); + for (int64 i = 0, e = indices_shape.dims() - 1; i < e; i++) { + num_indices *= indices_shape.dim_size(i); + } + } else { + num_index_dims = 1; + for (int64 i = 0, e = indices_shape.dims(); i < e; i++) { + num_indices *= indices_shape.dim_size(i); + } } - // Although the indices Tensor is flattened into rank 1 during the lookup, - // and each scalar entry is used as an index into the first dimension of the - // input, the output is returned with shape: - // input.shape[:axis] + indices.shape + input.shape[axis+1:] - - const int64 num_indices = indices_shape.num_elements(); - TensorShape input_shape_pre_axis(input_shape); - input_shape_pre_axis.RemoveDimRange(axis, input_shape.dims()); - TensorShape input_shape_post_axis(input_shape); - input_shape_post_axis.RemoveDimRange(0, axis + num_index_dims); - // Each slice of the input tensor has shape: - // [, 1, ..., 1, ] - TensorShape slice_shape(input_shape); - for (int64 i = 0; i < num_index_dims; ++i) { - slice_shape.set_dim(axis + i, 1); - } + // Degenerate case: empty indices. + if (num_indices == 0) { + TensorShape input_shape_pre_axis{input_shape}; + input_shape_pre_axis.RemoveDimRange(axis, input_shape.dims()); + TensorShape input_shape_post_axis{input_shape}; + input_shape_post_axis.RemoveDimRange(0, axis + num_index_dims); - TensorShape loop_out_shape; - loop_out_shape.AppendShape(input_shape_pre_axis); - loop_out_shape.AddDim(num_indices); - loop_out_shape.AppendShape(input_shape_post_axis); - TensorShape loop_out_slice_shape; - loop_out_slice_shape.AppendShape(input_shape_pre_axis); - loop_out_slice_shape.AddDim(1); - loop_out_slice_shape.AppendShape(input_shape_post_axis); + TensorShape indices_shape_no_index_vectors{indices_shape}; + if (indices_are_nd) { + indices_shape_no_index_vectors.RemoveLastDims(1); + } - TensorShape out_shape; - out_shape.AppendShape(input_shape_pre_axis); - out_shape.AppendShape(indices_shape); - out_shape.AppendShape(input_shape_post_axis); + TensorShape out_shape; + out_shape.AppendShape(input_shape_pre_axis); + out_shape.AppendShape(indices_shape_no_index_vectors); + out_shape.AppendShape(input_shape_post_axis); - // Degenerate case: empty indices. - if (num_indices == 0) { *gather_output = builder->Broadcast(XlaHelpers::Zero(builder, dtype), out_shape.dim_sizes()); return Status::OK(); @@ -88,76 +90,61 @@ Status XlaGather(const xla::ComputationDataHandle& input, } } - // Flatten the major dimensions of indices into a single dimension for ease of - // iteration. If there is an axis dimension, we must leave it alone. - std::vector flat_indices_shape = {num_indices}; - if (indices_are_nd) { - flat_indices_shape.push_back(num_index_dims); - } - - // Specify the shape of the loop-carried Tensor tuple. - - // Construct the initial values of the loop-carried Tensors. - auto flat_indices = builder->Reshape(indices, flat_indices_shape); - auto init_out = builder->Broadcast(XlaHelpers::Zero(builder, dtype), - loop_out_shape.dim_sizes()); - auto init = {input, flat_indices, init_out}; - - // Construct the while loop body's function. The implementation of gather is: - // for i in range(num_indices): - // index = dynamic-slice(indices, i) - // xi = dynamic-slice(input, index) - // output = dynamic-update-slice(output, xi, i) - auto body_fn = [&](xla::ComputationDataHandle i, - gtl::ArraySlice loop_vars, - xla::ComputationBuilder* bodyb) { - auto input = loop_vars[0]; - auto indices = loop_vars[1]; - auto output = loop_vars[2]; - - auto zero_index = XlaHelpers::Zero(bodyb, index_type); - - // Slice the i-th index from the indices array. - xla::ComputationDataHandle index; - auto indices_offset = bodyb->Reshape(i, {1}); - if (indices_are_nd) { - // Slice out the entire nd index, if applicable. - indices_offset = bodyb->Pad(indices_offset, zero_index, - xla::MakeEdgePaddingConfig({{0, 1}})); - index = bodyb->DynamicSlice(indices, indices_offset, {1, num_index_dims}); - index = bodyb->Collapse(index, {0, 1}); + // Example of a 1-D gather with axis=1, pulling two [3,1] tensors out of a + // tensor of shape [3,3]. + // + // operand = s32[3,3] parameter(0) + // indices = s32[2] parameter(1) + // gather = s32[3,2] gather(operand, indices), + // output_window_dims={0}, + // elided_window_dims={1}, + // gather_dims_to_operand_dims={1}, + // index_vector_dim=1, + // window_bounds={3, 1} + // + // + // Example of an N-D gather pulling out slices of shape [1,1,2] out of a + // tensor of shape [3,3,2]. + // + // operand = s32[3,3,2] parameter(0) + // indices = s32[2,2] parameter(1) + // gather = s32[2,2] gather(operand, indices), + // output_window_dims={1}, + // elided_window_dims={0,1}, + // gather_dims_to_operand_dims={0,1}, + // index_vector_dim=0, + // window_bounds={1,1,2} + + xla::GatherDimensionNumbers dim_numbers; + std::vector window_bounds; + window_bounds.reserve(input_shape.dims()); + for (int64 i = 0; i < input_shape.dims(); i++) { + int64 window_bound; + if (axis <= i && i < (axis + num_index_dims)) { + dim_numbers.add_elided_window_dims(i); + window_bound = 1; } else { - index = bodyb->DynamicSlice(indices, indices_offset, {1}); + window_bound = input_shape.dim_size(i); + } + + window_bounds.push_back(window_bound); + + if (i < axis) { + dim_numbers.add_output_window_dims(i); + } else if (i >= (axis + num_index_dims)) { + int64 indices_rank = + indices_are_nd ? (indices_shape.dims() - 1) : indices_shape.dims(); + dim_numbers.add_output_window_dims(i + indices_rank - num_index_dims); } + } + + dim_numbers.set_index_vector_dim(indices_are_nd ? (indices_shape.dims() - 1) + : indices_shape.dims()); + for (int64 i = axis; i < axis + num_index_dims; i++) { + dim_numbers.add_gather_dims_to_operand_dims(i); + } - // Slice the corresponding data from the input array. - auto start_indices = bodyb->Pad( - index, zero_index, - xla::MakeEdgePaddingConfig( - {{input_shape_pre_axis.dims(), input_shape_post_axis.dims()}})); - auto slice_i = bodyb->Reshape( - bodyb->DynamicSlice(input, start_indices, slice_shape.dim_sizes()), - loop_out_slice_shape.dim_sizes()); - - // Construct the index into the output Tensor 0, ..., , 0, ... - std::vector out_index_vals( - loop_out_shape.dims(), bodyb->Reshape(zero_index, {1})); - out_index_vals[input_shape_pre_axis.dims()] = bodyb->Reshape(i, {1}); - auto out_index = bodyb->ConcatInDim(out_index_vals, 0); - - // Update the output Tensor - auto updated_output = bodyb->DynamicUpdateSlice(output, slice_i, out_index); - - return std::vector{input, indices, - updated_output}; - }; - - // Construct the While loop, extract and reshape the output. - xla::PrimitiveType ptype; - TF_RETURN_IF_ERROR(DataTypeToPrimitiveType(index_type, &ptype)); - TF_ASSIGN_OR_RETURN(auto outputs, XlaForEachIndex(num_indices, ptype, body_fn, - init, "gather", builder)); - *gather_output = builder->Reshape(outputs[2], out_shape.dim_sizes()); + *gather_output = builder->Gather(input, indices, dim_numbers, window_bounds); return Status::OK(); } diff --git a/tensorflow/compiler/tf2xla/kernels/gather_op_helpers.h b/tensorflow/compiler/tf2xla/kernels/gather_op_helpers.h index bd8b92c22d71fe89ab8951ec79f411feef6505e3..f9376f0eabdc0f0c565eb4b9f86425de96b5aa22 100644 --- a/tensorflow/compiler/tf2xla/kernels/gather_op_helpers.h +++ b/tensorflow/compiler/tf2xla/kernels/gather_op_helpers.h @@ -36,8 +36,8 @@ namespace tensorflow { Status XlaGather(const xla::ComputationDataHandle& input, const TensorShape& input_shape, const xla::ComputationDataHandle& indices, - TensorShape indices_shape, int64 axis, bool indices_are_nd, - DataType dtype, DataType index_type, + const TensorShape& indices_shape, int64 axis, + bool indices_are_nd, DataType dtype, DataType index_type, xla::ComputationBuilder* builder, xla::ComputationDataHandle* gather_output); diff --git a/tensorflow/compiler/tf2xla/kernels/reduce_window_op.cc b/tensorflow/compiler/tf2xla/kernels/reduce_window_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..cb144bea9e429b7c8bcc3d07f688ed6a254c3be0 --- /dev/null +++ b/tensorflow/compiler/tf2xla/kernels/reduce_window_op.cc @@ -0,0 +1,135 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/tf2xla/kernels/while_op.h" + +#include "tensorflow/compiler/tf2xla/shape_util.h" +#include "tensorflow/compiler/tf2xla/xla_compiler.h" +#include "tensorflow/compiler/tf2xla/xla_op_kernel.h" +#include "tensorflow/compiler/tf2xla/xla_op_registry.h" +#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/core/framework/function.h" +#include "tensorflow/core/framework/op_kernel.h" + +namespace tensorflow { +namespace { + +class ReduceWindowOp : public XlaOpKernel { + public: + explicit ReduceWindowOp(OpKernelConstruction* context) + : XlaOpKernel(context) { + OP_REQUIRES_OK(context, context->GetAttr("computation", &computation_)); + OP_REQUIRES_OK(context, + context->GetAttr("window_dimensions", &window_dimensions_)); + OP_REQUIRES_OK(context, + context->GetAttr("window_strides", &window_strides_)); + OP_REQUIRES_OK(context, context->GetAttr("padding_low", &padding_low_)); + OP_REQUIRES_OK(context, context->GetAttr("padding_high", &padding_high_)); + } + + void Compile(XlaOpKernelContext* context) override { + const TensorShape input_shape = context->InputShape(0); + const DataType dtype = context->input_type(0); + + const int rank = input_shape.dims(); + OP_REQUIRES(context, rank == window_dimensions_.size(), + errors::InvalidArgument( + "The size of window_dimensions must be equal to the input " + "rank (", + window_dimensions_.size(), " vs. ", rank, ")")); + OP_REQUIRES(context, rank == window_strides_.size(), + errors::InvalidArgument( + "The size of window_strides must be equal to the input " + "rank (", + window_strides_.size(), " vs. ", rank, ")")); + OP_REQUIRES(context, rank == padding_low_.size(), + errors::InvalidArgument( + "The size of padding_low must be equal to the input " + "rank (", + padding_low_.size(), " vs. ", rank, ")")); + OP_REQUIRES(context, rank == padding_high_.size(), + errors::InvalidArgument( + "The size of padding_high must be equal to the input " + "rank (", + padding_high_.size(), " vs. ", rank, ")")); + + xla::ComputationBuilder* builder = context->builder(); + + // Build the reducer function. + XlaCompiler::Argument reducer_arg; + reducer_arg.kind = XlaCompiler::Argument::kParameter; + reducer_arg.type = dtype; + reducer_arg.shape = TensorShape(); + + XlaCompiler::CompileOptions compile_options; + compile_options.use_tuple_arg = false; + compile_options.resolve_compile_time_constants = false; + compile_options.is_entry_computation = false; + XlaCompiler::CompilationResult reducer; + OP_REQUIRES_OK(context, context->compiler()->CompileFunction( + compile_options, *computation_, + {reducer_arg, reducer_arg}, &reducer)); + + xla::Shape scalar_shape; + OP_REQUIRES_OK(context, + TensorShapeToXLAShape(dtype, TensorShape(), &scalar_shape)); + OP_REQUIRES(context, + xla::ShapeUtil::Compatible( + reducer.xla_output_shape, + xla::ShapeUtil::MakeTupleShape({scalar_shape})), + errors::InvalidArgument( + "Invalid output shape of ReduceWindow reducer. Expected ", + xla::ShapeUtil::HumanString(scalar_shape), " got ", + xla::ShapeUtil::HumanString(reducer.xla_output_shape))); + + // Wraps the reducer in a computation that unpacks the output tuple. + xla::Computation wrapper; + { + std::unique_ptr cb = + builder->CreateSubBuilder("wrapper"); + auto x = cb->Parameter(0, scalar_shape, "x"); + auto y = cb->Parameter(1, scalar_shape, "y"); + auto outputs = cb->Call(*reducer.computation, {x, y}); + cb->GetTupleElement(outputs, 0); + xla::StatusOr result = cb->Build(); + OP_REQUIRES_OK(context, result.status()); + wrapper = std::move(result.ValueOrDie()); + } + + std::vector> padding(rank); + for (int i = 0; i < rank; ++i) { + padding[i] = {padding_low_[i], padding_high_[i]}; + } + + xla::ComputationDataHandle output = builder->ReduceWindowWithGeneralPadding( + context->Input(0), context->Input(1), wrapper, window_dimensions_, + window_strides_, padding); + context->SetOutput(0, output); + } + + private: + const NameAttrList* computation_; + std::vector window_dimensions_; + std::vector window_strides_; + std::vector padding_low_; + std::vector padding_high_; + + TF_DISALLOW_COPY_AND_ASSIGN(ReduceWindowOp); +}; + +REGISTER_XLA_OP(Name("XlaReduceWindow"), ReduceWindowOp); + +} // namespace +} // namespace tensorflow diff --git a/tensorflow/compiler/tf2xla/kernels/sendrecv_ops.cc b/tensorflow/compiler/tf2xla/kernels/sendrecv_ops.cc index 5172781c0d05b6682fe92086654e3b86961949ee..d079b89861817a5639ac72b5ee49d76cb4506ae8 100644 --- a/tensorflow/compiler/tf2xla/kernels/sendrecv_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/sendrecv_ops.cc @@ -48,7 +48,7 @@ void SendOp::Compile(XlaOpKernelContext* ctx) { ctx->builder()->Send(ctx->Input(0), channel); } -REGISTER_XLA_OP(Name("_XLASend"), SendOp); +REGISTER_XLA_OP(Name("XlaSend"), SendOp); class RecvOp : public XlaOpKernel { public: @@ -68,7 +68,7 @@ RecvOp::RecvOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) { TensorShape tensor_shape; DataType dtype; OP_REQUIRES_OK(ctx, ctx->GetAttr("shape", &tensor_shape)); - OP_REQUIRES_OK(ctx, ctx->GetAttr("T", &dtype)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("dtype", &dtype)); OP_REQUIRES_OK(ctx, TensorShapeToXLAShape(dtype, tensor_shape, &shape_)); } @@ -79,7 +79,7 @@ void RecvOp::Compile(XlaOpKernelContext* ctx) { ctx->SetOutput(0, ctx->builder()->Recv(shape_, channel)); } -REGISTER_XLA_OP(Name("_XLARecv"), RecvOp); +REGISTER_XLA_OP(Name("XlaRecv"), RecvOp); } // namespace } // namespace tensorflow diff --git a/tensorflow/compiler/tf2xla/lib/BUILD b/tensorflow/compiler/tf2xla/lib/BUILD index 344773c8c5f8e1a552d585d0317c62c56d9f9d46..fde1977c1b1834156b87b4fb3516f7bf8df435d7 100644 --- a/tensorflow/compiler/tf2xla/lib/BUILD +++ b/tensorflow/compiler/tf2xla/lib/BUILD @@ -39,6 +39,7 @@ cc_library( ":batch_dot", ":triangular_solve", ":util", + ":while_loop", "//tensorflow/compiler/xla:literal_util", "//tensorflow/compiler/xla:shape_util", "//tensorflow/compiler/xla:status_macros", @@ -126,6 +127,30 @@ cc_library( ], ) +xla_test( + name = "util_test", + srcs = ["util_test.cc"], + deps = [ + ":batch_dot", + ":util", + "//tensorflow/compiler/xla:array2d", + "//tensorflow/compiler/xla:literal_util", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla:test", + "//tensorflow/compiler/xla:types", + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/compiler/xla/client:global_data", + "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/tests:client_library_test_base", + "//tensorflow/compiler/xla/tests:literal_test_util", + "//tensorflow/compiler/xla/tests:xla_internal_test_main", + "//tensorflow/core:lib", + "//tensorflow/core:test", + ], +) + cc_library( name = "while_loop", srcs = ["while_loop.cc"], diff --git a/tensorflow/compiler/tf2xla/lib/cholesky.cc b/tensorflow/compiler/tf2xla/lib/cholesky.cc index e795701181dd80a2ff544743d513bffd52fd2399..203365e2ab07e0da1abfac5452a8ec41a4ddf406 100644 --- a/tensorflow/compiler/tf2xla/lib/cholesky.cc +++ b/tensorflow/compiler/tf2xla/lib/cholesky.cc @@ -1,4 +1,4 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -21,6 +21,7 @@ limitations under the License. #include "tensorflow/compiler/tf2xla/lib/batch_dot.h" #include "tensorflow/compiler/tf2xla/lib/triangular_solve.h" #include "tensorflow/compiler/tf2xla/lib/util.h" +#include "tensorflow/compiler/tf2xla/lib/while_loop.h" #include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/status_macros.h" @@ -31,68 +32,122 @@ namespace tensorflow { namespace { +// The Cholesky–Banachiewicz algorithm. See +// https://en.wikipedia.org/wiki/Cholesky_decomposition#The_Cholesky–Banachiewicz_and_Cholesky–Crout_algorithms +// for a description. +// // def cholesky_unblocked(a): // assert len(a.shape) == 2 and a.shape[-2] == a.shape[-1] // n = a.shape[-2] // l = np.zeros_like(a) // for j in xrange(n): -// r = l[..., j, :j] -// l[..., j, j] = np.sqrt(a[..., j, j] - np.dot(r, r)) -// l[..., j+1:, j] = (a[..., j+1:, j] - np.dot(l[..., j+1:, :j], -// np.transpose(r))) / l[..., j, j] +// row = l[..., j, :j] +// row_t = np.swapaxes(row, -1, -2) +// l[..., j, j] = np.sqrt(a[..., j, j] - np.dot(row, row_t)) +// l[..., j+1:, j] = (a[..., j+1:, j] - np.dot(l[..., j+1:, :j], row_t)) / +// l[..., j, j] // return l xla::StatusOr CholeskyUnblocked( xla::ComputationBuilder* builder, const xla::ComputationDataHandle& a) { - TF_ASSIGN_OR_RETURN(std::unique_ptr shape, builder->GetShape(a)); - xla::ComputationDataHandle l = Zeros(builder, *shape); - const int64 n = xla::ShapeUtil::GetDimension(*shape, -2); - for (int j = 0; j < n; ++j) { - // Picture of block structure: - // ... \ - // \ - // -- r -- d - // |\ - // B c \ - // | \ - // | ... - // - // ^ - // column j - TF_ASSIGN_OR_RETURN(auto d, - SliceInMinorDims(builder, a, {j, j}, {j + 1, j + 1})); - TF_ASSIGN_OR_RETURN(auto c, - SliceInMinorDims(builder, a, {j + 1, j}, {n, j + 1})); - xla::ComputationDataHandle new_d_squared = d; - xla::ComputationDataHandle br; - if (j > 0) { - TF_ASSIGN_OR_RETURN(auto r, - SliceInMinorDims(builder, l, {j, 0}, {j + 1, j})); - TF_ASSIGN_OR_RETURN(auto b, - SliceInMinorDims(builder, l, {j + 1, 0}, {n, j})); - TF_ASSIGN_OR_RETURN(auto r_squared, - BatchDot(builder, r, r, /*transpose_x=*/false, - /*transpose_y=*/true, /*conjugate_x=*/false, - /*conjugate_y=*/false)); - new_d_squared = builder->Sub(new_d_squared, r_squared); + TF_ASSIGN_OR_RETURN(std::unique_ptr a_shape, + builder->GetShape(a)); + const int n_dims = xla::ShapeUtil::Rank(*a_shape); + const int64 n = xla::ShapeUtil::GetDimension(*a_shape, -1); + gtl::ArraySlice major_dims(xla::AsInt64Slice(a_shape->dimensions()), + /*pos=*/0, + /*len=*/n_dims - 2); - TF_ASSIGN_OR_RETURN(br, BatchDot(builder, b, r, /*transpose_x=*/false, - /*transpose_y=*/true, - /*conjugate_x=*/false, - /*conjugate_y=*/false)); - } - auto new_d_inv = builder->Pow( - new_d_squared, FloatLiteral(builder, shape->element_type(), -0.5)); - auto new_d = builder->Mul(new_d_inv, new_d_squared); - TF_ASSIGN_OR_RETURN(l, UpdateSliceInMinorDims(builder, l, new_d, {j, j})); + xla::ComputationDataHandle l = Zeros(builder, *a_shape); - if (j > 0) { - c = builder->Sub(c, br); + // Construct the for loop body to iterate over rows. + auto body_fn = [&](xla::ComputationDataHandle i, + gtl::ArraySlice loop_vars, + xla::ComputationBuilder* body_builder) + -> xla::StatusOr> { + xla::Shape col_shape; + xla::Shape row_shape; + for (int64 d : major_dims) { + row_shape.add_dimensions(d); + col_shape.add_dimensions(d); } - auto new_c = builder->Mul(c, new_d_inv); - TF_ASSIGN_OR_RETURN(l, - UpdateSliceInMinorDims(builder, l, new_c, {j + 1, j})); - } - return l; + row_shape.add_dimensions(1); + row_shape.add_dimensions(n); + row_shape.set_element_type(a_shape->element_type()); + auto mask_zeros_row = Zeros(body_builder, row_shape); + + col_shape.add_dimensions(n); + col_shape.add_dimensions(1); + col_shape.set_element_type(a_shape->element_type()); + auto mask_zeros_col = Zeros(body_builder, col_shape); + + std::vector mask_vector(n); + std::iota(mask_vector.begin(), mask_vector.end(), 0); + auto mask_range = body_builder->ConstantR1(mask_vector); + auto mask_range_row = body_builder->Broadcast( + body_builder->Reshape(mask_range, {0}, {1, n}), major_dims); + auto mask_range_col = body_builder->Broadcast( + body_builder->Reshape(mask_range, {0}, {n, 1}), major_dims); + auto body_a = loop_vars[0]; + auto body_l = loop_vars[1]; + + // row = l[..., i, :i] + // select the whole i-th row, then mask out all columns past i-1 + auto zero = body_builder->ConstantR0(0); + TF_ASSIGN_OR_RETURN(auto l_i, DynamicSliceInMinorDims(body_builder, body_l, + {i, zero}, {1, n})); + auto row = body_builder->Select(body_builder->Ge(mask_range_row, i), + mask_zeros_row, l_i); + // a[..., i, i] + TF_ASSIGN_OR_RETURN(auto a_ii, DynamicSliceInMinorDims(body_builder, body_a, + {i, i}, {1, 1})); + // np.dot(row, np.swapaxes(row, -1, -2)) + xla::ComputationDataHandle diag_dot; + TF_ASSIGN_OR_RETURN(diag_dot, BatchDot(body_builder, row, row, + /*transpose_x=*/false, + /*transpose_y=*/true)); + // l[..., i, i] = np.sqrt(a[..., i, i] - np.dot(row, + // np.swapaxes(row, -1, -2))) + auto l_ii = body_builder->Pow( + body_builder->Sub(a_ii, diag_dot), + FloatLiteral(body_builder, a_shape->element_type(), 0.5)); + + // a[..., i+1:, i] + auto ip1 = body_builder->Add(i, body_builder->ConstantR0(1)); + // select the whole i-th column, then mask out all rows above i+1 + TF_ASSIGN_OR_RETURN( + auto a_0i, DynamicSliceInMinorDims(body_builder, body_a, {i}, {1})); + auto a_ip1i = body_builder->Select(body_builder->Le(mask_range_col, i), + mask_zeros_col, a_0i); + + // l[..., i+1:, i] = (a[..., i+1:, i] - np.dot(l[..., i+1:, :i], r.T)) / + // l[..., i, i] + // The columns in [i, n] are zeroed out in `row`, so we just have to + // zero out rows above i+1 after the BatchDot. np.dot(l[..., :, :i], + // r.T) + TF_ASSIGN_OR_RETURN(auto dot, BatchDot(body_builder, body_l, row, + /*transpose_x=*/false, + /*transpose_y=*/true)); + // np.dot(l[..., i+1:, :i], r.T) + auto dot_ip1 = body_builder->Select(body_builder->Le(mask_range_col, i), + mask_zeros_col, dot); + + auto col_update = + body_builder->Div(body_builder->Sub(a_ip1i, dot_ip1), l_ii); + TF_ASSIGN_OR_RETURN(body_l, DynamicUpdateSliceInMinorDims( + body_builder, body_l, col_update, {i})); + // Assign the diagonal after the rest of the column because otherwise the + // column assign will wrap around and overwrite the diagonal assign. + TF_ASSIGN_OR_RETURN(body_l, DynamicUpdateSliceInMinorDims( + body_builder, body_l, l_ii, {i, i})); + + return std::vector{body_a, body_l}; + }; + + TF_ASSIGN_OR_RETURN( + auto cholesky_while, + XlaForEachIndex(n, xla::S32, body_fn, {a, l}, "unblocked", builder)); + + return cholesky_while[1]; } } // namespace diff --git a/tensorflow/compiler/tf2xla/lib/cholesky.h b/tensorflow/compiler/tf2xla/lib/cholesky.h index e083a383be4be0d1b556b63214fe5f70323b4149..17da8d8b22d107701ce768ac945c1404df6d47e8 100644 --- a/tensorflow/compiler/tf2xla/lib/cholesky.h +++ b/tensorflow/compiler/tf2xla/lib/cholesky.h @@ -1,4 +1,4 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -29,7 +29,7 @@ namespace tensorflow { // the block size to use. // TODO(phawkins): check for negative values on the diagonal and return an // error, instead of silently yielding NaNs. -// TODO(mattjj): handle the complex Hermitian case +// TODO(znado): handle the complex Hermitian case xla::StatusOr Cholesky( xla::ComputationBuilder* builder, xla::ComputationDataHandle a, int64 block_size = 256); diff --git a/tensorflow/compiler/tf2xla/lib/util.cc b/tensorflow/compiler/tf2xla/lib/util.cc index f579669bbd852b514e021ce71d635f8ce5e4fe4d..31d823ca336039f691f2c16e37028c0de98b1ee5 100644 --- a/tensorflow/compiler/tf2xla/lib/util.cc +++ b/tensorflow/compiler/tf2xla/lib/util.cc @@ -1,4 +1,4 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -140,13 +140,47 @@ xla::StatusOr SliceInMinorDims( return builder->Slice(x, padded_start, padded_end, strides); } +std::vector PrependMajorDims(xla::ComputationBuilder* builder, + const gtl::ArraySlice& major_dims, + const gtl::ArraySlice& indices) { + std::vector output(indices.size() + major_dims.size()); + std::copy(major_dims.begin(), major_dims.end(), output.begin()); + std::copy(indices.begin(), indices.end(), output.begin() + major_dims.size()); + return output; +} + +xla::StatusOr DynamicSliceInMinorDims( + xla::ComputationBuilder* builder, const xla::ComputationDataHandle& x, + const std::vector& starts, + const gtl::ArraySlice& sizes) { + TF_ASSIGN_OR_RETURN(std::unique_ptr shape, builder->GetShape(x)); + const int64 n_dims = xla::ShapeUtil::Rank(*shape); + int64 n_minor_dims = starts.size(); + TF_RET_CHECK(n_minor_dims == sizes.size()); + TF_RET_CHECK(n_minor_dims <= n_dims); + gtl::ArraySlice major_dims(xla::AsInt64Slice(shape->dimensions()), + /*pos=*/0, + /*len=*/n_dims - sizes.size()); + TF_ASSIGN_OR_RETURN(auto padded_starts, + PrependZerosInMajorDims(builder, x, starts)); + auto padded_sizes = PrependMajorDims(builder, major_dims, sizes); + return builder->DynamicSlice(x, padded_starts, padded_sizes); +} + xla::StatusOr UpdateSlice( xla::ComputationBuilder* builder, const xla::ComputationDataHandle& x, const xla::ComputationDataHandle& update, gtl::ArraySlice start) { // TODO(phawkins): make int64 work on all backends, remove the int32 cast. std::vector start_as_int32(start.begin(), start.end()); - return builder->DynamicUpdateSlice( - x, update, builder->ConstantR1(start_as_int32)); + auto start_constant = builder->ConstantR1(start_as_int32); + TF_ASSIGN_OR_RETURN(std::unique_ptr shape, builder->GetShape(x)); + const int64 n_dims = xla::ShapeUtil::Rank(*shape); + TF_ASSIGN_OR_RETURN(std::unique_ptr start_constant_shape, + builder->GetShape(start_constant)); + const int64 start_length = + xla::ShapeUtil::GetDimension(*start_constant_shape, -1); + TF_RET_CHECK(start_length == n_dims); + return builder->DynamicUpdateSlice(x, update, start_constant); } xla::StatusOr UpdateSliceInMinorDims( @@ -162,6 +196,29 @@ xla::StatusOr UpdateSliceInMinorDims( return UpdateSlice(builder, x, update, padded_start); } +xla::StatusOr DynamicUpdateSliceInMinorDims( + xla::ComputationBuilder* builder, const xla::ComputationDataHandle& x, + const xla::ComputationDataHandle& update, + const std::vector& starts) { + TF_ASSIGN_OR_RETURN(auto padded_starts, + PrependZerosInMajorDims(builder, x, starts)); + return builder->DynamicUpdateSlice(x, update, padded_starts); +} + +xla::StatusOr PrependZerosInMajorDims( + xla::ComputationBuilder* builder, const xla::ComputationDataHandle& x, + const std::vector& starts) { + TF_ASSIGN_OR_RETURN(std::unique_ptr shape, builder->GetShape(x)); + const int64 n_dims = xla::ShapeUtil::Rank(*shape); + auto zero = builder->Reshape(builder->ConstantR0(0), {1}); + std::vector padded_starts(n_dims, zero); + for (int i = 0; i < starts.size(); ++i) { + padded_starts[n_dims - starts.size() + i] = + builder->Reshape(starts[i], {1}); + } + return builder->ConcatInDim(padded_starts, 0); +} + xla::StatusOr TransposeInMinorDims( xla::ComputationBuilder* builder, const xla::ComputationDataHandle& x) { TF_ASSIGN_OR_RETURN(std::unique_ptr shape, builder->GetShape(x)); diff --git a/tensorflow/compiler/tf2xla/lib/util.h b/tensorflow/compiler/tf2xla/lib/util.h index 51f8baaf00bd8fd25baa1a87be8cb0089dfb22b5..b684123f1363cff9e6ac4314cc3a8ae7630cbdf3 100644 --- a/tensorflow/compiler/tf2xla/lib/util.h +++ b/tensorflow/compiler/tf2xla/lib/util.h @@ -1,4 +1,4 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -32,16 +32,39 @@ xla::ComputationDataHandle Zeros(xla::ComputationBuilder* builder, xla::ComputationDataHandle FloatLiteral(xla::ComputationBuilder* builder, xla::PrimitiveType type, double value); +// Makes a 1D tensor [0, ..., x, y] from two tensors x and y with zeros +// prepended until the array is length n_dims. +xla::ComputationDataHandle PrependZerosInMajorDims( + xla::ComputationBuilder* builder, + gtl::ArraySlice starts); + // Returns a integer scalar constant of 'type' with 'value'. // If 'type' is complex, returns a real value with zero imaginary component. xla::ComputationDataHandle IntegerLiteral(xla::ComputationBuilder* builder, xla::PrimitiveType type, int64 value); +// Builds a vector of zeros of length rank(x) with the last two values being +// those in `starts`. +xla::StatusOr PrependZerosInMajorDims( + xla::ComputationBuilder* builder, const xla::ComputationDataHandle& x, + const std::vector& starts); + // Performs a slice in the minor dimensions of a Tensor. xla::StatusOr SliceInMinorDims( xla::ComputationBuilder* builder, const xla::ComputationDataHandle& x, gtl::ArraySlice start, gtl::ArraySlice end); +// Builds a 1-d vector out of a concatenation of `major_dims` and `starts`. +std::vector PrependMajorDims(xla::ComputationBuilder* builder, + const gtl::ArraySlice& major_dims, + const gtl::ArraySlice& indices); + +// Performs a dynamic slice in the minor dimensions of a Tensor. +xla::StatusOr DynamicSliceInMinorDims( + xla::ComputationBuilder* builder, const xla::ComputationDataHandle& x, + const std::vector& starts, + const gtl::ArraySlice& sizes); + // Updates a slice of 'x', i.e., // x[start[0], ..., start[n]] = update xla::StatusOr UpdateSlice( @@ -54,6 +77,11 @@ xla::StatusOr UpdateSliceInMinorDims( xla::ComputationBuilder* builder, const xla::ComputationDataHandle& x, const xla::ComputationDataHandle& update, gtl::ArraySlice start); +xla::StatusOr DynamicUpdateSliceInMinorDims( + xla::ComputationBuilder* builder, const xla::ComputationDataHandle& x, + const xla::ComputationDataHandle& update, + const std::vector& starts); + // Transposes a stack of matrices `x` by swapping the last two dimensions. xla::StatusOr TransposeInMinorDims( xla::ComputationBuilder* builder, const xla::ComputationDataHandle& x); diff --git a/tensorflow/compiler/tf2xla/lib/util_test.cc b/tensorflow/compiler/tf2xla/lib/util_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..b6bd33af2e42a4ab93a22528fd49ef53c46bb479 --- /dev/null +++ b/tensorflow/compiler/tf2xla/lib/util_test.cc @@ -0,0 +1,145 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/tf2xla/lib/util.h" + +#include +#include +#include + +#include "tensorflow/compiler/tf2xla/lib/batch_dot.h" +#include "tensorflow/compiler/xla/array2d.h" +#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/literal_util.h" +#include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/compiler/xla/test.h" +#include "tensorflow/compiler/xla/tests/client_library_test_base.h" +#include "tensorflow/compiler/xla/tests/literal_test_util.h" +#include "tensorflow/compiler/xla/tests/test_macros.h" +#include "tensorflow/compiler/xla/types.h" +#include "tensorflow/core/lib/core/status_test_util.h" + +namespace tensorflow { +namespace { + +using UtilTest = xla::ClientLibraryTestBase; +using UtilLeftLookingTest = xla::ClientLibraryTestBase; + +xla::Array2D BValsRight() { + return {{1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10, 11, 12}}; +} + +xla::Array2D BValsLeft() { + return {{1, 2, 3}, {4, 5, 6}, {7, 8, 9}, {10, 11, 12}}; +} + +xla::Array2D AValsFull() { + return {{2, 0, 1, 2}, {3, 6, 0, 1}, {4, 7, 9, 0}, {5, 8, 10, 11}}; +} + +xla::Array3D BatchedAValsFull() { + return {{ + {2, 0, 1, 2}, + {3, 6, 0, 1}, + {4, 7, 9, 0}, + {5, 8, 10, 11}, + }, + { + {16, 24, 8, 12}, + {24, 61, 82, 48}, + {8, 82, 456, 106}, + {12, 48, 106, 62}, + }}; +} + +XLA_TEST_F(UtilTest, Simple2dLookup) { + xla::ComputationBuilder builder(client_, TestName()); + + xla::ComputationDataHandle a, x, y; + auto a_data = CreateR2Parameter(BValsRight(), 0, "a", &builder, &a); + auto x_data = CreateR0Parameter(2, 1, "x", &builder, &x); + auto y_data = CreateR0Parameter(1, 2, "y", &builder, &y); + auto result = DynamicSliceInMinorDims(&builder, a, {x, y}, {1, 1}); + TF_ASSERT_OK(result.status()); + + ComputeAndCompareR2(&builder, {{10}}, + {a_data.get(), x_data.get(), y_data.get()}, + xla::ErrorSpec(1e-2, 1e-2)); +} + +XLA_TEST_F(UtilTest, Simple3dLookup) { + xla::ComputationBuilder builder(client_, TestName()); + + xla::ComputationDataHandle a, index; + auto a_data = + CreateR3Parameter(BatchedAValsFull(), 0, "a", &builder, &a); + auto index_data = CreateR0Parameter(1, 1, "index", &builder, &index); + + TF_ASSERT_OK_AND_ASSIGN( + auto l_index, + DynamicSliceInMinorDims(&builder, a, + {index, builder.ConstantR0(0)}, {1, 4})); + + ComputeAndCompareR3(&builder, {{{3, 6, 0, 1}}, {{24, 61, 82, 48}}}, + {a_data.get(), index_data.get()}); +} + +XLA_TEST_F(UtilTest, SimpleSliceUpdate) { + xla::ComputationBuilder builder(client_, TestName()); + + xla::ComputationDataHandle a, b, x, y; + auto a_data = CreateR2Parameter(AValsFull(), 0, "a", &builder, &a); + auto b_data = CreateR2Parameter({{9, 1, -10}}, 1, "b", &builder, &b); + auto x_data = CreateR0Parameter(2, 2, "x", &builder, &x); + auto y_data = CreateR0Parameter(1, 3, "y", &builder, &y); + + auto result = DynamicUpdateSliceInMinorDims(&builder, a, b, {x, y}); + TF_ASSERT_OK(result.status()); + + xla::Array2D expected( + {{{2, 0, 1, 2}, {3, 6, 0, 1}, {4, 9, 1, -10}, {5, 8, 10, 11}}}); + + ComputeAndCompareR2( + &builder, expected, + {a_data.get(), b_data.get(), x_data.get(), y_data.get()}); +} + +XLA_TEST_F(UtilTest, RowBatchDot) { + xla::ComputationBuilder builder(client_, TestName()); + + int n = 4; + + xla::ComputationDataHandle a, row, index; + auto a_data = + CreateR3Parameter(BatchedAValsFull(), 0, "a", &builder, &a); + auto row_data = CreateR3Parameter({{{9, 1, 0, 0}}, {{2, 4, 0, 0}}}, 1, + "row", &builder, &row); + // Select {{3, 6, 0, 1}, {24, 61, 82, 48}} out of BatchedAValsFull(). + auto index_data = CreateR0Parameter(1, 2, "index", &builder, &index); + + TF_ASSERT_OK_AND_ASSIGN( + auto l_index, + DynamicSliceInMinorDims(&builder, a, + {index, builder.ConstantR0(0)}, {1, n})); + TF_ASSERT_OK_AND_ASSIGN( + auto dot, BatchDot(&builder, l_index, row, + /*transpose_x=*/false, /*transpose_y=*/true)); + + ComputeAndCompareR3(&builder, {{{33}}, {{292}}}, + {a_data.get(), row_data.get(), index_data.get()}); +} + +} // namespace +} // namespace tensorflow diff --git a/tensorflow/compiler/tf2xla/lib/while_loop.cc b/tensorflow/compiler/tf2xla/lib/while_loop.cc index 86c02ac2e65c12d3527c4022df0cc603e522ef7a..495d9c60780b0a728e8dbfb4537d33d92b4bb5b7 100644 --- a/tensorflow/compiler/tf2xla/lib/while_loop.cc +++ b/tensorflow/compiler/tf2xla/lib/while_loop.cc @@ -54,7 +54,6 @@ xla::StatusOr> XlaWhileLoop( auto result, condition_function(unpack_tuple(parameter, arity, cond_builder.get()), cond_builder.get())); - TF_RETURN_IF_ERROR(cond_builder->SetReturnValue(result)); } TF_ASSIGN_OR_RETURN(auto cond, cond_builder->Build()); diff --git a/tensorflow/compiler/tf2xla/ops/BUILD b/tensorflow/compiler/tf2xla/ops/BUILD index aeb743a6634673f2e8c4dee9ae1e5017944aae2c..bb9168fa358154f3db9dab87bacc9bf28dd16406 100644 --- a/tensorflow/compiler/tf2xla/ops/BUILD +++ b/tensorflow/compiler/tf2xla/ops/BUILD @@ -7,17 +7,13 @@ licenses(["notice"]) # Apache 2.0 load("//tensorflow:tensorflow.bzl", "tf_gen_op_wrapper_py") cc_library( - name = "functional_ops", - srcs = ["functional_ops.cc"], - deps = [ - "//tensorflow/core:framework", + name = "xla_ops", + srcs = [ + "dynamic_slice_ops.cc", + "functional_ops.cc", + "reduce_window_op.cc", + "sendrecv_ops.cc", ], - alwayslink = 1, -) - -cc_library( - name = "sendrecv_ops", - srcs = ["sendrecv_ops.cc"], deps = [ "//tensorflow/core:framework", ], @@ -25,17 +21,9 @@ cc_library( ) tf_gen_op_wrapper_py( - name = "gen_functional_ops", - out = "gen_functional_ops.py", - deps = [ - ":functional_ops", - ], -) - -tf_gen_op_wrapper_py( - name = "gen_sendrecv_ops", - out = "gen_sendrecv_ops.py", + name = "gen_xla_ops", + out = "gen_xla_ops.py", deps = [ - ":sendrecv_ops", + ":xla_ops", ], ) diff --git a/tensorflow/compiler/tf2xla/ops/dynamic_slice_ops.cc b/tensorflow/compiler/tf2xla/ops/dynamic_slice_ops.cc new file mode 100644 index 0000000000000000000000000000000000000000..d6c0edbb889b1751ac9d9d47d0c9534b543196ff --- /dev/null +++ b/tensorflow/compiler/tf2xla/ops/dynamic_slice_ops.cc @@ -0,0 +1,49 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/common_shape_fns.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/shape_inference.h" + +namespace tensorflow { + +REGISTER_OP("XlaDynamicUpdateSlice") + .Input("input: T") + .Input("update: T") + .Input("indices: Tindices") + .Output("output: T") + .Attr("T: type") + .Attr("Tindices: {int32, int64}") + .SetShapeFn(shape_inference::UnchangedShape) + .Doc(R"doc( +Wraps the XLA DynamicUpdateSlice operator, documented at + https://www.tensorflow.org/performance/xla/operation_semantics#dynamicupdateslice +. + +XlaDynamicUpdateSlice generates a result which is the value of the `input` +operand, with a slice update overwritten at `indices`. The shape of `update` +determines the shape of the sub-array of the result which is updated. The shape +of indices must be rank == 1, with dimension size equal to the rank of `input`. + +Handling of out-of-bounds slice indices is implementation-defined. + +input: A `Tensor` of type T. +indices: A vector of indices into `input`. Must have length equal to the rank of + `input`. +update: A `Tensor` of type T. Same rank as `input`. +output: A `Tensor` of type T. +)doc"); + +} // namespace tensorflow diff --git a/tensorflow/compiler/tf2xla/ops/reduce_window_op.cc b/tensorflow/compiler/tf2xla/ops/reduce_window_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..d9af982adc090ea78c711fd4656ba429c53b18c9 --- /dev/null +++ b/tensorflow/compiler/tf2xla/ops/reduce_window_op.cc @@ -0,0 +1,45 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/common_shape_fns.h" +#include "tensorflow/core/framework/op.h" + +namespace tensorflow { + +REGISTER_OP("XlaReduceWindow") + .Input("input: T") + .Input("init_value: T") + .Attr("T: numbertype") + .Attr("computation: func") + .Attr("window_dimensions: list(int)") + .Attr("window_strides: list(int)") + .Attr("padding_low: list(int)") + .Attr("padding_high: list(int)") + .Output("output: T") + .SetShapeFn(shape_inference::UnknownShape) + .Doc(R"doc( +Wraps the XLA ReduceWindow operator, documented at + https://www.tensorflow.org/performance/xla/operation_semantics#reducewindow . + +input: the input tensor +init_value: a scalar representing the initial value for the reduction +computation: a reducer function to apply +window_dimensions: the shape of the window +window_strides: the inter-window strides +padding_low: the padding to apply at the start of each input dimensions +padding_high: the padding to apply at the end of each input dimension. +)doc"); + +} // namespace tensorflow diff --git a/tensorflow/compiler/tf2xla/ops/sendrecv_ops.cc b/tensorflow/compiler/tf2xla/ops/sendrecv_ops.cc index 4b41c16a8b3fdc0c3412c76d29d3ec2b7bdfd0aa..7ec7b50e905a6cbdecea4543dcb87322b5a7e844 100644 --- a/tensorflow/compiler/tf2xla/ops/sendrecv_ops.cc +++ b/tensorflow/compiler/tf2xla/ops/sendrecv_ops.cc @@ -18,22 +18,24 @@ limitations under the License. namespace tensorflow { -REGISTER_OP("_XLASend") +REGISTER_OP("XlaSend") .Input("tensor: T") .Attr("T: type") .Attr("tensor_name: string") .SetIsStateful() .SetShapeFn(shape_inference::UnknownShape) .Doc(R"doc( -Sends the named tensor to another XLA computation. +Sends the named tensor to another XLA computation. Wraps the XLA Send operator +documented at + https://www.tensorflow.org/performance/xla/operation_semantics#send . tensor: The tensor to send. -tensor_name: The name of the tensor to send. +tensor_name: A string key that identifies the channel. )doc"); -REGISTER_OP("_XLARecv") - .Output("tensor: T") - .Attr("T: type") +REGISTER_OP("XlaRecv") + .Output("tensor: dtype") + .Attr("dtype: type") .Attr("tensor_name: string") .Attr("shape: shape") .SetIsStateful() @@ -46,11 +48,14 @@ REGISTER_OP("_XLARecv") return Status::OK(); }) .Doc(R"doc( -Receives the named tensor from another XLA computation. +Receives the named tensor from another XLA computation. Wraps the XLA Recv +operator documented at + https://www.tensorflow.org/performance/xla/operation_semantics#recv . tensor: The tensor to receive. -tensor_name: The name of the tensor to receive. -shape: The shape of the input tensor. +dtype: The type of the tensor. +tensor_name: A string key that identifies the channel. +shape: The shape of the tensor. )doc"); } // namespace tensorflow diff --git a/tensorflow/compiler/tf2xla/python/BUILD b/tensorflow/compiler/tf2xla/python/BUILD index f0a2ef0651ff6115bd201a3b1c34b3c061a22a3d..42b6292f79ffddd155c05758a1420a2a583eb0c6 100644 --- a/tensorflow/compiler/tf2xla/python/BUILD +++ b/tensorflow/compiler/tf2xla/python/BUILD @@ -22,3 +22,11 @@ tf_py_clif_cc( "//tensorflow/compiler/tf2xla:xla_compiler", ], ) + +py_library( + name = "xla", + srcs = ["xla.py"], + deps = [ + "//tensorflow/compiler/tf2xla/ops:gen_xla_ops", + ], +) diff --git a/tensorflow/compiler/tf2xla/python/xla.py b/tensorflow/compiler/tf2xla/python/xla.py new file mode 100644 index 0000000000000000000000000000000000000000..e5ce65bec950fdfd38c3ca5bc62ac745ef8ca4a7 --- /dev/null +++ b/tensorflow/compiler/tf2xla/python/xla.py @@ -0,0 +1,80 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Experimental library that exposes XLA operations directly in TensorFlow. + +It is sometimes useful to be able to build HLO programs directly from +TensorFlow. This file provides Tensorflow operators that map as closely as +possible to HLO operators. + +There is no promise of backward or forward compatibility for operators defined +in this module. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.compiler.tf2xla.ops import gen_xla_ops + +# TODO(phawkins): provide wrappers for all XLA operators. + +dynamic_update_slice = gen_xla_ops.xla_dynamic_update_slice + + +def reduce_window(operand, + init, + reducer, + window_dimensions, + window_strides=None, + padding=None, + name=None): + """Wraps the XLA ReduceWindow operator. + + ReduceWindow is documented at + https://www.tensorflow.org/performance/xla/operation_semantics#reducewindow . + + Args: + operand: the input tensor + init: a scalar tensor representing the initial value for the reduction + reducer: a reduction function that combines a pair of scalars. + window_dimensions: shape of the window, as a list of integers + window_strides: inter-window strides, as a list of integers. Optional; + if omitted, defaults to strides of 1. + padding: padding to apply to 'operand'. List of (low, high) pairs of + integers that specify the padding to apply before and after each + dimension. Optional; if omitted, defaults to no padding. + name: the operator name, or None. + Returns: + A tensor that represents the output of the reduce_window operator. + """ + window_strides = window_strides or [1] * len(window_dimensions) + padding = padding or [(0, 0)] * len(window_dimensions) + padding_low = [x for (x, _) in padding] + padding_high = [y for (_, y) in padding] + return gen_xla_ops.xla_reduce_window( + operand, + init, + reducer, + window_dimensions, + window_strides, + padding_low, + padding_high, + name=name) + + +recv = gen_xla_ops.xla_recv +send = gen_xla_ops.xla_send + +while_loop = gen_xla_ops.xla_while diff --git a/tensorflow/compiler/tf2xla/tf2xla_test.cc b/tensorflow/compiler/tf2xla/tf2xla_test.cc index a9978e697b091715ce120f0d18fdddd259e08b32..b813668a9edd3a704a9dca1eaa588c1eced6ac31 100644 --- a/tensorflow/compiler/tf2xla/tf2xla_test.cc +++ b/tensorflow/compiler/tf2xla/tf2xla_test.cc @@ -90,6 +90,11 @@ TEST(ConvertGraphDefToXla, Sum) { TF_EXPECT_OK(result_or.status()); std::unique_ptr result = std::move(result_or.ValueOrDie()); EXPECT_EQ("(s32[]) (\n42\n)", result->ToString()); + + config.mutable_feed(0)->mutable_id()->set_output_index( + 123); /* invalid output_index */ + EXPECT_TRUE(errors::IsInvalidArgument( + ConvertGraphDefToXla(graph_def, config, client, &computation))); } } // namespace diff --git a/tensorflow/compiler/tf2xla/tf2xla_util.cc b/tensorflow/compiler/tf2xla/tf2xla_util.cc index f428a194328935fec1210ea96245344de859e611..7ec85aa3cdec622cae509f45c5ba7740222025f9 100644 --- a/tensorflow/compiler/tf2xla/tf2xla_util.cc +++ b/tensorflow/compiler/tf2xla/tf2xla_util.cc @@ -151,8 +151,15 @@ Status AddPlaceholdersForFeeds( Status status; Node* feed_node = g.AddNode(gd.node(0), &status); TF_RETURN_IF_ERROR(status); - info.data_type = - BaseType(feed_node->output_type(info.feed->id().output_index())); + + if (info.feed->id().output_index() < feed_node->num_outputs()) { + info.data_type = + BaseType(feed_node->output_type(info.feed->id().output_index())); + } else { + return errors::InvalidArgument( + "Invalid output_index ", info.feed->id().output_index(), + " for feed node ", info.feed->id().node_name()); + } } } @@ -281,4 +288,13 @@ Status SetNodeShardingFromNeighbors(Node* n, bool out_edges) { return Status::OK(); } +void AddDtypeToKernalDefConstraint(StringPiece name, DataType dtype, + KernelDef* kdef) { + for (KernelDef::AttrConstraint& constraint : *kdef->mutable_constraint()) { + if (constraint.name() == name) { + constraint.mutable_allowed_values()->mutable_list()->add_type(dtype); + } + } +} + } // namespace tensorflow diff --git a/tensorflow/compiler/tf2xla/tf2xla_util.h b/tensorflow/compiler/tf2xla/tf2xla_util.h index e5fba8ede7745febbb42c572a7b52247213afc95..745beb39c1d917cd0d1cd219536ee26a96253ec9 100644 --- a/tensorflow/compiler/tf2xla/tf2xla_util.h +++ b/tensorflow/compiler/tf2xla/tf2xla_util.h @@ -20,6 +20,7 @@ limitations under the License. #include "tensorflow/compiler/tf2xla/tf2xla.pb.h" #include "tensorflow/core/framework/graph.pb.h" +#include "tensorflow/core/framework/kernel_def.pb.h" #include "tensorflow/core/framework/op.h" #include "tensorflow/core/graph/graph.h" #include "tensorflow/core/lib/core/status.h" @@ -51,6 +52,10 @@ string TensorIdToString(const tf2xla::TensorId& id); // edges are considered. Status SetNodeShardingFromNeighbors(Node* n, bool out_edges); +// Add an allowed data type to the AttrConstraint with the given name. +void AddDtypeToKernalDefConstraint(StringPiece name, DataType dtype, + KernelDef* kdef); + } // namespace tensorflow #endif // TENSORFLOW_COMPILER_TF2XLA_TF2XLA_UTIL_H_ diff --git a/tensorflow/compiler/tf2xla/xla_compiler.cc b/tensorflow/compiler/tf2xla/xla_compiler.cc index 86263d847ae02d50e70dafb0129b2664c522f2a3..c0e996768491a6315c21021ce874b8a11557de6e 100644 --- a/tensorflow/compiler/tf2xla/xla_compiler.cc +++ b/tensorflow/compiler/tf2xla/xla_compiler.cc @@ -813,4 +813,29 @@ Status XlaCompiler::SetHostToDeviceMetadata( return Status::OK(); } +Status XlaCompiler::GetHostComputeControlDependency( + const string& host_compute_name, xla::ComputationDataHandle* handle) { + const auto iter = host_compute_control_output_.find(host_compute_name); + if (iter == host_compute_control_output_.end()) { + return errors::InvalidArgument( + "No registered control handle for host compute Op '", host_compute_name, + "'"); + } else { + *handle = iter->second; + } + return Status::OK(); +} + +Status XlaCompiler::SetHostComputeControlDependency( + const string& host_compute_name, const xla::ComputationDataHandle& handle) { + if (host_compute_control_output_.find(host_compute_name) != + host_compute_control_output_.end()) { + return errors::InvalidArgument( + "Duplicate control handles registered for for host compute Op ", + host_compute_name); + } + host_compute_control_output_[host_compute_name] = handle; + return Status::OK(); +} + } // namespace tensorflow diff --git a/tensorflow/compiler/tf2xla/xla_compiler.h b/tensorflow/compiler/tf2xla/xla_compiler.h index a6747bbe72e161b2ece55697825cce0e71145a5c..8f564f35ec81765e8998513dfd4805d221200c6c 100644 --- a/tensorflow/compiler/tf2xla/xla_compiler.h +++ b/tensorflow/compiler/tf2xla/xla_compiler.h @@ -325,6 +325,23 @@ class XlaCompiler { gtl::ArraySlice types, gtl::ArraySlice shapes); + // In order to avoid deadlocks from dependencies in host computations, it can + // be necessary to enforce a partial order on the execution of HostCompute + // Ops. In particular it may be necessary to constrain the SendToHost for one + // HostCompute to run before blocking on the RecvAtHost for another + // HostCompute. The compiler maintains a mapping from 'host_compute_name' to + // handle, where the handle is an 'output' of the HostCompute Op corresponding + // to 'host_compute_name'. Another HostCompute Op that needs to be sequenced + // later can add the handle as an 'input' to enforce the constraints. + // 'host_compute_name' can be any string the client wishes to use to identify + // a given HostCompute Op as long as the names are unique within the + // compilation. + Status GetHostComputeControlDependency(const string& host_compute_name, + xla::ComputationDataHandle* handle); + Status SetHostComputeControlDependency( + const string& host_compute_name, + const xla::ComputationDataHandle& handle); + const Options& options() const { return options_; } xla::Client* client() const { return options_.client; } FunctionLibraryRuntime* flib_runtime() const { return flib_runtime_; } @@ -391,6 +408,9 @@ class XlaCompiler { std::unordered_map host_compute_sends_; std::unordered_map host_compute_recvs_; + std::unordered_map + host_compute_control_output_; + TF_DISALLOW_COPY_AND_ASSIGN(XlaCompiler); }; diff --git a/tensorflow/compiler/tf2xla/xla_cpu_backend.cc b/tensorflow/compiler/tf2xla/xla_cpu_backend.cc index 8286480e0ea07429adbe31ec4f16d043e321df0a..ead229aaccc292d4944db0c1eaf98c82583533cd 100644 --- a/tensorflow/compiler/tf2xla/xla_cpu_backend.cc +++ b/tensorflow/compiler/tf2xla/xla_cpu_backend.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include "tensorflow/compiler/tf2xla/tf2xla_util.h" #include "tensorflow/compiler/tf2xla/xla_op_registry.h" #include "tensorflow/core/framework/kernel_def.pb.h" @@ -30,6 +31,12 @@ bool CpuOpFilter(KernelDef* kdef) { DT_FLOAT); return true; } + if (kdef->op() == "Const") { + AddDtypeToKernalDefConstraint("dtype", DT_STRING, kdef); + } + if (kdef->op() == "Assert") { + AddDtypeToKernalDefConstraint("T", DT_STRING, kdef); + } return true; } diff --git a/tensorflow/compiler/tf2xla/xla_gpu_backend.cc b/tensorflow/compiler/tf2xla/xla_gpu_backend.cc index 8ca757e72355d890c13b8b448d35c327d3986696..62168b648331844bfe2db1a4d5dcad895c8726f3 100644 --- a/tensorflow/compiler/tf2xla/xla_gpu_backend.cc +++ b/tensorflow/compiler/tf2xla/xla_gpu_backend.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include "tensorflow/compiler/tf2xla/tf2xla_util.h" #include "tensorflow/compiler/tf2xla/xla_op_registry.h" #include "tensorflow/core/framework/kernel_def.pb.h" @@ -25,6 +26,12 @@ bool GpuOpFilter(KernelDef* kdef) { kdef->op() == "RandomUniformInt" || kdef->op() == "TruncatedNormal") { return false; } + if (kdef->op() == "Const") { + AddDtypeToKernalDefConstraint("dtype", DT_STRING, kdef); + } + if (kdef->op() == "Assert") { + AddDtypeToKernalDefConstraint("T", DT_STRING, kdef); + } return true; } diff --git a/tensorflow/compiler/tf2xla/xla_helpers.cc b/tensorflow/compiler/tf2xla/xla_helpers.cc index 3b0b2f06ebae4af918cbe6fb8a384004c1858998..62a5114837e07f35134ad99e28880d6a9233a213 100644 --- a/tensorflow/compiler/tf2xla/xla_helpers.cc +++ b/tensorflow/compiler/tf2xla/xla_helpers.cc @@ -122,6 +122,9 @@ xla::ComputationDataHandle XlaHelpers::One(xla::ComputationBuilder* b, xla::ComputationDataHandle XlaHelpers::Epsilon(xla::ComputationBuilder* b, DataType data_type) { switch (data_type) { + case DT_HALF: + return b->ConstantR0( + static_cast(Eigen::NumTraits::epsilon())); case DT_BFLOAT16: return b->ConstantR0(bfloat16::epsilon()); case DT_FLOAT: diff --git a/tensorflow/compiler/tf2xla/xla_op_registry.h b/tensorflow/compiler/tf2xla/xla_op_registry.h index ff7453194af3a85bded86a5ce298f8779422dccb..e255b01dd7fdcb095c7992d4352d2d9bb7d36ac3 100644 --- a/tensorflow/compiler/tf2xla/xla_op_registry.h +++ b/tensorflow/compiler/tf2xla/xla_op_registry.h @@ -51,13 +51,13 @@ constexpr std::array kNumericTypes = { {DT_UINT32, DT_UINT64, DT_INT32, DT_INT64, DT_HALF, DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_BFLOAT16}}; -constexpr std::array kCpuAllTypes = { - {DT_UINT32, DT_UINT64, DT_INT32, DT_INT64, DT_FLOAT, DT_DOUBLE, +constexpr std::array kCpuAllTypes = { + {DT_UINT32, DT_UINT64, DT_INT32, DT_INT64, DT_HALF, DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_BOOL}}; -constexpr std::array kGpuAllTypes = { - {DT_UINT32, DT_UINT64, DT_INT32, DT_INT64, DT_FLOAT, DT_DOUBLE, - DT_COMPLEX64, DT_BOOL}}; +constexpr std::array kGpuAllTypes = { + {DT_UINT32, DT_UINT64, DT_INT32, DT_INT64, DT_HALF, DT_FLOAT, DT_DOUBLE, + DT_COMPLEX64, DT_BOOL, DT_BFLOAT16}}; // Class that manages registrations of operators and devices for the XLA JIT. // Not thread-safe. diff --git a/tensorflow/compiler/xla/BUILD b/tensorflow/compiler/xla/BUILD index 751777222fcc7ec073958349aa2677d5b4e6757d..88f37433a552e0dff1587075f1d31fd5d050302b 100644 --- a/tensorflow/compiler/xla/BUILD +++ b/tensorflow/compiler/xla/BUILD @@ -443,6 +443,9 @@ cc_library( srcs = ["executable_run_options.cc"], hdrs = ["executable_run_options.h"], visibility = ["//visibility:public"], + deps = [ + ":types", + ], ) cc_library( diff --git a/tensorflow/compiler/xla/client/BUILD b/tensorflow/compiler/xla/client/BUILD index a299c2afd45aa6b785964b8a8e1400ddf54083a4..286d06d12ffca7410067f2d33398497576986807 100644 --- a/tensorflow/compiler/xla/client/BUILD +++ b/tensorflow/compiler/xla/client/BUILD @@ -130,6 +130,7 @@ cc_library( "//tensorflow/compiler/xla:statusor", "//tensorflow/compiler/xla:util", "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/client/xla_client:xla_computation", "//tensorflow/compiler/xla/service:compile_only_service", "//tensorflow/compiler/xla/service:compiler", "//tensorflow/core:stream_executor_no_cuda", diff --git a/tensorflow/compiler/xla/client/client.cc b/tensorflow/compiler/xla/client/client.cc index c4c889437465dc0edf313bb7e5130f1a9c311ebf..f0f94298a05f7c4bdc41cbfb8572454fbedd371d 100644 --- a/tensorflow/compiler/xla/client/client.cc +++ b/tensorflow/compiler/xla/client/client.cc @@ -193,6 +193,34 @@ StatusOr> Client::ExecuteAndTransfer( return Transfer(*data, shape_with_output_layout); } +StatusOr> Client::ComputeConstant( + const XlaComputation& computation, const Layout* output_layout) const { + ComputeConstantGraphRequest request; + *request.mutable_computation() = computation.proto(); + if (output_layout != nullptr) { + *request.mutable_output_layout() = *output_layout; + } + + ComputeConstantResponse response; + + VLOG(2) << "making compute-constant-graph request"; + Status s = stub_->ComputeConstantGraph(&request, &response); + VLOG(2) << "done with request"; + + if (!s.ok()) { + return s; + } + + VLOG(3) << "ComputeConstant: {" << response.DebugString() << "}"; + + if (!response.has_literal()) { + return InternalError( + "no computed literal in the provided response in ComputeConstantGraph " + "request"); + } + return Literal::CreateFromProto(response.literal()); +} + StatusOr Client::LoadSnapshot(const SessionModule& module) { LoadComputationSnapshotRequest request; *request.mutable_module() = module; @@ -324,8 +352,38 @@ StatusOr>> Client::ExecuteParallel( StatusOr>> Client::ExecuteParallel( tensorflow::gtl::ArraySlice computations) { - return Unimplemented( - "ExecuteParallel is not yet implemented for XlaComputation."); + ExecuteGraphParallelRequest request; + + for (const XlaComputationInstance& computation : computations) { + ExecuteGraphRequest single_request; + *single_request.mutable_computation() = computation.computation.proto(); + for (GlobalData* argument : computation.arguments) { + *single_request.add_arguments() = argument->handle(); + } + *single_request.mutable_execution_options() = computation.execution_options; + *request.add_requests() = single_request; + } + + ExecuteParallelResponse response; + VLOG(1) << "making execute-graph-parallel request: " + << request.ShortDebugString(); + tensorflow::Status s = stub_->ExecuteGraphParallel(&request, &response); + VLOG(1) << "done with request"; + + if (!s.ok()) { + return s; + } + + std::vector> outputs; + for (size_t i = 0; i < computations.size(); ++i) { + outputs.push_back( + MakeUnique(stub_, response.responses(i).output())); + if (computations[i].execution_profile != nullptr) { + *computations[i].execution_profile = response.responses(i).profile(); + } + } + + return std::move(outputs); } StatusOr> Client::GetDeviceHandles( diff --git a/tensorflow/compiler/xla/client/client.h b/tensorflow/compiler/xla/client/client.h index 05d707dab1533f44ce827157e888720e218d4c9c..14c685d94ea31c382d84223ca4e2eba544420d78 100644 --- a/tensorflow/compiler/xla/client/client.h +++ b/tensorflow/compiler/xla/client/client.h @@ -194,6 +194,27 @@ class Client { const ExecutionOptions* execution_options = nullptr, ExecutionProfile* execution_profile = nullptr); + // Computes the value of the given computation using a non-optimized + // interpreter on the host. + // + // The computation must not depend on any parameters, or on stateful operators + // such as `RngNormal` or `Infeed`. + // + // This functionality can be useful when translating a computation into XLA + // where something that looked dynamic is required by XLA to be specified as a + // constant. E.g. the source computation (outside of XLA) may include a + // dynamic computation of the shape of something and ComputeConstant lets you + // determine what the value of that computation is in the case where the value + // can be determined at compile time. + // + // If output_layout is non-null, then the output of the computation will be + // stored using that layout. + // + // TODO(b/74197823): This is a part of a NOT YET ready refactor. + StatusOr> ComputeConstant( + const XlaComputation& computation, + const Layout* output_layout = nullptr) const; + // Unregister the memory for the given GlobalData on the device. Status Unregister(const GlobalData& data); diff --git a/tensorflow/compiler/xla/client/client_library.cc b/tensorflow/compiler/xla/client/client_library.cc index b1663bc815719c3da75b37593ac665b1f3493db8..803a9e40094391ba47ed27713f4538caf875c4f6 100644 --- a/tensorflow/compiler/xla/client/client_library.cc +++ b/tensorflow/compiler/xla/client/client_library.cc @@ -23,22 +23,19 @@ limitations under the License. namespace xla { -LocalClientOptions::LocalClientOptions(perftools::gputools::Platform* platform, +LocalClientOptions::LocalClientOptions(se::Platform* platform, int number_of_replicas, int intra_op_parallelism_threads) : platform_(platform), number_of_replicas_(number_of_replicas), intra_op_parallelism_threads_(intra_op_parallelism_threads) {} -LocalClientOptions& LocalClientOptions::set_platform( - perftools::gputools::Platform* platform) { +LocalClientOptions& LocalClientOptions::set_platform(se::Platform* platform) { platform_ = platform; return *this; } -perftools::gputools::Platform* LocalClientOptions::platform() const { - return platform_; -} +se::Platform* LocalClientOptions::platform() const { return platform_; } LocalClientOptions& LocalClientOptions::set_number_of_replicas( int number_of_replicas) { @@ -69,7 +66,7 @@ ClientLibrary::ClientLibrary() = default; ClientLibrary::~ClientLibrary() = default; /* static */ StatusOr ClientLibrary::GetOrCreateLocalClient( - perftools::gputools::Platform* platform) { + se::Platform* platform) { LocalClientOptions default_options; default_options.set_platform(platform); return GetOrCreateLocalClient(default_options); @@ -77,7 +74,7 @@ ClientLibrary::~ClientLibrary() = default; /* static */ StatusOr ClientLibrary::GetOrCreateLocalClient( const LocalClientOptions& options) { - perftools::gputools::Platform* platform = options.platform(); + se::Platform* platform = options.platform(); int replica_count = options.number_of_replicas(); ClientLibrary& client_library = Singleton(); tensorflow::mutex_lock lock(client_library.service_mutex_); @@ -115,7 +112,7 @@ ClientLibrary::~ClientLibrary() = default; } /* static */ LocalService* ClientLibrary::GetXlaService( - perftools::gputools::Platform* platform) { + se::Platform* platform) { ClientLibrary& client_library = Singleton(); tensorflow::mutex_lock lock(client_library.service_mutex_); auto it = client_library.local_instances_.find(platform->id()); @@ -124,8 +121,7 @@ ClientLibrary::~ClientLibrary() = default; } /* static */ StatusOr -ClientLibrary::GetOrCreateCompileOnlyClient( - perftools::gputools::Platform* platform) { +ClientLibrary::GetOrCreateCompileOnlyClient(se::Platform* platform) { ClientLibrary& client_library = Singleton(); tensorflow::mutex_lock lock(client_library.service_mutex_); diff --git a/tensorflow/compiler/xla/client/client_library.h b/tensorflow/compiler/xla/client/client_library.h index a6f30d82e43587135697e76e8bc7d122edc0f602..3ad558fa532931937fab898f7b855f0a3370eaec 100644 --- a/tensorflow/compiler/xla/client/client_library.h +++ b/tensorflow/compiler/xla/client/client_library.h @@ -43,13 +43,13 @@ namespace xla { // Options to configure the local client when it is created. class LocalClientOptions { public: - LocalClientOptions(perftools::gputools::Platform* platform = nullptr, + LocalClientOptions(se::Platform* platform = nullptr, int number_of_replicas = 1, int intra_op_parallelism_threads = -1); // Set the platform backing the service, or nullptr for the default platform. - LocalClientOptions& set_platform(perftools::gputools::Platform* platform); - perftools::gputools::Platform* platform() const; + LocalClientOptions& set_platform(se::Platform* platform); + se::Platform* platform() const; // Set the number of replicas to use when compiling replicated // programs. @@ -61,7 +61,7 @@ class LocalClientOptions { int intra_op_parallelism_threads() const; private: - perftools::gputools::Platform* platform_; + se::Platform* platform_; int number_of_replicas_; int intra_op_parallelism_threads_; }; @@ -74,7 +74,7 @@ class ClientLibrary { // platform : The platform the underlying XLA service should target. If // null then default platform is used. static StatusOr GetOrCreateLocalClient( - perftools::gputools::Platform* platform = nullptr); + se::Platform* platform = nullptr); static StatusOr GetOrCreateLocalClient( const LocalClientOptions& options); @@ -84,14 +84,14 @@ class ClientLibrary { // Returns the service from the service thread. Only used in unit tests to // access user computations from client. - static LocalService* GetXlaService(perftools::gputools::Platform* platform); + static LocalService* GetXlaService(se::Platform* platform); // Singleton constructor-or-accessor for compile-only clients. Arguments: // // platform : The platform the underlying XLA service should target. If // null then default platform is used. static StatusOr GetOrCreateCompileOnlyClient( - perftools::gputools::Platform* platform = nullptr); + se::Platform* platform = nullptr); // Clears the local instance and compile only instance caches. The client // pointers returned by the previous GetOrCreateLocalClient() or @@ -120,12 +120,10 @@ class ClientLibrary { }; tensorflow::mutex service_mutex_; // Guards the singleton creation state. - std::unordered_map> + std::unordered_map> local_instances_ GUARDED_BY(service_mutex_); - std::unordered_map> + std::unordered_map> compile_only_instances_ GUARDED_BY(service_mutex_); TF_DISALLOW_COPY_AND_ASSIGN(ClientLibrary); diff --git a/tensorflow/compiler/xla/client/compile_only_client.cc b/tensorflow/compiler/xla/client/compile_only_client.cc index 59662c95ac15e7c23790c5b5ff5d75a694613aeb..96e38bca01087991943aff40ed1cb3e21f9e6cba 100644 --- a/tensorflow/compiler/xla/client/compile_only_client.cc +++ b/tensorflow/compiler/xla/client/compile_only_client.cc @@ -39,6 +39,24 @@ CompileOnlyClient::CompileAheadOfTime( return compiler_service_->CompileAheadOfTime(service_instances, options); } +StatusOr>> +CompileOnlyClient::CompileAheadOfTime( + const tensorflow::gtl::ArraySlice computations, + const AotCompilationOptions& options) { + std::vector service_instances; + service_instances.reserve(computations.size()); + for (const AotXlaComputationInstance& instance : computations) { + service_instances.emplace_back(); + CompileOnlyService::AotXlaComputationInstance& service_instance = + service_instances.back(); + TF_RET_CHECK(instance.computation != nullptr); + service_instance.computation = instance.computation->proto(); + service_instance.argument_layouts = instance.argument_layouts; + service_instance.result_layout = instance.result_layout; + } + return compiler_service_->CompileAheadOfTime(service_instances, options); +} + int64 CompileOnlyClient::PointerSizeForTriple(tensorflow::StringPiece triple) { llvm::Triple llvm_triple( llvm::Triple::normalize(llvm::StringRef(triple.data(), triple.size()))); diff --git a/tensorflow/compiler/xla/client/compile_only_client.h b/tensorflow/compiler/xla/client/compile_only_client.h index 5900048711384e0240a3cd502260eb388eb40f51..c8725b8517484acdaf093bc3b34adb00f69155b1 100644 --- a/tensorflow/compiler/xla/client/compile_only_client.h +++ b/tensorflow/compiler/xla/client/compile_only_client.h @@ -18,6 +18,7 @@ limitations under the License. #include "tensorflow/compiler/xla/client/client.h" #include "tensorflow/compiler/xla/client/computation.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_computation.h" #include "tensorflow/compiler/xla/service/compile_only_service.h" #include "tensorflow/compiler/xla/service/compiler.h" #include "tensorflow/compiler/xla/statusor.h" @@ -54,6 +55,27 @@ class CompileOnlyClient : public Client { const tensorflow::gtl::ArraySlice computations, const AotCompilationOptions& options); + // A description of an xla computation to compile using CompileAheadOfTime. + // + // TODO(b/74197823): This is a part of a NOT YET ready refactor. + struct AotXlaComputationInstance { + const XlaComputation* computation; + // Inform the compiler of the expected layout for arguments. + std::vector argument_layouts; + // Specifies the expected result layout. + const Shape* result_layout; + }; + + // Compiles a list of xla computations for ahead-of-time execution. This is + // intended for use in static compilation. The |options| parameter describes + // the target for which the compiler should emit code. + // + // TODO(b/74197823): This is a part of a NOT YET ready refactor. + StatusOr>> + CompileAheadOfTime( + const tensorflow::gtl::ArraySlice computations, + const AotCompilationOptions& options); + // Returns the size of a pointer in bytes for a given triple. static int64 PointerSizeForTriple(tensorflow::StringPiece triple); diff --git a/tensorflow/compiler/xla/client/computation_builder.cc b/tensorflow/compiler/xla/client/computation_builder.cc index 4d3b0ee0d6e9ba82cfa09af0fbff0ae1efa0ac64..83c7cb174402133706fbde6a734a29afd8edfe80 100644 --- a/tensorflow/compiler/xla/client/computation_builder.cc +++ b/tensorflow/compiler/xla/client/computation_builder.cc @@ -1046,6 +1046,11 @@ ComputationDataHandle ComputationBuilder::Neg( return UnaryOp(UNOP_NEGATE, operand); } +ComputationDataHandle ComputationBuilder::Clz( + const ComputationDataHandle& operand) { + return UnaryOp(UNOP_CLZ, operand); +} + ComputationDataHandle ComputationBuilder::Clamp( const ComputationDataHandle& min, const ComputationDataHandle& operand, const ComputationDataHandle& max) { diff --git a/tensorflow/compiler/xla/client/computation_builder.h b/tensorflow/compiler/xla/client/computation_builder.h index 019c6f3afb5d57bfe453988ded19120a4483cf36..9431c2c459a564e3cf509d9dae16e71fc27ee2c0 100644 --- a/tensorflow/compiler/xla/client/computation_builder.h +++ b/tensorflow/compiler/xla/client/computation_builder.h @@ -657,6 +657,9 @@ class ComputationBuilder { // Enqueues a negate instruction onto the computation. ComputationDataHandle Neg(const ComputationDataHandle& operand); + // Enqueues a count-leading-zeros instruction onto the computation. + ComputationDataHandle Clz(const ComputationDataHandle& operand); + // Enqueues a transpose instruction onto the computation. ComputationDataHandle Transpose( const ComputationDataHandle& operand, diff --git a/tensorflow/compiler/xla/client/lib/BUILD b/tensorflow/compiler/xla/client/lib/BUILD index d02972f2c0d18e00cd4b554e2125647fe80c64c5..59c4a53c05a45490a7c8e732840a4e70767c46c2 100644 --- a/tensorflow/compiler/xla/client/lib/BUILD +++ b/tensorflow/compiler/xla/client/lib/BUILD @@ -24,6 +24,8 @@ cc_library( "//tensorflow/compiler/xla:xla_data_proto", "//tensorflow/compiler/xla/client:computation", "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_computation", "//tensorflow/core:lib", ], ) @@ -44,6 +46,7 @@ cc_library( "//tensorflow/compiler/xla/client:computation", "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/client:global_data", + "//tensorflow/compiler/xla/client/xla_client:xla_computation", "//tensorflow/compiler/xla/tests:test_utils", "//tensorflow/core:lib", ], diff --git a/tensorflow/compiler/xla/client/lib/arithmetic.cc b/tensorflow/compiler/xla/client/lib/arithmetic.cc index 24048a1e5a782661ba577ba50e3b5b2914f17c0a..63df449e0b3bdd642d548319dd7d621ca2f59b1d 100644 --- a/tensorflow/compiler/xla/client/lib/arithmetic.cc +++ b/tensorflow/compiler/xla/client/lib/arithmetic.cc @@ -26,6 +26,7 @@ limitations under the License. namespace xla { namespace { + using InstructionGenerator = ComputationDataHandle (*)(ComputationBuilder*, const ComputationDataHandle&, const ComputationDataHandle&); @@ -47,6 +48,27 @@ Computation CreateScalarComputation(const string& name, PrimitiveType type, generator(b.get(), lhs, rhs); return b->BuildAndNoteError(); } + +using XlaOpGenerator = XlaOp (*)(XlaBuilder*, const XlaOp&, const XlaOp&); + +XlaComputation CreateScalarComputation(const string& name, PrimitiveType type, + XlaBuilder* builder, + XlaOpGenerator generator) { + std::unique_ptr b; + if (type == PRED) { + b = builder->CreateSubBuilder(name); + } else { + b = builder->CreateSubBuilder( + tensorflow::strings::StrCat(name, "_", PrimitiveType_Name(type))); + } + + const Shape scalar = ShapeUtil::MakeShape(type, {}); + auto lhs = b->Parameter(0, scalar, "lhs"); + auto rhs = b->Parameter(1, scalar, "rhs"); + generator(b.get(), lhs, rhs); + return b->BuildAndNoteError(); +} + } // namespace Computation CreateScalarAddComputation(PrimitiveType type, @@ -60,7 +82,7 @@ Computation CreateScalarAddComputation(PrimitiveType type, Computation CreateScalarMultiplyComputation(PrimitiveType type, ComputationBuilder* builder) { return CreateScalarComputation( - "add", type, builder, + "mul", type, builder, [](ComputationBuilder* b, const ComputationDataHandle& lhs, const ComputationDataHandle& rhs) { return b->Mul(lhs, rhs); }); } @@ -114,4 +136,75 @@ StatusOr Any(const ComputationDataHandle& predicates, return builder->Reduce(predicates, f, logical_or, all_dimensions); } +XlaComputation CreateScalarAddComputation(PrimitiveType type, + XlaBuilder* builder) { + return CreateScalarComputation( + "add", type, builder, + [](XlaBuilder* b, const XlaOp& lhs, const XlaOp& rhs) { + return b->Add(lhs, rhs); + }); +} + +XlaComputation CreateScalarMultiplyComputation(PrimitiveType type, + XlaBuilder* builder) { + return CreateScalarComputation( + "mul", type, builder, + [](XlaBuilder* b, const XlaOp& lhs, const XlaOp& rhs) { + return b->Mul(lhs, rhs); + }); +} + +XlaComputation CreateScalarGeComputation(PrimitiveType type, + XlaBuilder* builder) { + return CreateScalarComputation( + "ge", type, builder, + [](XlaBuilder* b, const XlaOp& lhs, const XlaOp& rhs) { + return b->Ge(lhs, rhs); + }); +} + +XlaComputation CreateScalarMaxComputation(PrimitiveType type, + XlaBuilder* builder) { + return CreateScalarComputation( + "max", type, builder, + [](XlaBuilder* b, const XlaOp& lhs, const XlaOp& rhs) { + return b->Max(lhs, rhs); + }); +} + +XlaComputation CreateScalarMinComputation(PrimitiveType type, + XlaBuilder* builder) { + return CreateScalarComputation( + "min", type, builder, + [](XlaBuilder* b, const XlaOp& lhs, const XlaOp& rhs) { + return b->Min(lhs, rhs); + }); +} + +XlaComputation CreateScalarAndComputation(XlaBuilder* builder) { + return CreateScalarComputation( + "and", PRED, builder, + [](XlaBuilder* b, const XlaOp& lhs, const XlaOp& rhs) { + return b->And(lhs, rhs); + }); +} + +XlaComputation CreateScalarOrComputation(XlaBuilder* builder) { + return CreateScalarComputation( + "or", PRED, builder, + [](XlaBuilder* b, const XlaOp& lhs, const XlaOp& rhs) { + return b->Or(lhs, rhs); + }); +} + +StatusOr Any(const XlaOp& predicates, XlaBuilder* builder) { + auto f = builder->ConstantR0(false); + XlaComputation logical_or = CreateScalarOrComputation(builder); + TF_ASSIGN_OR_RETURN(const Shape& predicates_shape, + builder->GetShape(predicates)); + std::vector all_dimensions(ShapeUtil::Rank(predicates_shape)); + std::iota(all_dimensions.begin(), all_dimensions.end(), 0); + return builder->Reduce(predicates, f, logical_or, all_dimensions); +} + } // namespace xla diff --git a/tensorflow/compiler/xla/client/lib/arithmetic.h b/tensorflow/compiler/xla/client/lib/arithmetic.h index ae89784bc227d837cf15f0a89687dd00dccc2745..f4d3fc801590fedbb84ed3d6283e62f47c56d5c7 100644 --- a/tensorflow/compiler/xla/client/lib/arithmetic.h +++ b/tensorflow/compiler/xla/client/lib/arithmetic.h @@ -20,6 +20,8 @@ limitations under the License. #include "tensorflow/compiler/xla/client/computation.h" #include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_computation.h" #include "tensorflow/compiler/xla/xla_data.pb.h" namespace xla { @@ -56,6 +58,48 @@ Computation CreateScalarOrComputation(ComputationBuilder* builder); StatusOr Any(const ComputationDataHandle& predicates, ComputationBuilder* builder); +// TODO(b/74197823): This is a part of a NOT YET ready refactor. +// +// Creates a scalar add computation and returns it. +XlaComputation CreateScalarAddComputation(PrimitiveType type, + XlaBuilder* builder); +// TODO(b/74197823): This is a part of a NOT YET ready refactor. +// +// Creates a scalar multiply computation and returns it. +XlaComputation CreateScalarMultiplyComputation(PrimitiveType type, + XlaBuilder* builder); +// TODO(b/74197823): This is a part of a NOT YET ready refactor. +// +// Creates a scalar ge computation and returns it. +XlaComputation CreateScalarGeComputation(PrimitiveType type, + XlaBuilder* builder); +// TODO(b/74197823): This is a part of a NOT YET ready refactor. +// +// Creates a scalar max computation and returns it. +XlaComputation CreateScalarMaxComputation(PrimitiveType type, + XlaBuilder* builder); +// TODO(b/74197823): This is a part of a NOT YET ready refactor. +// +// Creates a scalar min computation and returns it. +XlaComputation CreateScalarMinComputation(PrimitiveType type, + XlaBuilder* builder); +// TODO(b/74197823): This is a part of a NOT YET ready refactor. +// +// Creates a scalar logical AND computation and returns it. +XlaComputation CreateScalarAndComputation(XlaBuilder* builder); + +// TODO(b/74197823): This is a part of a NOT YET ready refactor. +// +// Creates a scalar logical OR computation and returns it. +XlaComputation CreateScalarOrComputation(XlaBuilder* builder); + +// TODO(b/74197823): This is a part of a NOT YET ready refactor. +// +// Returns whether any predicate in "predicates" is set. +// +// Note: if predicates is zero-sized, Any() vacuously returns false. +StatusOr Any(const XlaOp& predicates, XlaBuilder* builder); + } // namespace xla #endif // TENSORFLOW_COMPILER_XLA_CLIENT_LIB_ARITHMETIC_H_ diff --git a/tensorflow/compiler/xla/client/lib/testing.cc b/tensorflow/compiler/xla/client/lib/testing.cc index b63a1465ea755b906853860d47768ecbeaa0dcdd..311dc4bdd72cfd7999e83a26e11614d6ca005bce 100644 --- a/tensorflow/compiler/xla/client/lib/testing.cc +++ b/tensorflow/compiler/xla/client/lib/testing.cc @@ -111,4 +111,20 @@ std::vector> MakeFakeArgumentsOrDie( return fake_arguments; } +std::vector> MakeFakeArgumentsOrDie( + const XlaComputation& computation, Client* client) { + CHECK(computation.proto().has_program_shape()) + << "Computation should have progran shape."; + auto program_shape = computation.proto().program_shape(); + + // For every (unbound) parameter that the computation wants, we manufacture + // some arbitrary data so that we can invoke the computation. + std::vector> fake_arguments; + for (const Shape& parameter : program_shape.parameters()) { + fake_arguments.push_back(MakeFakeDataOrDie(parameter, client)); + } + + return fake_arguments; +} + } // namespace xla diff --git a/tensorflow/compiler/xla/client/lib/testing.h b/tensorflow/compiler/xla/client/lib/testing.h index 7e640d1307edcc3e2c021f4391c456f578a015ee..1dc2622972d5fd3da6991d70b800cc3fd5a638f4 100644 --- a/tensorflow/compiler/xla/client/lib/testing.h +++ b/tensorflow/compiler/xla/client/lib/testing.h @@ -22,6 +22,7 @@ limitations under the License. #include "tensorflow/compiler/xla/client/client.h" #include "tensorflow/compiler/xla/client/computation.h" #include "tensorflow/compiler/xla/client/global_data.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_computation.h" #include "tensorflow/compiler/xla/xla_data.pb.h" namespace xla { @@ -38,6 +39,12 @@ std::unique_ptr MakeFakeDataOrDie(const Shape& shape, std::vector> MakeFakeArgumentsOrDie( const Computation& computation, Client* client); +// Returns vector of GlobalData handles of fake data (created using +// MakeFakeDataOrDie) that are correctly shaped arguments for the given +// xla computation. +std::vector> MakeFakeArgumentsOrDie( + const XlaComputation& computation, Client* client); + } // namespace xla #endif // TENSORFLOW_COMPILER_XLA_CLIENT_LIB_TESTING_H_ diff --git a/tensorflow/compiler/xla/client/local_client.cc b/tensorflow/compiler/xla/client/local_client.cc index 30594243dcf51d2b5312b9dcb2bea7d0cd78524d..d0e945b70fdb3c7abdcf94cb03d3b962f8556576 100644 --- a/tensorflow/compiler/xla/client/local_client.cc +++ b/tensorflow/compiler/xla/client/local_client.cc @@ -24,8 +24,6 @@ limitations under the License. #include "tensorflow/compiler/xla/service/source_map_util.h" #include "tensorflow/compiler/xla/status_macros.h" -namespace se = ::perftools::gputools; - using xla::source_map_util::InvalidParameterArgument; namespace xla { @@ -136,7 +134,7 @@ tensorflow::Status LocalExecutable::ValidateExecutionOptions( return Status::OK(); } -StatusOr> LocalExecutable::Run( +StatusOr LocalExecutable::Run( const tensorflow::gtl::ArraySlice arguments, ExecutableRunOptions run_options) { TF_RETURN_IF_ERROR( @@ -169,27 +167,26 @@ StatusOr> LocalExecutable::Run( return ExecuteAndDump(&service_options, arguments); } TF_ASSIGN_OR_RETURN( - std::unique_ptr result, + ShapedBuffer result, executable_->ExecuteOnStreamWrapper( &service_options, run_options.execution_profile(), arguments)); - return MakeUnique(std::move(*result), - run_options.allocator()); + return ScopedShapedBuffer(std::move(result), run_options.allocator()); } -StatusOr> LocalExecutable::ExecuteAndDump( +StatusOr LocalExecutable::ExecuteAndDump( const ServiceExecutableRunOptions* run_options, const tensorflow::gtl::ArraySlice arguments) { executable_->session_module()->set_execution_platform( backend_->platform()->Name()); TF_RETURN_IF_ERROR(RecordArguments(arguments, executable_->session_module())); TF_ASSIGN_OR_RETURN( - std::unique_ptr result, + ShapedBuffer result, executable_->ExecuteOnStream(run_options, arguments, /*hlo_execution_profile=*/nullptr)); - TF_RETURN_IF_ERROR(RecordResult(result.get(), executable_->session_module())); + TF_RETURN_IF_ERROR(RecordResult(&result, executable_->session_module())); TF_RETURN_IF_ERROR(executable_->DumpSessionModule()); - return ScopedShapedBuffer::MakeScoped(result.get(), run_options->allocator()); + return ScopedShapedBuffer(std::move(result), run_options->allocator()); } tensorflow::Status LocalExecutable::RecordArguments( @@ -283,9 +280,9 @@ StatusOr> LocalClient::Compile( updated_options)); } -StatusOr> -LocalClient::LiteralToShapedBuffer(const Literal& literal, int device_ordinal, - DeviceMemoryAllocator* allocator) { +StatusOr LocalClient::LiteralToShapedBuffer( + const Literal& literal, int device_ordinal, + DeviceMemoryAllocator* allocator) { if (allocator == nullptr) { allocator = backend().memory_allocator(); } @@ -295,7 +292,7 @@ LocalClient::LiteralToShapedBuffer(const Literal& literal, int device_ordinal, TF_ASSIGN_OR_RETURN(se::StreamExecutor * executor, backend().stream_executor(device_ordinal)); TF_RETURN_IF_ERROR(backend().transfer_manager()->TransferLiteralToDevice( - executor, literal, *scoped_buffer)); + executor, literal, scoped_buffer)); return std::move(scoped_buffer); } diff --git a/tensorflow/compiler/xla/client/local_client.h b/tensorflow/compiler/xla/client/local_client.h index 98ee7c62c94be7c618cedd3dc12ecbfc812ee180..f306c520ede0014be52d1b952849c8894b092baf 100644 --- a/tensorflow/compiler/xla/client/local_client.h +++ b/tensorflow/compiler/xla/client/local_client.h @@ -38,7 +38,7 @@ class LocalExecutable { public: // Run the compiled computation with the given arguments and options and // return the result. - StatusOr> Run( + StatusOr Run( const tensorflow::gtl::ArraySlice arguments, ExecutableRunOptions run_options); @@ -73,7 +73,7 @@ class LocalExecutable { // Records the computation in a SessionModule proto with the arguments used to // invoke it, and the result. Enabled by flag: --tla_dump_executions_to. - StatusOr> ExecuteAndDump( + StatusOr ExecuteAndDump( const ServiceExecutableRunOptions* run_options, const tensorflow::gtl::ArraySlice arguments); @@ -136,7 +136,7 @@ class LocalClient : public Client { // ScopedShapedBuffer. If non-null the given memory allocator is used for // device memory allocation. If null, the default memory allocator for the // device is used. - StatusOr> LiteralToShapedBuffer( + StatusOr LiteralToShapedBuffer( const Literal& literal, int device_ordinal, DeviceMemoryAllocator* allocator = nullptr); @@ -167,7 +167,7 @@ class LocalClient : public Client { StatusOr ReplicaNumberToDeviceOrdinal(int replica_number); // Returns the platform that the underlying service targets. - perftools::gputools::Platform* platform() const; + se::Platform* platform() const; // Returns the number of devices on the system of the service platform // type. Not all devices may be supported by the service (see diff --git a/tensorflow/compiler/xla/client/xla_client/BUILD b/tensorflow/compiler/xla/client/xla_client/BUILD index b1dba168565cca86cba0403604736fecd00d6f29..31fa1241ee474a31575c45cf7652063dfc818fac 100644 --- a/tensorflow/compiler/xla/client/xla_client/BUILD +++ b/tensorflow/compiler/xla/client/xla_client/BUILD @@ -44,6 +44,7 @@ cc_library( hdrs = ["xla_builder.h"], deps = [ ":xla_computation", + "//tensorflow/compiler/xla:execution_options_util", "//tensorflow/compiler/xla:literal_util", "//tensorflow/compiler/xla:shape_util", "//tensorflow/compiler/xla:status_macros", diff --git a/tensorflow/compiler/xla/client/xla_client/xla_builder.cc b/tensorflow/compiler/xla/client/xla_client/xla_builder.cc index e51a8b14c029fbc62ae1381f3e0216d1f7b4ca40..1899983e442116d3ebf8a3e79b0515653cd624cb 100644 --- a/tensorflow/compiler/xla/client/xla_client/xla_builder.cc +++ b/tensorflow/compiler/xla/client/xla_client/xla_builder.cc @@ -15,13 +15,17 @@ limitations under the License. #include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" +#include #include +#include #include #include +#include "tensorflow/compiler/xla/execution_options_util.h" #include "tensorflow/compiler/xla/service/hlo_opcode.h" #include "tensorflow/compiler/xla/service/shape_inference.h" #include "tensorflow/compiler/xla/util.h" +#include "tensorflow/core/lib/gtl/flatset.h" #include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/platform/mutex.h" @@ -44,6 +48,7 @@ int64 GetUniqueId() { bool CanBeRoot(HloOpcode opcode) { switch (opcode) { case HloOpcode::kSend: + case HloOpcode::kSendDone: case HloOpcode::kOutfeed: case HloOpcode::kTrace: return false; @@ -52,15 +57,30 @@ bool CanBeRoot(HloOpcode opcode) { } } +StatusOr> GetOperandShapes( + tensorflow::gtl::ArraySlice operands) { + std::vector operand_shapes; + for (const XlaOp& operand : operands) { + TF_ASSIGN_OR_RETURN(const Shape& shape, operand.GetShape()); + operand_shapes.push_back(shape); + } + return operand_shapes; +} + } // namespace StatusOr XlaBuilder::GetShape(const XlaOp& op) const { + TF_RETURN_IF_ERROR(first_error_); + TF_ASSIGN_OR_RETURN(auto instr, LookUpInstruction(op)); return instr->shape(); } StatusOr XlaOp::GetShape() const { - TF_RET_CHECK(builder_ != nullptr); + if (builder_ == nullptr) { + return InvalidArgument( + "cannot GetShape for an invalid XlaOp with handle %lld", handle()); + } return builder_->GetShape(*this); } @@ -81,8 +101,24 @@ void XlaBuilder::NoteError(const Status& error) { } } -StatusOr XlaBuilder::GetProgramShape(int64* root_id) { +XlaOp XlaBuilder::NoteErrorOrReturn( + const std::function()>& op_creator) { + if (!first_error_.ok()) { + return {}; + } + auto op = op_creator(); + if (!op.ok()) { + NoteError(op.status()); + return {}; + } + return op.ConsumeValueOrDie(); +} + +StatusOr XlaBuilder::GetProgramShape(int64* root_id) const { + TF_RETURN_IF_ERROR(first_error_); + TF_RET_CHECK(root_id != nullptr); + ProgramShape program_shape; // Not all instructions can be roots. Walk backwards from the last added @@ -123,9 +159,56 @@ StatusOr XlaBuilder::GetProgramShape(int64* root_id) { return program_shape; } -StatusOr XlaBuilder::GetProgramShape() { - int64 root_id; - return GetProgramShape(&root_id); +StatusOr XlaBuilder::GetProgramShape() const { + int64 root; + return GetProgramShape(&root); +} + +void XlaBuilder::IsConstantVisitor(const int64 op_handle, + std::set* visited, + bool* is_constant) const { + if (visited->count(op_handle) != 0 || !*is_constant) { + return; + } + + CHECK(op_handle < instructions_.size() && op_handle >= 0); + + const HloInstructionProto& instr = instructions_[op_handle]; + const HloOpcode opcode = StringToHloOpcode(instr.opcode()).ValueOrDie(); + switch (opcode) { + default: + for (const int64 operand_id : instr.operand_ids()) { + IsConstantVisitor(operand_id, visited, is_constant); + } + // TODO(b/32495713): We aren't checking the called computations. + break; + + // Non functional ops. + case HloOpcode::kRng: + case HloOpcode::kCrossReplicaSum: + // TODO(b/33009255): Implmement constant folding for cross replica sum. + case HloOpcode::kInfeed: + case HloOpcode::kOutfeed: + case HloOpcode::kHostCompute: + case HloOpcode::kCall: + // TODO(b/32495713): We aren't checking the to_apply computation itself, + // so we conservatively say that computations containing the Call op + // cannot be constant. We cannot set is_functional=false in other similar + // cases since we're already relying on IsConstant to return true. + case HloOpcode::kCustomCall: + case HloOpcode::kWhile: + // TODO(b/32495713): We aren't checking the condition and body + // computations themselves. + case HloOpcode::kSend: + case HloOpcode::kRecv: + case HloOpcode::kParameter: + *is_constant = false; + break; + } + if (!*is_constant) { + VLOG(1) << "Non-constant: " << instr.name(); + } + visited->insert(op_handle); } XlaComputation XlaBuilder::BuildAndNoteError() { @@ -148,23 +231,25 @@ StatusOr XlaBuilder::Build() { } HloComputationProto entry; - entry.set_name(name_); + entry.set_id(GetUniqueId()); // Give the computation a global unique id. + entry.set_name(StrCat(name_, entry.id())); // Ensure that the name is unique. { int64 root_id; - ProgramShape program_shape; - TF_ASSIGN_OR_RETURN(program_shape, GetProgramShape(&root_id)); - entry.mutable_program_shape()->Swap(&program_shape); + TF_ASSIGN_OR_RETURN(*entry.mutable_program_shape(), + GetProgramShape(&root_id)); entry.set_root_id(root_id); } for (auto& instruction : instructions_) { + // Ensures that the instruction names are unique among the whole graph. + const string& new_name = + StrCat(instruction.name(), ".", entry.id(), ".", instruction.id()); + instruction.set_name(new_name); entry.add_instructions()->Swap(&instruction); } - const int64 id = GetUniqueId(); - entry.set_id(id); - XlaComputation computation(id); + XlaComputation computation(entry.id()); HloModuleProto* module = computation.mutable_proto(); module->set_name(entry.name()); module->set_id(entry.id()); @@ -187,6 +272,8 @@ StatusOr XlaBuilder::Build() { StatusOr XlaBuilder::InDimBroadcast( const Shape& shape, const XlaOp& operand, tensorflow::gtl::ArraySlice broadcast_dimensions) { + TF_RETURN_IF_ERROR(first_error_); + HloInstructionProto instr; *instr.mutable_shape() = shape; for (int64 dim : broadcast_dimensions) { @@ -197,6 +284,8 @@ StatusOr XlaBuilder::InDimBroadcast( StatusOr XlaBuilder::AddBroadcastSequence(const Shape& output_shape, const XlaOp& operand) { + TF_RETURN_IF_ERROR(first_error_); + TF_ASSIGN_OR_RETURN(const Shape& operand_shape, operand.GetShape()); CHECK(ShapeUtil::IsScalar(operand_shape) || @@ -240,7 +329,7 @@ XlaOp XlaBuilder::UnaryOp(HloOpcode unop, const XlaOp& operand) { TF_ASSIGN_OR_RETURN(*instr.mutable_shape(), ShapeInference::InferUnaryOpShape(unop, operand_shape)); return AddInstruction(std::move(instr), unop, {operand}); - }()); + }); } XlaOp XlaBuilder::BinaryOp( @@ -297,7 +386,7 @@ XlaOp XlaBuilder::BinaryOp( } return AddInstruction(std::move(instr), binop, {updated_lhs, updated_rhs}); - }()); + }); } XlaOp XlaBuilder::TernaryOp(HloOpcode triop, const XlaOp& lhs, const XlaOp& rhs, @@ -335,7 +424,7 @@ XlaOp XlaBuilder::TernaryOp(HloOpcode triop, const XlaOp& lhs, const XlaOp& rhs, } return AddInstruction(std::move(instr), triop, {updated_lhs, updated_rhs, updated_ehs}); - }()); + }); } XlaOp XlaBuilder::Add(const XlaOp& lhs, const XlaOp& rhs, @@ -354,7 +443,7 @@ XlaOp XlaBuilder::ConstantLiteral(const Literal& literal) { *instr.mutable_shape() = literal.shape(); *instr.mutable_literal() = literal.ToProto(); return AddInstruction(std::move(instr), HloOpcode::kConstant); - }()); + }); } XlaOp XlaBuilder::Call(const XlaComputation& computation, @@ -362,11 +451,7 @@ XlaOp XlaBuilder::Call(const XlaComputation& computation, return NoteErrorOrReturn([&]() -> StatusOr { HloInstructionProto instr; std::vector operand_shape_ptrs; - std::vector operand_shapes; - for (const auto& operand : operands) { - TF_ASSIGN_OR_RETURN(const Shape& shape, operand.GetShape()); - operand_shapes.push_back(shape); - } + TF_ASSIGN_OR_RETURN(const auto& operand_shapes, GetOperandShapes(operands)); c_transform(operand_shapes, std::back_inserter(operand_shape_ptrs), [](const Shape& shape) { return &shape; }); TF_ASSIGN_OR_RETURN(const ProgramShape& called_program_shape, @@ -376,31 +461,25 @@ XlaOp XlaBuilder::Call(const XlaComputation& computation, ShapeInference::InferCallShape(operand_shape_ptrs, /*to_apply=*/called_program_shape)); - // Add called computation. - instr.add_called_computation_ids( - computation.proto().entry_computation_id()); - for (const HloComputationProto& e : computation.proto().computations()) { - embedded_.insert({e.id(), e}); - } + AddCalledComputation(computation, &instr); return AddInstruction(std::move(instr), HloOpcode::kCall, operands); - }()); + }); } XlaOp XlaBuilder::Parameter(int64 parameter_number, const Shape& shape, const string& name) { return NoteErrorOrReturn([&]() -> StatusOr { HloInstructionProto instr; - if (parameter_numbers_.find(parameter_number) != parameter_numbers_.end()) { + if (!parameter_numbers_.insert(parameter_number).second) { return InvalidArgument("parameter %lld already registered", parameter_number); } - parameter_numbers_.insert(parameter_number); instr.set_parameter_number(parameter_number); instr.set_name(name); *instr.mutable_shape() = shape; return AddInstruction(std::move(instr), HloOpcode::kParameter); - }()); + }); } XlaOp XlaBuilder::Broadcast( @@ -424,10 +503,12 @@ XlaOp XlaBuilder::Broadcast( dimensions[i] = i + ShapeUtil::Rank(shape) - operand_rank; } return InDimBroadcast(shape, operand, dimensions); - }()); + }); } StatusOr XlaBuilder::Reshape(const Shape& shape, const XlaOp& operand) { + TF_RETURN_IF_ERROR(first_error_); + HloInstructionProto instr; *instr.mutable_shape() = shape; return AddInstruction(std::move(instr), HloOpcode::kReshape, {operand}); @@ -437,32 +518,115 @@ XlaOp XlaBuilder::Slice(const XlaOp& operand, tensorflow::gtl::ArraySlice start_indices, tensorflow::gtl::ArraySlice limit_indices, tensorflow::gtl::ArraySlice strides) { - return UnimplementedOp(); + return NoteErrorOrReturn([&]() -> StatusOr { + HloInstructionProto instr; + TF_ASSIGN_OR_RETURN(const Shape& operand_shape, GetShape(operand)); + TF_ASSIGN_OR_RETURN( + *instr.mutable_shape(), + ShapeInference::InferSliceShape(operand_shape, start_indices, + limit_indices, strides)); + for (int i = 0; i < start_indices.size(); i++) { + auto* slice_config = instr.add_slice_dimensions(); + slice_config->set_start(start_indices[i]); + slice_config->set_limit(limit_indices[i]); + slice_config->set_stride(strides[i]); + } + + return AddInstruction(std::move(instr), HloOpcode::kSlice, {operand}); + }); } XlaOp XlaBuilder::SliceInDim(const XlaOp& operand, int64 start_index, int64 limit_index, int64 stride, int64 dimno) { - return UnimplementedOp(); + return NoteErrorOrReturn([&]() -> StatusOr { + TF_ASSIGN_OR_RETURN(const Shape& shape, GetShape(operand)); + std::vector starts(ShapeUtil::Rank(shape), 0); + std::vector limits(shape.dimensions().begin(), + shape.dimensions().end()); + std::vector strides(ShapeUtil::Rank(shape), 1); + starts[dimno] = start_index; + limits[dimno] = limit_index; + strides[dimno] = stride; + return Slice(operand, starts, limits, strides); + }); } XlaOp XlaBuilder::DynamicSlice(const XlaOp& operand, const XlaOp& start_indices, tensorflow::gtl::ArraySlice slice_sizes) { - return UnimplementedOp(); + return NoteErrorOrReturn([&]() -> StatusOr { + HloInstructionProto instr; + + TF_ASSIGN_OR_RETURN(const Shape& operand_shape, GetShape(operand)); + TF_ASSIGN_OR_RETURN(const Shape& start_indices_shape, + GetShape(start_indices)); + TF_ASSIGN_OR_RETURN(*instr.mutable_shape(), + ShapeInference::InferDynamicSliceShape( + operand_shape, start_indices_shape, slice_sizes)); + + for (int64 size : slice_sizes) { + instr.add_dynamic_slice_sizes(size); + } + + return AddInstruction(std::move(instr), HloOpcode::kDynamicSlice, + {operand, start_indices}); + }); } XlaOp XlaBuilder::DynamicUpdateSlice(const XlaOp& operand, const XlaOp& update, const XlaOp& start_indices) { - return UnimplementedOp(); + return NoteErrorOrReturn([&]() -> StatusOr { + HloInstructionProto instr; + + TF_ASSIGN_OR_RETURN(const Shape& operand_shape, GetShape(operand)); + TF_ASSIGN_OR_RETURN(const Shape& update_shape, GetShape(update)); + TF_ASSIGN_OR_RETURN(const Shape& start_indices_shape, + GetShape(start_indices)); + TF_ASSIGN_OR_RETURN(*instr.mutable_shape(), + ShapeInference::InferDynamicUpdateSliceShape( + operand_shape, update_shape, start_indices_shape)); + + return AddInstruction(std::move(instr), HloOpcode::kDynamicUpdateSlice, + {operand, update, start_indices}); + }); } XlaOp XlaBuilder::ConcatInDim(tensorflow::gtl::ArraySlice operands, int64 dimension) { - return UnimplementedOp(); + return NoteErrorOrReturn([&]() -> StatusOr { + HloInstructionProto instr; + + std::vector operand_shape_ptrs; + TF_ASSIGN_OR_RETURN(const auto& operand_shapes, GetOperandShapes(operands)); + c_transform(operand_shapes, std::back_inserter(operand_shape_ptrs), + [](const Shape& shape) { return &shape; }); + TF_ASSIGN_OR_RETURN( + *instr.mutable_shape(), + ShapeInference::InferConcatOpShape(operand_shape_ptrs, dimension)); + + instr.add_dimensions(dimension); + + return AddInstruction(std::move(instr), HloOpcode::kConcatenate, operands); + }); } XlaOp XlaBuilder::Pad(const XlaOp& operand, const XlaOp& padding_value, const PaddingConfig& padding_config) { - return UnimplementedOp(); + return NoteErrorOrReturn([&]() -> StatusOr { + HloInstructionProto instr; + + TF_ASSIGN_OR_RETURN(const Shape& operand_shape, GetShape(operand)); + TF_ASSIGN_OR_RETURN(const Shape& padding_value_shape, + GetShape(padding_value)); + TF_ASSIGN_OR_RETURN( + *instr.mutable_shape(), + ShapeInference::InferPadShape(operand_shape, padding_value_shape, + padding_config)); + + *instr.mutable_padding_config() = padding_config; + + return AddInstruction(std::move(instr), HloOpcode::kPad, + {operand, padding_value}); + }); } XlaOp XlaBuilder::Reshape(const XlaOp& operand, @@ -477,7 +641,7 @@ XlaOp XlaBuilder::Reshape(const XlaOp& operand, ? operand : Transpose(operand, dimensions); return Reshape(shape, transposed); - }()); + }); } XlaOp XlaBuilder::Reshape(const XlaOp& operand, @@ -487,16 +651,59 @@ XlaOp XlaBuilder::Reshape(const XlaOp& operand, std::vector dimensions(shape.dimensions_size()); std::iota(dimensions.begin(), dimensions.end(), 0); return Reshape(operand, dimensions, new_sizes); - }()); + }); } XlaOp XlaBuilder::Collapse(const XlaOp& operand, tensorflow::gtl::ArraySlice dimensions) { - return UnimplementedOp(); + return NoteErrorOrReturn([&]() -> StatusOr { + if (dimensions.size() <= 1) { + // Not collapsing anything, trivially we can return the operand versus + // enqueueing a trivial reshape. + return operand; + } + + // Out-of-order collapse is not supported. + // Checks that the collapsed dimensions are in order and consecutive. + for (tensorflow::gtl::ArraySlice::size_type i = 1; + i < dimensions.size(); ++i) { + if (dimensions[i] - 1 != dimensions[i - 1]) { + return InvalidArgument( + "Collapsed dimensions are not in consecutive order."); + } + } + + // Create a new sizes vector from the old shape, replacing the collapsed + // dimensions by the product of their sizes. + TF_ASSIGN_OR_RETURN(const Shape& original_shape, GetShape(operand)); + + VLOG(3) << "original shape: " << ShapeUtil::HumanString(original_shape); + VLOG(3) << "dims to collapse: " + << tensorflow::str_util::Join(dimensions, ","); + + std::vector new_sizes; + for (int i = 0; i < ShapeUtil::Rank(original_shape); ++i) { + if (i <= dimensions.front() || i > dimensions.back()) { + new_sizes.push_back(original_shape.dimensions(i)); + } else { + new_sizes.back() *= original_shape.dimensions(i); + } + } + + VLOG(3) << "new sizes: [" << tensorflow::str_util::Join(new_sizes, ",") + << "]"; + + return Reshape(operand, new_sizes); + }); } void XlaBuilder::Trace(const string& tag, const XlaOp& operand) { - UnimplementedOp(); + NoteErrorOrReturn([&]() -> StatusOr { + HloInstructionProto instr; + *instr.mutable_shape() = ShapeUtil::MakeNil(); + *instr.mutable_literal() = Literal::CreateR1U8(tag)->ToProto(); + return AddInstruction(std::move(instr), HloOpcode::kTrace, {operand}); + }); } XlaOp XlaBuilder::Select(const XlaOp& pred, const XlaOp& on_true, @@ -508,18 +715,14 @@ XlaOp XlaBuilder::Tuple(tensorflow::gtl::ArraySlice elements) { return NoteErrorOrReturn([&]() -> StatusOr { HloInstructionProto instr; std::vector operand_shape_ptrs; - std::vector operand_shapes; - for (const XlaOp& e : elements) { - TF_ASSIGN_OR_RETURN(const Shape& shape, GetShape(e)); - operand_shapes.push_back(shape); - } + TF_ASSIGN_OR_RETURN(const auto& operand_shapes, GetOperandShapes(elements)); c_transform(operand_shapes, std::back_inserter(operand_shape_ptrs), [](const Shape& shape) { return &shape; }); TF_ASSIGN_OR_RETURN(*instr.mutable_shape(), ShapeInference::InferVariadicOpShape( HloOpcode::kTuple, operand_shape_ptrs)); return AddInstruction(std::move(instr), HloOpcode::kTuple, elements); - }()); + }); } XlaOp XlaBuilder::GetTupleElement(const XlaOp& tuple_data, int64 index) { @@ -538,7 +741,7 @@ XlaOp XlaBuilder::GetTupleElement(const XlaOp& tuple_data, int64 index) { return AddInstruction(std::move(instr), HloOpcode::kGetTupleElement, {tuple_data}); - }()); + }); } XlaOp XlaBuilder::Eq(const XlaOp& lhs, const XlaOp& rhs, @@ -572,32 +775,126 @@ XlaOp XlaBuilder::Lt(const XlaOp& lhs, const XlaOp& rhs, } XlaOp XlaBuilder::Dot(const XlaOp& lhs, const XlaOp& rhs) { - return UnimplementedOp(); + return NoteErrorOrReturn([&]() -> StatusOr { + TF_ASSIGN_OR_RETURN(const Shape& lhs_shape, GetShape(lhs)); + + DotDimensionNumbers dimension_numbers; + dimension_numbers.add_lhs_contracting_dimensions( + lhs_shape.dimensions_size() == 1 ? 0 : 1); + dimension_numbers.add_rhs_contracting_dimensions(0); + return DotGeneral(lhs, rhs, dimension_numbers); + }); } XlaOp XlaBuilder::DotGeneral(const XlaOp& lhs, const XlaOp& rhs, const DotDimensionNumbers& dimension_numbers) { - return UnimplementedOp(); + return NoteErrorOrReturn([&]() -> StatusOr { + HloInstructionProto instr; + TF_ASSIGN_OR_RETURN(const Shape& lhs_shape, GetShape(lhs)); + TF_ASSIGN_OR_RETURN(const Shape& rhs_shape, GetShape(rhs)); + TF_ASSIGN_OR_RETURN(*instr.mutable_shape(), + ShapeInference::InferDotOpShape(lhs_shape, rhs_shape, + dimension_numbers)); + *instr.mutable_dot_dimension_numbers() = dimension_numbers; + return AddInstruction(std::move(instr), HloOpcode::kDot, {lhs, rhs}); + }); +} + +Status XlaBuilder::VerifyConvolution( + const Shape& lhs_shape, const Shape& rhs_shape, + const ConvolutionDimensionNumbers& dimension_numbers) const { + if (ShapeUtil::Rank(lhs_shape) != ShapeUtil::Rank(rhs_shape)) { + return InvalidArgument( + "Convolution arguments must have same number of " + "dimensions. Got: %s and %s", + ShapeUtil::HumanString(lhs_shape).c_str(), + ShapeUtil::HumanString(rhs_shape).c_str()); + } + int num_dims = ShapeUtil::Rank(lhs_shape); + if (num_dims < 2) { + return InvalidArgument( + "Convolution expects argument arrays with >= 3 dimensions. " + "Got: %s and %s", + ShapeUtil::HumanString(lhs_shape).c_str(), + ShapeUtil::HumanString(rhs_shape).c_str()); + } + int num_spatial_dims = num_dims - 2; + + const auto check_spatial_dimensions = + [&](const char* const field_name, + const tensorflow::protobuf::RepeatedField& + numbers) { + if (numbers.size() != num_spatial_dims) { + return InvalidArgument("Expected %d elements for %s, but got %d.", + num_spatial_dims, field_name, numbers.size()); + } + for (int i = 0; i < numbers.size(); ++i) { + if (numbers.Get(i) < 0 || numbers.Get(i) >= num_dims) { + return InvalidArgument("Convolution %s[%d] is out of bounds: %lld", + field_name, i, numbers.Get(i)); + } + } + return Status::OK(); + }; + TF_RETURN_IF_ERROR( + check_spatial_dimensions("input_spatial_dimensions", + dimension_numbers.input_spatial_dimensions())); + TF_RETURN_IF_ERROR( + check_spatial_dimensions("kernel_spatial_dimensions", + dimension_numbers.kernel_spatial_dimensions())); + return check_spatial_dimensions( + "output_spatial_dimensions", + dimension_numbers.output_spatial_dimensions()); } XlaOp XlaBuilder::Conv(const XlaOp& lhs, const XlaOp& rhs, tensorflow::gtl::ArraySlice window_strides, Padding padding) { - return UnimplementedOp(); + return ConvWithGeneralDimensions( + lhs, rhs, window_strides, padding, + CreateDefaultConvDimensionNumbers(window_strides.size())); } XlaOp XlaBuilder::ConvWithGeneralPadding( const XlaOp& lhs, const XlaOp& rhs, tensorflow::gtl::ArraySlice window_strides, tensorflow::gtl::ArraySlice> padding) { - return UnimplementedOp(); + return ConvGeneral(lhs, rhs, window_strides, padding, + CreateDefaultConvDimensionNumbers(window_strides.size())); } XlaOp XlaBuilder::ConvWithGeneralDimensions( const XlaOp& lhs, const XlaOp& rhs, tensorflow::gtl::ArraySlice window_strides, Padding padding, const ConvolutionDimensionNumbers& dimension_numbers) { - return UnimplementedOp(); + return NoteErrorOrReturn([&]() -> StatusOr { + TF_ASSIGN_OR_RETURN(const Shape& lhs_shape, GetShape(lhs)); + TF_ASSIGN_OR_RETURN(const Shape& rhs_shape, GetShape(rhs)); + + TF_RETURN_IF_ERROR( + VerifyConvolution(lhs_shape, rhs_shape, dimension_numbers)); + + std::vector base_area_dimensions( + dimension_numbers.input_spatial_dimensions_size()); + for (std::vector::size_type i = 0; i < base_area_dimensions.size(); + ++i) { + base_area_dimensions[i] = + lhs_shape.dimensions(dimension_numbers.input_spatial_dimensions(i)); + } + + std::vector window_dimensions( + dimension_numbers.kernel_spatial_dimensions_size()); + for (std::vector::size_type i = 0; i < window_dimensions.size(); + ++i) { + window_dimensions[i] = + rhs_shape.dimensions(dimension_numbers.kernel_spatial_dimensions(i)); + } + + return ConvGeneral(lhs, rhs, window_strides, + MakePadding(base_area_dimensions, window_dimensions, + window_strides, padding), + dimension_numbers); + }); } XlaOp XlaBuilder::ConvGeneral( @@ -605,7 +902,8 @@ XlaOp XlaBuilder::ConvGeneral( tensorflow::gtl::ArraySlice window_strides, tensorflow::gtl::ArraySlice> padding, const ConvolutionDimensionNumbers& dimension_numbers) { - return UnimplementedOp(); + return ConvGeneralDilated(lhs, rhs, window_strides, padding, {}, {}, + dimension_numbers); } XlaOp XlaBuilder::ConvGeneralDilated( @@ -615,33 +913,174 @@ XlaOp XlaBuilder::ConvGeneralDilated( tensorflow::gtl::ArraySlice lhs_dilation, tensorflow::gtl::ArraySlice rhs_dilation, const ConvolutionDimensionNumbers& dimension_numbers) { - return UnimplementedOp(); + return NoteErrorOrReturn([&]() -> StatusOr { + HloInstructionProto instr; + TF_ASSIGN_OR_RETURN(const Shape& lhs_shape, GetShape(lhs)); + TF_ASSIGN_OR_RETURN(const Shape& rhs_shape, GetShape(rhs)); + TF_RETURN_IF_ERROR( + VerifyConvolution(lhs_shape, rhs_shape, dimension_numbers)); + + std::vector window_dimensions( + dimension_numbers.kernel_spatial_dimensions_size()); + for (std::vector::size_type i = 0; i < window_dimensions.size(); + ++i) { + window_dimensions[i] = + rhs_shape.dimensions(dimension_numbers.kernel_spatial_dimensions(i)); + } + TF_ASSIGN_OR_RETURN(*instr.mutable_window(), + MakeWindow(window_dimensions, window_strides, padding, + lhs_dilation, rhs_dilation)); + + TF_ASSIGN_OR_RETURN( + *instr.mutable_shape(), + ShapeInference::InferConvolveShape(lhs_shape, rhs_shape, instr.window(), + dimension_numbers)); + + *instr.mutable_convolution_dimension_numbers() = dimension_numbers; + + return AddInstruction(std::move(instr), HloOpcode::kConvolution, + {lhs, rhs}); + }); +} + +StatusOr XlaBuilder::MakeWindow( + tensorflow::gtl::ArraySlice window_dimensions, + tensorflow::gtl::ArraySlice window_strides, + tensorflow::gtl::ArraySlice> padding, + tensorflow::gtl::ArraySlice lhs_dilation, + tensorflow::gtl::ArraySlice rhs_dilation) const { + const auto verify_size = [&](const size_t x, const char* x_name) { + if (x == 0 || x == window_dimensions.size()) { + return Status::OK(); + } else { + return InvalidArgument( + "%s", tensorflow::strings::StrCat( + "Window has different number of window dimensions than of ", + x_name, + "\nNumber of window dimensions: ", window_dimensions.size(), + "\nNumber of ", x_name, ": ", x, "\n") + .c_str()); + } + }; + TF_RETURN_IF_ERROR(verify_size(window_strides.size(), "window strides")); + TF_RETURN_IF_ERROR(verify_size(padding.size(), "padding entries")); + TF_RETURN_IF_ERROR(verify_size(lhs_dilation.size(), "lhs dilation factors")); + TF_RETURN_IF_ERROR(verify_size(rhs_dilation.size(), "rhs dilation factors")); + + Window window; + for (size_t i = 0; i < window_dimensions.size(); i++) { + auto dim = window.add_dimensions(); + dim->set_size(window_dimensions[i]); + if (!window_strides.empty()) { + dim->set_stride(window_strides[i]); + } else { + dim->set_stride(1); + } + if (!padding.empty()) { + dim->set_padding_low(padding[i].first); + dim->set_padding_high(padding[i].second); + } else { + dim->set_padding_low(0); + dim->set_padding_high(0); + } + if (!lhs_dilation.empty()) { + dim->set_base_dilation(lhs_dilation[i]); + } else { + dim->set_base_dilation(1); + } + if (!rhs_dilation.empty()) { + dim->set_window_dilation(rhs_dilation[i]); + } else { + dim->set_window_dilation(1); + } + dim->set_window_reversal(false); + } + return window; } XlaOp XlaBuilder::Fft(const XlaOp& operand, const FftType fft_type, const tensorflow::gtl::ArraySlice fft_length) { - return UnimplementedOp(); + return NoteErrorOrReturn([&]() -> StatusOr { + HloInstructionProto instr; + TF_ASSIGN_OR_RETURN(const Shape& operand_shape, operand.GetShape()); + TF_ASSIGN_OR_RETURN( + *instr.mutable_shape(), + ShapeInference::InferFftShape(operand_shape, fft_type, fft_length)); + + instr.set_fft_type(fft_type); + for (int64 i : fft_length) { + instr.add_fft_length(i); + } + + return AddInstruction(std::move(instr), HloOpcode::kFft, {operand}); + }); } XlaOp XlaBuilder::Infeed(const Shape& shape, const string& config) { - return UnimplementedOp(); + return NoteErrorOrReturn([&]() -> StatusOr { + HloInstructionProto instr; + if (!LayoutUtil::HasLayout(shape)) { + return InvalidArgument("Given shape to Infeed must have a layout"); + } + *instr.mutable_shape() = shape; + instr.set_infeed_config(config); + return AddInstruction(std::move(instr), HloOpcode::kInfeed); + }); } void XlaBuilder::Outfeed(const XlaOp& operand, const Shape& shape_with_layout, const string& outfeed_config) { - UnimplementedOp(); + NoteErrorOrReturn([&]() -> StatusOr { + HloInstructionProto instr; + + *instr.mutable_shape() = ShapeUtil::MakeNil(); + + // Check and set outfeed shape. + if (!LayoutUtil::HasLayout(shape_with_layout)) { + return InvalidArgument("Given shape to Outfeed must have a layout"); + } + TF_ASSIGN_OR_RETURN(const Shape& operand_shape, GetShape(operand)); + if (!ShapeUtil::Compatible(operand_shape, shape_with_layout)) { + return InvalidArgument( + "Outfeed shape %s must be compatible with operand shape %s", + ShapeUtil::HumanStringWithLayout(shape_with_layout).c_str(), + ShapeUtil::HumanStringWithLayout(operand_shape).c_str()); + } + *instr.mutable_outfeed_shape() = shape_with_layout; + + instr.set_outfeed_config(outfeed_config); + + return AddInstruction(std::move(instr), HloOpcode::kOutfeed, {operand}); + }); } XlaOp XlaBuilder::CustomCall(const string& call_target_name, tensorflow::gtl::ArraySlice operands, const Shape& shape) { - return UnimplementedOp(); + return NoteErrorOrReturn([&]() -> StatusOr { + HloInstructionProto instr; + if (tensorflow::str_util::StartsWith(call_target_name, "$")) { + return InvalidArgument( + "Invalid custom_call_target \"%s\": Call targets that start with '$' " + "are reserved for internal use.", + call_target_name.c_str()); + } + *instr.mutable_shape() = shape; + instr.set_custom_call_target(call_target_name); + return AddInstruction(std::move(instr), HloOpcode::kCustomCall, operands); + }); } XlaOp XlaBuilder::HostCompute(tensorflow::gtl::ArraySlice operands, const string& channel_name, int64 cost_estimate_ns, const Shape& shape) { - return UnimplementedOp(); + return NoteErrorOrReturn([&]() -> StatusOr { + HloInstructionProto instr; + *instr.mutable_shape() = shape; + instr.set_channel_name(channel_name); + instr.set_cost_estimate_ns(cost_estimate_ns); + return AddInstruction(std::move(instr), HloOpcode::kHostCompute, operands); + }); } XlaOp XlaBuilder::Complex( @@ -650,7 +1089,9 @@ XlaOp XlaBuilder::Complex( return BinaryOp(HloOpcode::kComplex, real, imag, broadcast_dimensions); } -XlaOp XlaBuilder::Conj(const XlaOp& operand) { return UnimplementedOp(); } +XlaOp XlaBuilder::Conj(const XlaOp& operand) { + return Complex(Real(operand), Neg(Imag(operand))); +} XlaOp XlaBuilder::Sub(const XlaOp& lhs, const XlaOp& rhs, tensorflow::gtl::ArraySlice broadcast_dimensions) { @@ -752,6 +1193,10 @@ XlaOp XlaBuilder::Sign(const XlaOp& operand) { return UnaryOp(HloOpcode::kSign, operand); } +XlaOp XlaBuilder::Clz(const XlaOp& operand) { + return UnaryOp(HloOpcode::kClz, operand); +} + XlaOp XlaBuilder::Cos(const XlaOp& operand) { return UnaryOp(HloOpcode::kCos, operand); } @@ -788,12 +1233,22 @@ XlaOp XlaBuilder::Transpose(const XlaOp& operand, instr.add_dimensions(dim); } return AddInstruction(std::move(instr), HloOpcode::kTranspose, {operand}); - }()); + }); } XlaOp XlaBuilder::Rev(const XlaOp& operand, tensorflow::gtl::ArraySlice dimensions) { - return UnimplementedOp(); + return NoteErrorOrReturn([&]() -> StatusOr { + HloInstructionProto instr; + TF_ASSIGN_OR_RETURN(const Shape& operand_shape, GetShape(operand)); + TF_ASSIGN_OR_RETURN( + *instr.mutable_shape(), + ShapeInference::InferReverseShape(operand_shape, dimensions)); + for (int64 dim : dimensions) { + instr.add_dimensions(dim); + } + return AddInstruction(std::move(instr), HloOpcode::kReverse, {operand}); + }); } XlaOp XlaBuilder::Sort(const XlaOp& operand) { @@ -812,12 +1267,27 @@ XlaOp XlaBuilder::Pow(const XlaOp& lhs, const XlaOp& rhs, XlaOp XlaBuilder::ConvertElementType(const XlaOp& operand, PrimitiveType new_element_type) { - return UnimplementedOp(); + return NoteErrorOrReturn([&]() -> StatusOr { + HloInstructionProto instr; + TF_ASSIGN_OR_RETURN(const Shape& operand_shape, GetShape(operand)); + TF_ASSIGN_OR_RETURN( + *instr.mutable_shape(), + ShapeInference::InferConvertShape(operand_shape, new_element_type)); + return AddInstruction(std::move(instr), HloOpcode::kConvert, {operand}); + }); } XlaOp XlaBuilder::BitcastConvertType(const XlaOp& operand, PrimitiveType new_element_type) { - return UnimplementedOp(); + return NoteErrorOrReturn([&]() -> StatusOr { + HloInstructionProto instr; + TF_ASSIGN_OR_RETURN(const Shape& operand_shape, GetShape(operand)); + TF_ASSIGN_OR_RETURN( + *instr.mutable_shape(), + ShapeInference::InferConvertShape(operand_shape, new_element_type)); + return AddInstruction(std::move(instr), HloOpcode::kBitcastConvert, + {operand}); + }); } XlaOp XlaBuilder::SquareF32(const XlaOp& operand) { @@ -843,47 +1313,181 @@ XlaOp XlaBuilder::Map(tensorflow::gtl::ArraySlice operands, const XlaComputation& computation, tensorflow::gtl::ArraySlice dimensions, tensorflow::gtl::ArraySlice static_operands) { - return UnimplementedOp(); + return NoteErrorOrReturn([&]() -> StatusOr { + if (!static_operands.empty()) { + return Unimplemented("static_operands is not supported in Map"); + } + + HloInstructionProto instr; + + std::vector operand_shape_ptrs; + TF_ASSIGN_OR_RETURN(const auto& operand_shapes, GetOperandShapes(operands)); + c_transform(operand_shapes, std::back_inserter(operand_shape_ptrs), + [](const Shape& shape) { return &shape; }); + TF_ASSIGN_OR_RETURN(const ProgramShape& called_program_shape, + computation.GetProgramShape()); + TF_ASSIGN_OR_RETURN( + *instr.mutable_shape(), + ShapeInference::InferMapShape(operand_shape_ptrs, called_program_shape, + dimensions)); + + AddCalledComputation(computation, &instr); + + return AddInstruction(std::move(instr), HloOpcode::kMap, operands); + }); +} + +XlaOp XlaBuilder::RngOp(RandomDistribution distribution, + tensorflow::gtl::ArraySlice parameters, + const Shape& shape) { + return NoteErrorOrReturn([&]() -> StatusOr { + HloInstructionProto instr; + + // Check the number of parameters per RNG distribution. + switch (distribution) { + case RandomDistribution::RNG_NORMAL: + case RandomDistribution::RNG_UNIFORM: + if (parameters.size() != 2) { + return InvalidArgument( + "RNG distribution (%s) expects 2 parameters, but got %ld", + RandomDistribution_Name(distribution).c_str(), parameters.size()); + } + break; + default: + LOG(FATAL) << "unhandled distribution " << distribution; + } + + TF_RETURN_IF_ERROR(ShapeUtil::ValidateShapeWithOptionalLayout(shape)); + *instr.mutable_shape() = shape; + + instr.set_distribution(distribution); + + return AddInstruction(std::move(instr), HloOpcode::kRng, parameters); + }); } XlaOp XlaBuilder::RngNormal(const XlaOp& mu, const XlaOp& sigma, const Shape& shape) { - return UnimplementedOp(); + return RngOp(RandomDistribution::RNG_NORMAL, {mu, sigma}, shape); } XlaOp XlaBuilder::RngUniform(const XlaOp& a, const XlaOp& b, const Shape& shape) { - return UnimplementedOp(); + return RngOp(RandomDistribution::RNG_UNIFORM, {a, b}, shape); } XlaOp XlaBuilder::While(const XlaComputation& condition, const XlaComputation& body, const XlaOp& init) { - return UnimplementedOp(); + return NoteErrorOrReturn([&]() -> StatusOr { + HloInstructionProto instr; + + // Infer shape. + TF_ASSIGN_OR_RETURN(const auto& body_program_shape, body.GetProgramShape()); + TF_ASSIGN_OR_RETURN(const auto& condition_program_shape, + condition.GetProgramShape()); + TF_ASSIGN_OR_RETURN(const Shape& init_shape, GetShape(init)); + TF_ASSIGN_OR_RETURN( + *instr.mutable_shape(), + ShapeInference::InferWhileShape(condition_program_shape, + body_program_shape, init_shape)); + // Body comes before condition computation in the vector. + AddCalledComputation(body, &instr); + AddCalledComputation(condition, &instr); + return AddInstruction(std::move(instr), HloOpcode::kWhile, {init}); + }); } XlaOp XlaBuilder::Gather(const XlaOp& input, const XlaOp& gather_indices, const GatherDimensionNumbers& dimension_numbers, tensorflow::gtl::ArraySlice window_bounds) { - return UnimplementedOp(); + return NoteErrorOrReturn([&]() -> StatusOr { + HloInstructionProto instr; + + TF_ASSIGN_OR_RETURN(const Shape& input_shape, GetShape(input)); + TF_ASSIGN_OR_RETURN(const Shape& gather_indices_shape, + GetShape(gather_indices)); + TF_ASSIGN_OR_RETURN( + *instr.mutable_shape(), + ShapeInference::InferGatherShape(input_shape, gather_indices_shape, + dimension_numbers, window_bounds)); + + *instr.mutable_gather_dimension_numbers() = dimension_numbers; + for (int64 bound : window_bounds) { + instr.add_gather_window_bounds(bound); + } + + return AddInstruction(std::move(instr), HloOpcode::kGather, + {input, gather_indices}); + }); } XlaOp XlaBuilder::Conditional(const XlaOp& predicate, const XlaOp& true_operand, const XlaComputation& true_computation, const XlaOp& false_operand, const XlaComputation& false_computation) { - return UnimplementedOp(); + return NoteErrorOrReturn([&]() -> StatusOr { + HloInstructionProto instr; + + TF_ASSIGN_OR_RETURN(const Shape& predicate_shape, GetShape(predicate)); + TF_ASSIGN_OR_RETURN(const Shape& true_operand_shape, + GetShape(true_operand)); + TF_ASSIGN_OR_RETURN(const ProgramShape& true_computation_shape, + true_computation.GetProgramShape()); + TF_ASSIGN_OR_RETURN(const Shape& false_operand_shape, + GetShape(false_operand)); + TF_ASSIGN_OR_RETURN(const ProgramShape& false_computation_shape, + false_computation.GetProgramShape()); + TF_ASSIGN_OR_RETURN( + *instr.mutable_shape(), + ShapeInference::InferConditionalShape( + predicate_shape, true_operand_shape, false_operand_shape, + true_computation_shape, false_computation_shape)); + + // The index of true_computation must be 0 and that of false computation + // must be 1. + AddCalledComputation(true_computation, &instr); + AddCalledComputation(false_computation, &instr); + + return AddInstruction(std::move(instr), HloOpcode::kConditional, + {predicate, true_operand, false_operand}); + }); } XlaOp XlaBuilder::Reduce( const XlaOp& operand, const XlaOp& init_value, const XlaComputation& computation, tensorflow::gtl::ArraySlice dimensions_to_reduce) { - return UnimplementedOp(); + return NoteErrorOrReturn([&]() -> StatusOr { + HloInstructionProto instr; + + TF_ASSIGN_OR_RETURN(const Shape& operand_shape, GetShape(operand)); + TF_ASSIGN_OR_RETURN(const Shape& init_shape, GetShape(init_value)); + TF_ASSIGN_OR_RETURN(const ProgramShape& called_program_shape, + computation.GetProgramShape()); + TF_ASSIGN_OR_RETURN(*instr.mutable_shape(), + ShapeInference::InferReduceShape( + operand_shape, init_shape, dimensions_to_reduce, + called_program_shape)); + + for (int64 dim : dimensions_to_reduce) { + instr.add_dimensions(dim); + } + + AddCalledComputation(computation, &instr); + + return AddInstruction(std::move(instr), HloOpcode::kReduce, + {operand, init_value}); + }); } XlaOp XlaBuilder::ReduceAll(const XlaOp& operand, const XlaOp& init_value, const XlaComputation& computation) { - return UnimplementedOp(); + return NoteErrorOrReturn([&]() -> StatusOr { + TF_ASSIGN_OR_RETURN(const Shape& operand_shape, GetShape(operand)); + std::vector all_dimnos(ShapeUtil::Rank(operand_shape)); + std::iota(all_dimnos.begin(), all_dimnos.end(), 0); + return Reduce(operand, init_value, computation, all_dimnos); + }); } XlaOp XlaBuilder::ReduceWindow( @@ -891,7 +1495,21 @@ XlaOp XlaBuilder::ReduceWindow( const XlaComputation& computation, tensorflow::gtl::ArraySlice window_dimensions, tensorflow::gtl::ArraySlice window_strides, Padding padding) { - return UnimplementedOp(); + return NoteErrorOrReturn([&]() -> StatusOr { + HloInstructionProto instr; + + TF_ASSIGN_OR_RETURN(const Shape& operand_shape, GetShape(operand)); + TF_RETURN_IF_ERROR( + ValidatePaddingValues(AsInt64Slice(operand_shape.dimensions()), + window_dimensions, window_strides)); + + std::vector> padding_values = + MakePadding(AsInt64Slice(operand_shape.dimensions()), window_dimensions, + window_strides, padding); + return ReduceWindowWithGeneralPadding(operand, init_value, computation, + window_dimensions, window_strides, + padding_values); + }); } XlaOp XlaBuilder::ReduceWindowWithGeneralPadding( @@ -900,31 +1518,111 @@ XlaOp XlaBuilder::ReduceWindowWithGeneralPadding( tensorflow::gtl::ArraySlice window_dimensions, tensorflow::gtl::ArraySlice window_strides, tensorflow::gtl::ArraySlice> padding) { - return UnimplementedOp(); + return NoteErrorOrReturn([&]() -> StatusOr { + HloInstructionProto instr; + + TF_ASSIGN_OR_RETURN(const Shape& operand_shape, GetShape(operand)); + TF_ASSIGN_OR_RETURN(const Shape& init_shape, GetShape(init_value)); + TF_ASSIGN_OR_RETURN(const ProgramShape& to_apply_shape, + computation.GetProgramShape()); + TF_ASSIGN_OR_RETURN(*instr.mutable_window(), + MakeWindow(window_dimensions, window_strides, padding, + /*lhs_dilation=*/{}, /*rhs_dilation=*/{})); + TF_ASSIGN_OR_RETURN( + *instr.mutable_shape(), + ShapeInference::InferReduceWindowShape(operand_shape, init_shape, + instr.window(), to_apply_shape)); + + AddCalledComputation(computation, &instr); + return AddInstruction(std::move(instr), HloOpcode::kReduceWindow, + {operand, init_value}); + }); } XlaOp XlaBuilder::BatchNormTraining(const XlaOp& operand, const XlaOp& scale, const XlaOp& offset, float epsilon, int64 feature_index) { - return UnimplementedOp(); + return NoteErrorOrReturn([&]() -> StatusOr { + HloInstructionProto instr; + + TF_ASSIGN_OR_RETURN(const Shape& operand_shape, GetShape(operand)); + TF_ASSIGN_OR_RETURN(const Shape& scale_shape, GetShape(scale)); + TF_ASSIGN_OR_RETURN(const Shape& offset_shape, GetShape(offset)); + TF_ASSIGN_OR_RETURN( + *instr.mutable_shape(), + ShapeInference::InferBatchNormTrainingShape( + operand_shape, scale_shape, offset_shape, feature_index)); + + instr.set_epsilon(epsilon); + instr.set_feature_index(feature_index); + + return AddInstruction(std::move(instr), HloOpcode::kBatchNormTraining, + {operand, scale, offset}); + }); } XlaOp XlaBuilder::BatchNormInference(const XlaOp& operand, const XlaOp& scale, const XlaOp& offset, const XlaOp& mean, const XlaOp& variance, float epsilon, int64 feature_index) { - return UnimplementedOp(); + return NoteErrorOrReturn([&]() -> StatusOr { + HloInstructionProto instr; + + TF_ASSIGN_OR_RETURN(const Shape& operand_shape, GetShape(operand)); + TF_ASSIGN_OR_RETURN(const Shape& scale_shape, GetShape(scale)); + TF_ASSIGN_OR_RETURN(const Shape& offset_shape, GetShape(offset)); + TF_ASSIGN_OR_RETURN(const Shape& mean_shape, GetShape(mean)); + TF_ASSIGN_OR_RETURN(const Shape& variance_shape, GetShape(variance)); + TF_ASSIGN_OR_RETURN(*instr.mutable_shape(), + ShapeInference::InferBatchNormInferenceShape( + operand_shape, scale_shape, offset_shape, + mean_shape, variance_shape, feature_index)); + + instr.set_epsilon(epsilon); + instr.set_feature_index(feature_index); + + return AddInstruction(std::move(instr), HloOpcode::kBatchNormInference, + {operand, scale, offset, mean, variance}); + }); } XlaOp XlaBuilder::BatchNormGrad(const XlaOp& operand, const XlaOp& scale, const XlaOp& batch_mean, const XlaOp& batch_var, const XlaOp& grad_output, float epsilon, int64 feature_index) { - return UnimplementedOp(); + return NoteErrorOrReturn([&]() -> StatusOr { + HloInstructionProto instr; + + TF_ASSIGN_OR_RETURN(const Shape& operand_shape, GetShape(operand)); + TF_ASSIGN_OR_RETURN(const Shape& scale_shape, GetShape(scale)); + TF_ASSIGN_OR_RETURN(const Shape& batch_mean_shape, GetShape(batch_mean)); + TF_ASSIGN_OR_RETURN(const Shape& batch_var_shape, GetShape(batch_var)); + TF_ASSIGN_OR_RETURN(const Shape& grad_output_shape, GetShape(grad_output)); + TF_ASSIGN_OR_RETURN(*instr.mutable_shape(), + ShapeInference::InferBatchNormGradShape( + operand_shape, scale_shape, batch_mean_shape, + batch_var_shape, grad_output_shape, feature_index)); + + instr.set_epsilon(epsilon); + instr.set_feature_index(feature_index); + + return AddInstruction(std::move(instr), HloOpcode::kBatchNormGrad, + {operand, scale, batch_mean, batch_var, grad_output}); + }); } XlaOp XlaBuilder::CrossReplicaSum(const XlaOp& operand) { - return UnimplementedOp(); + return NoteErrorOrReturn([&]() -> StatusOr { + HloInstructionProto instr; + + TF_ASSIGN_OR_RETURN(const Shape& operand_shape, GetShape(operand)); + TF_ASSIGN_OR_RETURN( + *instr.mutable_shape(), + ShapeInference::InferCrossReplicaSumShape({&operand_shape})); + + return AddInstruction(std::move(instr), HloOpcode::kCrossReplicaSum, + {operand}); + }); } XlaOp XlaBuilder::SelectAndScatter( @@ -933,7 +1631,14 @@ XlaOp XlaBuilder::SelectAndScatter( tensorflow::gtl::ArraySlice window_strides, Padding padding, const XlaOp& source, const XlaOp& init_value, const XlaComputation& scatter) { - return UnimplementedOp(); + return NoteErrorOrReturn([&]() -> StatusOr { + TF_ASSIGN_OR_RETURN(const Shape& operand_shape, GetShape(operand)); + return SelectAndScatterWithGeneralPadding( + operand, select, window_dimensions, window_strides, + MakePadding(AsInt64Slice(operand_shape.dimensions()), window_dimensions, + window_strides, padding), + source, init_value, scatter); + }); } XlaOp XlaBuilder::SelectAndScatterWithGeneralPadding( @@ -943,31 +1648,179 @@ XlaOp XlaBuilder::SelectAndScatterWithGeneralPadding( tensorflow::gtl::ArraySlice> padding, const XlaOp& source, const XlaOp& init_value, const XlaComputation& scatter) { - return UnimplementedOp(); + return NoteErrorOrReturn([&]() -> StatusOr { + HloInstructionProto instr; + + TF_ASSIGN_OR_RETURN(const Shape& operand_shape, GetShape(operand)); + TF_ASSIGN_OR_RETURN(const Shape& source_shape, GetShape(source)); + TF_ASSIGN_OR_RETURN(const Shape& init_shape, GetShape(init_value)); + TF_ASSIGN_OR_RETURN(const ProgramShape& select_shape, + select.GetProgramShape()); + TF_ASSIGN_OR_RETURN(const ProgramShape& scatter_shape, + scatter.GetProgramShape()); + TF_ASSIGN_OR_RETURN(*instr.mutable_window(), + MakeWindow(window_dimensions, window_strides, padding, + /*lhs_dilation=*/{}, /*rhs_dilation=*/{})); + TF_ASSIGN_OR_RETURN(*instr.mutable_shape(), + ShapeInference::InferSelectAndScatterShape( + operand_shape, select_shape, instr.window(), + source_shape, init_shape, scatter_shape)); + + AddCalledComputation(select, &instr); + AddCalledComputation(scatter, &instr); + + return AddInstruction(std::move(instr), HloOpcode::kSelectAndScatter, + {operand, source, init_value}); + }); } XlaOp XlaBuilder::ReducePrecision(const XlaOp& operand, const int exponent_bits, const int mantissa_bits) { - return UnimplementedOp(); + return NoteErrorOrReturn([&]() -> StatusOr { + HloInstructionProto instr; + TF_ASSIGN_OR_RETURN(const Shape& operand_shape, GetShape(operand)); + TF_ASSIGN_OR_RETURN(*instr.mutable_shape(), + ShapeInference::InferReducePrecisionShape( + operand_shape, exponent_bits, mantissa_bits)); + instr.set_exponent_bits(exponent_bits); + instr.set_mantissa_bits(mantissa_bits); + return AddInstruction(std::move(instr), HloOpcode::kReducePrecision, + {operand}); + }); } void XlaBuilder::Send(const XlaOp& operand, const ChannelHandle& handle) { - UnimplementedOp(); + NoteErrorOrReturn([&]() -> StatusOr { + HloInstructionProto instr; + + // Send instruction produces a tuple of {aliased operand, U32 context}. + TF_ASSIGN_OR_RETURN(const Shape& shape, GetShape(operand)); + *instr.mutable_shape() = + ShapeUtil::MakeTupleShape({shape, ShapeUtil::MakeShape(U32, {})}); + instr.set_channel_id(handle.handle()); + TF_ASSIGN_OR_RETURN( + XlaOp send, + AddInstruction(std::move(instr), HloOpcode::kSend, {operand})); + + HloInstructionProto send_done_instr; + *send_done_instr.mutable_shape() = ShapeUtil::MakeNil(); + send_done_instr.set_channel_id(handle.handle()); + return AddInstruction(std::move(send_done_instr), HloOpcode::kSendDone, + {send}); + }); } XlaOp XlaBuilder::Recv(const Shape& shape, const ChannelHandle& handle) { - return UnimplementedOp(); -} + return NoteErrorOrReturn([&]() -> StatusOr { + HloInstructionProto instr; -StatusOr XlaBuilder::IsConstant(const XlaOp& operand, - int64 num_parameters) { - return Unimplemented("IsConstant is not implemented."); -} + // Recv instruction produces a tuple of {receive buffer, U32 context}. + *instr.mutable_shape() = + ShapeUtil::MakeTupleShape({shape, ShapeUtil::MakeShape(U32, {})}); + instr.set_channel_id(handle.handle()); + TF_ASSIGN_OR_RETURN(XlaOp recv, + AddInstruction(std::move(instr), HloOpcode::kRecv, {})); + + HloInstructionProto recv_done_instr; + *recv_done_instr.mutable_shape() = shape; + recv_done_instr.set_channel_id(handle.handle()); + return AddInstruction(std::move(recv_done_instr), HloOpcode::kRecvDone, + {recv}); + }); +} + +StatusOr XlaBuilder::IsConstant(const XlaOp& operand) const { + TF_RETURN_IF_ERROR(first_error_); + + // Verify that the handle is valid. + TF_RETURN_IF_ERROR(LookUpInstruction(operand).status()); + + bool is_constant = true; + std::set visited; + IsConstantVisitor(operand.handle(), &visited, &is_constant); + return is_constant; +} + +StatusOr XlaBuilder::BuildConstantSubGraph( + const XlaOp& root_op) const { + TF_ASSIGN_OR_RETURN(bool is_constant, IsConstant(root_op)); + if (!is_constant) { + auto op_status = LookUpInstruction(root_op); + string op_string = + op_status.ok() ? op_status.ValueOrDie()->name() : ""; + return InvalidArgument( + "Operand to BuildConstantSubGraph depends on a parameter.\n\n" + " op requested for constant subgraph: %s\n\n" + "This is an internal error that typically happens when the XLA user " + "(e.g. TensorFlow) is attempting to determine a value that must be a " + "compile-time constant (e.g. an array dimension) but it is not capable " + "of being evaluated at XLA compile time.\n\n" + "Please file a usability bug with the framework being used (e.g. " + "TensorFlow).", + op_string.c_str()); + } + + TF_ASSIGN_OR_RETURN(const HloInstructionProto* root, + LookUpInstruction(root_op)); + TF_ASSIGN_OR_RETURN(HloOpcode opcode, StringToHloOpcode(root->opcode())); + if (!CanBeRoot(opcode)) { + return InvalidArgument("the operand with opcode %s cannot be root", + root->opcode().c_str()); + } -StatusOr> XlaBuilder::ComputeConstant( - const XlaOp& operand, const Layout* output_layout, - tensorflow::gtl::ArraySlice parameters) { - return Unimplemented("ComputeConstant is not implemented"); + HloComputationProto entry; + entry.set_id(GetUniqueId()); // Give the computation a global unique id. + entry.set_name(StrCat(name_, entry.id(), "_compute_constant")); + entry.set_root_id(root->id()); + ProgramShape* program_shape = entry.mutable_program_shape(); + *program_shape->mutable_result() = root->shape(); + + // We use std::set to keep the instruction ids in ascending order (which is + // also a valid denpendency order). The related ops will be added to the + // subgraph in the same order. + std::set related_ops; + tensorflow::gtl::FlatSet related_calls; // Related computations. + std::queue worklist; + worklist.push(root->id()); + related_ops.insert(root->id()); + while (!worklist.empty()) { + int64 node = worklist.front(); + worklist.pop(); + for (int64 id : instructions_[node].operand_ids()) { + if (related_ops.insert(id).second) { + worklist.push(id); + } + } + for (int64 called_id : instructions_[node].called_computation_ids()) { + related_calls.insert(called_id); + } + } + + // Add related ops to the computation. + for (int64 id : related_ops) { + auto* instr = entry.add_instructions(); + *instr = instructions_[id]; + // Ensures that the instruction names are unique among the graph. + const string& new_name = + StrCat(instr->name(), ".", entry.id(), ".", instr->id()); + instr->set_name(new_name); + } + + XlaComputation computation(entry.id()); + HloModuleProto* module = computation.mutable_proto(); + module->set_name(entry.name()); + module->set_id(entry.id()); + module->set_entry_computation_name(entry.name()); + module->set_entry_computation_id(entry.id()); + *module->mutable_program_shape() = *program_shape; + for (auto& e : embedded_) { + if (related_calls.find(e.second.id()) != related_calls.end()) { + *module->add_computations() = e.second; + } + } + *module->add_computations() = std::move(entry); + + return std::move(computation); } std::unique_ptr XlaBuilder::CreateSubBuilder( @@ -978,10 +1831,6 @@ std::unique_ptr XlaBuilder::CreateSubBuilder( return sub_builder; } -Status XlaBuilder::SetReturnValue(const XlaOp& operand) { - return Unimplemented("SetReturnValue is not implemented."); -} - /* static */ ConvolutionDimensionNumbers XlaBuilder::CreateDefaultConvDimensionNumbers(int num_spatial_dims) { ConvolutionDimensionNumbers dimension_numbers; @@ -1055,20 +1904,24 @@ XlaBuilder::CreateDefaultConvDimensionNumbers(int num_spatial_dims) { StatusOr XlaBuilder::AddInstruction( HloInstructionProto&& instr, HloOpcode opcode, tensorflow::gtl::ArraySlice operands) { + TF_RETURN_IF_ERROR(first_error_); + const int64 handle = instructions_.size(); instr.set_id(handle); instr.set_opcode(HloOpcodeString(opcode)); if (instr.name().empty()) { - instr.set_name(StrCat(instr.opcode(), ".", handle)); - } else { - // Append the handle to make sure the name is unique. - instr.set_name(StrCat(instr.name(), ".", handle)); + instr.set_name(StrCat(instr.opcode())); } for (const auto& operand : operands) { - TF_RET_CHECK(operand.builder_ != nullptr); - TF_RET_CHECK(operand.builder_ == this) - << "Do not add XlaOp from builder " << operand.builder_->name() - << " to builder " << this->name(); + if (operand.builder_ == nullptr) { + return InvalidArgument("invalid XlaOp with handle %lld", + operand.handle()); + } + if (operand.builder_ != this) { + return InvalidArgument("Do not add XlaOp from builder %s to builder %s", + operand.builder_->name().c_str(), + this->name().c_str()); + } instr.add_operand_ids(operand.handle()); } @@ -1083,8 +1936,22 @@ StatusOr XlaBuilder::AddInstruction( return op; } +void XlaBuilder::AddCalledComputation(const XlaComputation& computation, + HloInstructionProto* instr) { + instr->add_called_computation_ids(computation.proto().entry_computation_id()); + for (const HloComputationProto& e : computation.proto().computations()) { + embedded_.insert({e.id(), e}); + } +} + StatusOr XlaBuilder::LookUpInstruction( const XlaOp& op) const { + TF_RETURN_IF_ERROR(first_error_); + + if (op.builder_ != this) { + return InvalidArgument("invalid XlaOp with handle %lld", op.handle()); + } + TF_RET_CHECK(op.builder_ == this); if (op.handle() >= instructions_.size() || op.handle() < 0) { return InvalidArgument("no XlaOp value %lld", op.handle()); diff --git a/tensorflow/compiler/xla/client/xla_client/xla_builder.h b/tensorflow/compiler/xla/client/xla_client/xla_builder.h index f66feb93ceec479037a966562f0bd3f58f76a4f5..4955f1515d66af00ddf72e4c7621292a590e662c 100644 --- a/tensorflow/compiler/xla/client/xla_client/xla_builder.h +++ b/tensorflow/compiler/xla/client/xla_client/xla_builder.h @@ -53,14 +53,31 @@ class XlaBuilder; class XlaOp { public: XlaOp() : handle_(0), builder_(nullptr) {} + ~XlaOp() {} StatusOr GetShape() const; + const XlaBuilder* builder() const { return builder_; } + + bool operator==(const XlaOp& rhs) const { + return handle_ == rhs.handle_ && builder_ == rhs.builder_; + } + + bool operator!=(const XlaOp& rhs) const { + return handle_ != rhs.handle_ || builder_ != rhs.builder_; + } + + friend std::ostream& operator<<(std::ostream& out, const XlaOp& op) { + out << op.handle(); + return out; + } + private: XlaOp(int64 handle, XlaBuilder* builder) : handle_(handle), builder_(builder) {} int64 handle() const { return handle_; } + friend class XlaBuilder; int64 handle_; @@ -570,6 +587,9 @@ class XlaBuilder { // Enqueues a sign instruction onto the computation. XlaOp Sign(const XlaOp& operand); + // Enqueues a count leading zeros instruction onto the computation. + XlaOp Clz(const XlaOp& operand); + // Enqueues a cosine instruction onto the computation. XlaOp Cos(const XlaOp& operand); @@ -687,11 +707,12 @@ class XlaBuilder { XlaOp Recv(const Shape& shape, const ChannelHandle& handle); // Returns true if 'operand' is a compile-time constant. A compile-time - // constant does not depend on parameters with index greater than or equal to - // `num_parameters`, or on stateful operators such as `RngNormal` or `Infeed`. - // Unlike `ComputeConstant`, `IsConstant` tests whether a computation is a - // compile-time constant without evaluating the computation. - StatusOr IsConstant(const XlaOp& operand, int64 num_parameters = 0); + // constant does not depend on any parameters, or on stateful operators such + // as `RngNormal` or `Infeed`. + // + // This tests whether a computation is a compile-time constant without + // evaluating the computation. + StatusOr IsConstant(const XlaOp& operand) const; // Normalizes operand across spatial and batch dimensions for each feature. // @@ -731,47 +752,14 @@ class XlaBuilder { const XlaOp& grad_output, float epsilon, int64 feature_index); - // Computes the value of a constant indicated by a XlaOp using a non-optimized - // interpreter on the host. - // - // The operand must represent a constant value, which in this case - // means that it must not statically depend on any parameter of the - // computation that is being built other then the ones specified on the - // parameter list. The parameters in the list will be indexed by their - // parameter id property so the number of parameters specified should be at - // least as many as the largest used parameter index. - // - // `IsConstant` can be used to test whether a computation is a compile-time - // constant without evaluation it. `ComputeConstant` only succeeds for - // computations where `IsConstant` returns true. - // - // This functionality can be useful when translating a computation - // into XLA where something that looked dynamic is required by - // XLA to be specified as a constant. E.g. the source - // computation (outside of XLA) may include a dynamic - // computation of the shape of something and ComputeConstant lets - // you determine what the value of that computation is in the case - // where the value can be determined at compile time. - // - // If output_layout is non-null, then the output of the computation - // will be stored using that layout. - StatusOr> ComputeConstant( - const XlaOp& operand, const Layout* output_layout = nullptr, - tensorflow::gtl::ArraySlice parameters = {}); - // Returns a new XlaBuilder whose resultant Computation is used only by this // XlaBuilder. The sub-XlaBuilder has the same die_immediately_on_error // behavior as the parent. std::unique_ptr CreateSubBuilder(const string& computation_name); - // Modifies the computation being built so that executions of it will return - // the value associated with operand, rather than the last expression enqueued - // on the XlaBuilder. Any subsequent operations added to the XlaBuilder will - // not have any effect unless SetReturnValue is called again. - Status SetReturnValue(const XlaOp& operand); - // Builds the computation with the requested operations, or returns a non-ok - // status. + // status. Note that all ops that have been enqueued will be moved to the + // computation being returned. StatusOr Build(); // Builds the computation with the requested operations, or notes an error in @@ -784,6 +772,12 @@ class XlaBuilder { // instead. XlaComputation BuildAndNoteError(); + // Returns a subgraph that roots on the given root. If the root is not a + // compile-time constant (see `IsConstant`), returns an error. + // + // This will copy the needed ops/computations to the subgraph. + StatusOr BuildConstantSubGraph(const XlaOp& root_op) const; + // Returns the first error that was encountered while building the // computation. When an error is encountered, by default we return a vacuous // XlaOp and inform the user of the error that occurred while @@ -796,26 +790,23 @@ class XlaBuilder { StatusOr GetShape(const XlaOp& op) const; // Returns the (inferred) result for the current computation's shape. - StatusOr GetProgramShape(); + StatusOr GetProgramShape() const; private: StatusOr AddInstruction( HloInstructionProto&& instr, HloOpcode opcode, tensorflow::gtl::ArraySlice operands = {}); + void AddCalledComputation(const XlaComputation& computation, + HloInstructionProto* instr); + // Notes that the error occurred by: // * storing it internally and capturing a backtrace if it's the first error // (this deferred value will be produced on the call to Build()) // * dying if die_immediately_on_error_ is true void NoteError(const Status& error); - XlaOp NoteErrorOrReturn(StatusOr&& op) { - if (!op.ok()) { - NoteError(op.status()); - return XlaOp(); - } - return op.ConsumeValueOrDie(); - } + XlaOp NoteErrorOrReturn(const std::function()>& op_creator); // Helper method that creates an empty op and notes error. XlaOp UnimplementedOp(); @@ -835,6 +826,10 @@ class XlaBuilder { XlaOp TernaryOp(HloOpcode triop, const XlaOp& lhs, const XlaOp& rhs, const XlaOp& ehs); + XlaOp RngOp(RandomDistribution distribution, + tensorflow::gtl::ArraySlice parameters, + const Shape& shape); + StatusOr InDimBroadcast( const Shape& shape, const XlaOp& operand, tensorflow::gtl::ArraySlice broadcast_dimensions); @@ -850,7 +845,29 @@ class XlaBuilder { // Returns the (inferred) result for the program shape for the current // computation and fills the root_id in the pointer. - StatusOr GetProgramShape(int64* root_id); + StatusOr GetProgramShape(int64* root_id) const; + + // A visitor which checks whether an operation is a compile-time constant, + // meaning that it doesn't depend on any parameters, or on any stateful + // operation such as `RngNormal` or `Infeed`. The visitor walks the + // computation starting at a given operation and sets is_constant to false iff + // a parameter or stateful operation is encountered. + void IsConstantVisitor(const int64 op_handle, std::set* visited, + bool* is_constant) const; + + // Checks bounds for convolution parameters. + Status VerifyConvolution( + const Shape& lhs_shape, const Shape& rhs_shape, + const ConvolutionDimensionNumbers& dimension_numbers) const; + + // Helper function for creating a Window proto from user-supplied data. + // Returns error if the user-supplied data was invalid. + StatusOr MakeWindow( + tensorflow::gtl::ArraySlice window_dimensions, + tensorflow::gtl::ArraySlice window_strides, + tensorflow::gtl::ArraySlice> padding, + tensorflow::gtl::ArraySlice lhs_dilation, + tensorflow::gtl::ArraySlice rhs_dilation) const; string name_; // Name to use for the built computation. @@ -962,6 +979,37 @@ XlaOp XlaBuilder::ConstantR4FromArray4D(const Array4D& values) { return ConstantFromArray(values); } +// RAII-style object: sets the current sharding assignment in builder on +// construction, and sets back to the previous assignment on destruction. +// +// TODO(b/74197823): This is a part of a NOT YET ready refactor. +class XlaScopedShardingAssignment { + public: + XlaScopedShardingAssignment(xla::XlaBuilder* builder, + tensorflow::gtl::optional sharding) + : builder_(builder), prev_sharding_(builder->sharding()) { + SetSharding(sharding); + } + + XlaScopedShardingAssignment(const XlaScopedShardingAssignment&) = delete; + XlaScopedShardingAssignment& operator=(const XlaScopedShardingAssignment&) = + delete; + + ~XlaScopedShardingAssignment() { SetSharding(prev_sharding_); } + + private: + void SetSharding(const tensorflow::gtl::optional& sharding) { + if (sharding.has_value()) { + builder_->SetSharding(sharding.value()); + } else { + builder_->ClearSharding(); + } + } + + xla::XlaBuilder* const builder_; + tensorflow::gtl::optional prev_sharding_; +}; + } // namespace xla #endif // TENSORFLOW_COMPILER_XLA_CLIENT_XLA_CLIENT_XLA_BUILDER_H_ diff --git a/tensorflow/compiler/xla/client/xla_client/xla_computation.h b/tensorflow/compiler/xla/client/xla_client/xla_computation.h index 2a3c6952667a434b68ca0c5e4e9874397da173d3..7ad212aa24cd32d104cc4db7aa164c22c9f5be8f 100644 --- a/tensorflow/compiler/xla/client/xla_client/xla_computation.h +++ b/tensorflow/compiler/xla/client/xla_client/xla_computation.h @@ -30,6 +30,10 @@ namespace xla { class XlaComputation { public: XlaComputation() : unique_id_(-1) {} + XlaComputation(const HloModuleProto& proto) + : unique_id_(proto.id()), proto_(proto) {} + + ~XlaComputation() {} XlaComputation(const XlaComputation&) = delete; XlaComputation& operator=(const XlaComputation&) = delete; @@ -44,6 +48,9 @@ class XlaComputation { const HloModuleProto& proto() const { return proto_; } + // Returns true if this object is a null Computation. + bool IsNull() const { return unique_id_ == -1; } + private: XlaComputation(const int64 unique_id) : unique_id_(unique_id) {} HloModuleProto* mutable_proto() { return &proto_; } diff --git a/tensorflow/compiler/xla/device_util.h b/tensorflow/compiler/xla/device_util.h index 23a622b1ad0e2f3b220645f62767271f28df24e9..1a51fdee680721a4a03fa5de79a81746d92af76b 100644 --- a/tensorflow/compiler/xla/device_util.h +++ b/tensorflow/compiler/xla/device_util.h @@ -29,7 +29,7 @@ namespace xla { // Returns a string that represents the device in terms of platform and ordinal; // e.g. the first CUDA device will be "cuda:0" -string DeviceIdentifier(perftools::gputools::StreamExecutor* stream_exec) { +string DeviceIdentifier(se::StreamExecutor* stream_exec) { return tensorflow::strings::StrCat(stream_exec->platform()->Name(), ":", stream_exec->device_ordinal()); } diff --git a/tensorflow/compiler/xla/executable_run_options.cc b/tensorflow/compiler/xla/executable_run_options.cc index 392ad9010ab81923a089c7b00a79ddc281af92bb..99b8f0558e6e39649c0e2e6ef345332e2aa55736 100644 --- a/tensorflow/compiler/xla/executable_run_options.cc +++ b/tensorflow/compiler/xla/executable_run_options.cc @@ -36,12 +36,12 @@ DeviceMemoryAllocator* ExecutableRunOptions::allocator() const { } ExecutableRunOptions& ExecutableRunOptions::set_stream( - perftools::gputools::Stream* stream) { + stream_executor::Stream* stream) { stream_ = stream; return *this; } -perftools::gputools::Stream* ExecutableRunOptions::stream() const { +stream_executor::Stream* ExecutableRunOptions::stream() const { return stream_; } @@ -87,4 +87,11 @@ const DeviceAssignment* ExecutableRunOptions::device_assignment() const { return device_assignment_; } +ExecutableRunOptions& ExecutableRunOptions::set_rng_seed(int rng_seed) { + rng_seed_ = rng_seed; + return *this; +} + +int ExecutableRunOptions::rng_seed() const { return rng_seed_; } + } // namespace xla diff --git a/tensorflow/compiler/xla/executable_run_options.h b/tensorflow/compiler/xla/executable_run_options.h index d4fcbf0493c936ebcd0639a432e56b62ee15672c..a306ae16ba4aee87e565ec3d119b84d90bc69c6c 100644 --- a/tensorflow/compiler/xla/executable_run_options.h +++ b/tensorflow/compiler/xla/executable_run_options.h @@ -16,26 +16,27 @@ limitations under the License. #ifndef TENSORFLOW_COMPILER_XLA_EXECUTABLE_RUN_OPTIONS_H_ #define TENSORFLOW_COMPILER_XLA_EXECUTABLE_RUN_OPTIONS_H_ -// Intentionally forward declared so that ExecutableRunOptions can be linked +// Pulls in the ::stream_executor -> ::xla::se namespace alias. +#include "tensorflow/compiler/xla/types.h" + +// These classes are forward declared so that ExecutableRunOptions can be linked // into an XLA-compiled binary without having to link all of the pointed-to // objects (e.g., for an ahead-of-time compiled CPU binary, the gpu tools don't // need to be linked). -namespace perftools { -namespace gputools { +namespace stream_executor { class Stream; class Platform; -} -} +} // namespace stream_executor namespace tensorflow { namespace thread { class ThreadPool; -} -} +} // namespace thread +} // namespace tensorflow namespace Eigen { struct ThreadPoolDevice; -} +} // namespace Eigen namespace xla { @@ -61,8 +62,8 @@ class ExecutableRunOptions { // If set, this is the stream to run the computation on. The platform of the // stream must match the platform the executable was built for. A value of // nullptr indicates the option has not been set. - ExecutableRunOptions& set_stream(perftools::gputools::Stream* stream); - perftools::gputools::Stream* stream() const; + ExecutableRunOptions& set_stream(stream_executor::Stream* stream); + stream_executor::Stream* stream() const; // Sets the thread pool on which to run parallel CPU backend // computations. Does not take ownership. @@ -84,14 +85,18 @@ class ExecutableRunOptions { DeviceAssignment* device_assignment); const DeviceAssignment* device_assignment() const; + ExecutableRunOptions& set_rng_seed(int rng_seed); + int rng_seed() const; + private: DeviceMemoryAllocator* allocator_ = nullptr; int device_ordinal_ = -1; DeviceAssignment* device_assignment_ = nullptr; - perftools::gputools::Stream* stream_ = nullptr; + stream_executor::Stream* stream_ = nullptr; tensorflow::thread::ThreadPool* inter_op_thread_pool_ = nullptr; const Eigen::ThreadPoolDevice* intra_op_thread_pool_ = nullptr; ExecutionProfile* execution_profile_ = nullptr; + int rng_seed_ = 0; }; } // namespace xla diff --git a/tensorflow/compiler/xla/legacy_flags/debug_options_flags.cc b/tensorflow/compiler/xla/legacy_flags/debug_options_flags.cc index c8ed3e3a2b009ddffdfb79a9a6ced8d5e736bee6..70ae95bf47398589e3c20f72c1f2084a738f253a 100644 --- a/tensorflow/compiler/xla/legacy_flags/debug_options_flags.cc +++ b/tensorflow/compiler/xla/legacy_flags/debug_options_flags.cc @@ -40,7 +40,10 @@ void SetDebugOptionsDefaults(DebugOptions* flags) { flags->set_xla_cpu_multi_thread_eigen(true); flags->set_xla_gpu_cuda_data_dir("./cuda_sdk_lib"); flags->set_xla_eliminate_hlo_implicit_broadcast(true); - +#ifdef INTEL_MKL + flags->set_xla_cpu_use_mkl_dnn(true); +#endif // INTEL_MKL + flags->set_xla_gpu_max_kernel_unroll_factor(1); // Set cudnn batchnorm off by default; it does not provide a performance win // on average. flags->set_xla_gpu_use_cudnn_batchnorm(false); @@ -220,6 +223,11 @@ void AllocateFlags() { bool_setter_for(&DebugOptions::set_xla_gpu_disable_multi_streaming), flag_values->xla_gpu_disable_multi_streaming(), "If true, multi-streaming in the GPU backend is disabled."), + tensorflow::Flag( + "xla_gpu_max_kernel_unroll_factor", + int32_setter_for(&DebugOptions::set_xla_gpu_max_kernel_unroll_factor), + flag_values->xla_gpu_max_kernel_unroll_factor(), + "Specify the maximum kernel unroll factor for the GPU backend."), tensorflow::Flag( "xla_dump_optimized_hlo_proto_to", flag_values->mutable_xla_dump_optimized_hlo_proto_to(), @@ -288,6 +296,10 @@ void AllocateFlags() { flag_values->xla_gpu_use_cudnn_batchnorm(), "Allows the GPU backend to implement batchnorm HLOs using cudnn, " "rather than expanding them to a soup of HLOs."), + tensorflow::Flag("xla_cpu_use_mkl_dnn", + bool_setter_for(&DebugOptions::set_xla_cpu_use_mkl_dnn), + flag_values->xla_cpu_use_mkl_dnn(), + "Generate calls to MKL-DNN in the CPU backend."), }); ParseFlagsFromEnv(*flag_objects); } diff --git a/tensorflow/compiler/xla/literal_util.cc b/tensorflow/compiler/xla/literal_util.cc index 13675b7d0074592043b7e12de0aad948a3e9848f..bb6dd4f9098aefc1c2bbb1b1c41b3cee856b67de 100644 --- a/tensorflow/compiler/xla/literal_util.cc +++ b/tensorflow/compiler/xla/literal_util.cc @@ -44,8 +44,16 @@ namespace { constexpr bool kLittleEndian = __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__; -// Converts between little and big endian, assuming elements in the array are 16 -// bits long. +// Converts between little and big endian. +// +// Precondition: size % 2 == 0 (elements in the array are 16 bits long) +void ConvertEndianShort(string* bytes) { + CHECK_EQ(bytes->size() / 2, 0); + for (int64 i = 0; i < bytes->size(); i += 2) { + std::swap((*bytes)[i], (*bytes)[i + 1]); + } +} + void ConvertEndianShort(char* bytes, int64 size) { CHECK_EQ(size / 2, 0); for (int64 i = 0; i < size; i += 2) { @@ -97,11 +105,18 @@ Literal::Literal(const Shape& shape, bool allocate_arrays) const Shape& subshape = piece.subshape(); if (ShapeUtil::IsArray(subshape)) { if (allocate_arrays) { - piece.set_buffer(new char[piece.size_bytes()]); if (LayoutUtil::IsSparseArray(subshape)) { + // For sparse arrays, the buffer must be of the size of the maximum + // number of sparse elements possible. + const int64 max_sparse_elements = + LayoutUtil::MaxSparseElements(subshape.layout()); + piece.set_buffer( + new char[max_sparse_elements * ShapeUtil::ByteSizeOfPrimitiveType( + subshape.element_type())]); piece.set_sparse_indices(new SparseIndexArray( - LayoutUtil::MaxSparseElements(subshape.layout()), - ShapeUtil::Rank(subshape))); + max_sparse_elements, ShapeUtil::Rank(subshape))); + } else { + piece.set_buffer(new char[piece.size_bytes()]); } } else { piece.set_buffer(nullptr); @@ -1409,6 +1424,28 @@ std::unique_ptr ConvertBetweenNativeTypes(const Literal& src_literal) { src_literal, converter); } +template +typename std::enable_if<(sizeof(NativeSrcT) == sizeof(NativeDestT)), + std::unique_ptr>::type +BitcastBetweenNativeTypes(const Literal& src_literal) { + auto converter = [](NativeSrcT src) { + return tensorflow::bit_cast(src); + }; + return ConvertBetweenNativeTypesWithConverter( + src_literal, converter); +} + +// This template specialization is here to make the compiler happy. bit_cast has +// a static check that the types are the same size. This specialization should +// never be used because the source and destination types are checked for +// identical sizes higher up. +template +typename std::enable_if<(sizeof(NativeSrcT) != sizeof(NativeDestT)), + std::unique_ptr>::type +BitcastBetweenNativeTypes(const Literal& src_literal) { + LOG(FATAL) << "Invalid bitcast between types of different sizes."; +} + template std::unique_ptr ConvertToC64(const Literal& src_literal) { CHECK(ShapeUtil::IsArray(src_literal.shape())); @@ -1428,21 +1465,33 @@ std::unique_ptr ConvertToC64(const Literal& src_literal) { } template -std::unique_ptr ConvertIfTypesMatch(const Literal& src_literal) { +std::unique_ptr ConvertIfTypesMatch(const Literal& src_literal, + bool bitcast) { CHECK_EQ(primitive_src_type, src_literal.shape().element_type()); - return ConvertBetweenNativeTypes< - typename primitive_util::PrimitiveTypeToNative::type, - typename primitive_util::PrimitiveTypeToNative< - primitive_dest_type>::type>(src_literal); + if (bitcast) { + return BitcastBetweenNativeTypes< + typename primitive_util::PrimitiveTypeToNative< + primitive_src_type>::type, + typename primitive_util::PrimitiveTypeToNative< + primitive_dest_type>::type>(src_literal); + } else { + return ConvertBetweenNativeTypes< + typename primitive_util::PrimitiveTypeToNative< + primitive_src_type>::type, + typename primitive_util::PrimitiveTypeToNative< + primitive_dest_type>::type>(src_literal); + } } template StatusOr> ConvertIfDestTypeMatches( - const Literal& src_literal, PrimitiveType primitive_dest_type) { + const Literal& src_literal, PrimitiveType primitive_dest_type, + bool bitcast) { switch (primitive_dest_type) { -#define CONVERT_IF_TYPES_MATCH(type) \ - case (type): \ - return ConvertIfTypesMatch(src_literal); +#define CONVERT_IF_TYPES_MATCH(type) \ + case (type): \ + return ConvertIfTypesMatch(src_literal, \ + bitcast); CONVERT_IF_TYPES_MATCH(PRED) CONVERT_IF_TYPES_MATCH(S8) CONVERT_IF_TYPES_MATCH(S32) @@ -1456,28 +1505,31 @@ StatusOr> ConvertIfDestTypeMatches( CONVERT_IF_TYPES_MATCH(BF16) #undef CONVERT_IF_TYPES_MATCH case C64: - return ConvertToC64(src_literal); + if (!bitcast) { + return ConvertToC64(src_literal); + } + break; // Other types are not yet supported. default: - return Unimplemented( - "Converting from type %s to type %s is not implemented.", - PrimitiveType_Name(src_literal.shape().element_type()).c_str(), - PrimitiveType_Name(primitive_dest_type).c_str()); - } -} - -} // namespace - -StatusOr> Literal::Convert( - PrimitiveType primitive_dest_type) const { - TF_RET_CHECK(ShapeUtil::IsArray(shape())); - if (shape().element_type() == primitive_dest_type) { - return CloneToUnique(); + break; } - switch (shape().element_type()) { -#define CONVERT_IF_DEST_TYPE_MATCHES(type) \ - case (type): \ - return ConvertIfDestTypeMatches<(type)>(*this, primitive_dest_type); + return Unimplemented( + "Converting from type %s to type %s is not implemented.", + PrimitiveType_Name(src_literal.shape().element_type()).c_str(), + PrimitiveType_Name(primitive_dest_type).c_str()); +} + +StatusOr> ConvertSwitch( + const Literal& literal, PrimitiveType primitive_dest_type, bool bitcast) { + TF_RET_CHECK(ShapeUtil::IsArray(literal.shape())); + if (literal.shape().element_type() == primitive_dest_type) { + return literal.CloneToUnique(); + } + switch (literal.shape().element_type()) { +#define CONVERT_IF_DEST_TYPE_MATCHES(type) \ + case (type): \ + return ConvertIfDestTypeMatches<(type)>(literal, primitive_dest_type, \ + bitcast); CONVERT_IF_DEST_TYPE_MATCHES(PRED) CONVERT_IF_DEST_TYPE_MATCHES(S8) CONVERT_IF_DEST_TYPE_MATCHES(S32) @@ -1493,12 +1545,35 @@ StatusOr> Literal::Convert( // Other types are not yet supported. default: return Unimplemented( - "Converting from type %s to type %s is not implemented.", - PrimitiveType_Name(shape().element_type()).c_str(), + "%s from type %s to type %s is not implemented.", + (bitcast ? "Bitcast converting" : "Converting"), + PrimitiveType_Name(literal.shape().element_type()).c_str(), PrimitiveType_Name(primitive_dest_type).c_str()); } } +} // namespace + +StatusOr> Literal::Convert( + PrimitiveType primitive_dest_type) const { + return ConvertSwitch(*this, primitive_dest_type, /*bitcast=*/false); +} + +StatusOr> Literal::BitcastConvert( + PrimitiveType primitive_dest_type) const { + if (primitive_util::BitWidth(shape().element_type()) != + primitive_util::BitWidth(primitive_dest_type)) { + return InvalidArgument( + "Cannot bitcast convert from %s to %s, bit widths are different: %d != " + "%d", + PrimitiveType_Name(shape().element_type()).c_str(), + PrimitiveType_Name(primitive_dest_type).c_str(), + primitive_util::BitWidth(shape().element_type()), + primitive_util::BitWidth(primitive_dest_type)); + } + return ConvertSwitch(*this, primitive_dest_type, /*bitcast=*/true); +} + StatusOr> Literal::ConvertToShape( const Shape& dest_shape, bool round_f32_to_bf16) const { if (!ShapeUtil::IsTuple(dest_shape)) { @@ -1863,16 +1938,14 @@ void Literal::Piece::WriteToProto(LiteralProto* proto) const { *proto->mutable_f16s() = string( reinterpret_cast(data().data()), size_bytes()); if (!kLittleEndian) { - ConvertEndianShort(const_cast(proto->mutable_f16s()->data()), - proto->f16s().size()); + ConvertEndianShort(proto->mutable_f16s()); } break; case BF16: *proto->mutable_bf16s() = string( reinterpret_cast(data().data()), size_bytes()); if (!kLittleEndian) { - ConvertEndianShort(const_cast(proto->mutable_bf16s()->data()), - proto->bf16s().size()); + ConvertEndianShort(proto->mutable_bf16s()); } break; case F32: diff --git a/tensorflow/compiler/xla/literal_util.h b/tensorflow/compiler/xla/literal_util.h index a96a76fbb4e1a46e225d33b715f073c05fe6275a..8aa19222dc4b9175ec72128dfdad448f65c23e91 100644 --- a/tensorflow/compiler/xla/literal_util.h +++ b/tensorflow/compiler/xla/literal_util.h @@ -333,11 +333,19 @@ class Literal { template std::unique_ptr Replicate(int64 times) const; - // Converts this literal to another primitive type. Returns an error if the - // conversion is not possible. This literal must be array-shaped. + // Converts this literal to another primitive type using + // static_cast<>. Returns an error if the conversion is not possible. This + // literal must be array-shaped. StatusOr> Convert( PrimitiveType primitive_dest_type) const; + // Converts this literal to another primitive type using a bitcast + // conversion. The to and from primitive types must have the same bit + // width. Returns an error if the conversion is not possible. This literal + // must be array-shaped. + StatusOr> BitcastConvert( + PrimitiveType primitive_dest_type) const; + // Converts this literal to the given shape. Returns an error is the // conversion is not possible. // @@ -587,6 +595,12 @@ class Literal { template Status Populate(const FnType& generator); + // A parallel version of Populate(). This can be used if the generator is + // thread-safe and the values for the shape's different elements are + // independent. + template + Status PopulateParallel(const FnType& generator); + // Fills this literal with the given value. template void PopulateWithValue(NativeT value); @@ -727,7 +741,13 @@ class Literal { int64 size_bytes() const { return ShapeUtil::ByteSizeOf(subshape()); } // Returns the number of elements in this piece's array. - int64 element_count() const { return ShapeUtil::ElementsIn(subshape()); } + int64 element_count() const { + // If this is a sparse array, use the number of elements represented by + // the indices in the associated SparseIndexArray. + return LayoutUtil::IsSparseArray(subshape()) + ? sparse_indices()->index_count() + : ShapeUtil::ElementsIn(subshape()); + } // Copy the data from 'src' into this piece's buffer. Shapes of this piece // and src must be compatible. @@ -785,6 +805,10 @@ class Literal { // buffer). void DeallocateBuffers(); + // Implementation details shared between Populate() and PopulateParallel() + template + Status PopulateInternal(const FnType& generator, bool parallel); + Shape shape_; ShapeTree pieces_; @@ -835,8 +859,7 @@ tensorflow::gtl::ArraySlice Literal::Piece::data() const { << " type, but literal element type is " << PrimitiveType_Name(subshape().element_type()); return tensorflow::gtl::ArraySlice( - reinterpret_cast(buffer()), - ShapeUtil::ElementsIn(subshape())); + reinterpret_cast(buffer()), element_count()); } template @@ -849,7 +872,7 @@ tensorflow::gtl::MutableArraySlice Literal::Piece::data() { << " type, but literal element type is " << PrimitiveType_Name(subshape().element_type()); return tensorflow::gtl::MutableArraySlice( - reinterpret_cast(buffer()), ShapeUtil::ElementsIn(subshape())); + reinterpret_cast(buffer()), element_count()); } template @@ -1264,19 +1287,20 @@ void Literal::PopulateSparse(SparseIndexArray indices, CHECK_LE(num_elements, max_elements); CHECK_EQ(num_elements, indices.index_count()); auto root_data = root_piece().data(); - root_data.remove_suffix(max_elements - values.size()); + // Piece::data() returns an ArraySlice of size equal to the number of indices + // in the SparseIndexArray. So there is no need to adjust the size of the data + // here. It is enough to just copy the incoming values into the data buffer. std::copy(values.begin(), values.end(), root_data.begin()); *this->root_piece().sparse_indices() = std::move(indices); if (sort) { auto root_data = this->root_piece().data(); - root_data.remove_suffix(root_data.size() - num_elements); this->root_piece().sparse_indices()->SortWithValues(root_data); } DCHECK(this->root_piece().sparse_indices()->Validate(shape())); } template -Status Literal::Populate(const FnType& generator) { +Status Literal::PopulateInternal(const FnType& generator, bool parallel) { const Shape& this_shape = shape(); const int64 rank = ShapeUtil::Rank(this_shape); TF_RET_CHECK(LayoutUtil::IsDenseArray(this_shape)); @@ -1286,11 +1310,11 @@ Status Literal::Populate(const FnType& generator) { if (rank > 0) { StrideConfig stride_config(this_shape, this_shape, AsInt64Slice(this_shape.dimensions())); - DimensionVector minor_scan_indexes(rank, 0); int64 minor_dimension_size = ShapeUtil::GetDimension(this_shape, stride_config.minor_dimension); auto init_function = [&](tensorflow::gtl::ArraySlice indexes) { + DimensionVector minor_scan_indexes(rank, 0); const int64 index = IndexUtil::MultidimensionalIndexToLinearIndex(shape(), indexes); std::copy(indexes.begin(), indexes.end(), minor_scan_indexes.begin()); @@ -1298,17 +1322,35 @@ Status Literal::Populate(const FnType& generator) { minor_scan_indexes[stride_config.minor_dimension] = i; literal_data.at(index + i) = generator(minor_scan_indexes); } - return true; }; - ShapeUtil::ForEachIndex(this_shape, stride_config.base, - stride_config.dimensions, stride_config.step, - init_function); + if (parallel) { + ShapeUtil::ForEachIndexParallel(this_shape, stride_config.base, + stride_config.dimensions, + stride_config.step, init_function); + } else { + ShapeUtil::ForEachIndex( + this_shape, stride_config.base, stride_config.dimensions, + stride_config.step, + [&init_function](tensorflow::gtl::ArraySlice indexes) { + init_function(indexes); + return true; + }); + } } else { // For scalars. literal_data.at(0) = generator({}); } return Status::OK(); } +template +Status Literal::Populate(const FnType& generator) { + return PopulateInternal(generator, /*parallel=*/false); +} + +template +Status Literal::PopulateParallel(const FnType& generator) { + return PopulateInternal(generator, /*parallel=*/true); +} template void Literal::PopulateWithValue(NativeT value) { diff --git a/tensorflow/compiler/xla/literal_util_test.cc b/tensorflow/compiler/xla/literal_util_test.cc index 7627762074b6132655c58690a7fffbaf2717e279..61046784e05623cd3117c24ecc6d6c474739bbd5 100644 --- a/tensorflow/compiler/xla/literal_util_test.cc +++ b/tensorflow/compiler/xla/literal_util_test.cc @@ -23,6 +23,7 @@ limitations under the License. #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/test.h" #include "tensorflow/compiler/xla/types.h" +#include "tensorflow/core/lib/core/casts.h" #include "tensorflow/core/lib/core/status_test_util.h" #include "tensorflow/core/platform/macros.h" #include "tensorflow/core/platform/types.h" @@ -217,9 +218,7 @@ TEST_F(LiteralUtilTest, CreateSparse) { EXPECT_EQ(literal->sparse_indices()->data(), ArraySlice(expected_indices.data(), expected_indices.num_elements())); - EXPECT_EQ( - ArraySlice(literal->data().data(), expected_values.size()), - ArraySlice(expected_values)); + EXPECT_EQ(literal->data(), ArraySlice(expected_values)); } TEST_F(LiteralUtilTest, LiteralR4F32ProjectedStringifies) { @@ -1090,6 +1089,48 @@ TEST_F(LiteralUtilTest, Populate) { } } +TEST_F(LiteralUtilTest, PopulateParallel) { + struct PopulateData { + std::vector dimensions; + std::vector layout; + } populate_data[] = { + {{}, {}}, + {{0}, {0}}, + {{16}, {0}}, + {{2, 0}, {1, 0}}, + {{4, 16}, {1, 0}}, + {{21, 12}, {0, 1}}, + {{6, 11, 17}, {2, 0, 1}}, + {{6, 11, 5, 17}, {3, 2, 0, 1}}, + }; + for (const auto& data : populate_data) { + Shape shape = ShapeUtil::MakeShapeWithLayout( + primitive_util::NativeToPrimitiveType(), data.dimensions, + data.layout); + auto literal = Literal::CreateFromShape(shape); + auto generator = [&](ArraySlice indexes) -> uint32 { + // Offsets from linear index just to avoid R0 literals to be initialized + // with zero. + return IndexUtil::MultidimensionalIndexToLinearIndex(literal->shape(), + indexes) + + 17; + }; + TF_EXPECT_OK(literal->PopulateParallel(generator)); + + std::vector zero_base(data.dimensions.size(), 0); + std::vector step(data.dimensions.size(), 1); + bool matched = true; + auto check_function = [&](ArraySlice indexes) { + auto value = literal->Get(indexes); + matched = matched && (value == generator(indexes)); + return matched; + }; + ShapeUtil::ForEachIndex(literal->shape(), zero_base, data.dimensions, step, + check_function); + EXPECT_TRUE(matched); + } +} + TEST_F(LiteralUtilTest, ConvertR4) { // clang-format off auto original = Literal::CreateR4WithLayout({{ @@ -1243,6 +1284,25 @@ TEST_F(LiteralUtilTest, ConvertIfTypesMatch) { tensorflow::error::UNIMPLEMENTED); } +TEST_F(LiteralUtilTest, BitcastConvert) { + auto original = + Literal::CreateR1({tensorflow::bit_cast(2.5f), + tensorflow::bit_cast(-42.25f), + tensorflow::bit_cast(100.f), 0xbeef}); + auto expected = Literal::CreateR1( + {2.5f, -42.25f, 100.0f, tensorflow::bit_cast(0xbeef)}); + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr converted, + original->BitcastConvert(F32)); +} + +TEST_F(LiteralUtilTest, BitcastConvertBetweenInvalidTypes) { + auto literal = Literal::CreateR0(1234); + Status status = literal->BitcastConvert(F64).status(); + EXPECT_NE(Status::OK(), status); + EXPECT_TRUE(tensorflow::str_util::StrContains(status.error_message(), + "bit widths are different")); +} + TEST_F(LiteralUtilTest, CopyFromProto_Bool) { LiteralProto p; p.mutable_shape()->set_element_type(PRED); diff --git a/tensorflow/compiler/xla/python/BUILD b/tensorflow/compiler/xla/python/BUILD index 0517a5502e686def4ffea59f929aef225186a8aa..ecb87bd8893276fbb9ecffaa0f8a3233d2e0043f 100644 --- a/tensorflow/compiler/xla/python/BUILD +++ b/tensorflow/compiler/xla/python/BUILD @@ -20,6 +20,7 @@ py_test( srcs = ["xla_client_test.py"], main = "xla_client_test.py", srcs_version = "PY2AND3", + tags = ["no_oss"], deps = [ ":xla_client", "//tensorflow/python:platform_test", diff --git a/tensorflow/compiler/xla/python/local_computation_builder.cc b/tensorflow/compiler/xla/python/local_computation_builder.cc index b21ab3044fae7136071f50bdba6e74b799a309d5..24e17abbe06197d2a090421508132e611da3cfa0 100644 --- a/tensorflow/compiler/xla/python/local_computation_builder.cc +++ b/tensorflow/compiler/xla/python/local_computation_builder.cc @@ -89,17 +89,16 @@ StatusOr> TransferFromOutfeedLocalReplica( return client->TransferFromOutfeedLocal(shape, device_ordinal); } -LocalShapedBuffer::LocalShapedBuffer( - std::unique_ptr shaped_buffer) +LocalShapedBuffer::LocalShapedBuffer(ScopedShapedBuffer shaped_buffer) : shaped_buffer_(std::move(shaped_buffer)) {} -const std::unique_ptr& LocalShapedBuffer::shaped_buffer() - const { - return shaped_buffer_; +const ScopedShapedBuffer* LocalShapedBuffer::shaped_buffer() const { + return &shaped_buffer_; } -static StatusOr> ToBuffer( - LocalClient* client, int device_ordinal, const Literal& arg) { +static StatusOr ToBuffer(LocalClient* client, + int device_ordinal, + const Literal& arg) { return client->LiteralToShapedBuffer(arg, device_ordinal, client->backend().memory_allocator()); } @@ -109,14 +108,15 @@ LocalShapedBuffer* LocalShapedBuffer::FromLiteral( const Literal& argument, const tensorflow::gtl::optional& shape_with_layout) { LocalClient* client = GetOrCreateLocalClient(); - std::unique_ptr buf; - if (shape_with_layout) { - std::unique_ptr relaid = - argument.Relayout(shape_with_layout.value()); - buf = ToBuffer(client, /*device_ordinal=*/0, *relaid).ConsumeValueOrDie(); - } else { - buf = ToBuffer(client, /*device_ordinal=*/0, argument).ConsumeValueOrDie(); - } + ScopedShapedBuffer buf = [&] { + if (shape_with_layout) { + std::unique_ptr relaid = + argument.Relayout(shape_with_layout.value()); + return ToBuffer(client, /*device_ordinal=*/0, *relaid) + .ConsumeValueOrDie(); + } + return ToBuffer(client, /*device_ordinal=*/0, argument).ConsumeValueOrDie(); + }(); return new LocalShapedBuffer(std::move(buf)); } @@ -158,14 +158,14 @@ StatusOr> CompiledLocalComputation::Execute( << device_ordinal; // Transfer arguments in - std::vector> scoped_buffers; + std::vector scoped_buffers; scoped_buffers.reserve(arguments.size()); for (int i = 0; i < arguments.size(); ++i) { const Literal& argument = arguments[i]; const tensorflow::gtl::optional& shape_with_layout = shapes_with_layout[i]; - StatusOr> pushed; + StatusOr pushed; if (shape_with_layout) { std::unique_ptr relaid = argument.Relayout(shape_with_layout.value()); @@ -185,7 +185,7 @@ StatusOr> CompiledLocalComputation::Execute( std::vector argument_buffers; argument_buffers.reserve(scoped_buffers.size()); for (auto& buffer : scoped_buffers) { - argument_buffers.push_back(buffer.get()); + argument_buffers.push_back(&buffer); } DeviceAssignment device_assignment = @@ -202,7 +202,7 @@ StatusOr> CompiledLocalComputation::Execute( options.set_intra_op_thread_pool( client->backend().eigen_intra_op_thread_pool_device()); options.set_device_assignment(&device_assignment); - StatusOr> result_buffer_status = + StatusOr result_buffer_status = executable_->Run(argument_buffers, options); if (!result_buffer_status.ok()) { results[replica] = result_buffer_status.status(); @@ -210,8 +210,8 @@ StatusOr> CompiledLocalComputation::Execute( } // Transfer result out - results[replica] = - client->ShapedBufferToLiteral(*result_buffer_status.ValueOrDie()); + results[replica] = client->ShapedBufferToLiteral( + std::move(result_buffer_status).ValueOrDie()); }); } } @@ -236,7 +236,7 @@ LocalShapedBuffer* CompiledLocalComputation::ExecuteWithShapedBuffers( std::vector argument_buffers; argument_buffers.reserve(argument_handles.size()); for (auto& handle : argument_handles) { - argument_buffers.push_back(handle->shaped_buffer().get()); + argument_buffers.push_back(handle->shaped_buffer()); } // Execute @@ -245,7 +245,7 @@ LocalShapedBuffer* CompiledLocalComputation::ExecuteWithShapedBuffers( options.set_inter_op_thread_pool(client->backend().inter_op_thread_pool()); options.set_intra_op_thread_pool( client->backend().eigen_intra_op_thread_pool_device()); - std::unique_ptr result_buffer = + ScopedShapedBuffer result_buffer = executable_->Run(argument_buffers, options).ConsumeValueOrDie(); return new LocalShapedBuffer(std::move(result_buffer)); @@ -521,6 +521,17 @@ ComputationDataHandle LocalComputationBuilder::Conditional( false_computation.computation()); } +StatusOr LocalComputationBuilder::IsConstant( + const ComputationDataHandle& operand, int64 num_parameters) { + return builder_.IsConstant(operand, num_parameters); +} + +StatusOr> LocalComputationBuilder::ComputeConstant( + const ComputationDataHandle& operand, const Layout* output_layout, + tensorflow::gtl::ArraySlice parameters) { + return builder_.ComputeConstant(operand, output_layout, parameters); +} + #define _FORWARD(method_name, return_sig, args_sig, args) \ return_sig LocalComputationBuilder::method_name args_sig { \ return builder_.method_name args; \ diff --git a/tensorflow/compiler/xla/python/local_computation_builder.h b/tensorflow/compiler/xla/python/local_computation_builder.h index a7375c8965e9041226ffee08dab6ffafa25312af..e1048909ab29c2147a37ed72844391400d99e90d 100644 --- a/tensorflow/compiler/xla/python/local_computation_builder.h +++ b/tensorflow/compiler/xla/python/local_computation_builder.h @@ -62,12 +62,12 @@ class LocalShapedBuffer { static LocalShapedBuffer* FromLiteral( const Literal& argument, const tensorflow::gtl::optional& shape_with_layout); - LocalShapedBuffer(std::unique_ptr shaped_buffer); - const std::unique_ptr& shaped_buffer() const; + LocalShapedBuffer(ScopedShapedBuffer shaped_buffer); + const ScopedShapedBuffer* shaped_buffer() const; std::unique_ptr ToLiteral() const; private: - std::unique_ptr shaped_buffer_; + ScopedShapedBuffer shaped_buffer_; }; // Wraps a LocalExecutable produced by compiling a @@ -268,6 +268,13 @@ class LocalComputationBuilder { const ComputationDataHandle& false_operand, const LocalComputation& false_computation); + StatusOr IsConstant(const ComputationDataHandle& operand, + int64 num_parameters); + + StatusOr > ComputeConstant( + const ComputationDataHandle& operand, const Layout* output_layout, + tensorflow::gtl::ArraySlice parameters); + #define _FORWARD(method_name, return_sig, args_sig) \ return_sig method_name args_sig; diff --git a/tensorflow/compiler/xla/python/local_computation_builder.i b/tensorflow/compiler/xla/python/local_computation_builder.i index 8f231d1a12d92ecd93908771019c1440da6855e3..ac792e8189bda9eda472e7d282db86ac988c57b9 100644 --- a/tensorflow/compiler/xla/python/local_computation_builder.i +++ b/tensorflow/compiler/xla/python/local_computation_builder.i @@ -182,7 +182,7 @@ tensorflow::ImportNumpy(); %typemap(in) const ComputationDataHandle& (ComputationDataHandle temp) { const int64 handle = numpy::PyIntOrPyLongToLong($input); if (handle == -1 && PyErr_Occurred()) { - return NULL; + SWIG_fail; } temp.set_handle(handle); $1 = &temp; @@ -201,7 +201,7 @@ tensorflow::ImportNumpy(); } } else { PyErr_SetString(PyExc_RuntimeError, $1.status().ToString().c_str()); - return NULL; + SWIG_fail; } } @@ -211,7 +211,7 @@ tensorflow::ImportNumpy(); $result = numpy::PyObjectFromXlaLiteral(*value); } else { PyErr_SetString(PyExc_RuntimeError, $1.status().ToString().c_str()); - return NULL; + SWIG_fail; } } @@ -224,7 +224,7 @@ tensorflow::ImportNumpy(); } } else { PyErr_SetString(PyExc_RuntimeError, $1.status().ToString().c_str()); - return NULL; + SWIG_fail; } } @@ -233,7 +233,16 @@ tensorflow::ImportNumpy(); $result = numpy::PyShapeInfoFromXlaShape($1.ConsumeValueOrDie()); } else { PyErr_SetString(PyExc_RuntimeError, $1.status().ToString().c_str()); - return NULL; + SWIG_fail; + } +} + +%typemap(out) StatusOr { + if ($1.ok()) { + $result = PyBool_FromLong($1.ConsumeValueOrDie()); + } else { + PyErr_SetString(PyExc_RuntimeError, $1.status().ToString().c_str()); + SWIG_fail; } } @@ -241,7 +250,7 @@ tensorflow::ImportNumpy(); if (!$1.ok()) { PyErr_SetString( PyExc_RuntimeError, $1.ToString().c_str()); - return NULL; + SWIG_fail; } Py_INCREF(Py_None); $result = Py_None; @@ -253,7 +262,7 @@ tensorflow::ImportNumpy(); (std::vector temps) { if (!PySequence_Check($input)) { PyErr_SetString(PyExc_TypeError, "Argument is not a sequence"); - return NULL; + SWIG_fail; } const int size = PySequence_Size($input); temps.resize(size); @@ -265,13 +274,13 @@ tensorflow::ImportNumpy(); PyExc_TypeError, "Argument sequence element cannot be converted to int"); Py_DECREF(o); - return NULL; + SWIG_fail; } temps[i] = numpy::PyIntOrPyLongToLong(py_int); if (temps[i] == -1 && PyErr_Occurred()) { Py_DECREF(py_int); Py_DECREF(o); - return NULL; + SWIG_fail; } Py_DECREF(py_int); Py_DECREF(o); @@ -285,7 +294,7 @@ tensorflow::ImportNumpy(); (std::vector temps) { if (!PySequence_Check($input)) { PyErr_SetString(PyExc_TypeError, "Argument is not a sequence"); - return NULL; + SWIG_fail; } const int size = PySequence_Size($input); temps.resize(size); @@ -296,13 +305,13 @@ tensorflow::ImportNumpy(); PyErr_SetString( PyExc_TypeError, "Argument sequence element cannot be converted to int"); - return NULL; + SWIG_fail; } const int64 handle = numpy::PyIntOrPyLongToLong(py_int); if (handle == -1 && PyErr_Occurred()) { Py_DECREF(py_int); Py_DECREF(o); - return NULL; + SWIG_fail; } temps[i].set_handle(handle); Py_DECREF(py_int); @@ -317,7 +326,7 @@ tensorflow::ImportNumpy(); (std::vector temps) { if (!PySequence_Check($input)) { PyErr_SetString(PyExc_TypeError, "Argument is not a sequence"); - return NULL; + SWIG_fail; } const int size = PySequence_Size($input); temps.reserve(size); @@ -326,7 +335,7 @@ tensorflow::ImportNumpy(); LocalShapedBuffer* lsbp; if ((SWIG_ConvertPtr(o, (void**) &lsbp, $descriptor(xla::swig::LocalShapedBuffer*), SWIG_POINTER_EXCEPTION)) == -1) { - return NULL; + SWIG_fail; } temps.push_back(lsbp); Py_DECREF(o); @@ -340,7 +349,7 @@ tensorflow::ImportNumpy(); literal_status = numpy::XlaLiteralFromPyObject($input); if (!literal_status.ok()) { PyErr_SetString(PyExc_RuntimeError, literal_status.status().ToString().c_str()); - return NULL; + SWIG_fail; } $1 = literal_status.ValueOrDie().get(); } @@ -352,7 +361,7 @@ tensorflow::ImportNumpy(); %typemap(out) StatusOr< std::unique_ptr > { if (!$1.ok()) { PyErr_SetString(PyExc_RuntimeError, $1.status().ToString().c_str()); - return NULL; + SWIG_fail; } $result = numpy::PyObjectFromXlaLiteral(*$1.ValueOrDie()); } @@ -360,7 +369,7 @@ tensorflow::ImportNumpy(); %typemap(in) const std::vector& (std::vector temps) { if (!PySequence_Check($input)) { PyErr_SetString(PyExc_TypeError, "Argument is not a sequence"); - return NULL; + SWIG_fail; } const int size = PySequence_Size($input); for (int i = 0; i < size; ++i) { @@ -369,7 +378,7 @@ tensorflow::ImportNumpy(); if (!literal_status.ok()) { PyErr_SetString(PyExc_RuntimeError, literal_status.status().ToString().c_str()); Py_DECREF(o); - return NULL; + SWIG_fail; } temps.push_back(std::move(*literal_status.ConsumeValueOrDie())); Py_DECREF(o); @@ -383,7 +392,7 @@ tensorflow::ImportNumpy(); StatusOr statusor = numpy::OpMetadataFromPyObject($input); if (!statusor.ok()) { PyErr_SetString(PyExc_RuntimeError, statusor.status().ToString().c_str()); - return NULL; + SWIG_fail; } temp = std::move(statusor).ValueOrDie(); $1 = &temp; @@ -395,7 +404,7 @@ tensorflow::ImportNumpy(); StatusOr statusor = numpy::XlaShapeFromPyShape($input); if (!statusor.ok()) { PyErr_SetString(PyExc_RuntimeError, statusor.status().ToString().c_str()); - return NULL; + SWIG_fail; } temp = std::move(statusor).ValueOrDie(); $1 = &temp; @@ -410,7 +419,7 @@ tensorflow::ImportNumpy(); StatusOr statusor = numpy::XlaShapeFromPyShape($input); if (!statusor.ok()) { PyErr_SetString(PyExc_RuntimeError, statusor.status().ToString().c_str()); - return NULL; + SWIG_fail; } temp = std::move(statusor).ValueOrDie(); $1 = &temp; @@ -424,7 +433,7 @@ tensorflow::ImportNumpy(); %typemap(in) const std::vector& (std::vector temps) { if (!PySequence_Check($input)) { PyErr_SetString(PyExc_TypeError, "Argument is not a sequence"); - return NULL; + SWIG_fail; } const int size = PySequence_Size($input); for (int i = 0; i < size; ++i) { @@ -433,7 +442,7 @@ tensorflow::ImportNumpy(); Py_DECREF(o); if (!statusor.ok()) { PyErr_SetString(PyExc_RuntimeError, statusor.status().ToString().c_str()); - return NULL; + SWIG_fail; } temps.push_back(statusor.ConsumeValueOrDie()); } @@ -444,7 +453,7 @@ tensorflow::ImportNumpy(); std::vector > temps) { if (!PySequence_Check($input)) { PyErr_SetString(PyExc_TypeError, "Argument is not a sequence"); - return NULL; + SWIG_fail; } const int size = PySequence_Size($input); for (int i = 0; i < size; ++i) { @@ -456,7 +465,7 @@ tensorflow::ImportNumpy(); Py_DECREF(o); if (!statusor.ok()) { PyErr_SetString(PyExc_RuntimeError, statusor.status().ToString().c_str()); - return NULL; + SWIG_fail; } temps.push_back(statusor.ConsumeValueOrDie()); } @@ -470,18 +479,18 @@ tensorflow::ImportNumpy(); PyObject* py_int = numpy::PyNumberToPyInt($input); if (!py_int) { PyErr_SetString(PyExc_TypeError, "Argument cannot be converted to int"); - return NULL; + SWIG_fail; } const long value = numpy::PyIntOrPyLongToLong(py_int); if (value == -1 && PyErr_Occurred()) { Py_DECREF(py_int); - return NULL; + SWIG_fail; } if (!PrimitiveType_IsValid(value)) { PyErr_SetString( PyExc_TypeError, "Argument not valid for PrimitiveType enum"); Py_DECREF(py_int); - return NULL; + SWIG_fail; } $1 = static_cast(value); } @@ -492,19 +501,19 @@ tensorflow::ImportNumpy(); (std::vector > temps) { if (!PySequence_Check($input)) { PyErr_SetString(PyExc_TypeError, "Argument is not a sequence"); - return NULL; + SWIG_fail; } const int size = PySequence_Size($input); temps.reserve(size); for (int i = 0; i < size; ++i) { PyObject* o = PySequence_GetItem($input, i); if (!o) { - return NULL; + SWIG_fail; } PyObject* first = PyTuple_GetItem(o, 0); if (!first) { Py_DECREF(o); - return NULL; + SWIG_fail; } PyObject* first_pyint = numpy::PyNumberToPyInt(first); if (!first_pyint) { @@ -512,13 +521,13 @@ tensorflow::ImportNumpy(); PyExc_TypeError, "First pair item cannot be converted to int"); Py_DECREF(o); - return NULL; + SWIG_fail; } PyObject* second = PyTuple_GetItem(o, 1); if (!second) { Py_DECREF(o); Py_DECREF(first_pyint); - return NULL; + SWIG_fail; } PyObject* second_pyint = numpy::PyNumberToPyInt(second); if (!second_pyint) { @@ -527,21 +536,21 @@ tensorflow::ImportNumpy(); "Second pair item cannot be converted to int"); Py_DECREF(o); Py_DECREF(first_pyint); - return NULL; + SWIG_fail; } const int64 first_value = numpy::PyIntOrPyLongToLong(first_pyint); if (first_value == -1 && PyErr_Occurred()) { Py_DECREF(o); Py_DECREF(first_pyint); Py_DECREF(second_pyint); - return NULL; + SWIG_fail; } const int64 second_value = numpy::PyIntOrPyLongToLong(second_pyint); if (second_value == -1 && PyErr_Occurred()) { Py_DECREF(o); Py_DECREF(first_pyint); Py_DECREF(second_pyint); - return NULL; + SWIG_fail; } temps.push_back(std::make_pair(first_value, second_value)); Py_DECREF(o); @@ -559,26 +568,26 @@ tensorflow::ImportNumpy(); PyObject* lhs_contracting_dimensions = PyObject_GetAttrString( $input, "lhs_contracting_dimensions"); if (!lhs_contracting_dimensions) { - return NULL; + SWIG_fail; } length = PySequence_Size(lhs_contracting_dimensions); if (length == -1) { Py_DECREF(lhs_contracting_dimensions); - return NULL; + SWIG_fail; } for (int i = 0; i < length; ++i) { PyObject* item = PySequence_GetItem(lhs_contracting_dimensions, i); if (!item) { Py_DECREF(lhs_contracting_dimensions); - return NULL; + SWIG_fail; } const int64 dimension = numpy::PyIntOrPyLongToLong(item); if (dimension == -1 && PyErr_Occurred()) { Py_DECREF(item); Py_DECREF(lhs_contracting_dimensions); - return NULL; + SWIG_fail; } dimension_numbers.add_lhs_contracting_dimensions(dimension); Py_DECREF(item); @@ -589,26 +598,26 @@ tensorflow::ImportNumpy(); PyObject* rhs_contracting_dimensions = PyObject_GetAttrString( $input, "rhs_contracting_dimensions"); if (!lhs_contracting_dimensions) { - return NULL; + SWIG_fail; } length = PySequence_Size(rhs_contracting_dimensions); if (length == -1) { Py_DECREF(rhs_contracting_dimensions); - return NULL; + SWIG_fail; } for (int i = 0; i < length; ++i) { PyObject* item = PySequence_GetItem(rhs_contracting_dimensions, i); if (!item) { Py_DECREF(rhs_contracting_dimensions); - return NULL; + SWIG_fail; } const int64 dimension = numpy::PyIntOrPyLongToLong(item); if (dimension == -1 && PyErr_Occurred()) { Py_DECREF(item); Py_DECREF(rhs_contracting_dimensions); - return NULL; + SWIG_fail; } dimension_numbers.add_rhs_contracting_dimensions(dimension); Py_DECREF(item); @@ -619,26 +628,26 @@ tensorflow::ImportNumpy(); PyObject* lhs_batch_dimensions = PyObject_GetAttrString( $input, "lhs_batch_dimensions"); if (!lhs_batch_dimensions) { - return NULL; + SWIG_fail; } length = PySequence_Size(lhs_batch_dimensions); if (length == -1) { Py_DECREF(lhs_batch_dimensions); - return NULL; + SWIG_fail; } for (int i = 0; i < length; ++i) { PyObject* item = PySequence_GetItem(lhs_batch_dimensions, i); if (!item) { Py_DECREF(lhs_batch_dimensions); - return NULL; + SWIG_fail; } const int64 dimension = numpy::PyIntOrPyLongToLong(item); if (dimension == -1 && PyErr_Occurred()) { Py_DECREF(item); Py_DECREF(lhs_batch_dimensions); - return NULL; + SWIG_fail; } dimension_numbers.add_lhs_batch_dimensions(dimension); Py_DECREF(item); @@ -649,26 +658,26 @@ tensorflow::ImportNumpy(); PyObject* rhs_batch_dimensions = PyObject_GetAttrString( $input, "rhs_batch_dimensions"); if (!rhs_batch_dimensions) { - return NULL; + SWIG_fail; } length = PySequence_Size(rhs_batch_dimensions); if (length == -1) { Py_DECREF(rhs_batch_dimensions); - return NULL; + SWIG_fail; } for (int i = 0; i < length; ++i) { PyObject* item = PySequence_GetItem(rhs_batch_dimensions, i); if (!item) { Py_DECREF(rhs_batch_dimensions); - return NULL; + SWIG_fail; } const int64 dimension = numpy::PyIntOrPyLongToLong(item); if (dimension == -1 && PyErr_Occurred()) { Py_DECREF(item); Py_DECREF(rhs_batch_dimensions); - return NULL; + SWIG_fail; } dimension_numbers.add_rhs_batch_dimensions(dimension); Py_DECREF(item); @@ -684,20 +693,20 @@ tensorflow::ImportNumpy(); (PaddingConfig padding_config) { PyObject* dimensions = PyObject_GetAttrString($input, "dimensions"); if (!dimensions) { - return NULL; + SWIG_fail; } int length = PySequence_Size(dimensions); if (length == -1) { Py_DECREF(dimensions); - return NULL; + SWIG_fail; } for (int i = 0; i < length; ++i) { PyObject* item = PySequence_GetItem(dimensions, i); if (!item) { Py_DECREF(dimensions); - return NULL; + SWIG_fail; } int64 edge_padding_low, edge_padding_high, interior_padding; if (!GetIntAttr(item, "edge_padding_low", &edge_padding_low) @@ -705,7 +714,7 @@ tensorflow::ImportNumpy(); || !GetIntAttr(item, "interior_padding", &interior_padding)) { Py_DECREF(item); Py_DECREF(dimensions); - return NULL; + SWIG_fail; } Py_DECREF(item); @@ -727,32 +736,32 @@ tensorflow::ImportNumpy(); int64 value; if (!GetIntAttr($input, "input_batch_dimension", &value)) { - return NULL; + SWIG_fail; } dimension_numbers.set_input_batch_dimension(value); if (!GetIntAttr($input, "input_feature_dimension", &value)) { - return NULL; + SWIG_fail; } dimension_numbers.set_input_feature_dimension(value); if (!GetIntAttr($input, "output_batch_dimension", &value)) { - return NULL; + SWIG_fail; } dimension_numbers.set_output_batch_dimension(value); if (!GetIntAttr($input, "output_feature_dimension", &value)) { - return NULL; + SWIG_fail; } dimension_numbers.set_output_feature_dimension(value); if (!GetIntAttr($input, "kernel_output_feature_dimension", &value)) { - return NULL; + SWIG_fail; } dimension_numbers.set_kernel_output_feature_dimension(value); if (!GetIntAttr($input, "kernel_input_feature_dimension", &value)) { - return NULL; + SWIG_fail; } dimension_numbers.set_kernel_input_feature_dimension(value); @@ -761,24 +770,24 @@ tensorflow::ImportNumpy(); o = PyObject_GetAttrString($input, "input_spatial_dimensions"); if (!o) { - return NULL; + SWIG_fail; } length = PySequence_Size(o); if (length == -1) { Py_DECREF(o); - return NULL; + SWIG_fail; } for (int i = 0; i < length; ++i) { PyObject* item = PySequence_GetItem(o, i); if (!item) { Py_DECREF(o); - return NULL; + SWIG_fail; } const int64 dimension = numpy::PyIntOrPyLongToLong(item); if (dimension == -1 && PyErr_Occurred()) { Py_DECREF(item); Py_DECREF(o); - return NULL; + SWIG_fail; } dimension_numbers.add_input_spatial_dimensions(dimension); Py_DECREF(item); @@ -787,24 +796,24 @@ tensorflow::ImportNumpy(); o = PyObject_GetAttrString($input, "kernel_spatial_dimensions"); if (!o) { - return NULL; + SWIG_fail; } length = PySequence_Size(o); if (length == -1) { Py_DECREF(o); - return NULL; + SWIG_fail; } for (int i = 0; i < length; ++i) { PyObject* item = PySequence_GetItem(o, i); if (!item) { Py_DECREF(o); - return NULL; + SWIG_fail; } const int64 dimension = numpy::PyIntOrPyLongToLong(item); if (dimension == -1 && PyErr_Occurred()) { Py_DECREF(item); Py_DECREF(o); - return NULL; + SWIG_fail; } dimension_numbers.add_kernel_spatial_dimensions(dimension); Py_DECREF(item); @@ -813,24 +822,24 @@ tensorflow::ImportNumpy(); o = PyObject_GetAttrString($input, "output_spatial_dimensions"); if (!o) { - return NULL; + SWIG_fail; } length = PySequence_Size(o); if (length == -1) { Py_DECREF(o); - return NULL; + SWIG_fail; } for (int i = 0; i < length; ++i) { PyObject* item = PySequence_GetItem(o, i); if (!item) { Py_DECREF(o); - return NULL; + SWIG_fail; } const int64 dimension = numpy::PyIntOrPyLongToLong(item); if (dimension == -1 && PyErr_Occurred()) { Py_DECREF(item); Py_DECREF(o); - return NULL; + SWIG_fail; } dimension_numbers.add_output_spatial_dimensions(dimension); Py_DECREF(item); @@ -865,12 +874,12 @@ tensorflow::ImportNumpy(); PyObject* o = PyObject_GetAttrString($input, "hlo_profile"); if (o == NULL) { - return NULL; + SWIG_fail; } if (o != Py_None) { if (!PyBool_Check(o)) { PyErr_SetString(PyExc_TypeError, "ExecutableBuildOptions.hlo_profile must be a bool or None."); - return NULL; + SWIG_fail; } build_options.set_hlo_profile(o == Py_True); } @@ -885,7 +894,7 @@ tensorflow::ImportNumpy(); if (!statusor.ok()) { PyErr_SetString(PyExc_TypeError, tensorflow::strings::StrCat("ExecutableBuildOptions.result_shape could not be created from Python shape value: ", statusor.status().ToString()).c_str()); Py_DECREF(o); - return NULL; + SWIG_fail; } build_options.set_result_layout(statusor.ValueOrDie()); } @@ -951,6 +960,7 @@ tensorflow::ImportNumpy(); %unignore xla::swig::LocalComputationBuilder::RngBernoulli; %unignore xla::swig::LocalComputationBuilder::While; %unignore xla::swig::LocalComputationBuilder::Conditional; +%unignore xla::swig::LocalComputationBuilder::IsConstant; %unignore xla::swig::LocalComputationBuilder::Eq; %unignore xla::swig::LocalComputationBuilder::Ne; %unignore xla::swig::LocalComputationBuilder::Ge; diff --git a/tensorflow/compiler/xla/python/numpy_bridge.cc b/tensorflow/compiler/xla/python/numpy_bridge.cc index eec48479c929ab0823fef342fc284bfdc4b1f339..dc6f5fe5fcc067c99ced01812f9f2388a00766d0 100644 --- a/tensorflow/compiler/xla/python/numpy_bridge.cc +++ b/tensorflow/compiler/xla/python/numpy_bridge.cc @@ -181,16 +181,6 @@ StatusOr XlaShapeFromPyShape(PyObject* o) { PyObjectCppRepr(o).c_str()); }; - auto get_attr = [o, &error](const string& field) -> StatusOr { - PyObject* result = - PyObject_GetAttrString(o, const_cast(field.c_str())); - if (result == nullptr) { - return error(tensorflow::strings::StrCat( - "Failed to get attribute of Shape object:", field)); - } - return result; - }; - auto call_method = [o, &error](const string& method) -> StatusOr { PyObject* result = PyObject_CallMethod(o, const_cast(method.c_str()), nullptr); @@ -202,12 +192,16 @@ StatusOr XlaShapeFromPyShape(PyObject* o) { }; PyObject* np_type; - TF_ASSIGN_OR_RETURN(np_type, get_attr("np_dtype")); + TF_ASSIGN_OR_RETURN(np_type, call_method("numpy_dtype")); if (np_type->ob_type != &PyArrayDescr_Type) { - return error("Shape attribute np_dtype is not an integer numpy dtype"); + return error( + "Return value of shape method numpy_dtype " + "is not an integer numpy dtype"); } if (!NumpyTypeIsValid(NumpyTypenum(np_type))) { - return error("Shape attribute np_dtype is not a valid integer numpy dtype"); + return error( + "Return value of shape method numpy_dtype " + "is not a valid integer numpy dtype"); } const PrimitiveType element_type = NumpyTypeToPrimitiveType(NumpyTypenum(np_type)); diff --git a/tensorflow/compiler/xla/python/xla_client.py b/tensorflow/compiler/xla/python/xla_client.py index e548d420f4614d3b3fff6034f9a174d553ebea66..f6809b6b871d7e246dd43811c7e8c08378d53989 100644 --- a/tensorflow/compiler/xla/python/xla_client.py +++ b/tensorflow/compiler/xla/python/xla_client.py @@ -166,14 +166,14 @@ class LocalBuffer(object): self._delete = c_api.DeleteLocalShapedBuffer @staticmethod - def from_py(npval, layout_fn=None): - npval = require_numpy_array_layout(npval) + def from_pyval(pyval, layout_fn=None): + pyval = require_numpy_array_layout(pyval) if layout_fn: - shape = Shape.from_numpy(npval) + shape = Shape.from_pyval(pyval) shape = shape.map_leaves(layout_fn) else: shape = None - return LocalBuffer(c_api.LocalShapedBuffer.FromLiteral(npval, shape)) + return LocalBuffer(c_api.LocalShapedBuffer.FromLiteral(pyval, shape)) def to_py(self): return self.c_local_shaped_buffer.ToLiteral() @@ -191,53 +191,104 @@ class LocalBuffer(object): class Shape(object): - """XLA shape. + """Represents an XLA shape. - Represents an XLA shape by a corresponding Python/Numpy type and a - list of dimensions, which are themselves Shapes in case this one - represents an XLA tuple. + A shape is either an array shape, having rank-many integer + dimensions and an element type (represented by a Numpy dtype), or it + is a tuple shape, having a shape for every tuple component: + + type shape = + TupleShape of shape list + | ArrayShape of { dimensions: int list; element_type: dtype } + + Callers are expected to instantiate this class only via the static + constructors: tuple_shape, array_shape, and from_pyval. """ - def __init__(self, np_dtype, dimensions, minor_to_major=None): + @staticmethod + def tuple_shape(tuple_shapes): + """Construct a tuple shape.""" + if (not isinstance(tuple_shapes, (tuple, list)) or + not all(isinstance(t, Shape) for t in tuple_shapes)): + raise TypeError('tuple_shapes must be a tuple of Shapes') + return Shape(tuple_shapes, tuple) + + @staticmethod + def array_shape(element_type, dimensions, minor_to_major=None): + """Construct an array shape.""" + if (not isinstance(dimensions, tuple) or + not all(isinstance(i, int) for i in dimensions)): + dimensions = tuple(int(i) for i in dimensions) + return Shape(dimensions, np.dtype(element_type), + minor_to_major=minor_to_major) + + @staticmethod + def from_pyval(pyval): + def convert(pyval): + if isinstance(pyval, tuple): + return Shape.tuple_shape(tuple(convert(elt) for elt in pyval)) + else: + pyval = require_numpy_array_layout(pyval) + return Shape.array_shape(pyval.dtype, np.shape(pyval)) + return convert(pyval) + + def __init__(self, dimensions, dtype, minor_to_major=None): assert isinstance(dimensions, tuple) - self.np_dtype = np_dtype self._dimensions = dimensions + self._dtype = dtype + self._is_tuple = dtype == tuple self._minor_to_major = minor_to_major self._check_minor_to_major() def __eq__(self, other): # pylint: disable=protected-access - return (self.np_dtype == other.np_dtype and + return (self._dtype == other._dtype and self._dimensions == other._dimensions and self._minor_to_major == other._minor_to_major) def __repr__(self): - return ('xla_client.Shape(np_dtype={!r}, dimensions={!r}, ' - 'minor_to_major={!r})').format(self.np_dtype, self._dimensions, - self._minor_to_major) - - def element_type(self): - return DTYPE_TO_XLA_ELEMENT_TYPE[str(self.np_dtype)] + return ('xla_client.Shape(_dtype={!r}, _dimensions={!r}, ' + '_is_tuple={!r}), _minor_to_major={!r}').format( + self._dtype, self._dimensions, self._is_tuple, + self._minor_to_major) def is_tuple(self): - return self.element_type() == xla_data_pb2.TUPLE - - def dimensions(self): - if self.is_tuple(): - raise ValueError('Tuple shape has no dimensions') - return self._dimensions + return self._is_tuple - def minor_to_major(self): - return self._minor_to_major + def is_array(self): + return not self._is_tuple def tuple_shapes(self): if not self.is_tuple(): - raise ValueError('Shape is not a tuple shape') + raise ValueError('not a tuple shape') + return self._dimensions + + def numpy_dtype(self): + """Like element_type(), but returns dtype('O') in case of a tuple shape.""" + if self.is_tuple(): + return np.dtype(np.object) + else: + return self.element_type() + + def xla_element_type(self): + return DTYPE_TO_XLA_ELEMENT_TYPE[str(self.numpy_dtype())] + + def element_type(self): + if not self.is_array(): + raise ValueError('not an array shape') + return self._dtype + + def dimensions(self): + if not self.is_array(): + raise ValueError('not an array shape') return self._dimensions def rank(self): return len(self.dimensions()) + def minor_to_major(self): + return self._minor_to_major + def map_leaves(self, f): """Map f over each leaf-level array subshape. @@ -250,7 +301,7 @@ class Shape(object): """ if self.is_tuple(): children = tuple(child.map_leaves(f) for child in self.tuple_shapes()) - return Shape(np.dtype('O'), children) + return Shape.tuple_shape(children) else: mapped = f(self) return self if mapped is None else mapped @@ -264,30 +315,24 @@ class Shape(object): assert sorted(mtm) == range(len(mtm)), self def update_minor_to_major(self, minor_to_major): + if not self.is_array(): + raise ValueError('not an array shape') if not isinstance(minor_to_major, tuple): raise TypeError('minor_to_major must be a tuple') - updated = Shape(self.np_dtype, tuple(self.dimensions()), minor_to_major) + updated = Shape.array_shape( + self.element_type(), self.dimensions(), minor_to_major) updated._check_minor_to_major() # pylint: disable=protected-access return updated - @staticmethod - def from_numpy(npval): - - def convert(npval): - if isinstance(npval, tuple): - return Shape(np.dtype('O'), tuple(convert(elt) for elt in npval)) - else: - return Shape(npval.dtype, np.shape(npval)) - - return convert(require_numpy_array_layout(npval)) - def _wrap_shape(shape_info): dtype, dims = shape_info element_type = DTYPE_TO_XLA_ELEMENT_TYPE[str(dtype)] if element_type == xla_data_pb2.TUPLE: - dims = tuple(_wrap_shape(subshape_info) for subshape_info in dims) - return Shape(dtype, dims) + shapes = tuple(_wrap_shape(subshape_info) for subshape_info in dims) + return Shape.tuple_shape(shapes) + else: + return Shape.array_shape(dtype, dims) def _wrap_data_handle(handle): @@ -420,7 +465,7 @@ class LocalComputation(object): compile_options=None, layout_fn=None): return self.Compile( - argument_shapes=[Shape.from_numpy(arg) for arg in arguments], + argument_shapes=[Shape.from_pyval(arg) for arg in arguments], compile_options=compile_options, layout_fn=layout_fn) @@ -428,7 +473,7 @@ class LocalComputation(object): """Execute with Python values as arguments and return value.""" if not self.is_compiled: raise ValueError('Cannot execute an uncompiled local XLA computation.') - argument_shapes = [Shape.from_numpy(arg) for arg in arguments] + argument_shapes = [Shape.from_pyval(arg) for arg in arguments] if layout_fn: argument_shapes = [ shape.map_leaves(layout_fn) for shape in argument_shapes @@ -607,7 +652,7 @@ class ComputationBuilder(object): A ComputationDataHandle message. """ return self.ParameterWithShape( - Shape.from_numpy(value), name=name, parameter_num=parameter_num) + Shape.from_pyval(value), name=name, parameter_num=parameter_num) def Broadcast(self, operand, sizes): """Enqueues a broadcast operation onto the computation. @@ -968,7 +1013,7 @@ class ComputationBuilder(object): Returns: a ComputationDataHandle to the generated array of F32 values. """ - shape = Shape(self.GetShape(mu).np_dtype, dims) + shape = Shape.array_shape(self.GetShape(mu).element_type(), dims) return _wrap_data_handle( self._client.RngNormal( _unwrap_data_handle(mu), _unwrap_data_handle(sigma), shape)) @@ -988,7 +1033,7 @@ class ComputationBuilder(object): Returns: a ComputationDataHandle to the generated array of values with the same numeric type (F32, S32, or U32) as the arguments a and b. """ - shape = Shape(self.GetShape(a).np_dtype, dims) + shape = Shape.array_shape(self.GetShape(a).element_type(), dims) return _wrap_data_handle( self._client.RngUniform( _unwrap_data_handle(a), _unwrap_data_handle(b), shape)) @@ -1028,6 +1073,20 @@ class ComputationBuilder(object): _unwrap_data_handle(false_operand), false_computation.c_local_computation)) + def IsConstant(self, operand, num_parameters=0): + """Enqueues an IsConstant operation onto the computation. + + Args: + operand: a ComputationDataHandle to test. + num_parameters: optional int, number of computation parameters to treat as + constant (default 0). + + Returns: bool indicating whether `operand` is a compile-time constant, + meaning its value does not depend on parameters with index greater than or + equal to `num_parameters`. + """ + return self._client.IsConstant(_unwrap_data_handle(operand), num_parameters) + def Dot(self, lhs, rhs): """Enqueues a dot operation onto the computation. diff --git a/tensorflow/compiler/xla/python/xla_client_test.py b/tensorflow/compiler/xla/python/xla_client_test.py index 4c16c1f8b07a28d8098e92e27f81a126ed9bdf0c..c073c02040e4d260cf760ea2b25f70d60ddd41a1 100644 --- a/tensorflow/compiler/xla/python/xla_client_test.py +++ b/tensorflow/compiler/xla/python/xla_client_test.py @@ -319,7 +319,7 @@ class LocalBufferTest(LocalComputationTest): def _Execute(self, c, arguments): compiled_c = c.Build().CompileWithExampleArguments(arguments) - arg_buffers = [xla_client.LocalBuffer.from_py(arg) for arg in arguments] + arg_buffers = [xla_client.LocalBuffer.from_pyval(arg) for arg in arguments] result_buffer = compiled_c.ExecuteWithLocalBuffers(arg_buffers) return result_buffer.to_py() @@ -350,7 +350,7 @@ class LocalBufferTest(LocalComputationTest): c.Add(c.ParameterFromNumpy(NumpyArrayF32(0.)), c.ConstantF32Scalar(3.14)) arg = NumpyArrayF32(1.11) compiled_c = c.Build().CompileWithExampleArguments([arg]) - arg_buffer = xla_client.LocalBuffer.from_py(arg) + arg_buffer = xla_client.LocalBuffer.from_pyval(arg) arg_buffer.delete() with self.assertRaises(ValueError): compiled_c.ExecuteWithLocalBuffers([arg_buffer]) @@ -855,6 +855,17 @@ class SingleOpTest(LocalComputationTest): self.assertTrue(np.all(lo <= result)) self.assertTrue(np.all(result < hi)) + def testIsConstant(self): + c = self._NewComputation() + a = c.ConstantS32Scalar(3) + b = c.ConstantS32Scalar(1) + x = c.ParameterFromNumpy(NumpyArrayS32(0)) + const_expr = c.Sub(b, a) + non_const_expr = c.Mul(const_expr, x) + self.assertTrue(c.IsConstant(const_expr)) + self.assertFalse(c.IsConstant(non_const_expr)) + # self.assertTrue(c.IsConstant(c.Sub(c.Add(x, a), x))) # TODO(b/77245564) + class EmbeddedComputationsTest(LocalComputationTest): """Tests for XLA graphs with embedded computations (such as maps).""" @@ -1149,7 +1160,6 @@ class EmbeddedComputationsTest(LocalComputationTest): self._ExecuteAndCompareClose( c, expected=np.sum(input_array, axis=tuple(dims))) - _ReduceAndTest(0) _ReduceAndTest(0) _ReduceAndTest(0, 1) _ReduceAndTest(0, 2) @@ -1277,7 +1287,7 @@ class EmbeddedComputationsTest(LocalComputationTest): def testInfeedS32Values(self): to_infeed = NumpyArrayS32([1, 2, 3, 4]) c = self._NewComputation() - c.Infeed(xla_client.Shape.from_numpy(to_infeed[0])) + c.Infeed(xla_client.Shape.from_pyval(to_infeed[0])) compiled_c = c.Build().CompileWithExampleArguments() for item in to_infeed: xla_client.transfer_to_infeed(item) @@ -1289,7 +1299,7 @@ class EmbeddedComputationsTest(LocalComputationTest): def testInfeedThenOutfeedS32(self): to_round_trip = NumpyArrayS32([1, 2, 3, 4]) c = self._NewComputation() - x = c.Infeed(xla_client.Shape.from_numpy(to_round_trip[0])) + x = c.Infeed(xla_client.Shape.from_pyval(to_round_trip[0])) c.Outfeed(x) compiled_c = c.Build().CompileWithExampleArguments() @@ -1299,7 +1309,7 @@ class EmbeddedComputationsTest(LocalComputationTest): execution.start() xla_client.transfer_to_infeed(want) got = xla_client.transfer_from_outfeed( - xla_client.Shape.from_numpy(to_round_trip[0])) + xla_client.Shape.from_pyval(to_round_trip[0])) execution.join() self.assertEqual(want, got) diff --git a/tensorflow/compiler/xla/rpc/BUILD b/tensorflow/compiler/xla/rpc/BUILD new file mode 100644 index 0000000000000000000000000000000000000000..977f8637873a4b6555798f533010a28ff36e8679 --- /dev/null +++ b/tensorflow/compiler/xla/rpc/BUILD @@ -0,0 +1,79 @@ +licenses(["notice"]) # Apache 2.0 + +package(default_visibility = ["//tensorflow:internal"]) + +load("//tensorflow:tensorflow.bzl", "tf_cc_test") +load("//tensorflow:tensorflow.bzl", "tf_cc_binary") +load( + "//tensorflow/compiler/xla:xla.bzl", + "xla_proto_library", + "xla_py_grpc_library", +) + +xla_proto_library( + name = "xla_service_proto", + srcs = ["xla_service.proto"], + use_grpc_plugin = True, + visibility = ["//visibility:public"], + deps = [ + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla:xla_proto", + ], +) + +cc_library( + name = "grpc_stub", + srcs = ["grpc_stub.cc"], + hdrs = ["grpc_stub.h"], + deps = [ + ":xla_service_proto", + "//tensorflow/compiler/xla:service_interface", + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/core:lib", + "//tensorflow/core/distributed_runtime/rpc:grpc_util", + ], +) + +tf_cc_binary( + name = "grpc_service_main_cpu", + srcs = ["grpc_service_main.cc"], + deps = [ + ":grpc_service", + "//tensorflow/compiler/xla/service:cpu_plugin", + "//tensorflow/core:framework_internal", + "//tensorflow/core:lib", + "@grpc//:grpc++_unsecure", + ], +) + +tf_cc_test( + name = "grpc_client_test", + srcs = ["grpc_client_test.cc"], + data = [ + "//tensorflow/compiler/xla/rpc:grpc_service_main_cpu", + ], + deps = [ + ":grpc_stub", + "//tensorflow/compiler/xla/client", + "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/compiler/xla/tests:literal_test_util", + "//tensorflow/core:framework_internal", + "//tensorflow/core:lib", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "@grpc//:grpc++_unsecure", + ], +) + +cc_library( + name = "grpc_service", + srcs = ["grpc_service.cc"], + hdrs = ["grpc_service.h"], + deps = [ + ":xla_service_proto", + "//tensorflow/compiler/xla/service", + "//tensorflow/compiler/xla/service:platform_util", + "//tensorflow/core/distributed_runtime/rpc:grpc_util", + "@grpc//:grpc++_unsecure", + ], +) diff --git a/tensorflow/compiler/xla/rpc/grpc_client_test.cc b/tensorflow/compiler/xla/rpc/grpc_client_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..b559ee4b5a345dbb2cc481b571562a0a630b3294 --- /dev/null +++ b/tensorflow/compiler/xla/rpc/grpc_client_test.cc @@ -0,0 +1,109 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Simple C++ test to exercise the GRPC capabilities of XLA. +// +// Launches an RPC service in a subprocess and connects to it over a socket +// using an RPCStub. +#include +#include + +#include "grpc++/create_channel.h" +#include "grpc++/security/credentials.h" + +#include "tensorflow/compiler/xla/client/client.h" +#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/rpc/grpc_stub.h" +#include "tensorflow/compiler/xla/tests/literal_test_util.h" +#include "tensorflow/core/lib/io/path.h" +#include "tensorflow/core/lib/strings/stringprintf.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/net.h" +#include "tensorflow/core/platform/subprocess.h" +#include "tensorflow/core/platform/test.h" + +namespace xla { +namespace { + +class GRPCClientTestBase : public ::testing::Test { + protected: + GRPCClientTestBase() { + string test_srcdir = tensorflow::testing::TensorFlowSrcRoot(); + string service_main_path = tensorflow::io::JoinPath( + test_srcdir, "compiler/xla/rpc/grpc_service_main_cpu"); + int port = tensorflow::internal::PickUnusedPortOrDie(); + subprocess_.SetProgram( + service_main_path, + {service_main_path, tensorflow::strings::Printf("--port=%d", port)}); + subprocess_.SetChannelAction(tensorflow::CHAN_STDOUT, + tensorflow::ACTION_DUPPARENT); + subprocess_.SetChannelAction(tensorflow::CHAN_STDERR, + tensorflow::ACTION_DUPPARENT); + CHECK(subprocess_.Start()); + LOG(INFO) << "Launched subprocess"; + + auto channel = + ::grpc::CreateChannel(tensorflow::strings::Printf("localhost:%d", port), + ::grpc::InsecureChannelCredentials()); + channel->WaitForConnected(gpr_time_add( + gpr_now(GPR_CLOCK_REALTIME), gpr_time_from_seconds(10, GPR_TIMESPAN))); + LOG(INFO) << "Channel to server is connected on port " << port; + + xla_service_ = grpc::XlaService::NewStub(channel); + stub_.reset(new GRPCStub(xla_service_.get())); + client_.reset(new Client(stub_.get())); + } + + ~GRPCClientTestBase() override { + LOG(INFO) << "Killing subprocess"; + subprocess_.Kill(SIGKILL); + } + + tensorflow::SubProcess subprocess_; + std::unique_ptr xla_service_; + std::unique_ptr stub_; + std::unique_ptr client_; +}; + +TEST_F(GRPCClientTestBase, ItsAlive) { + ASSERT_NE(xla_service_, nullptr); + ASSERT_NE(stub_, nullptr); + ASSERT_NE(client_, nullptr); +} + +TEST_F(GRPCClientTestBase, AxpyTenValues) { + ComputationBuilder builder(client_.get(), "axpy_10"); + auto alpha = builder.ConstantR0(3.1415926535); + auto x = builder.ConstantR1( + {-1.0, 1.0, 2.0, -2.0, -3.0, 3.0, 4.0, -4.0, -5.0, 5.0}); + auto y = builder.ConstantR1( + {5.0, -5.0, -4.0, 4.0, 3.0, -3.0, -2.0, 2.0, 1.0, -1.0}); + auto ax = builder.Mul(alpha, x); + auto axpy = builder.Add(ax, y); + + std::vector expected = { + 1.85840735, -1.85840735, 2.28318531, -2.28318531, -6.42477796, + 6.42477796, 10.56637061, -10.56637061, -14.70796327, 14.70796327}; + std::unique_ptr expected_literal = + Literal::CreateR1(expected); + TF_ASSERT_OK_AND_ASSIGN(auto computation, builder.Build()); + TF_ASSERT_OK_AND_ASSIGN(auto result_literal, client_->ExecuteAndTransfer( + computation, {}, nullptr)); + LiteralTestUtil::ExpectNear(*expected_literal, *result_literal, + ErrorSpec(0.0001)); +} + +} // namespace +} // namespace xla diff --git a/tensorflow/compiler/xla/rpc/grpc_service.cc b/tensorflow/compiler/xla/rpc/grpc_service.cc new file mode 100644 index 0000000000000000000000000000000000000000..0b100bd108e239964483ed5ba279dff61bce0023 --- /dev/null +++ b/tensorflow/compiler/xla/rpc/grpc_service.cc @@ -0,0 +1,192 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/rpc/grpc_service.h" +#include "tensorflow/compiler/xla/service/platform_util.h" +#include "tensorflow/core/distributed_runtime/rpc/grpc_util.h" + +namespace xla { + +/* static */ StatusOr> GRPCService::NewService( + se::Platform* platform) { + std::unique_ptr grpc_service(new GRPCService()); + TF_ASSIGN_OR_RETURN(grpc_service->service_, + ::xla::Service::NewService(platform)); + return std::move(grpc_service); +} + +::grpc::Status DelegateRPC(std::function op) { + tensorflow::Status s = op(); + return tensorflow::ToGrpcStatus(s); +} + +::grpc::Status GRPCService::Computation(::grpc::ServerContext* context, + const ComputationRequest* arg, + ComputationResponse* result) { + return DelegateRPC( + [this, arg, result]() { return service_->Computation(arg, result); }); +} + +::grpc::Status GRPCService::CreateOp(::grpc::ServerContext* context, + const OpRequest* arg, OpResponse* result) { + return DelegateRPC( + [this, arg, result]() { return service_->Op(arg, result); }); +} + +::grpc::Status GRPCService::Unregister(::grpc::ServerContext* context, + const UnregisterRequest* arg, + UnregisterResponse* result) { + return DelegateRPC( + [this, arg, result]() { return service_->Unregister(arg, result); }); +} + +::grpc::Status GRPCService::DeconstructTuple(::grpc::ServerContext* context, + const DeconstructTupleRequest* arg, + DeconstructTupleResponse* result) { + return DelegateRPC([this, arg, result]() { + return service_->DeconstructTuple(arg, result); + }); +} + +::grpc::Status GRPCService::SetReturnValue(::grpc::ServerContext* context, + const SetReturnValueRequest* arg, + SetReturnValueResponse* results) { + return DelegateRPC([this, arg, results]() { + return service_->SetReturnValue(arg, results); + }); +} + +::grpc::Status GRPCService::Execute(::grpc::ServerContext* context, + const ExecuteRequest* arg, + ExecuteResponse* result) { + return DelegateRPC( + [this, arg, result]() { return service_->Execute(arg, result); }); +} + +::grpc::Status GRPCService::ExecuteAsync(::grpc::ServerContext* context, + const ExecuteAsyncRequest* arg, + ExecuteAsyncResponse* result) { + return DelegateRPC( + [this, arg, result]() { return service_->ExecuteAsync(arg, result); }); +} + +::grpc::Status GRPCService::WaitForExecution(::grpc::ServerContext* context, + const WaitForExecutionRequest* arg, + WaitForExecutionResponse* result) { + return DelegateRPC([this, arg, result]() { + return service_->WaitForExecution(arg, result); + }); +} + +::grpc::Status GRPCService::TransferToClient(::grpc::ServerContext* context, + const TransferToClientRequest* arg, + TransferToClientResponse* result) { + return DelegateRPC([this, arg, result]() { + return service_->TransferToClient(arg, result); + }); +} + +::grpc::Status GRPCService::TransferToServer(::grpc::ServerContext* context, + const TransferToServerRequest* arg, + TransferToServerResponse* result) { + return DelegateRPC([this, arg, result]() { + return service_->TransferToServer(arg, result); + }); +} + +::grpc::Status GRPCService::TransferToInfeed(::grpc::ServerContext* context, + const TransferToInfeedRequest* arg, + TransferToInfeedResponse* result) { + return DelegateRPC([this, arg, result]() { + return service_->TransferToInfeed(arg, result); + }); +} + +::grpc::Status GRPCService::TransferFromOutfeed( + ::grpc::ServerContext* context, const TransferFromOutfeedRequest* arg, + TransferFromOutfeedResponse* result) { + return DelegateRPC([this, arg, result]() { + return service_->TransferFromOutfeed(arg, result); + }); +} + +::grpc::Status GRPCService::ResetDevice(::grpc::ServerContext* context, + const ResetDeviceRequest* arg, + ResetDeviceResponse* result) { + return DelegateRPC( + [this, arg, result]() { return service_->ResetDevice(arg, result); }); +} + +::grpc::Status GRPCService::IsConstant(::grpc::ServerContext* context, + const IsConstantRequest* arg, + IsConstantResponse* result) { + return DelegateRPC( + [this, arg, result]() { return service_->IsConstant(arg, result); }); +} + +::grpc::Status GRPCService::ComputeConstant(::grpc::ServerContext* context, + const ComputeConstantRequest* arg, + ComputeConstantResponse* result) { + return DelegateRPC( + [this, arg, result]() { return service_->ComputeConstant(arg, result); }); +} + +::grpc::Status GRPCService::GetShape(::grpc::ServerContext* context, + const GetShapeRequest* arg, + GetShapeResponse* result) { + return DelegateRPC( + [this, arg, result]() { return service_->GetShape(arg, result); }); +} + +::grpc::Status GRPCService::GetComputationShape( + ::grpc::ServerContext* context, const GetComputationShapeRequest* arg, + GetComputationShapeResponse* result) { + return DelegateRPC([this, arg, result]() { + return service_->GetComputationShape(arg, result); + }); +} + +::grpc::Status GRPCService::GetLocalShape(::grpc::ServerContext* context, + const GetLocalShapeRequest* arg, + GetLocalShapeResponse* result) { + return DelegateRPC( + [this, arg, result]() { return service_->GetLocalShape(arg, result); }); +} + +::grpc::Status GRPCService::GetComputationStats( + ::grpc::ServerContext* context, const ComputationStatsRequest* arg, + ComputationStatsResponse* result) { + return DelegateRPC([this, arg, result]() { + return service_->GetComputationStats(arg, result); + }); +} + +::grpc::Status GRPCService::SnapshotComputation( + ::grpc::ServerContext* context, const SnapshotComputationRequest* arg, + SnapshotComputationResponse* result) { + return DelegateRPC([this, arg, result]() { + return service_->SnapshotComputation(arg, result); + }); +} + +::grpc::Status GRPCService::LoadComputationSnapshot( + ::grpc::ServerContext* context, const LoadComputationSnapshotRequest* arg, + LoadComputationSnapshotResponse* result) { + return DelegateRPC([this, arg, result]() { + return service_->LoadComputationSnapshot(arg, result); + }); +} + +} // namespace xla diff --git a/tensorflow/compiler/xla/rpc/grpc_service.h b/tensorflow/compiler/xla/rpc/grpc_service.h new file mode 100644 index 0000000000000000000000000000000000000000..fad74375bd59f7254d97c4adbc6b3d2f5fbf6b29 --- /dev/null +++ b/tensorflow/compiler/xla/rpc/grpc_service.h @@ -0,0 +1,126 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_XLA_RPC_GRPC_SERVICE_H_ +#define TENSORFLOW_COMPILER_XLA_RPC_GRPC_SERVICE_H_ + +#include "grpc++/server_context.h" +#include "tensorflow/compiler/xla/rpc/xla_service.grpc.pb.h" +#include "tensorflow/compiler/xla/service/service.h" + +namespace xla { + +// Service implementation which wraps a XLA Service with a GRPC interface. +class GRPCService : public grpc::XlaService::Service { + public: + // Factory for creating a RPCService. The parameter platform is the platform + // that the service should target. If platform is null then the default + // platform is used. + static StatusOr> NewService( + se::Platform* platform = nullptr); + + ::grpc::Status Computation(::grpc::ServerContext* context, + const ComputationRequest* arg, + ComputationResponse* result) override; + + ::grpc::Status CreateOp(::grpc::ServerContext* context, const OpRequest* arg, + OpResponse* result) override; + + ::grpc::Status Unregister(::grpc::ServerContext* context, + const UnregisterRequest* arg, + UnregisterResponse* result) override; + + ::grpc::Status DeconstructTuple(::grpc::ServerContext* context, + const DeconstructTupleRequest* arg, + DeconstructTupleResponse* result) override; + + ::grpc::Status SetReturnValue(::grpc::ServerContext* context, + const SetReturnValueRequest* arg, + SetReturnValueResponse* results) override; + + ::grpc::Status Execute(::grpc::ServerContext* context, + const ExecuteRequest* arg, + ExecuteResponse* result) override; + + ::grpc::Status ExecuteAsync(::grpc::ServerContext* context, + const ExecuteAsyncRequest* arg, + ExecuteAsyncResponse* result) override; + + ::grpc::Status WaitForExecution(::grpc::ServerContext* context, + const WaitForExecutionRequest* arg, + WaitForExecutionResponse* result) override; + + ::grpc::Status TransferToClient(::grpc::ServerContext* context, + const TransferToClientRequest* arg, + TransferToClientResponse* result) override; + + ::grpc::Status TransferToServer(::grpc::ServerContext* context, + const TransferToServerRequest* arg, + TransferToServerResponse* result) override; + + ::grpc::Status TransferToInfeed(::grpc::ServerContext* context, + const TransferToInfeedRequest* arg, + TransferToInfeedResponse* result) override; + + ::grpc::Status TransferFromOutfeed( + ::grpc::ServerContext* context, const TransferFromOutfeedRequest* arg, + TransferFromOutfeedResponse* result) override; + + ::grpc::Status ResetDevice(::grpc::ServerContext* context, + const ResetDeviceRequest* arg, + ResetDeviceResponse* result) override; + + ::grpc::Status IsConstant(::grpc::ServerContext* context, + const IsConstantRequest* arg, + IsConstantResponse* result) override; + + ::grpc::Status ComputeConstant(::grpc::ServerContext* context, + const ComputeConstantRequest* arg, + ComputeConstantResponse* result) override; + + ::grpc::Status GetShape(::grpc::ServerContext* context, + const GetShapeRequest* arg, + GetShapeResponse* result) override; + + ::grpc::Status GetComputationShape( + ::grpc::ServerContext* context, const GetComputationShapeRequest* arg, + GetComputationShapeResponse* result) override; + + ::grpc::Status GetLocalShape(::grpc::ServerContext* context, + const GetLocalShapeRequest* arg, + GetLocalShapeResponse* result) override; + + ::grpc::Status GetComputationStats(::grpc::ServerContext* context, + const ComputationStatsRequest* arg, + ComputationStatsResponse* result) override; + + ::grpc::Status SnapshotComputation( + ::grpc::ServerContext* context, const SnapshotComputationRequest* arg, + SnapshotComputationResponse* result) override; + + ::grpc::Status LoadComputationSnapshot( + ::grpc::ServerContext* context, const LoadComputationSnapshotRequest* arg, + LoadComputationSnapshotResponse* result) override; + + private: + std::unique_ptr<::xla::Service> service_; + + GRPCService() {} + GRPCService(const GRPCService&) = delete; + void operator=(const GRPCService&) = delete; +}; +} // namespace xla + +#endif // TENSORFLOW_COMPILER_XLA_RPC_GRPC_SERVICE_H_ diff --git a/tensorflow/compiler/xla/rpc/grpc_service_main.cc b/tensorflow/compiler/xla/rpc/grpc_service_main.cc new file mode 100644 index 0000000000000000000000000000000000000000..e29908ccec80db76e3b5b856e57382c56430c379 --- /dev/null +++ b/tensorflow/compiler/xla/rpc/grpc_service_main.cc @@ -0,0 +1,62 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Basic server binary that exposes a xla::Service through a GRPC interface +// on a configurable port. +#include "grpc++/security/server_credentials.h" +#include "grpc++/server.h" +#include "grpc++/server_builder.h" +#include "tensorflow/compiler/xla/rpc/grpc_service.h" +#include "tensorflow/core/lib/strings/stringprintf.h" +#include "tensorflow/core/platform/init_main.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/util/command_line_flags.h" + +namespace xla { +namespace { + +int RealMain(int argc, char** argv) { + int32 port = 1685; + std::vector flag_list = { + tensorflow::Flag("port", &port, "port to listen on"), + }; + string usage = tensorflow::Flags::Usage(argv[0], flag_list); + bool parsed_values_ok = tensorflow::Flags::Parse(&argc, argv, flag_list); + if (!parsed_values_ok) { + LOG(ERROR) << usage; + return 2; + } + tensorflow::port::InitMain(argv[0], &argc, &argv); + + std::unique_ptr service = + xla::GRPCService::NewService().ConsumeValueOrDie(); + + ::grpc::ServerBuilder builder; + string server_address(tensorflow::strings::Printf("localhost:%d", port)); + + builder.AddListeningPort(server_address, ::grpc::InsecureServerCredentials()); + builder.RegisterService(service.get()); + std::unique_ptr<::grpc::Server> server(builder.BuildAndStart()); + + LOG(INFO) << "Server listening on " << server_address; + server->Wait(); + + return 0; +} + +} // namespace +} // namespace xla + +int main(int argc, char** argv) { return xla::RealMain(argc, argv); } diff --git a/tensorflow/compiler/xla/rpc/grpc_stub.cc b/tensorflow/compiler/xla/rpc/grpc_stub.cc new file mode 100644 index 0000000000000000000000000000000000000000..e1f2b0abe39b10dd82b700941748bc4f4e8cb2f8 --- /dev/null +++ b/tensorflow/compiler/xla/rpc/grpc_stub.cc @@ -0,0 +1,244 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/rpc/grpc_stub.h" +#include "tensorflow/core/distributed_runtime/rpc/grpc_util.h" + +namespace xla { + +GRPCStub::~GRPCStub() = default; + +tensorflow::Status MakeRPC( + const std::function<::grpc::Status(::grpc::ClientContext*)>& rpc_method) { + ::grpc::ClientContext context; + ::grpc::Status s = rpc_method(&context); + return tensorflow::FromGrpcStatus(s); +} + +tensorflow::Status GRPCStub::TransferToClient( + const TransferToClientRequest* request, + TransferToClientResponse* response) { + return MakeRPC([this, request, response](::grpc::ClientContext* context) { + return grpc_stub_->TransferToClient(context, *request, response); + }); +} + +tensorflow::Status GRPCStub::TransferToServer( + const TransferToServerRequest* request, + TransferToServerResponse* response) { + return MakeRPC([this, request, response](::grpc::ClientContext* context) { + return grpc_stub_->TransferToServer(context, *request, response); + }); +} + +tensorflow::Status GRPCStub::TransferToInfeed( + const TransferToInfeedRequest* request, + TransferToInfeedResponse* response) { + return MakeRPC([this, request, response](::grpc::ClientContext* context) { + return grpc_stub_->TransferToInfeed(context, *request, response); + }); +} + +tensorflow::Status GRPCStub::TransferFromOutfeed( + const TransferFromOutfeedRequest* request, + TransferFromOutfeedResponse* response) { + return MakeRPC([this, request, response](::grpc::ClientContext* context) { + return grpc_stub_->TransferFromOutfeed(context, *request, response); + }); +} + +tensorflow::Status GRPCStub::ResetDevice(const ResetDeviceRequest* request, + ResetDeviceResponse* response) { + return MakeRPC([this, request, response](::grpc::ClientContext* context) { + return grpc_stub_->ResetDevice(context, *request, response); + }); +} + +tensorflow::Status GRPCStub::LoadComputationSnapshot( + const LoadComputationSnapshotRequest* request, + LoadComputationSnapshotResponse* response) { + return MakeRPC([this, request, response](::grpc::ClientContext* context) { + return grpc_stub_->LoadComputationSnapshot(context, *request, response); + }); +} + +tensorflow::Status GRPCStub::Execute(const ExecuteRequest* request, + ExecuteResponse* response) { + return MakeRPC([this, request, response](::grpc::ClientContext* context) { + return grpc_stub_->Execute(context, *request, response); + }); +} + +tensorflow::Status GRPCStub::ExecuteGraph(const ExecuteGraphRequest* request, + ExecuteResponse* response) { + return MakeRPC([this, request, response](::grpc::ClientContext* context) { + return grpc_stub_->ExecuteGraph(context, *request, response); + }); +} + +tensorflow::Status GRPCStub::ExecuteParallel( + const ExecuteParallelRequest* request, ExecuteParallelResponse* response) { + return MakeRPC([this, request, response](::grpc::ClientContext* context) { + return grpc_stub_->ExecuteParallel(context, *request, response); + }); +} + +tensorflow::Status GRPCStub::ExecuteGraphParallel( + const ExecuteGraphParallelRequest* request, + ExecuteParallelResponse* response) { + return MakeRPC([this, request, response](::grpc::ClientContext* context) { + return grpc_stub_->ExecuteGraphParallel(context, *request, response); + }); +} + +tensorflow::Status GRPCStub::ExecuteAsync(const ExecuteAsyncRequest* request, + ExecuteAsyncResponse* response) { + return MakeRPC([this, request, response](::grpc::ClientContext* context) { + return grpc_stub_->ExecuteAsync(context, *request, response); + }); +} + +tensorflow::Status GRPCStub::WaitForExecution( + const WaitForExecutionRequest* request, + WaitForExecutionResponse* response) { + return MakeRPC([this, request, response](::grpc::ClientContext* context) { + return grpc_stub_->WaitForExecution(context, *request, response); + }); +} + +tensorflow::Status GRPCStub::DeconstructTuple( + const DeconstructTupleRequest* request, + DeconstructTupleResponse* response) { + return MakeRPC([this, request, response](::grpc::ClientContext* context) { + return grpc_stub_->DeconstructTuple(context, *request, response); + }); +} + +tensorflow::Status GRPCStub::GetComputationStats( + const ComputationStatsRequest* request, + ComputationStatsResponse* response) { + return MakeRPC([this, request, response](::grpc::ClientContext* context) { + return grpc_stub_->GetComputationStats(context, *request, response); + }); +} + +tensorflow::Status GRPCStub::GetComputationGraphStats( + const ComputationGraphStatsRequest* request, + ComputationStatsResponse* response) { + return MakeRPC([this, request, response](::grpc::ClientContext* context) { + return grpc_stub_->GetComputationGraphStats(context, *request, response); + }); +} + +tensorflow::Status GRPCStub::GetComputationShape( + const GetComputationShapeRequest* request, + GetComputationShapeResponse* response) { + return MakeRPC([this, request, response](::grpc::ClientContext* context) { + return grpc_stub_->GetComputationShape(context, *request, response); + }); +} + +tensorflow::Status GRPCStub::GetShape(const GetShapeRequest* request, + GetShapeResponse* response) { + return MakeRPC([this, request, response](::grpc::ClientContext* context) { + return grpc_stub_->GetShape(context, *request, response); + }); +} + +tensorflow::Status GRPCStub::GetDeviceHandles( + const GetDeviceHandlesRequest* request, + GetDeviceHandlesResponse* response) { + return MakeRPC([this, request, response](::grpc::ClientContext* context) { + return grpc_stub_->GetDeviceHandles(context, *request, response); + }); +} + +tensorflow::Status GRPCStub::CreateChannelHandle( + const CreateChannelHandleRequest* request, + CreateChannelHandleResponse* response) { + return MakeRPC([this, request, response](::grpc::ClientContext* context) { + return grpc_stub_->CreateChannelHandle(context, *request, response); + }); +} + +// Methods used by ComputationBuilder. +tensorflow::Status GRPCStub::Computation(const ComputationRequest* request, + ComputationResponse* response) { + return MakeRPC([this, request, response](::grpc::ClientContext* context) { + return grpc_stub_->Computation(context, *request, response); + }); +} + +tensorflow::Status GRPCStub::Op(const OpRequest* request, + OpResponse* response) { + return MakeRPC([this, request, response](::grpc::ClientContext* context) { + return grpc_stub_->CreateOp(context, *request, response); + }); +} + +tensorflow::Status GRPCStub::GetLocalShape(const GetLocalShapeRequest* request, + GetLocalShapeResponse* response) { + return MakeRPC([this, request, response](::grpc::ClientContext* context) { + return grpc_stub_->GetLocalShape(context, *request, response); + }); +} + +tensorflow::Status GRPCStub::SetReturnValue( + const SetReturnValueRequest* request, SetReturnValueResponse* responses) { + return MakeRPC([this, request, responses](::grpc::ClientContext* context) { + return grpc_stub_->SetReturnValue(context, *request, responses); + }); +} + +tensorflow::Status GRPCStub::IsConstant(const IsConstantRequest* request, + IsConstantResponse* response) { + return MakeRPC([this, request, response](::grpc::ClientContext* context) { + return grpc_stub_->IsConstant(context, *request, response); + }); +} + +tensorflow::Status GRPCStub::ComputeConstant( + const ComputeConstantRequest* request, ComputeConstantResponse* response) { + return MakeRPC([this, request, response](::grpc::ClientContext* context) { + return grpc_stub_->ComputeConstant(context, *request, response); + }); +} + +tensorflow::Status GRPCStub::ComputeConstantGraph( + const ComputeConstantGraphRequest* request, + ComputeConstantResponse* response) { + return MakeRPC([this, request, response](::grpc::ClientContext* context) { + return grpc_stub_->ComputeConstantGraph(context, *request, response); + }); +} + +// Methods used by Computation. +tensorflow::Status GRPCStub::SnapshotComputation( + const SnapshotComputationRequest* request, + SnapshotComputationResponse* response) { + return MakeRPC([this, request, response](::grpc::ClientContext* context) { + return grpc_stub_->SnapshotComputation(context, *request, response); + }); +} + +// Methods used by GlobalData. +tensorflow::Status GRPCStub::Unregister(const UnregisterRequest* request, + UnregisterResponse* response) { + return MakeRPC([this, request, response](::grpc::ClientContext* context) { + return grpc_stub_->Unregister(context, *request, response); + }); +} + +} // namespace xla diff --git a/tensorflow/compiler/xla/rpc/grpc_stub.h b/tensorflow/compiler/xla/rpc/grpc_stub.h new file mode 100644 index 0000000000000000000000000000000000000000..fd9810d4f1a5e084b73e83007ea7f9f8b0462c72 --- /dev/null +++ b/tensorflow/compiler/xla/rpc/grpc_stub.h @@ -0,0 +1,141 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_XLA_RPC_GRPC_STUB_H_ +#define TENSORFLOW_COMPILER_XLA_RPC_GRPC_STUB_H_ + +#include "tensorflow/compiler/xla/rpc/xla_service.grpc.pb.h" +#include "tensorflow/compiler/xla/service_interface.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/platform/macros.h" + +namespace xla { + +class GRPCStub : public ServiceInterface { + public: + explicit GRPCStub(grpc::XlaService::Stub* stub) : grpc_stub_(stub) {} + ~GRPCStub() override; + + tensorflow::Status TransferToClient( + const TransferToClientRequest* arg, + TransferToClientResponse* result) override; + + tensorflow::Status TransferToServer( + const TransferToServerRequest* arg, + TransferToServerResponse* result) override; + + tensorflow::Status TransferToInfeed( + const TransferToInfeedRequest* arg, + TransferToInfeedResponse* result) override; + + tensorflow::Status TransferFromOutfeed( + const TransferFromOutfeedRequest* arg, + TransferFromOutfeedResponse* result) override; + + tensorflow::Status ResetDevice(const ResetDeviceRequest* arg, + ResetDeviceResponse* result) override; + + tensorflow::Status LoadComputationSnapshot( + const LoadComputationSnapshotRequest* request, + LoadComputationSnapshotResponse* result) override; + + tensorflow::Status Execute(const ExecuteRequest* arg, + ExecuteResponse* result) override; + + tensorflow::Status ExecuteGraph(const ExecuteGraphRequest* request, + ExecuteResponse* response) override; + + tensorflow::Status ExecuteParallel(const ExecuteParallelRequest* arg, + ExecuteParallelResponse* result) override; + + tensorflow::Status ExecuteGraphParallel( + const ExecuteGraphParallelRequest* request, + ExecuteParallelResponse* response) override; + + tensorflow::Status ExecuteAsync(const ExecuteAsyncRequest* arg, + ExecuteAsyncResponse* result) override; + + tensorflow::Status WaitForExecution( + const WaitForExecutionRequest* arg, + WaitForExecutionResponse* result) override; + + tensorflow::Status DeconstructTuple( + const DeconstructTupleRequest* arg, + DeconstructTupleResponse* result) override; + + tensorflow::Status GetComputationStats( + const ComputationStatsRequest* arg, + ComputationStatsResponse* result) override; + + tensorflow::Status GetComputationGraphStats( + const ComputationGraphStatsRequest* request, + ComputationStatsResponse* response) override; + + tensorflow::Status GetComputationShape( + const GetComputationShapeRequest* arg, + GetComputationShapeResponse* result) override; + + tensorflow::Status GetShape(const GetShapeRequest* arg, + GetShapeResponse* result) override; + + tensorflow::Status GetDeviceHandles( + const GetDeviceHandlesRequest* arg, + GetDeviceHandlesResponse* result) override; + + tensorflow::Status CreateChannelHandle( + const CreateChannelHandleRequest* arg, + CreateChannelHandleResponse* result) override; + + // Methods used by ComputationBuilder. + tensorflow::Status Computation(const ComputationRequest* arg, + ComputationResponse* result) override; + + tensorflow::Status Op(const OpRequest* arg, OpResponse* result) override; + tensorflow::Status GetLocalShape(const GetLocalShapeRequest* arg, + GetLocalShapeResponse* result) override; + + tensorflow::Status SetReturnValue(const SetReturnValueRequest* arg, + SetReturnValueResponse* results) override; + + tensorflow::Status IsConstant(const IsConstantRequest* arg, + IsConstantResponse* result) override; + + tensorflow::Status ComputeConstant(const ComputeConstantRequest* arg, + ComputeConstantResponse* result) override; + + tensorflow::Status ComputeConstantGraph( + const ComputeConstantGraphRequest* arg, + ComputeConstantResponse* result) override; + + // Methods used by Computation. + tensorflow::Status SnapshotComputation( + const SnapshotComputationRequest* ag, + SnapshotComputationResponse* result) override; + + // Methods used by GlobalData. + tensorflow::Status Unregister(const UnregisterRequest* arg, + UnregisterResponse* result) override; + + grpc::XlaService::Stub* service() { return grpc_stub_; } + + private: + grpc::XlaService::Stub* grpc_stub_; + + TF_DISALLOW_COPY_AND_ASSIGN(GRPCStub); +}; + +} // namespace xla + +#endif // TENSORFLOW_COMPILER_XLA_RPC_GRPC_STUB_H_ diff --git a/tensorflow/compiler/xla/rpc/xla_service.proto b/tensorflow/compiler/xla/rpc/xla_service.proto new file mode 100644 index 0000000000000000000000000000000000000000..c47164ee1b7657ae378a053f553442bee751753e --- /dev/null +++ b/tensorflow/compiler/xla/rpc/xla_service.proto @@ -0,0 +1,225 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// XLA service API. +// +// Users 1) build up computations and 2) create allocations via this API. +// Computations are composed of data flowing between arbitrarily-sized +// vector-oriented operations. +// +// Users build up computations using a ComputationHandle, and talk about +// allocations using GlobalDataHandles. +// +// There are currently no checkpointing capabilities or distribution/replication +// guarantees. The service runs on a single machine (e.g. one task) and that is +// its failure domain. +// +// Canonical example of "alpha * X + Y": +// * Make a computation. +// * Add alpha and X and Y as parameters. +// * Request the multiplication of alpha and X. +// * Request the addition of that result and Y. +// +// Then, pass the computation and appropriately shaped inputs to the XLA +// service's Execute method, which provides a result as a GlobalDataHandle. +// +// All data in XLA computations are conceptually immutable. +// +// Note: this API is subject to change / refinement over time -- use the +// provided client libraries to insulate code from changes to this service API. + +syntax = "proto3"; + +import "tensorflow/compiler/xla/xla.proto"; +import "tensorflow/compiler/xla/xla_data.proto"; + +package xla; + +service XlaService { + ///////////////////////// + // Global data requests + + // Unregisters a global allocation. + // + // If the handle given is not currently allocated, a NOT_FOUND status is + // returned. + rpc Unregister(UnregisterRequest) returns (UnregisterResponse) { + } + + // Deconstructs a tuple. Returns a newly created GlobalDataHandle for each + // element in the tuple. + rpc DeconstructTuple(DeconstructTupleRequest) + returns (DeconstructTupleResponse) { + } + + // Unpack requests that a global data handle, with a tuple shape, has global + // data handles created for each of its constituent members. This is the + // equivalent of the "destructuring assignment" present in various programming + // languages. + rpc Unpack(UnpackRequest) returns (UnpackResponse) { + } + + // Requests the shape of the referenced global data. + rpc GetShape(GetShapeRequest) returns (GetShapeResponse) { + } + + // Requests the program shape of the referenced computation. + rpc GetComputationShape(GetComputationShapeRequest) + returns (GetComputationShapeResponse) { + } + + // Requests the statistics of the given computation. + rpc GetComputationStats(ComputationStatsRequest) + returns (ComputationStatsResponse) { + } + + // Requests the statistics of the given computation. + // + // TODO(b/74197823): This is a part of a NOT YET ready refactor. + rpc GetComputationGraphStats(ComputationGraphStatsRequest) + returns (ComputationStatsResponse) { + } + + // Loads a variable number of values with a given element type from ColumnIO. + rpc LoadData(LoadDataRequest) returns (LoadDataResponse) { + } + + // Transfers the given global data to the client in the form of a Literal. + rpc TransferToClient(TransferToClientRequest) + returns (TransferToClientResponse) { + } + + // Transfers the given literal to the server to be stored in a global + // allocation, which is returned. + rpc TransferToServer(TransferToServerRequest) + returns (TransferToServerResponse) { + } + + // Transfers the given literal to the Infeed buffer of the device. + rpc TransferToInfeed(TransferToInfeedRequest) + returns (TransferToInfeedResponse) { + } + + // Transferred literal from the Outfeed buffer of the device. + rpc TransferFromOutfeed(TransferFromOutfeedRequest) + returns (TransferFromOutfeedResponse) { + } + + // Resets the device, clearing all existing state on the device. + rpc ResetDevice(ResetDeviceRequest) returns (ResetDeviceResponse) { + } + + // Tests if an expression is a compile-time constant. + rpc IsConstant(IsConstantRequest) returns (IsConstantResponse) { + } + + // Computes the value of a constant expression. + rpc ComputeConstant(ComputeConstantRequest) + returns (ComputeConstantResponse) { + } + + // Computes the value of a constant expression. The request contains the + // computation graph for the constant expression. + rpc ComputeConstantGraph(ComputeConstantGraphRequest) + returns (ComputeConstantResponse) { + } + + // Retrieves the inferred shape for a value within a computation. + rpc GetLocalShape(GetLocalShapeRequest) returns (GetLocalShapeResponse) { + } + + // Requests one or more device handles from the target. The returned device + // handles can be used to specify the device on which to execute computations + // or transfer data. + rpc GetDeviceHandles(GetDeviceHandlesRequest) + returns (GetDeviceHandlesResponse) { + } + + // Creates a channel handle that can be used to transfer data between + // two computations via a pair of Send and Recv instructions. + rpc CreateChannelHandle(CreateChannelHandleRequest) + returns (CreateChannelHandleResponse) { + } + + // Requests that the referenced computation be specialized for the provided + // arguments for subsequent execution. This permits things such as value + // specialization. + rpc Specialize(SpecializeRequest) returns (SpecializeResponse) { + } + + // Modifies the provided computation so that subsequent executions + // will compute the provided ComputationDataHandle, rather than the + // last expression enqueued on that Computation. + rpc SetReturnValue(SetReturnValueRequest) returns (SetReturnValueResponse) { + } + + // Computation creates a new computation with the given name. + // A unique ComputationHandle is returned. + rpc Computation(ComputationRequest) returns (ComputationResponse) { + } + + // Adds a new op to a computation. + rpc CreateOp(OpRequest) returns (OpResponse) { + } + + // Invokes the provided computation with the provided global data passed as + // immutable arguments. Returns global data output and execution timing. + rpc Execute(ExecuteRequest) returns (ExecuteResponse) { + } + + // Invokes the provided computation with the provided global data passed as + // immutable arguments. The request contains the whole computation graph. + // Returns global data output and execution timing. + rpc ExecuteGraph(ExecuteGraphRequest) returns (ExecuteResponse) { + } + + // Invokes the provided list of computations in parallel with the provided + // global data for each computation. Returns a list of global data output and + // execution timing. + rpc ExecuteParallel(ExecuteParallelRequest) + returns (ExecuteParallelResponse) { + } + + // Invokes the provided list of computations in parallel with the provided + // global data for each computation. Returns a list of global data output and + // execution timing. + // + // TODO(b/74197823): This is a part of a NOT YET ready refactor. + rpc ExecuteGraphParallel(ExecuteGraphParallelRequest) + returns (ExecuteParallelResponse) { + } + + // Invokes the provided computation with the provided global data passed as + // immutable arguments. Returns a handle to the execution. + rpc ExecuteAsync(ExecuteAsyncRequest) returns (ExecuteAsyncResponse) { + } + + // Waits until the given execution (aysnchronously launched) is complete, and + // returns the global data output. + rpc WaitForExecution(WaitForExecutionRequest) + returns (WaitForExecutionResponse) { + } + + // Serializes a computation to proto form, so it can be loaded via + // LoadComputationSnapshot. + rpc SnapshotComputation(SnapshotComputationRequest) + returns (SnapshotComputationResponse) { + } + + // Loads a computation from a captured snapshot. + rpc LoadComputationSnapshot(LoadComputationSnapshotRequest) + returns (LoadComputationSnapshotResponse) { + } +} diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD index 3a99d84bea63636870609a01c10f2bb3e0e5e8d7..afb344e5ae2aa5e420bf762dcfad4fad34247bf3 100644 --- a/tensorflow/compiler/xla/service/BUILD +++ b/tensorflow/compiler/xla/service/BUILD @@ -302,6 +302,29 @@ tf_cc_test( ], ) +cc_library( + name = "pattern_matcher", + hdrs = ["pattern_matcher.h"], + deps = [ + ":hlo", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/core:lib", + ], +) + +tf_cc_test( + name = "pattern_matcher_test", + srcs = ["pattern_matcher_test.cc"], + deps = [ + ":hlo", + ":pattern_matcher", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla/tests:xla_internal_test_main", + "//tensorflow/compiler/xla/tools/parser:hlo_parser", + "//tensorflow/core:test", + ], +) + cc_library( name = "hlo_reachability", srcs = ["hlo_reachability.cc"], @@ -732,6 +755,7 @@ cc_library( ":hlo", ":hlo_execution_profile", ":hlo_graph_dumper", + ":hlo_proto", ":pool", ":session_proto", ":shaped_buffer", @@ -1197,6 +1221,23 @@ cc_library( ], ) +tf_cc_test( + name = "hlo_creation_utils_test", + srcs = ["hlo_creation_utils_test.cc"], + deps = [ + ":hlo", + ":hlo_creation_utils", + ":hlo_evaluator", + ":hlo_matchers", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:test", + "//tensorflow/compiler/xla:util", + "//tensorflow/compiler/xla/tests:hlo_test_base", + "//tensorflow/compiler/xla/tests:xla_internal_test_main", + "//tensorflow/core:test", + ], +) + cc_library( name = "batchnorm_expander", srcs = ["batchnorm_expander.cc"], @@ -1260,6 +1301,7 @@ cc_library( ":hlo_creation_utils", ":hlo_pass", ":hlo_query", + ":pattern_matcher", "//tensorflow/compiler/xla:literal_util", "//tensorflow/compiler/xla:shape_util", "//tensorflow/compiler/xla:status_macros", @@ -1912,10 +1954,12 @@ cc_library( deps = [ ":computation_layout", ":hlo", + ":hlo_dce", ":hlo_graph_dumper", ":hlo_pass", ":logical_buffer", ":tuple_points_to_analysis", + ":tuple_simplifier", "//tensorflow/compiler/xla:shape_layout", "//tensorflow/compiler/xla:shape_util", "//tensorflow/compiler/xla:status_macros", @@ -1991,6 +2035,7 @@ cc_library( srcs = ["hlo_verifier.cc"], hdrs = ["hlo_verifier.h"], deps = [ + ":hlo", ":hlo_pass", ":shape_inference", "//tensorflow/compiler/xla:status_macros", @@ -2391,6 +2436,7 @@ cc_library( "//tensorflow/compiler/xla:shape_util", "//tensorflow/compiler/xla:status_macros", "//tensorflow/compiler/xla:util", + "//tensorflow/compiler/xla:xla_data_proto", "//tensorflow/core:lib", ], ) @@ -2535,6 +2581,7 @@ cc_library( srcs = ["hlo_runner.cc"], hdrs = ["hlo_runner.h"], deps = [ + ":computation_placer", ":executable", ":hlo", ":transfer_manager", @@ -2551,6 +2598,7 @@ cc_library( "//tensorflow/core:lib", "//tensorflow/core:stream_executor_no_cuda", "//third_party/eigen3", + "@com_google_absl//absl/memory", ], ) @@ -2639,6 +2687,21 @@ tf_cc_test( ], ) +cc_library( + name = "despecializer", + srcs = ["despecializer.cc"], + hdrs = ["despecializer.h"], + deps = [ + ":bfloat16_normalization", + ":defuser", + ":hlo", + ":hlo_pass", + ":hlo_pass_pipeline", + ":implicit_broadcast_remover", + "//tensorflow/compiler/xla:statusor", + ], +) + cc_library( name = "source_map_util", srcs = ["source_map_util.cc"], diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc index 0e4624fd69e623efca780937c5347dbf6bb9afe1..8e785de68cb1fbe4ce9fd58a661bdc208725483b 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc @@ -30,6 +30,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/hlo_instruction.h" #include "tensorflow/compiler/xla/service/hlo_opcode.h" #include "tensorflow/compiler/xla/service/hlo_query.h" +#include "tensorflow/compiler/xla/service/pattern_matcher.h" #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/status_macros.h" #include "tensorflow/compiler/xla/types.h" @@ -44,8 +45,11 @@ limitations under the License. #include "tensorflow/core/platform/types.h" namespace xla { + namespace { +namespace m = match; + // Returns whether operand is a literal with the given value. bool IsLiteralWithValue(const HloInstruction* operand, int8 value) { return operand->opcode() == HloOpcode::kConstant && @@ -105,6 +109,7 @@ HloComputation* CreateScalarBinaryComputation(HloModule* module, module->AddEmbeddedComputation(b.Build(scalar_op)); return scalar_computation; } + } // namespace // AlgebraicSimplifierVisitor traverses the HLO computation and reduces certain @@ -350,8 +355,9 @@ bool AlgebraicSimplifierVisitor::ReplaceInstructionIfSameShape( } Status AlgebraicSimplifierVisitor::HandleAdd(HloInstruction* add) { - auto lhs = add->mutable_operand(0); - auto rhs = add->mutable_operand(1); + HloInstruction *lhs, *rhs; + CHECK(Match(add, m::Add(m::Op(&lhs), m::Op(&rhs)))); + // A + 0 => A VLOG(10) << "trying transform [A + 0 => A]: " << add->ToString(); if (IsAll(rhs, 0) && ReplaceInstructionIfSameShape(add, lhs)) { @@ -366,7 +372,7 @@ Status AlgebraicSimplifierVisitor::HandleAdd(HloInstruction* add) { // Canonicalization: Put constants on the right. This makes the reassociation // rules below simpler. VLOG(10) << "trying transform [Const + A => A + Const]"; - if (lhs->IsConstant() && !rhs->IsConstant()) { + if (Match(add, m::Add(m::Constant(), m::NonConstant()))) { return ReplaceWithNewInstruction( add, HloInstruction::CreateBinary(add->shape(), HloOpcode::kAdd, rhs, lhs)); @@ -379,16 +385,13 @@ Status AlgebraicSimplifierVisitor::HandleAdd(HloInstruction* add) { // (A + C1) + (B + C2) => A + B + (C1 + C2). // VLOG(10) << "trying transform [(A + C1) + C2 => A + (C1 + C2)]"; - if (rhs->IsConstant() && lhs->opcode() == HloOpcode::kAdd && - !lhs->operand(0)->IsConstant() && lhs->operand(1)->IsConstant()) { - auto* c1 = lhs->mutable_operand(1); - auto* c2 = rhs; - + HloInstruction *a, *c1, *c2; + if (Match(add, m::Add(m::Add(m::NonConstant(&a), m::Constant(&c1)), + m::Constant(&c2)))) { TF_ASSIGN_OR_RETURN(auto* sum_of_constants, MakeBinaryHlo(HloOpcode::kAdd, c1, c2)); return ReplaceWithNewInstruction( - add, HloInstruction::CreateBinary(add->shape(), HloOpcode::kAdd, - lhs->mutable_operand(0), + add, HloInstruction::CreateBinary(add->shape(), HloOpcode::kAdd, a, sum_of_constants)); } @@ -397,11 +400,11 @@ Status AlgebraicSimplifierVisitor::HandleAdd(HloInstruction* add) { Status AlgebraicSimplifierVisitor::HandleBitcast(HloInstruction* bitcast) { // If a bitcast feeds a bitcast, make it a single bitcast. - if (bitcast->operand(0)->opcode() == HloOpcode::kBitcast) { + HloInstruction* op; + if (Match(bitcast, m::Bitcast(m::Bitcast(m::Op(&op))))) { return ReplaceWithNewInstruction( - bitcast, HloInstruction::CreateUnary( - bitcast->shape(), HloOpcode::kBitcast, - bitcast->mutable_operand(0)->mutable_operand(0))); + bitcast, + HloInstruction::CreateUnary(bitcast->shape(), HloOpcode::kBitcast, op)); } // All bitcasts can be eliminated (assuming layout constraints are // satisified). @@ -418,11 +421,10 @@ Status AlgebraicSimplifierVisitor::HandleBitcastConvert( Status AlgebraicSimplifierVisitor::HandleCopy(HloInstruction* copy) { // If a copy feeds a copy, make it a single copy. - if (copy->operand(0)->opcode() == HloOpcode::kCopy) { + HloInstruction* op; + if (Match(copy, m::Copy(m::Copy(m::Op(&op))))) { return ReplaceWithNewInstruction( - copy, HloInstruction::CreateUnary( - copy->shape(), HloOpcode::kCopy, - copy->mutable_operand(0)->mutable_operand(0))); + copy, HloInstruction::CreateUnary(copy->shape(), HloOpcode::kCopy, op)); } // All copies can be eliminated (assuming layout constraints are satisified). ReplaceInstructionIfSameShape(copy, copy->mutable_operand(0)); @@ -462,12 +464,10 @@ Status AlgebraicSimplifierVisitor::HandleConcatenate( } else if (operands.size() == 2) { // A binary concat with a broadcasted scalar as an operand can be converted // into a pad which is simpler to fold into other operations. - bool is_effective_low_pad = - operands[0]->opcode() == HloOpcode::kBroadcast && - ShapeUtil::IsScalar(operands[0]->operand(0)->shape()); - bool is_effective_high_pad = - operands[1]->opcode() == HloOpcode::kBroadcast && - ShapeUtil::IsScalar(operands[1]->operand(0)->shape()); + bool is_effective_low_pad = Match( + operands[0], m::Broadcast(m::Op().WithShape(m::Shape().IsScalar()))); + bool is_effective_high_pad = Match( + operands[1], m::Broadcast(m::Op().WithShape(m::Shape().IsScalar()))); if (!is_effective_low_pad && !is_effective_high_pad) { return Status::OK(); } @@ -537,8 +537,8 @@ Status AlgebraicSimplifierVisitor::HandleConstant(HloInstruction* constant) { } Status AlgebraicSimplifierVisitor::HandleSubtract(HloInstruction* sub) { - auto lhs = sub->mutable_operand(0); - auto rhs = sub->mutable_operand(1); + HloInstruction *lhs, *rhs; + CHECK(Match(sub, m::Subtract(m::Op(&lhs), m::Op(&rhs)))); // A - 0 => A VLOG(10) << "trying transform [A - 0 => A]: " << sub->ToString(); if (IsAll(rhs, 0) && ReplaceInstructionIfSameShape(sub, lhs)) { @@ -547,7 +547,7 @@ Status AlgebraicSimplifierVisitor::HandleSubtract(HloInstruction* sub) { // Canonicalize subtraction of a constant to addition. VLOG(10) << "trying transform [A - Const => A + (-Const)]"; - if (rhs->IsConstant() && !lhs->IsConstant()) { + if (Match(sub, m::Subtract(m::NonConstant(&lhs), m::Constant(&rhs)))) { HloInstruction* negative_const = computation_->AddInstruction( HloInstruction::CreateUnary(rhs->shape(), HloOpcode::kNegate, rhs)); return ReplaceWithNewInstruction( @@ -559,56 +559,53 @@ Status AlgebraicSimplifierVisitor::HandleSubtract(HloInstruction* sub) { } Status AlgebraicSimplifierVisitor::HandleDivide(HloInstruction* divide) { - auto lhs = divide->mutable_operand(0); - auto rhs = divide->mutable_operand(1); + Shape* shape; + HloInstruction *a, *b, *c, *d; + CHECK(Match(divide, m::Divide(m::Op(&a), m::Op(&b)))); // A/1 => A VLOG(10) << "trying transform [A/1 => A]: " << divide->ToString(); - if (IsAll(rhs, 1) && ReplaceInstructionIfSameShape(divide, lhs)) { + if (IsAll(b, 1) && ReplaceInstructionIfSameShape(divide, a)) { return Status::OK(); } // exp(A)/exp(B) => exp(A-B) - if (lhs->opcode() == HloOpcode::kExp && rhs->opcode() == HloOpcode::kExp) { + if (Match(divide, m::Divide(m::Exp(m::Op(&a)), m::Exp(m::Op(&b))) + .WithShape(m::Shape(&shape)))) { VLOG(10) << "transform [exp(A)/exp(B) => exp(A-B)]: " << divide->ToString(); - HloInstruction* subtract = - computation_->AddInstruction(HloInstruction::CreateBinary( - divide->shape(), HloOpcode::kSubtract, lhs->mutable_operand(0), - rhs->mutable_operand(0))); + HloInstruction* subtract = computation_->AddInstruction( + HloInstruction::CreateBinary(*shape, HloOpcode::kSubtract, a, b)); return ReplaceWithNewInstruction( - divide, HloInstruction::CreateUnary(divide->shape(), HloOpcode::kExp, - subtract)); + divide, HloInstruction::CreateUnary(*shape, HloOpcode::kExp, subtract)); } // A/exp(B) => A*exp(-B) - if (rhs->opcode() == HloOpcode::kExp) { + if (Match(divide, m::Divide(m::Op(&a), m::Exp(m::Op(&b))))) { VLOG(10) << "transform [A/exp(B) => A*exp(-B)]: " << divide->ToString(); - HloInstruction* negate = - computation_->AddInstruction(HloInstruction::CreateUnary( - divide->shape(), HloOpcode::kNegate, rhs->mutable_operand(0))); + HloInstruction* negate = computation_->AddInstruction( + HloInstruction::CreateUnary(divide->shape(), HloOpcode::kNegate, b)); HloInstruction* new_exp = computation_->AddInstruction( HloInstruction::CreateUnary(divide->shape(), HloOpcode::kExp, negate)); return ReplaceWithNewInstruction( - divide, HloInstruction::CreateBinary( - divide->shape(), HloOpcode::kMultiply, lhs, new_exp)); + divide, HloInstruction::CreateBinary(divide->shape(), + HloOpcode::kMultiply, a, new_exp)); } // A/pow(B,C) => A*pow(B,-C) - if (rhs->opcode() == HloOpcode::kPower) { + if (Match(divide, m::Divide(m::Op(&a), m::Power(m::Op(&b), m::Op(&c))))) { VLOG(10) << "transform [A/pow(B,C) => A*pow(B,-C)]: " << divide->ToString(); // The output shape of the created negate operator should be the same as the // input. - const Shape& negate_shape = rhs->operand(1)->shape(); - HloInstruction* negate = - computation_->AddInstruction(HloInstruction::CreateUnary( - negate_shape, HloOpcode::kNegate, rhs->mutable_operand(1))); + const Shape& negate_shape = c->shape(); + HloInstruction* negate = computation_->AddInstruction( + HloInstruction::CreateUnary(negate_shape, HloOpcode::kNegate, c)); // And the power operator should retain the output shape of the old one. - const Shape& new_power_shape = rhs->shape(); - HloInstruction* new_power = computation_->AddInstruction( - HloInstruction::CreateBinary(new_power_shape, HloOpcode::kPower, - rhs->mutable_operand(0), negate)); + const Shape& new_power_shape = b->shape(); + HloInstruction* new_power = + computation_->AddInstruction(HloInstruction::CreateBinary( + new_power_shape, HloOpcode::kPower, b, negate)); return ReplaceWithNewInstruction( divide, HloInstruction::CreateBinary( - divide->shape(), HloOpcode::kMultiply, lhs, new_power)); + divide->shape(), HloOpcode::kMultiply, a, new_power)); } // Simplifying integral division would produce unexpected results. @@ -620,28 +617,24 @@ Status AlgebraicSimplifierVisitor::HandleDivide(HloInstruction* divide) { // // (Backends can do this transformation, but generally only if the constant is // a scalar.) - if (lhs->opcode() != HloOpcode::kConstant && - rhs->opcode() == HloOpcode::kConstant) { + if (Match(divide, m::Divide(m::NonConstant(&a), m::Constant(&b)))) { HloInstruction* one = computation_->AddInstruction(HloInstruction::CreateConstant( - Literal::One(lhs->shape().element_type()).CloneToUnique())); - HloInstruction* inverse = - computation_->AddInstruction(HloInstruction::CreateBinary( - rhs->shape(), HloOpcode::kDivide, one, rhs)); + Literal::One(a->shape().element_type()).CloneToUnique())); + HloInstruction* inverse = computation_->AddInstruction( + HloInstruction::CreateBinary(b->shape(), HloOpcode::kDivide, one, b)); return ReplaceWithNewInstruction( - divide, HloInstruction::CreateBinary( - divide->shape(), HloOpcode::kMultiply, lhs, inverse)); + divide, HloInstruction::CreateBinary(divide->shape(), + HloOpcode::kMultiply, a, inverse)); } // (A / B) / (C / D) => (A / B)*(D / C) => (A * D) / (B * C) - if (lhs->opcode() == HloOpcode::kDivide && - rhs->opcode() == HloOpcode::kDivide) { - TF_ASSIGN_OR_RETURN(auto a_times_d, MakeBinaryHlo(HloOpcode::kMultiply, - lhs->mutable_operand(0), - rhs->mutable_operand(1))); - TF_ASSIGN_OR_RETURN(auto b_times_c, MakeBinaryHlo(HloOpcode::kMultiply, - lhs->mutable_operand(1), - rhs->mutable_operand(0))); + if (Match(divide, m::Divide(m::Divide(m::Op(&a), m::Op(&b)), + m::Divide(m::Op(&c), m::Op(&d))))) { + TF_ASSIGN_OR_RETURN(auto a_times_d, + MakeBinaryHlo(HloOpcode::kMultiply, a, d)); + TF_ASSIGN_OR_RETURN(auto b_times_c, + MakeBinaryHlo(HloOpcode::kMultiply, b, c)); TF_ASSIGN_OR_RETURN(auto new_divide, MakeBinaryHlo(HloOpcode::kDivide, a_times_d, b_times_c)); @@ -649,24 +642,21 @@ Status AlgebraicSimplifierVisitor::HandleDivide(HloInstruction* divide) { } // (A / B) / C => A / (B * C) - if (lhs->opcode() == HloOpcode::kDivide) { - TF_ASSIGN_OR_RETURN( - auto b_times_c, - MakeBinaryHlo(HloOpcode::kMultiply, lhs->mutable_operand(1), rhs)); + if (Match(divide, m::Divide(m::Divide(m::Op(&a), m::Op(&b)), m::Op(&c)))) { + TF_ASSIGN_OR_RETURN(auto b_times_c, + MakeBinaryHlo(HloOpcode::kMultiply, b, c)); return ReplaceWithNewInstruction( - divide, - HloInstruction::CreateBinary(divide->shape(), HloOpcode::kDivide, - lhs->mutable_operand(0), b_times_c)); + divide, HloInstruction::CreateBinary(divide->shape(), + HloOpcode::kDivide, a, b_times_c)); } // A / (B / C) => (A*C) / B - if (rhs->opcode() == HloOpcode::kDivide) { - TF_ASSIGN_OR_RETURN(auto a_times_c, MakeBinaryHlo(HloOpcode::kMultiply, lhs, - rhs->mutable_operand(1))); + if (Match(divide, m::Divide(m::Op(&a), m::Divide(m::Op(&b), m::Op(&c))))) { + TF_ASSIGN_OR_RETURN(auto a_times_c, + MakeBinaryHlo(HloOpcode::kMultiply, a, c)); return ReplaceWithNewInstruction( - divide, - HloInstruction::CreateBinary(divide->shape(), HloOpcode::kDivide, - a_times_c, rhs->mutable_operand(0))); + divide, HloInstruction::CreateBinary(divide->shape(), + HloOpcode::kDivide, a_times_c, b)); } return Status::OK(); @@ -674,8 +664,8 @@ Status AlgebraicSimplifierVisitor::HandleDivide(HloInstruction* divide) { StatusOr AlgebraicSimplifierVisitor::HandleDotStrengthReduction( HloInstruction* dot) { - HloInstruction* lhs = dot->mutable_operand(0); - HloInstruction* rhs = dot->mutable_operand(1); + HloInstruction *lhs, *rhs; + CHECK(Match(dot, m::Dot(m::Op(&lhs), m::Op(&rhs)))); int64 lhs_collapsing_dim = dot->dot_dimension_numbers().lhs_contracting_dimensions(0); if (lhs->IsRank2Transpose()) { @@ -792,8 +782,8 @@ StatusOr AlgebraicSimplifierVisitor::OptimizeDotOfConcat( const int64 lhs_contracting_dim = dnums.lhs_contracting_dimensions(0); const int64 rhs_contracting_dim = dnums.rhs_contracting_dimensions(0); - HloInstruction* lhs = dot->mutable_operand(0); - HloInstruction* rhs = dot->mutable_operand(1); + HloInstruction *lhs, *rhs; + CHECK(Match(dot, m::Dot(m::Op(&lhs), m::Op(&rhs)))); TF_ASSIGN_OR_RETURN( HloInstruction * optimized_lhs_concat, @@ -923,8 +913,8 @@ StatusOr AlgebraicSimplifierVisitor::OptimizeDotOfConcatHelper( } Status AlgebraicSimplifierVisitor::HandleDot(HloInstruction* dot) { - auto lhs = dot->mutable_operand(0); - auto rhs = dot->mutable_operand(1); + HloInstruction *lhs, *rhs; + CHECK(Match(dot, m::Dot(m::Op(&lhs), m::Op(&rhs)))); // Only optimize F32 dot operations where the dot, rhs and lhs are rank 2 or // below. @@ -976,8 +966,8 @@ Status AlgebraicSimplifierVisitor::HandleDot(HloInstruction* dot) { } Status AlgebraicSimplifierVisitor::HandleMultiply(HloInstruction* multiply) { - auto lhs = multiply->mutable_operand(0); - auto rhs = multiply->mutable_operand(1); + HloInstruction *lhs, *rhs; + CHECK(Match(multiply, m::Multiply(m::Op(&lhs), m::Op(&rhs)))); // A*1 => A VLOG(10) << "trying transform [A*1 => A]: " << multiply->ToString(); if (IsAll(rhs, 1) && ReplaceInstructionIfSameShape(multiply, lhs)) { @@ -990,10 +980,9 @@ Status AlgebraicSimplifierVisitor::HandleMultiply(HloInstruction* multiply) { } // exp(A) * exp(B) => exp(A+B) - if (lhs->opcode() == HloOpcode::kExp && rhs->opcode() == HloOpcode::kExp) { + if (Match(multiply, m::Multiply(m::Exp(m::Op(&lhs)), m::Exp(m::Op(&rhs))))) { auto add = computation_->AddInstruction(HloInstruction::CreateBinary( - multiply->shape(), HloOpcode::kAdd, lhs->mutable_operand(0), - rhs->mutable_operand(0))); + multiply->shape(), HloOpcode::kAdd, lhs, rhs)); return ReplaceWithNewInstruction( multiply, HloInstruction::CreateUnary(multiply->shape(), HloOpcode::kExp, add)); @@ -1004,20 +993,19 @@ Status AlgebraicSimplifierVisitor::HandleMultiply(HloInstruction* multiply) { Status AlgebraicSimplifierVisitor::HandleLog(HloInstruction* log) { // ln(exp(A)) => A VLOG(10) << "trying transform [ln(exp(A)) => A]: " << log->ToString(); - auto operand = log->mutable_operand(0); - if (operand->opcode() == HloOpcode::kExp && - ReplaceInstructionIfSameShape(log, operand->mutable_operand(0))) { + HloInstruction *a, *b; + if (Match(log, m::Log(m::Exp(m::Op(&a)))) && + ReplaceInstructionIfSameShape(log, a)) { return Status::OK(); } // ln(pow(A,B)) => B*ln(A) - if (operand->opcode() == HloOpcode::kPower) { - auto new_log = computation_->AddInstruction(HloInstruction::CreateUnary( - log->shape(), HloOpcode::kLog, operand->mutable_operand(0))); + if (Match(log, m::Log(m::Power(m::Op(&a), m::Op(&b))))) { + auto new_log = computation_->AddInstruction( + HloInstruction::CreateUnary(log->shape(), HloOpcode::kLog, a)); return ReplaceWithNewInstruction( - log, - HloInstruction::CreateBinary(log->shape(), HloOpcode::kMultiply, - new_log, operand->mutable_operand(1))); + log, HloInstruction::CreateBinary(log->shape(), HloOpcode::kMultiply, + new_log, b)); } return Status::OK(); @@ -1120,7 +1108,8 @@ bool OutputIsSubsetOfOperandElements(HloInstruction* instruction, } // namespace Status AlgebraicSimplifierVisitor::HandleBroadcast(HloInstruction* broadcast) { - auto operand = broadcast->mutable_operand(0); + HloInstruction* operand; + CHECK(Match(broadcast, m::Broadcast(m::Op(&operand)))); auto dims = broadcast->dimensions(); // A degenerate broadcast of a reshape that does not change the number of // elements can be replaced by a reshape. @@ -1231,30 +1220,28 @@ Status AlgebraicSimplifierVisitor::HandleConvert(HloInstruction* convert) { // Complex(Real(c), Imag(c)) -> c Status AlgebraicSimplifierVisitor::HandleComplex(HloInstruction* complex) { - auto real = complex->mutable_operand(0); - auto imag = complex->mutable_operand(1); - if (real->opcode() == HloOpcode::kReal && - imag->opcode() == HloOpcode::kImag && - real->operand(0) == imag->operand(0)) { - return ReplaceInstruction(complex, real->mutable_operand(0)); + HloInstruction *c0, *c1; + if (Match(complex, m::Complex(m::Real(m::Op(&c0)), m::Imag(m::Op(&c1)))) && + c0 == c1) { + return ReplaceInstruction(complex, c0); } return Status::OK(); } // Real(Complex(r, i)) -> r Status AlgebraicSimplifierVisitor::HandleReal(HloInstruction* real) { - auto operand = real->mutable_operand(0); - if (operand->opcode() == HloOpcode::kComplex) { - return ReplaceInstruction(real, operand->mutable_operand(0)); + HloInstruction* op; + if (Match(real, m::Real(m::Complex(m::Op(&op), m::Op())))) { + return ReplaceInstruction(real, op); } return Status::OK(); } // Imag(Complex(r, i)) -> i Status AlgebraicSimplifierVisitor::HandleImag(HloInstruction* imag) { - auto operand = imag->mutable_operand(0); - if (operand->opcode() == HloOpcode::kComplex) { - return ReplaceInstruction(imag, operand->mutable_operand(1)); + HloInstruction* op; + if (Match(imag, m::Imag(m::Complex(m::Op(), m::Op(&op))))) { + return ReplaceInstruction(imag, op); } return Status::OK(); } @@ -1351,8 +1338,8 @@ Status AlgebraicSimplifierVisitor::HandlePad(HloInstruction* pad) { Status AlgebraicSimplifierVisitor::HandlePower(HloInstruction* power) { VLOG(10) << "trying transform [pow(A, 0) => 1]: " << power->ToString(); - auto lhs = power->mutable_operand(0); - auto rhs = power->mutable_operand(1); + HloInstruction *lhs, *rhs; + CHECK(Match(power, m::Power(m::Op(&lhs), m::Op(&rhs)))); if (IsAll(rhs, 0)) { auto one = HloInstruction::CreateConstant( Literal::One(power->shape().element_type()).CloneToUnique()); @@ -1372,9 +1359,10 @@ Status AlgebraicSimplifierVisitor::HandlePower(HloInstruction* power) { } // pow(exp(A),B) => exp(A*B) - if (lhs->opcode() == HloOpcode::kExp) { + HloInstruction *a, *b; + if (Match(power, m::Power(m::Exp(m::Op(&a)), m::Op(&b)))) { auto a_times_b = computation_->AddInstruction(HloInstruction::CreateBinary( - power->shape(), HloOpcode::kMultiply, lhs->operands()[0], rhs)); + power->shape(), HloOpcode::kMultiply, a, b)); return ReplaceWithNewInstruction( power, HloInstruction::CreateUnary(power->shape(), HloOpcode::kExp, a_times_b)); @@ -1706,7 +1694,7 @@ Status AlgebraicSimplifierVisitor::HandleReduce(HloInstruction* reduce) { HloInstruction::CreateReshape(reduce->shape(), arg)); return ReplaceWithNewInstruction( reduce, HloInstruction::CreateMap(reduce->shape(), - {reshape, init_value}, function)); + {init_value, reshape}, function)); } return Status::OK(); } diff --git a/tensorflow/compiler/xla/service/allocation_tracker.cc b/tensorflow/compiler/xla/service/allocation_tracker.cc index 4f819a743c48f30df8dde00ece72a0b4e1748802..6bf65825cd02a77e51e9fc521516164d93483084 100644 --- a/tensorflow/compiler/xla/service/allocation_tracker.cc +++ b/tensorflow/compiler/xla/service/allocation_tracker.cc @@ -31,52 +31,51 @@ limitations under the License. namespace xla { StatusOr AllocationTracker::Register( - std::unique_ptr shaped_buffer, const string& tag) { + ShapedBuffer shaped_buffer, const string& tag) { tensorflow::mutex_lock lock(mutex_); VLOG(2) << "Register"; - std::vector> replicated_buffers; + std::vector replicated_buffers; replicated_buffers.emplace_back(std::move(shaped_buffer)); return RegisterInternal(std::move(replicated_buffers), tag); } StatusOr AllocationTracker::RegisterReplicatedBuffers( - std::vector> replicated_buffers, - const string& tag) { + std::vector replicated_buffers, const string& tag) { tensorflow::mutex_lock lock(mutex_); VLOG(2) << "RegisterReplicatedBuffers"; return RegisterInternal(std::move(replicated_buffers), tag); } StatusOr AllocationTracker::RegisterInternal( - std::vector> replicated_buffers, - const string& tag) { + std::vector replicated_buffers, const string& tag) { VLOG(2) << "RegisterInternal(" << "tag: \"" << tag << "\" with " << replicated_buffers.size() << " shaped_buffers."; for (const auto& shaped_buffer : replicated_buffers) { - VLOG(2) << "shaped_buffer:" << *shaped_buffer; - if (shaped_buffer->platform() != backend_->platform()) { + VLOG(2) << "shaped_buffer:" << shaped_buffer; + if (shaped_buffer.platform() != backend_->platform()) { return InvalidArgument( "AllocationTracker for platform %s cannot register buffer from " "platform %s", backend_->platform()->Name().c_str(), - shaped_buffer->platform()->Name().c_str()); + shaped_buffer.platform()->Name().c_str()); } } int64 handle = next_handle_++; for (auto& shaped_buffer : replicated_buffers) { std::vector shape_indices; - ShapeUtil::ForEachSubshape(shaped_buffer->on_device_shape(), + ShapeUtil::ForEachSubshape(shaped_buffer.on_device_shape(), [this, &shape_indices](const Shape& /*subshape*/, const ShapeIndex& index) { shape_indices.push_back(index); }); for (const ShapeIndex& index : shape_indices) { - AddAllocationOrIncrementRefCount(shaped_buffer->buffer(index), - shaped_buffer->device_ordinal()); + AddAllocationOrIncrementRefCount(shaped_buffer.buffer(index), + shaped_buffer.device_ordinal()); } - handle_to_shaped_buffers_[handle].emplace_back(std::move(shaped_buffer)); + handle_to_shaped_buffers_[handle].emplace_back( + MakeUnique(std::move(shaped_buffer))); } GlobalDataHandle result; @@ -146,13 +145,13 @@ StatusOr> AllocationTracker::DeconstructTuple( for (int i = 0; i < ShapeUtil::TupleElementCount(shaped_buffer->on_device_shape()); ++i) { - auto element_buffer = MakeUnique( + auto element_buffer = ShapedBuffer( ShapeUtil::GetTupleElementShape(shaped_buffer->on_host_shape(), i), ShapeUtil::GetTupleElementShape(shaped_buffer->on_device_shape(), i), shaped_buffer->platform(), shaped_buffer->device_ordinal()); - element_buffer->set_buffer(shaped_buffer->buffer(/*index=*/{i}), - /*index=*/{}); - std::vector> replicated_buffers; + element_buffer.set_buffer(shaped_buffer->buffer(/*index=*/{i}), + /*index=*/{}); + std::vector replicated_buffers; replicated_buffers.emplace_back(std::move(element_buffer)); TF_ASSIGN_OR_RETURN( GlobalDataHandle element_handle, @@ -204,7 +203,7 @@ StatusOr> AllocationTracker::ResolveInternal( } void AllocationTracker::AddAllocationOrIncrementRefCount( - perftools::gputools::DeviceMemoryBase device_memory, int device_ordinal) { + se::DeviceMemoryBase device_memory, int device_ordinal) { AllocationMap& allocation_map = opaque_to_allocation_map_[device_ordinal]; auto it = allocation_map.find(device_memory.opaque()); if (it == allocation_map.end()) { @@ -215,8 +214,8 @@ void AllocationTracker::AddAllocationOrIncrementRefCount( } } -Status AllocationTracker::DecrementRefCount( - perftools::gputools::DeviceMemoryBase device_memory, int device_ordinal) { +Status AllocationTracker::DecrementRefCount(se::DeviceMemoryBase device_memory, + int device_ordinal) { AllocationMap& allocation_map = opaque_to_allocation_map_[device_ordinal]; auto it = allocation_map.find(device_memory.opaque()); TF_RET_CHECK(it != allocation_map.end()); diff --git a/tensorflow/compiler/xla/service/allocation_tracker.h b/tensorflow/compiler/xla/service/allocation_tracker.h index 038aee8541b297d6f91fe2b3bce7455fd9a7084e..2bfcd5371293635f6525d0c5506c606e2762b943 100644 --- a/tensorflow/compiler/xla/service/allocation_tracker.h +++ b/tensorflow/compiler/xla/service/allocation_tracker.h @@ -45,14 +45,13 @@ class AllocationTracker { // Registers a shaped buffer of device memory, and returns a corresponding // handle that can be used for talking to XLA clients. The given shaped buffer // will be treated as the buffer corresponding to the only replica. - StatusOr Register( - std::unique_ptr shaped_buffer, const string& tag); + StatusOr Register(ShapedBuffer shaped_buffer, + const string& tag); // Registers a vector of shaped buffers of device memory, one per replica, and // returns a corresponding handle that can be used for talking to XLA clients. StatusOr RegisterReplicatedBuffers( - std::vector> replicated_buffers, - const string& tag); + std::vector replicated_buffers, const string& tag); // Unregister the allocation for the given data handle. Status Unregister(const GlobalDataHandle& data); @@ -77,7 +76,7 @@ class AllocationTracker { // Data structure encapsulating single memory allocation on the device. struct Allocation { // The pointer to this allocation. - perftools::gputools::DeviceMemoryBase device_memory; + se::DeviceMemoryBase device_memory; // The device that the memory is allocated on. int device_ordinal; @@ -95,21 +94,21 @@ class AllocationTracker { // Internal helper which registers a vector of shaped buffers, one per // replica. StatusOr RegisterInternal( - std::vector> replicated_buffers, - const string& tag) EXCLUSIVE_LOCKS_REQUIRED(mutex_); + std::vector replicated_buffers, const string& tag) + EXCLUSIVE_LOCKS_REQUIRED(mutex_); // Resets the shaped buffers corresponding to the given handle. Status Reset(const GlobalDataHandle& data) EXCLUSIVE_LOCKS_REQUIRED(mutex_); // Adds the given device address to the allocation tracker, or if it already // exists, then increment it's reference count. - void AddAllocationOrIncrementRefCount( - perftools::gputools::DeviceMemoryBase device_memory, int device_ordinal) + void AddAllocationOrIncrementRefCount(se::DeviceMemoryBase device_memory, + int device_ordinal) EXCLUSIVE_LOCKS_REQUIRED(mutex_); // Decrements the reference count of the given device memory. Then, if it is // zero, deallocate the memory. - Status DecrementRefCount(perftools::gputools::DeviceMemoryBase device_memory, + Status DecrementRefCount(se::DeviceMemoryBase device_memory, int device_ordinal) EXCLUSIVE_LOCKS_REQUIRED(mutex_); // A map from device memory opaque value to allocation. One such map is @@ -132,6 +131,9 @@ class AllocationTracker { // A map from data handle to a vector of shaped buffers that represent the // buffers for different replicas. + // + // The ShapedBuffers in this map's vectors need to be unique_ptrs, because our + // public API returns pointers to them. tensorflow::gtl::FlatMap>> handle_to_shaped_buffers_ GUARDED_BY(mutex_); diff --git a/tensorflow/compiler/xla/service/backend.cc b/tensorflow/compiler/xla/service/backend.cc index 05f2d062784147108a94ffb7bb0ca42ddfe4f010..a582dbffd688a4ca904539a462aa268a0b4d3cb6 100644 --- a/tensorflow/compiler/xla/service/backend.cc +++ b/tensorflow/compiler/xla/service/backend.cc @@ -36,19 +36,14 @@ limitations under the License. #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/stream_executor_no_cuda.h" -namespace se = ::perftools::gputools; - namespace xla { -BackendOptions& BackendOptions::set_platform( - perftools::gputools::Platform* platform) { +BackendOptions& BackendOptions::set_platform(se::Platform* platform) { platform_ = platform; return *this; } -perftools::gputools::Platform* BackendOptions::platform() const { - return platform_; -} +se::Platform* BackendOptions::platform() const { return platform_; } BackendOptions& BackendOptions::set_intra_op_parallelism_threads( int num_threads) { @@ -77,7 +72,7 @@ struct Backend::EigenThreadPoolWrapper { /* static */ StatusOr> Backend::CreateBackend( const BackendOptions& options) { - perftools::gputools::Platform* platform = options.platform(); + se::Platform* platform = options.platform(); TF_ASSIGN_OR_RETURN(auto compiler, Compiler::GetForPlatform(platform)); TF_ASSIGN_OR_RETURN(auto stream_executors, PlatformUtil::GetStreamExecutors(platform)); @@ -121,7 +116,7 @@ StatusOr Backend::BorrowStream( } Backend::Backend( - perftools::gputools::Platform* platform, Compiler* compiler, + se::Platform* platform, Compiler* compiler, tensorflow::gtl::ArraySlice stream_executors, TransferManager* transfer_manager, ComputationPlacer* computation_placer, int intra_op_parallelism_threads) @@ -178,7 +173,7 @@ tensorflow::thread::ThreadPool* Backend::eigen_intra_op_thread_pool() const { return intra_op_thread_pool_wrapper_->pool.get(); } -StatusOr Backend::stream_executor( +StatusOr Backend::stream_executor( int device_ordinal) const { if (device_ordinal < 0 || device_ordinal > stream_executors_.back()->device_ordinal()) { @@ -201,9 +196,9 @@ StatusOr Backend::devices_equivalent(int device_ordinal_a, // bit crude but works for GPUs which is the important case where we compile // an executable for one GPU and want to know if it will run (well) on // another. - TF_ASSIGN_OR_RETURN(perftools::gputools::StreamExecutor * executor_a, + TF_ASSIGN_OR_RETURN(se::StreamExecutor * executor_a, stream_executor(device_ordinal_a)); - TF_ASSIGN_OR_RETURN(perftools::gputools::StreamExecutor * executor_b, + TF_ASSIGN_OR_RETURN(se::StreamExecutor * executor_b, stream_executor(device_ordinal_b)); return (executor_a->GetDeviceDescription().name() == executor_b->GetDeviceDescription().name()); diff --git a/tensorflow/compiler/xla/service/backend.h b/tensorflow/compiler/xla/service/backend.h index b5ca483b7274d20c31e932d748b6a4c9dea926f9..d32a0a400d8bd51fed6bfb6c6a581e16c8249a5c 100644 --- a/tensorflow/compiler/xla/service/backend.h +++ b/tensorflow/compiler/xla/service/backend.h @@ -44,8 +44,8 @@ namespace xla { class BackendOptions { public: // Set the platform backing the backend, or nullptr for the default platform. - BackendOptions& set_platform(perftools::gputools::Platform* platform); - perftools::gputools::Platform* platform() const; + BackendOptions& set_platform(se::Platform* platform); + se::Platform* platform() const; // Sets the thread pool size for parallel execution of an individual operator. // The default value of -1 will result in initializing the thread pool with @@ -54,7 +54,7 @@ class BackendOptions { int intra_op_parallelism_threads() const; private: - perftools::gputools::Platform* platform_ = nullptr; + se::Platform* platform_ = nullptr; int intra_op_parallelism_threads_ = -1; }; @@ -66,7 +66,7 @@ class BackendOptions { // StreamPtr stream = backend->BorrowStream().ConsumeValueOrDie(); class Backend { public: - using StreamPtr = Pool::SmartPtr; + using StreamPtr = Pool::SmartPtr; // Creates a new backend. static StatusOr> CreateBackend( @@ -79,7 +79,7 @@ class Backend { ~Backend(); // Accessors for the various objects. - perftools::gputools::Platform* platform() const { return platform_; } + se::Platform* platform() const { return platform_; } Compiler* compiler() const { return compiler_; } DeviceMemoryAllocator* memory_allocator() const { return memory_allocator_.get(); @@ -96,19 +96,17 @@ class Backend { // Returns stream executors of all supported devices for this backend. The // executors are ordered by the device ordinal. - const std::vector& stream_executors() - const { + const std::vector& stream_executors() const { return stream_executors_; } // Returns the stream executor for the given device ordinal. - StatusOr stream_executor( - int device_ordinal) const; + StatusOr stream_executor(int device_ordinal) const; // Returns the stream executor for the default device ordinal. This stream // executor can only be used when the number of computations is 1 (replication // can be > 1). - perftools::gputools::StreamExecutor* default_stream_executor() const { + se::StreamExecutor* default_stream_executor() const { CHECK(!stream_executors_.empty()); return stream_executors_[0]; } @@ -117,8 +115,7 @@ class Backend { // internal pool, or by constructing/initializating it, and returns the result // to the caller. StatusOr BorrowStream(int device_ordinal); - StatusOr BorrowStream( - perftools::gputools::StreamExecutor* executor); + StatusOr BorrowStream(se::StreamExecutor* executor); // Returns a function to borrow a stream, as `BorrowStream` above does. // Purely for convenience, the caller could rather make this anonymous @@ -157,29 +154,26 @@ class Backend { private: struct EigenThreadPoolWrapper; - Backend(perftools::gputools::Platform* platform, Compiler* compiler, - tensorflow::gtl::ArraySlice - stream_executors, + Backend(se::Platform* platform, Compiler* compiler, + tensorflow::gtl::ArraySlice stream_executors, TransferManager* transfer_manager, ComputationPlacer* computation_placer, int intra_op_parallelism_threads); Backend(const Backend&) = delete; Backend& operator=(const Backend&) = delete; - perftools::gputools::Platform* platform_; + se::Platform* platform_; Compiler* compiler_; TransferManager* transfer_manager_; ComputationPlacer* computation_placer_; // Vector of stream executors. stream_executors_[0] is the default executor. - std::vector stream_executors_; + std::vector stream_executors_; tensorflow::mutex mu_; // Mapping from stream executor to stream pools, used by `BorrowStream` above. - std::map> - stream_pools_ GUARDED_BY(mu_); + std::map> stream_pools_ GUARDED_BY(mu_); // The default memory allocator to use. std::unique_ptr memory_allocator_; diff --git a/tensorflow/compiler/xla/service/bfloat16_propagation.cc b/tensorflow/compiler/xla/service/bfloat16_propagation.cc index c26d2feef584faeff013a602409cdd58c2d44a5a..43ebe92c5ec1c945780f76ca4178a94f948a81b9 100644 --- a/tensorflow/compiler/xla/service/bfloat16_propagation.cc +++ b/tensorflow/compiler/xla/service/bfloat16_propagation.cc @@ -392,7 +392,6 @@ void BFloat16Propagation::AdjustCalledComputationRoot(HloInstruction* hlo) { adjust_computation(hlo->fused_instructions_computation(), hlo->shape()); break; case HloOpcode::kWhile: - adjust_computation(hlo->while_condition(), hlo->shape()); adjust_computation(hlo->while_body(), hlo->shape()); break; default: diff --git a/tensorflow/compiler/xla/service/bfloat16_propagation_test.cc b/tensorflow/compiler/xla/service/bfloat16_propagation_test.cc index 88f83014164ff726a11e45e762b9c082cf12720d..183db1652e498edb0b94e9c9a272e2b8a7fc53ba 100644 --- a/tensorflow/compiler/xla/service/bfloat16_propagation_test.cc +++ b/tensorflow/compiler/xla/service/bfloat16_propagation_test.cc @@ -426,8 +426,62 @@ TEST_F(BFloat16PropagationTest, SelectOverTuples) { EXPECT_TRUE(OutputsBF16(xpose)); } -// Tests that BF16 is propagated properly through while computations. -TEST_F(BFloat16PropagationTest, PropagateThroughWhile) { +// Tests that BF16 is propagated properly through a while computation with +// non-tuple input/output. +TEST_F(BFloat16PropagationTest, PropagateThroughSimpleWhile) { + auto module = CreateNewModule(); + auto builder = HloComputation::Builder(TestName()); + Shape shape = ShapeUtil::MakeShape(F32, {4, 4}); + + HloInstruction* param0 = builder.AddInstruction( + HloInstruction::CreateParameter(0, shape, "param0")); + HloInstruction* param1 = builder.AddInstruction( + HloInstruction::CreateParameter(1, shape, "param1")); + HloInstruction* add = builder.AddInstruction( + HloInstruction::CreateBinary(shape, HloOpcode::kAdd, param0, param1)); + + auto builder_cond = HloComputation::Builder("cond"); + auto cond_param = builder_cond.AddInstruction( + HloInstruction::CreateParameter(0, shape, "cond_param")); + auto cond_dot = builder_cond.AddInstruction(HloInstruction::CreateBinary( + shape, HloOpcode::kDot, cond_param, cond_param)); + auto cond_root = builder_cond.AddInstruction(HloInstruction::CreateBinary( + ShapeUtil::MakeShape(PRED, {}), HloOpcode::kGt, + builder_cond.AddInstruction(HloInstruction::CreateSlice( + ShapeUtil::MakeShape(F32, {}), cond_dot, {0, 0}, {1, 1}, {1, 1})), + builder_cond.AddInstruction(HloInstruction::CreateSlice( + ShapeUtil::MakeShape(F32, {}), cond_dot, {1, 1}, {2, 2}, {1, 1})))); + auto cond = module->AddEmbeddedComputation(builder_cond.Build()); + + auto builder_body = HloComputation::Builder("body"); + auto body_param = builder_body.AddInstruction( + HloInstruction::CreateParameter(0, shape, "body_param")); + auto body_dot = builder_body.AddInstruction(HloInstruction::CreateBinary( + shape, HloOpcode::kDot, body_param, body_param)); + auto body = module->AddEmbeddedComputation(builder_body.Build()); + + auto while_hlo = builder.AddInstruction( + HloInstruction::CreateWhile(shape, cond, body, add)); + + auto dot = builder.AddInstruction(HloInstruction::CreateBinary( + shape, HloOpcode::kDot, while_hlo, while_hlo)); + auto computation = module->AddEntryComputation(builder.Build()); + + EXPECT_TRUE(PropagatePrecision(module.get())); + + EXPECT_EQ(computation->root_instruction(), dot); + EXPECT_TRUE( + ShapeUtil::Equal(cond_root->shape(), ShapeUtil::MakeShape(PRED, {}))); + EXPECT_TRUE(OutputsBF16(add)); + EXPECT_TRUE(OutputsBF16(body_dot)); + EXPECT_TRUE(OutputsBF16(body_param)); + EXPECT_TRUE(OutputsBF16(cond_param)); + EXPECT_FALSE(OutputsBF16(dot)); +} + +// Tests that BF16 is propagated properly through while computations with +// tuple-shaped input/output. +TEST_F(BFloat16PropagationTest, PropagateThroughTupleWhile) { auto module = CreateNewModule(); auto builder = HloComputation::Builder(TestName()); Shape shape = ShapeUtil::MakeShape(F32, {4, 4}); diff --git a/tensorflow/compiler/xla/service/call_graph.cc b/tensorflow/compiler/xla/service/call_graph.cc index 13eb02ca012f44b2b5ed7c6f5becb7d54b07c33c..a8053d15e124319c5c898f0034b9aaa95a007a89 100644 --- a/tensorflow/compiler/xla/service/call_graph.cc +++ b/tensorflow/compiler/xla/service/call_graph.cc @@ -51,8 +51,8 @@ std::ostream& operator<<(std::ostream& out, const CallContext& context) { return out; } -CallContext GetInstructionCallContext(const HloInstruction* instruction) { - switch (instruction->opcode()) { +CallContext GetInstructionCallContext(HloOpcode opcode) { + switch (opcode) { case HloOpcode::kCall: case HloOpcode::kConditional: case HloOpcode::kWhile: @@ -101,7 +101,7 @@ void CallGraphNode::AddCallerCallSite(const CallSite& caller_callsite) { void CallGraphNode::AddCallSiteForInstruction(HloInstruction* instruction) { CHECK_EQ(instruction->parent(), computation()); - const CallContext context = GetInstructionCallContext(instruction); + const CallContext context = GetInstructionCallContext(instruction->opcode()); if (!instruction->called_computations().empty()) { CHECK(context == CallContext::kSequential || context == CallContext::kParallel); diff --git a/tensorflow/compiler/xla/service/call_graph.h b/tensorflow/compiler/xla/service/call_graph.h index 688c4085dfb4f47d3e08a4abee5e7b645f595b11..97d3811508adee1bf2d0942bcc69e3e34a41c8c3 100644 --- a/tensorflow/compiler/xla/service/call_graph.h +++ b/tensorflow/compiler/xla/service/call_graph.h @@ -53,7 +53,7 @@ enum class CallContext { string CallContextToString(CallContext context); std::ostream& operator<<(std::ostream& out, const CallContext& context); -CallContext GetInstructionCallContext(const HloInstruction* instruction); +CallContext GetInstructionCallContext(HloOpcode opcode); // Represents an HLO instruction which calls one or more computations. class CallSite { diff --git a/tensorflow/compiler/xla/service/compile_only_service.cc b/tensorflow/compiler/xla/service/compile_only_service.cc index c83da9eddc8f8b156dd9acfc99b393bf844575da..c9f78a0f9f1c0e889cd2c761e3129ec329a7b647 100644 --- a/tensorflow/compiler/xla/service/compile_only_service.cc +++ b/tensorflow/compiler/xla/service/compile_only_service.cc @@ -37,7 +37,7 @@ limitations under the License. namespace xla { /* static */ StatusOr> -CompileOnlyService::NewService(perftools::gputools::Platform* platform) { +CompileOnlyService::NewService(se::Platform* platform) { ServiceOptions default_options; default_options.set_platform(platform); return NewService(default_options); @@ -45,7 +45,7 @@ CompileOnlyService::NewService(perftools::gputools::Platform* platform) { /* static */ StatusOr> CompileOnlyService::NewService(const ServiceOptions& options) { - perftools::gputools::Platform* platform = options.platform(); + se::Platform* platform = options.platform(); if (platform == nullptr) { TF_ASSIGN_OR_RETURN(platform, PlatformUtil::GetDefaultPlatform()); } @@ -61,6 +61,33 @@ CompileOnlyService::CompileOnlyService(const ServiceOptions& options, Compiler* compiler) : Service(options, /*execute_backend=*/nullptr), compiler_(compiler) {} +StatusOr>> +CompileOnlyService::CompileAheadOfTime( + const tensorflow::gtl::ArraySlice computations, + const AotCompilationOptions& options) { + std::vector> hlo_modules; + for (const AotXlaComputationInstance& instance : computations) { + TF_RET_CHECK(instance.computation.has_program_shape()); + + const DebugOptions& debug_options = options.debug_options(); + const auto& program_shape = instance.computation.program_shape(); + ExecutionOptions execution_options; + *execution_options.mutable_debug_options() = debug_options; + TF_ASSIGN_OR_RETURN( + std::unique_ptr module_config, + CreateModuleConfig(program_shape, instance.argument_layouts, + &execution_options)); + + TF_ASSIGN_OR_RETURN( + std::unique_ptr hlo_module, + HloModule::CreateFromProto(instance.computation, *module_config)); + TF_RETURN_IF_ERROR(MaybeDumpHloModule(*hlo_module)); + hlo_modules.push_back(std::move(hlo_module)); + } + + return compiler_->CompileAheadOfTime(std::move(hlo_modules), options); +} + StatusOr>> CompileOnlyService::CompileAheadOfTime( const tensorflow::gtl::ArraySlice computations, diff --git a/tensorflow/compiler/xla/service/compile_only_service.h b/tensorflow/compiler/xla/service/compile_only_service.h index 9859941c6c17460939e5b6817f1c7c415e63443c..c10609e67fcdec459baf25a95173bbf700994be9 100644 --- a/tensorflow/compiler/xla/service/compile_only_service.h +++ b/tensorflow/compiler/xla/service/compile_only_service.h @@ -34,7 +34,7 @@ class CompileOnlyService : public Service { // platform that the service should target. If platform is null then the // default platform is used. static StatusOr> NewService( - perftools::gputools::Platform* platform); + se::Platform* platform); static StatusOr> NewService( const ServiceOptions& options); @@ -53,6 +53,25 @@ class CompileOnlyService : public Service { const tensorflow::gtl::ArraySlice computations, const AotCompilationOptions& Options); + // A description of a xla computation to compile using CompileAheadOfTime. + // + // TODO(b/74197823): This is a part of a NOT YET ready refactor. + struct AotXlaComputationInstance { + HloModuleProto computation; + std::vector argument_layouts; + const Shape* result_layout = nullptr; + }; + + // Compiles a list of xla computations for ahead-of-time execution. This is + // intended for use in static compilation. See + // |CompileOnlyClient::CompileAheadOfTime| for additional details. + // + // TODO(b/74197823): This is a part of a NOT YET ready refactor. + StatusOr>> + CompileAheadOfTime( + const tensorflow::gtl::ArraySlice computations, + const AotCompilationOptions& options); + // Override Service methods that require or imply the existence of an // execute backend. Note that this does not include TransferToClient, as // computing constants produces global data that we may wish to transfer. diff --git a/tensorflow/compiler/xla/service/compiler.cc b/tensorflow/compiler/xla/service/compiler.cc index 0392d4af48a040c4a648f7bf9bf21a62ce03a990..8b01a6c4b5004d03e6e7d23b99b923fdcdeaff99 100644 --- a/tensorflow/compiler/xla/service/compiler.cc +++ b/tensorflow/compiler/xla/service/compiler.cc @@ -23,26 +23,21 @@ limitations under the License. #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/macros.h" -namespace se = ::perftools::gputools; - namespace xla { /* static */ tensorflow::mutex Compiler::platform_compiler_mutex_( tensorflow::LINKER_INITIALIZED); -/* static */ std::map* +/* static */ std::map* Compiler::GetPlatformCompilerFactories() { - static auto* r = - new std::map; + static auto* r = new std::map; return r; } /* static */ -std::map>* +std::map>* Compiler::GetPlatformCompilers() { - static auto* r = new std::map>; + static auto* r = new std::map>; return r; } diff --git a/tensorflow/compiler/xla/service/compiler.h b/tensorflow/compiler/xla/service/compiler.h index b4b53ae2ed425a48de5bcb6ba5c37b5d37e1f371..5c14591d93cc995a0b75efb14da8ec98d5859ff5 100644 --- a/tensorflow/compiler/xla/service/compiler.h +++ b/tensorflow/compiler/xla/service/compiler.h @@ -70,7 +70,7 @@ class AotCompilationOptions { virtual ~AotCompilationOptions() = default; // Returns the ID of the platform to which these options apply. - virtual perftools::gputools::Platform::Id PlatformId() const = 0; + virtual se::Platform::Id PlatformId() const = 0; // Optional allocator that may be used for allocating temp space on the device // during compilation. @@ -109,7 +109,7 @@ class Compiler { virtual ~Compiler() {} // Returns the ID of the platform that this compiler targets. - virtual perftools::gputools::Platform::Id PlatformId() const = 0; + virtual se::Platform::Id PlatformId() const = 0; // Runs Hlo passes to optimize the given Hlo module, returns the optimized // module. @@ -120,8 +120,7 @@ class Compiler { // algorithm over those buffers, to see which variant is fastest. Any space // allocated should be deallocated before this function returns. virtual StatusOr> RunHloPasses( - std::unique_ptr module, - perftools::gputools::StreamExecutor* executor, + std::unique_ptr module, se::StreamExecutor* executor, DeviceMemoryAllocator* device_allocator) = 0; // Compiles the HLO module for execution on a device given by the executor, @@ -137,8 +136,7 @@ class Compiler { // // Use the overload below to compile computations that run in parallel. virtual StatusOr> RunBackend( - std::unique_ptr module, - perftools::gputools::StreamExecutor* executor, + std::unique_ptr module, se::StreamExecutor* executor, DeviceMemoryAllocator* device_allocator) = 0; // Compiles a set of HLO modules that can run in parallel, potentially @@ -151,8 +149,7 @@ class Compiler { // modules to RunHloPasses and RunBackends. virtual StatusOr>> Compile( std::vector> modules, - std::vector> - stream_exec, + std::vector> stream_exec, DeviceMemoryAllocator* device_allocator) = 0; // Compiles the HLO module for ahead-of-time execution. This is intended for @@ -171,14 +168,12 @@ class Compiler { // be a singleton, so no ownership is transferred. // // Precondition: a platform kind must not be registered more than once. - static void RegisterCompilerFactory( - perftools::gputools::Platform::Id platform_id, - CompilerFactory compiler_factory); + static void RegisterCompilerFactory(se::Platform::Id platform_id, + CompilerFactory compiler_factory); // Returns the compiler singleton pointer if it is available for the given // platform, or an error status if it is not. - static StatusOr GetForPlatform( - const perftools::gputools::Platform* platform); + static StatusOr GetForPlatform(const se::Platform* platform); // Returns a function that computes the size in bytes of the logical // buffer that contains a shape. @@ -198,12 +193,12 @@ class Compiler { static tensorflow::mutex platform_compiler_mutex_; // Map from platform kind to compiler factory. - static std::map* + static std::map* GetPlatformCompilerFactories(); // Map from platform kind to compiler instance, if we made one already (based // on the factories above). - static std::map>* + static std::map>* GetPlatformCompilers(); }; diff --git a/tensorflow/compiler/xla/service/computation_layout.cc b/tensorflow/compiler/xla/service/computation_layout.cc index d2d4f14fcec35f5b51a2670a646154ce8bb9bfc1..cb61f3da39fb8eef69fd81066d87a1da91a62935 100644 --- a/tensorflow/compiler/xla/service/computation_layout.cc +++ b/tensorflow/compiler/xla/service/computation_layout.cc @@ -23,12 +23,15 @@ limitations under the License. namespace xla { -ComputationLayout::ComputationLayout(const ProgramShape& program_shape) +ComputationLayout::ComputationLayout(const ProgramShape& program_shape, + bool ignore_layouts) : result_layout_(program_shape.result()) { for (auto& shape : program_shape.parameters()) { parameter_layouts_.emplace_back(shape); } - SetToDefaultLayout(); + if (ignore_layouts) { + SetToDefaultLayout(); + } } void ComputationLayout::SetToDefaultLayout() { diff --git a/tensorflow/compiler/xla/service/computation_layout.h b/tensorflow/compiler/xla/service/computation_layout.h index 80e102411c7885669947d89f378b1ec61e3e4e96..53c3a3f7b738687db3098acfaef1ae87860d0440 100644 --- a/tensorflow/compiler/xla/service/computation_layout.h +++ b/tensorflow/compiler/xla/service/computation_layout.h @@ -34,8 +34,9 @@ class ComputationLayout { public: // Constructs a ComputationLayout from a ProgramShape. The layouts of the // parameters and results are set to the default layout. Layouts in the - // ProgramShape are ignored. - explicit ComputationLayout(const ProgramShape& program_shape); + // ProgramShape are ignored if ignore_layouts is true. + explicit ComputationLayout(const ProgramShape& program_shape, + bool ignore_layouts = true); // Returns the layout of a particular parameter. const ShapeLayout& parameter_layout(int64 param_no) const { diff --git a/tensorflow/compiler/xla/service/computation_placer.cc b/tensorflow/compiler/xla/service/computation_placer.cc index 657fba6b6231104bf47f9dec80f7cd36a0ba3efd..7c1bacff92b231661477b9931a3066fd91110445 100644 --- a/tensorflow/compiler/xla/service/computation_placer.cc +++ b/tensorflow/compiler/xla/service/computation_placer.cc @@ -32,8 +32,6 @@ limitations under the License. #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/stream_executor_no_cuda.h" -namespace se = ::perftools::gputools; - namespace xla { Status DeviceAssignment::Serialize(DeviceAssignmentProto* proto) const { @@ -132,11 +130,9 @@ StatusOr ComputationPlacer::AssignDevices( ComputationPlacer::platform_computation_placer_mutex_( tensorflow::LINKER_INITIALIZED); -/* static */ std::map* +/* static */ std::map* ComputationPlacer::GetPlatformComputationPlacers() { - static auto* r = - new std::map; + static auto* r = new std::map; return r; } @@ -147,10 +143,10 @@ static std::unique_ptr CreateComputationPlacer() { } static bool InitModule() { - xla::ComputationPlacer::RegisterComputationPlacer(se::host::kHostPlatformId, - &CreateComputationPlacer); - xla::ComputationPlacer::RegisterComputationPlacer(se::cuda::kCudaPlatformId, - &CreateComputationPlacer); + xla::ComputationPlacer::RegisterComputationPlacer( + stream_executor::host::kHostPlatformId, &CreateComputationPlacer); + xla::ComputationPlacer::RegisterComputationPlacer( + stream_executor::cuda::kCudaPlatformId, &CreateComputationPlacer); return true; } static bool module_initialized = InitModule(); diff --git a/tensorflow/compiler/xla/service/computation_placer.h b/tensorflow/compiler/xla/service/computation_placer.h index 737ccabaa7a61931b6e2787f75b02857562d4820..737d00e93ecb51a9bd544bbcbe99d93374d108fb 100644 --- a/tensorflow/compiler/xla/service/computation_placer.h +++ b/tensorflow/compiler/xla/service/computation_placer.h @@ -80,13 +80,13 @@ class ComputationPlacer { // Registers a computation placer creation function for a particular platform. static void RegisterComputationPlacer( - perftools::gputools::Platform::Id platform_id, + se::Platform::Id platform_id, ComputationPlacerCreationFunction creation_function); // Returns the computation placer singleton pointer if it is available for the // given platform, or an error status if it is not. static StatusOr GetForPlatform( - const perftools::gputools::Platform* platform); + const se::Platform* platform); private: // The mutex that guards the platform-to-computation placer map. @@ -101,10 +101,9 @@ class ComputationPlacer { }; // Map from platform kind to computation placer singleton. - static std::map* - GetPlatformComputationPlacers(); + static std::map* GetPlatformComputationPlacers(); - perftools::gputools::Platform::Id platform_id_; + se::Platform::Id platform_id_; TF_DISALLOW_COPY_AND_ASSIGN(ComputationPlacer); }; diff --git a/tensorflow/compiler/xla/service/conditional_simplifier.cc b/tensorflow/compiler/xla/service/conditional_simplifier.cc index f35de080853f7ec986565cb2df1050946ac3f244..e560abc87f84566905333181c159edd3ca297563 100644 --- a/tensorflow/compiler/xla/service/conditional_simplifier.cc +++ b/tensorflow/compiler/xla/service/conditional_simplifier.cc @@ -69,7 +69,7 @@ static StatusOr TryRemoveConditional(HloInstruction* conditional) { conditional->shape(), {conditional->mutable_operand(2)}, conditional->false_computation())); } - + conditional->SetupDerivedInstruction(call_op); TF_RETURN_IF_ERROR(computation->ReplaceInstruction(conditional, call_op)); TF_RETURN_IF_ERROR(CallInliner::Inline(call_op).status()); diff --git a/tensorflow/compiler/xla/service/cpu/BUILD b/tensorflow/compiler/xla/service/cpu/BUILD index 966e2d0fc5b5e21180795a07119cb028913dd176..246b80286189286dd29a306dd0bda495df9dad3e 100644 --- a/tensorflow/compiler/xla/service/cpu/BUILD +++ b/tensorflow/compiler/xla/service/cpu/BUILD @@ -18,6 +18,10 @@ load(":build_defs.bzl", "runtime_copts") load("//tensorflow:tensorflow.bzl", "tf_cc_test") load("//tensorflow:tensorflow.bzl", "tf_cc_binary") load("//tensorflow/compiler/xla:xla.bzl", "ORC_JIT_MEMORY_MAPPER_TARGETS") +load( + "//third_party/mkl:build_defs.bzl", + "if_mkl", +) # Filegroup used to collect source files for dependency checking. filegroup( @@ -170,6 +174,7 @@ cc_library( ":runtime_fft", ":runtime_fork_join", ":runtime_matmul", + ":runtime_matmul_mkl", ":runtime_single_threaded_conv2d", ":runtime_single_threaded_matmul", "@llvm//:execution_engine", @@ -538,6 +543,22 @@ cc_library( ], ) +cc_library( + name = "runtime_matmul_mkl", + srcs = ["runtime_matmul_mkl.cc"], + hdrs = ["runtime_matmul_mkl.h"], + copts = runtime_copts(), + visibility = ["//visibility:public"], + deps = [ + "//tensorflow/compiler/xla:executable_run_options", + "//tensorflow/core:framework_lite", + "//third_party/eigen3", + ] + if_mkl([ + "//third_party/mkl:intel_binary_blob", + "@mkl_dnn", + ]), +) + cc_library( name = "runtime_single_threaded_conv2d", srcs = [ @@ -584,10 +605,12 @@ cc_library( tf_cc_test( name = "cpu_runtime_test", srcs = ["cpu_runtime_test.cc"], + shard_count = 10, tags = ["optonly"], deps = [ ":cpu_runtime", ":runtime_matmul", + ":runtime_matmul_mkl", ":runtime_single_threaded_matmul", "//tensorflow/compiler/xla:array2d", "//tensorflow/compiler/xla:types", diff --git a/tensorflow/compiler/xla/service/cpu/compiler_functor.cc b/tensorflow/compiler/xla/service/cpu/compiler_functor.cc index 61b2da7a7dce7f6fba46a23cc8e5462a3899a18c..6a7eb85e3baec3517b8f3ddef6a8dcfae9c9e614 100644 --- a/tensorflow/compiler/xla/service/cpu/compiler_functor.cc +++ b/tensorflow/compiler/xla/service/cpu/compiler_functor.cc @@ -25,11 +25,11 @@ limitations under the License. #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/ExecutionEngine/ObjectMemoryBuffer.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Verifier.h" #include "llvm/MC/MCContext.h" #include "llvm/Object/ObjectFile.h" +#include "llvm/Support/SmallVectorMemoryBuffer.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/IPO.h" @@ -158,7 +158,7 @@ std::unique_ptr CompilerFunctor::operator()( // Construct ObjectFile from machine code buffer. return std::unique_ptr( - new llvm::ObjectMemoryBuffer(std::move(stream_buffer))); + new llvm::SmallVectorMemoryBuffer(std::move(stream_buffer))); } static std::vector VectorFunctionsForTargetLibraryInfoImpl() { diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc index e43777c5e5e8afcf08e1e334c8847f6b94d0d047..e8472fd36b360fcb1f73cd97128fe7975bf2cd5c 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc @@ -100,8 +100,6 @@ limitations under the License. #include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/lib/strings/strcat.h" -namespace se = ::perftools::gputools; - namespace xla { namespace cpu { @@ -440,8 +438,7 @@ Status VerifyLlvmModule(const llvm::Module& llvm_module) { } // namespace StatusOr> CpuCompiler::RunHloPasses( - std::unique_ptr module, - perftools::gputools::StreamExecutor* /*stream_exec*/, + std::unique_ptr module, se::StreamExecutor* /*stream_exec*/, DeviceMemoryAllocator* /*device_allocator*/) { VLOG(2) << "Before optimization:"; XLA_VLOG_LINES(2, module->ToString()); @@ -454,8 +451,7 @@ StatusOr> CpuCompiler::RunHloPasses( } StatusOr> CpuCompiler::RunBackend( - std::unique_ptr module, - perftools::gputools::StreamExecutor* stream_exec, + std::unique_ptr module, se::StreamExecutor* stream_exec, DeviceMemoryAllocator* /*device_allocator*/) { const string timer_message = "Compiling [" + module->name() + "] for CPU using JIT"; @@ -938,9 +934,9 @@ HloCostAnalysis::ShapeSizeFunction CpuCompiler::ShapeSizeBytesFunction() const { } // namespace xla static bool InitModule() { - xla::Compiler::RegisterCompilerFactory(se::host::kHostPlatformId, []() { - return xla::MakeUnique(); - }); + xla::Compiler::RegisterCompilerFactory( + stream_executor::host::kHostPlatformId, + []() { return xla::MakeUnique(); }); return true; } static bool module_initialized = InitModule(); diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.h b/tensorflow/compiler/xla/service/cpu/cpu_compiler.h index 3498139ab95d21383c6dc008ae5614b7bfe91148..151af38438a980e40c06a1801a936cb620c6c4ba 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.h +++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.h @@ -53,7 +53,7 @@ class CpuAotCompilationOptions : public AotCompilationOptions { RelocationModel relocation_model); ~CpuAotCompilationOptions() override; - perftools::gputools::Platform::Id PlatformId() const override; + se::Platform::Id PlatformId() const override; // The triple used for compilation, similar to clang's -target flag. const string& triple() const { return triple_; } @@ -112,25 +112,23 @@ class CpuCompiler : public LLVMCompiler { // Bring in // StatusOr>> Compile( // std::vector> modules, - // std::vector> + // std::vector> // stream_execs) using LLVMCompiler::Compile; StatusOr> RunHloPasses( - std::unique_ptr module, - perftools::gputools::StreamExecutor* stream_exec, + std::unique_ptr module, se::StreamExecutor* stream_exec, DeviceMemoryAllocator* device_allocator) override; StatusOr> RunBackend( - std::unique_ptr module, - perftools::gputools::StreamExecutor* stream_exec, + std::unique_ptr module, se::StreamExecutor* stream_exec, DeviceMemoryAllocator* device_allocator) override; StatusOr>> CompileAheadOfTime(std::vector> modules, const AotCompilationOptions& options) override; - perftools::gputools::Platform::Id PlatformId() const override; + se::Platform::Id PlatformId() const override; HloCostAnalysis::ShapeSizeFunction ShapeSizeBytesFunction() const override; diff --git a/tensorflow/compiler/xla/service/cpu/cpu_executable.cc b/tensorflow/compiler/xla/service/cpu/cpu_executable.cc index c053703c3524a47ee1de9681c1b986edbf109430..97e550abe44a32f25b0aab039e95b0ce2eecde4c 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_executable.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_executable.cc @@ -45,8 +45,6 @@ limitations under the License. #include "tensorflow/core/platform/types.h" #include "tensorflow/stream_executor/host/host_stream.h" -namespace se = ::perftools::gputools; - namespace xla { namespace cpu { @@ -75,7 +73,7 @@ CpuExecutable::CpuExecutable( Status CpuExecutable::AllocateBuffers( DeviceMemoryAllocator* memory_allocator, int device_ordinal, - std::vector* buffers) { + std::vector* buffers) { CHECK_EQ(buffers->size(), assignment_->Allocations().size()); VLOG(3) << "Allocating " << assignment_->Allocations().size() << " allocations for module " << module().name(); @@ -245,19 +243,18 @@ static Status DeallocateTempBuffers( return Status::OK(); } -StatusOr> CpuExecutable::CreateResultShapedBuffer( +StatusOr CpuExecutable::CreateResultShapedBuffer( const ServiceExecutableRunOptions* run_options, - tensorflow::gtl::ArraySlice - allocated_buffers, + tensorflow::gtl::ArraySlice allocated_buffers, std::vector* buffers_in_result) { se::Stream* stream = run_options->stream(); - auto result_buffer = MakeUnique( + ShapedBuffer result_buffer( /*on_host_shape=*/result_shape(), /*on_device_shape=*/result_shape(), stream->parent()->platform(), stream->parent()->device_ordinal()); // Copy DeviceMemoryBase values which contain the array(s) of the result into // the respective location in ShapedBuffer which is returned to the caller. - TF_RETURN_IF_ERROR(result_buffer->buffers().ForEachMutableElementWithStatus( + TF_RETURN_IF_ERROR(result_buffer.buffers().ForEachMutableElementWithStatus( [&](const ShapeIndex& index, se::DeviceMemoryBase* device_memory) { const auto& sources = this->GetRootPointsToSet().element(index); // The points to set is unambiguous so the set should be a @@ -284,7 +281,7 @@ StatusOr> CpuExecutable::CreateResultShapedBuffer( return std::move(result_buffer); } -StatusOr> CpuExecutable::ExecuteOnStream( +StatusOr CpuExecutable::ExecuteOnStream( const ServiceExecutableRunOptions* run_options, tensorflow::gtl::ArraySlice arguments, HloExecutionProfile* hlo_execution_profile) { @@ -303,7 +300,7 @@ StatusOr> CpuExecutable::ExecuteOnStream( std::vector buffers_in_result(assignment_->Allocations().size(), false); TF_ASSIGN_OR_RETURN( - std::unique_ptr result_buffer, + ShapedBuffer result_buffer, CreateResultShapedBuffer(run_options, buffers, &buffers_in_result)); // Free all buffers not in the result. @@ -313,7 +310,7 @@ StatusOr> CpuExecutable::ExecuteOnStream( return std::move(result_buffer); } -StatusOr> CpuExecutable::ExecuteAsyncOnStream( +StatusOr CpuExecutable::ExecuteAsyncOnStream( const ServiceExecutableRunOptions* run_options, tensorflow::gtl::ArraySlice arguments) { if (hlo_profiling_enabled()) { @@ -322,7 +319,7 @@ StatusOr> CpuExecutable::ExecuteAsyncOnStream( "supported on CPU."); } - auto* host_stream = dynamic_cast( + auto* host_stream = dynamic_cast( run_options->stream()->implementation()); se::Stream* stream = run_options->stream(); DeviceMemoryAllocator* memory_allocator = run_options->allocator(); @@ -333,7 +330,7 @@ StatusOr> CpuExecutable::ExecuteAsyncOnStream( std::vector buffers_in_result(assignment_->Allocations().size(), false); TF_ASSIGN_OR_RETURN( - std::unique_ptr result_buffer, + ShapedBuffer result_buffer, CreateResultShapedBuffer(run_options, buffers, &buffers_in_result)); LogLiveAddresses(buffers, buffers_in_result); diff --git a/tensorflow/compiler/xla/service/cpu/cpu_executable.h b/tensorflow/compiler/xla/service/cpu/cpu_executable.h index d3502b3a03e27c8f90ed74c4d826dfab1c4e8b75..06b6943cb5a3f20f4d8aff8acfc4ba50b9067477 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_executable.h +++ b/tensorflow/compiler/xla/service/cpu/cpu_executable.h @@ -55,12 +55,12 @@ class CpuExecutable : public Executable { std::unique_ptr hlo_profile_index_map); ~CpuExecutable() override {} - StatusOr> ExecuteOnStream( + StatusOr ExecuteOnStream( const ServiceExecutableRunOptions* run_options, tensorflow::gtl::ArraySlice arguments, HloExecutionProfile* hlo_execution_profile) override; - StatusOr> ExecuteAsyncOnStream( + StatusOr ExecuteAsyncOnStream( const ServiceExecutableRunOptions* run_options, tensorflow::gtl::ArraySlice arguments) override; @@ -90,29 +90,27 @@ class CpuExecutable : public Executable { // assignment. Each vector element corresponds to a particular Index. If // a vector element already contains a non-null DeviceMemoryBase, then no // buffer is assigned for this element. - Status AllocateBuffers( - DeviceMemoryAllocator* memory_allocator, int device_ordinal, - std::vector* buffers); + Status AllocateBuffers(DeviceMemoryAllocator* memory_allocator, + int device_ordinal, + std::vector* buffers); // Calls the generated function performing the computation with the given // arguments using the supplied buffers. Status ExecuteComputeFunction( const ExecutableRunOptions* run_options, tensorflow::gtl::ArraySlice arguments, - tensorflow::gtl::ArraySlice - buffers, + tensorflow::gtl::ArraySlice buffers, HloExecutionProfile* hlo_execution_profile); - // Create a ShapedBuffer for holding the result of the computation. The + // Creates a ShapedBuffer for holding the result of the computation. The // addresses (DeviceMemoryBases) are set according to buffer assignment. // 'buffers_in_result' should point to a vector of the same size as // 'allocated_buffers'. An element in buffers_in_result is set to true if the // corresponding buffer is live out of the computation (and thus contained in // the returned ShapedBuffer). - StatusOr> CreateResultShapedBuffer( + StatusOr CreateResultShapedBuffer( const ServiceExecutableRunOptions* run_options, - tensorflow::gtl::ArraySlice - allocated_buffers, + tensorflow::gtl::ArraySlice allocated_buffers, std::vector* buffers_in_result); // Returns the points-to set of the root instruction of the entry diff --git a/tensorflow/compiler/xla/service/cpu/cpu_runtime.cc b/tensorflow/compiler/xla/service/cpu/cpu_runtime.cc index 9a3bd68c80c6e8bcdb231c63ba025d1f73619eb7..872b0be1f8a8ec317bf059fd1c4d2550e2ad161a 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_runtime.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_runtime.cc @@ -37,6 +37,14 @@ extern const char* const kEigenMatMulF32SymbolName = "__xla_cpu_runtime_EigenMatMulF32"; extern const char* const kEigenMatMulF64SymbolName = "__xla_cpu_runtime_EigenMatMulF64"; +extern const char* const kMKLMatMulF32SymbolName = + "__xla_cpu_runtime_MKLMatMulF32"; +extern const char* const kMKLMatMulF64SymbolName = + "__xla_cpu_runtime_MKLMatMulF64"; +extern const char* const kMKLSingleThreadedMatMulF32SymbolName = + "__xla_cpu_runtime_MKLSingleThreadedMatMulF32"; +extern const char* const kMKLSingleThreadedMatMulF64SymbolName = + "__xla_cpu_runtime_MKLSingleThreadedMatMulF64"; extern const char* const kEigenConvF16SymbolName = "__xla_cpu_runtime_EigenConvF16"; extern const char* const kEigenConvF32SymbolName = diff --git a/tensorflow/compiler/xla/service/cpu/cpu_runtime.h b/tensorflow/compiler/xla/service/cpu/cpu_runtime.h index e61d6ea28b633398863357541e056ee887582f9c..e392e231b4c71b2e206640a47b712de70a148582 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_runtime.h +++ b/tensorflow/compiler/xla/service/cpu/cpu_runtime.h @@ -44,6 +44,10 @@ namespace runtime { extern const char* const kEigenMatMulF16SymbolName; extern const char* const kEigenMatMulF32SymbolName; extern const char* const kEigenMatMulF64SymbolName; +extern const char* const kMKLMatMulF32SymbolName; +extern const char* const kMKLMatMulF64SymbolName; +extern const char* const kMKLSingleThreadedMatMulF32SymbolName; +extern const char* const kMKLSingleThreadedMatMulF64SymbolName; extern const char* const kEigenConvF16SymbolName; extern const char* const kEigenConvF32SymbolName; extern const char* const kEigenFftSymbolName; diff --git a/tensorflow/compiler/xla/service/cpu/cpu_runtime_test.cc b/tensorflow/compiler/xla/service/cpu/cpu_runtime_test.cc index f385829cdf5cafbd35e083f47106734cdd5dde88..2ac950e6d93ade315808f2ca1d0bdd7bc85f53b9 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_runtime_test.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_runtime_test.cc @@ -24,6 +24,7 @@ limitations under the License. #include "tensorflow/compiler/xla/client/local_client.h" #include "tensorflow/compiler/xla/ptr_util.h" #include "tensorflow/compiler/xla/service/cpu/runtime_matmul.h" +#include "tensorflow/compiler/xla/service/cpu/runtime_matmul_mkl.h" #include "tensorflow/compiler/xla/service/cpu/runtime_single_threaded_matmul.h" #include "tensorflow/compiler/xla/types.h" #include "tensorflow/core/common_runtime/eigen_thread_pool.h" @@ -130,25 +131,23 @@ MatMulShape MatMulShapes[] = { // * transpose_lhs // * transpose_rhs // * single_threaded -using EigenMatMulTestParam = std::tuple; +using MatMulTestParam = std::tuple; -class EigenMatMulTest - : public CpuRuntimeTest, - public ::testing::WithParamInterface { +class EigenMatMulTest : public CpuRuntimeTest, + public ::testing::WithParamInterface { public: - static string Name( - const ::testing::TestParamInfo& info) { + static string Name(const ::testing::TestParamInfo& info) { MatMulShape shape = std::get<0>(info.param); bool transpose_lhs = std::get<1>(info.param); bool transpose_rhs = std::get<2>(info.param); bool single_threaded = std::get<3>(info.param); return tensorflow::strings::Printf( - "MatMul_%lld_%lld_%lld_%s%s%s_threaded", shape.m, shape.k, shape.n, + "EigenMatMul_%lld_%lld_%lld_%s%s%s_threaded", shape.m, shape.k, shape.n, transpose_lhs ? "Tlhs_" : "", transpose_rhs ? "Trhs_" : "", single_threaded ? "single" : "multi"); } -}; // namespace xla +}; TEST_P(EigenMatMulTest, DoIt) { MatMulShape shape = std::get<0>(GetParam()); @@ -169,5 +168,74 @@ INSTANTIATE_TEST_CASE_P(EigenMatMulTestInstantiaion, EigenMatMulTest, ::testing::Bool()), EigenMatMulTest::Name); +#ifdef INTEL_MKL +class MKLMatMulTest : public CpuRuntimeTest, + public ::testing::WithParamInterface { + public: + static string Name(const ::testing::TestParamInfo& info) { + MatMulShape shape = std::get<0>(info.param); + bool transpose_lhs = std::get<1>(info.param); + bool transpose_rhs = std::get<2>(info.param); + bool single_threaded = std::get<3>(info.param); + + return tensorflow::strings::Printf( + "MKLMatMul_%lld_%lld_%lld_%s%s%s_threaded", shape.m, shape.k, shape.n, + transpose_lhs ? "Tlhs_" : "", transpose_rhs ? "Trhs_" : "", + single_threaded ? "single" : "multi"); + } +}; + +std::unique_ptr> MKLMatrixMultiply(const Array2D& a, + const Array2D& b, + bool transpose_lhs, + bool transpose_rhs, + bool single_threaded) { + CHECK_EQ(a.width(), b.height()); + int64 m = a.height(); + int64 n = b.width(); + int64 k = a.width(); + + // The MKL matmul runtime function expects the matrix to be in column major + // order and array2d is in row-major order. Create transposes of a and b. The + // 'data' buffer in the transposed array is the original array in column major + // order. + auto a_transpose = MaybeTransposeArray2D(a, !transpose_lhs); + auto b_transpose = MaybeTransposeArray2D(b, !transpose_rhs); + + // Since we're going to transpose c before returning it, swap the order of the + // dimension sizes to ensure the returned array is properly dimensioned. + auto c_transpose = MakeUnique>(n, m); + if (single_threaded) { + __xla_cpu_runtime_MKLSingleThreadedMatMulF32( + nullptr, c_transpose->data(), a_transpose->data(), b_transpose->data(), + m, n, k, transpose_lhs, transpose_rhs); + } else { + __xla_cpu_runtime_MKLMatMulF32(nullptr, c_transpose->data(), + a_transpose->data(), b_transpose->data(), m, + n, k, transpose_lhs, transpose_rhs); + } + return MaybeTransposeArray2D(*c_transpose, true); +} + +TEST_P(MKLMatMulTest, DoIt) { + MatMulShape shape = std::get<0>(GetParam()); + bool transpose_lhs = std::get<1>(GetParam()); + bool transpose_rhs = std::get<2>(GetParam()); + bool single_threaded = std::get<3>(GetParam()); + + auto a = MakeLinspaceArray2D(0.0, 1.0, shape.m, shape.k); + auto b = MakeLinspaceArray2D(-2.0, 2.0, shape.k, shape.n); + auto c = + MKLMatrixMultiply(*a, *b, transpose_lhs, transpose_rhs, single_threaded); + CheckMatrixMultiply(*a, *b, *c); +} + +INSTANTIATE_TEST_CASE_P(MKLMatMulTestInstantiaion, MKLMatMulTest, + ::testing::Combine(::testing::ValuesIn(MatMulShapes), + ::testing::Bool(), ::testing::Bool(), + ::testing::Bool()), + MKLMatMulTest::Name); +#endif // INTEL_MKL + } // namespace } // namespace xla diff --git a/tensorflow/compiler/xla/service/cpu/cpu_transfer_manager.cc b/tensorflow/compiler/xla/service/cpu/cpu_transfer_manager.cc index f5e61aef534da57ce13d3ee9bbeaeaec31f53d2e..9b39e7f5765ae5eb6a25c06eef4d74b1c00e5c91 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_transfer_manager.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_transfer_manager.cc @@ -34,8 +34,6 @@ limitations under the License. #include "tensorflow/core/platform/notification.h" #include "tensorflow/core/platform/stream_executor_no_cuda.h" -namespace se = ::perftools::gputools; - namespace xla { namespace { @@ -241,21 +239,20 @@ Status CpuTransferManager::TransferLiteralFromOutfeed( } StatusOr CpuTransferManager::TransferTupleBuffersFromOutfeed( - perftools::gputools::StreamExecutor* executor, + se::StreamExecutor* executor, tensorflow::gtl::ArraySlice> buffer_data) { return TransferBuffersFromOutfeedInternal(executor, buffer_data, /*is_tuple=*/true); } StatusOr CpuTransferManager::TransferArrayBufferFromOutfeed( - perftools::gputools::StreamExecutor* executor, void* destination, - int64 size_bytes) { + se::StreamExecutor* executor, void* destination, int64 size_bytes) { return TransferBuffersFromOutfeedInternal( executor, {{destination, size_bytes}}, /*is_tuple=*/false); } StatusOr CpuTransferManager::TransferBuffersFromOutfeedInternal( - perftools::gputools::StreamExecutor* executor, + se::StreamExecutor* executor, tensorflow::gtl::ArraySlice> buffer_data, bool is_tuple) { std::vector> buffers; @@ -306,8 +303,8 @@ static std::unique_ptr CreateCpuTransferManager() { } static bool InitModule() { - xla::TransferManager::RegisterTransferManager(se::host::kHostPlatformId, - &CreateCpuTransferManager); + xla::TransferManager::RegisterTransferManager( + stream_executor::host::kHostPlatformId, &CreateCpuTransferManager); return true; } static bool module_initialized = InitModule(); diff --git a/tensorflow/compiler/xla/service/cpu/cpu_transfer_manager.h b/tensorflow/compiler/xla/service/cpu/cpu_transfer_manager.h index 6c7524d94716464218ba18ad9950f702d2759f89..3ecb0d236498371f48caf63249f9cd4e8777752b 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_transfer_manager.h +++ b/tensorflow/compiler/xla/service/cpu/cpu_transfer_manager.h @@ -37,36 +37,35 @@ class CpuTransferManager : public GenericTransferManager { CpuTransferManager(); ~CpuTransferManager() override {} - Status TransferLiteralToInfeed(perftools::gputools::StreamExecutor* executor, + Status TransferLiteralToInfeed(se::StreamExecutor* executor, const Literal& literal) override; - Status TransferBufferToInfeed(perftools::gputools::StreamExecutor* executor, - int64 size, const void* source) override; - Status TransferLiteralFromOutfeed( - perftools::gputools::StreamExecutor* executor, const Shape& literal_shape, - Literal* literal) override; + Status TransferBufferToInfeed(se::StreamExecutor* executor, int64 size, + const void* source) override; + Status TransferLiteralFromOutfeed(se::StreamExecutor* executor, + const Shape& literal_shape, + Literal* literal) override; private: // Transfers infeed data to device. InfeedBuffer->Done() must be // called to clean up the memory allocated for InfeedBuffer. StatusOr TransferBufferToInfeedInternal( - perftools::gputools::StreamExecutor* executor, int64 size, - const void* source); + se::StreamExecutor* executor, int64 size, const void* source); // Helper that transfers a tuple of element buffers from the device's outfeed. StatusOr TransferTupleBuffersFromOutfeed( - perftools::gputools::StreamExecutor* executor, + se::StreamExecutor* executor, tensorflow::gtl::ArraySlice> buffer_data); // Helper that transfers an array buffer from the device's outfeed. - StatusOr TransferArrayBufferFromOutfeed( - perftools::gputools::StreamExecutor* executor, void* destination, - int64 size_bytes); + StatusOr TransferArrayBufferFromOutfeed(se::StreamExecutor* executor, + void* destination, + int64 size_bytes); // On success, returns the shape that was transferred from the outfeed -- if // is_tuple is true, the returned shape will be a tuple of the returned shapes // for the given buffers. StatusOr TransferBuffersFromOutfeedInternal( - perftools::gputools::StreamExecutor* executor, + se::StreamExecutor* executor, tensorflow::gtl::ArraySlice> buffer_data, bool is_tuple); diff --git a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc index 8b1e20d79e90fcc32e985ffb855a1a10cdd2f2b9..495fecc4aa8b3cf8fcb3ab63d82d8146546854da 100644 --- a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc +++ b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc @@ -918,28 +918,35 @@ tensorflow::Status DotOpEmitter::EmitCallToRuntime() { // The two transpose_... parameters are actually booleans, but we use int32 // to avoid target-dependent calling convention details. - bool multi_threaded_eigen = + bool multi_threaded = hlo_module_config_.debug_options().xla_cpu_multi_thread_eigen(); + bool use_mkl_dnn = hlo_module_config_.debug_options().xla_cpu_use_mkl_dnn(); PrimitiveType type = target_array_.GetShape().element_type(); llvm::Type* float_type; const char* fn_name; switch (type) { case F16: - fn_name = multi_threaded_eigen + fn_name = multi_threaded ? runtime::kEigenMatMulF16SymbolName : runtime::kEigenSingleThreadedMatMulF16SymbolName; float_type = ir_builder_->getHalfTy(); break; case F32: - fn_name = multi_threaded_eigen - ? runtime::kEigenMatMulF32SymbolName - : runtime::kEigenSingleThreadedMatMulF32SymbolName; + fn_name = multi_threaded + ? (use_mkl_dnn ? runtime::kMKLMatMulF32SymbolName + : runtime::kEigenMatMulF32SymbolName) + : (use_mkl_dnn + ? runtime::kMKLSingleThreadedMatMulF32SymbolName + : runtime::kEigenSingleThreadedMatMulF32SymbolName); float_type = ir_builder_->getFloatTy(); break; case F64: - fn_name = multi_threaded_eigen - ? runtime::kEigenMatMulF64SymbolName - : runtime::kEigenSingleThreadedMatMulF64SymbolName; + fn_name = multi_threaded + ? (use_mkl_dnn ? runtime::kMKLMatMulF64SymbolName + : runtime::kEigenMatMulF64SymbolName) + : (use_mkl_dnn + ? runtime::kMKLSingleThreadedMatMulF64SymbolName + : runtime::kEigenSingleThreadedMatMulF64SymbolName); float_type = ir_builder_->getDoubleTy(); break; default: @@ -1063,7 +1070,8 @@ static bool AreValidGemmShapes(const Shape& lhs_shape, const Shape& rhs_shape, // 1) be matrices with no padding, and // 2) have an allowed element type. PrimitiveType output_primitive_type = output_shape.element_type(); - return (output_primitive_type == F32 || output_primitive_type == F16) && + return (output_primitive_type == F64 || output_primitive_type == F32 || + output_primitive_type == F16) && IsRank2WithNoPadding(lhs_shape) && IsRank2WithNoPadding(rhs_shape) && IsRank2WithNoPadding(output_shape); } diff --git a/tensorflow/compiler/xla/service/cpu/elemental_ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/elemental_ir_emitter.cc index 99c5e16db70c6a203b4751c1ed8a106c0dc260e6..e97113dfa0f59e791d614c0093d0781e49c48ee4 100644 --- a/tensorflow/compiler/xla/service/cpu/elemental_ir_emitter.cc +++ b/tensorflow/compiler/xla/service/cpu/elemental_ir_emitter.cc @@ -115,7 +115,7 @@ llvm_ir::ElementGenerator CpuElementalIrEmitter::MakeElementGenerator( for (int i = 0; i < hlo->operand_count(); i++) { TF_ASSIGN_OR_RETURN(llvm::Value * operand_value, operand_to_generator.at(hlo->operand(i))( - ElementwiseSourceIndex(index, *hlo, 0))); + ElementwiseSourceIndex(index, *hlo, i))); operands.push_back(operand_value); } return ir_emitter_->EmitScalarCall(hlo->shape().element_type(), diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc index 3405277d449f2d9e558f2d3f83277163655af592..f990ee278526c396d9b55ae49abdaa9bfd2f9d5f 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc +++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc @@ -2076,7 +2076,7 @@ Status IrEmitter::HandleFusion(HloInstruction* fusion) { TF_RETURN_IF_ERROR(ElementTypesSameAndSupported( /*instruction=*/*root, /*operands=*/{lhs, rhs}, - /*supported_types=*/{F16, F32})); + /*supported_types=*/{F16, F32, F64})); llvm_ir::IrArray lhs_array(GetIrArrayFor(lhs)); llvm_ir::IrArray rhs_array(GetIrArrayFor(rhs)); diff --git a/tensorflow/compiler/xla/service/cpu/parallel_cpu_executable.cc b/tensorflow/compiler/xla/service/cpu/parallel_cpu_executable.cc index 07a9f0efcb64db4b2ff0c6518d4b48eee9a505e0..a2bd4fa195bd93211914da172e7aed59b713ba8b 100644 --- a/tensorflow/compiler/xla/service/cpu/parallel_cpu_executable.cc +++ b/tensorflow/compiler/xla/service/cpu/parallel_cpu_executable.cc @@ -49,8 +49,6 @@ limitations under the License. #include "tensorflow/core/platform/mutex.h" #include "tensorflow/core/platform/types.h" -namespace se = ::perftools::gputools; - namespace xla { namespace cpu { @@ -325,7 +323,7 @@ const void** Executor::GetOperandBuffers(HloInstruction* instruction) { Status ParallelCpuExecutable::AllocateBuffers( DeviceMemoryAllocator* memory_allocator, int device_ordinal, - std::vector* buffers) { + std::vector* buffers) { CHECK_EQ(buffers->size(), assignment_->Allocations().size()); VLOG(3) << "Allocating " << assignment_->Allocations().size() << " allocations for module " << module().name(); @@ -449,7 +447,7 @@ Status ParallelCpuExecutable::ExecuteComputeFunctions( return Status::OK(); } -StatusOr> ParallelCpuExecutable::ExecuteOnStream( +StatusOr ParallelCpuExecutable::ExecuteOnStream( const ServiceExecutableRunOptions* run_options, tensorflow::gtl::ArraySlice arguments, HloExecutionProfile* hlo_execution_profile) { @@ -461,7 +459,7 @@ StatusOr> ParallelCpuExecutable::ExecuteOnStream( DeviceMemoryAllocator* memory_allocator = run_options->allocator(); std::vector buffers(assignment_->Allocations().size()); - auto result_buffer = MakeUnique( + ShapedBuffer result_buffer( /*on_host_shape=*/result_shape(), /*on_device_shape=*/result_shape(), stream->parent()->platform(), stream->parent()->device_ordinal()); @@ -474,7 +472,7 @@ StatusOr> ParallelCpuExecutable::ExecuteOnStream( // Copy DeviceMemoryBase values which into the respective location in // ShapedBuffer which is returned to the caller. std::vector buffers_in_result(assignment_->Allocations().size(), false); - TF_RETURN_IF_ERROR(result_buffer->buffers().ForEachMutableElementWithStatus( + TF_RETURN_IF_ERROR(result_buffer.buffers().ForEachMutableElementWithStatus( [&](const ShapeIndex& index, se::DeviceMemoryBase* device_memory) { const auto& sources = this->GetRootPointsToSet().element(index); @@ -513,8 +511,7 @@ StatusOr> ParallelCpuExecutable::ExecuteOnStream( return std::move(result_buffer); } -StatusOr> -ParallelCpuExecutable::ExecuteAsyncOnStream( +StatusOr ParallelCpuExecutable::ExecuteAsyncOnStream( const ServiceExecutableRunOptions* run_options, tensorflow::gtl::ArraySlice arguments) { // TODO(b/30671675): Implement asynchronous execution mode. diff --git a/tensorflow/compiler/xla/service/cpu/parallel_cpu_executable.h b/tensorflow/compiler/xla/service/cpu/parallel_cpu_executable.h index 87c0a3df458eb4b3f217192597e0de1576304367..5ce84fa99640c782d85e8bcedd19820788737a38 100644 --- a/tensorflow/compiler/xla/service/cpu/parallel_cpu_executable.h +++ b/tensorflow/compiler/xla/service/cpu/parallel_cpu_executable.h @@ -59,12 +59,12 @@ class ParallelCpuExecutable : public Executable { std::unique_ptr hlo_profile_index_map); ~ParallelCpuExecutable() override {} - StatusOr> ExecuteOnStream( + StatusOr ExecuteOnStream( const ServiceExecutableRunOptions* run_options, tensorflow::gtl::ArraySlice arguments, HloExecutionProfile* hlo_execution_profile) override; - StatusOr> ExecuteAsyncOnStream( + StatusOr ExecuteAsyncOnStream( const ServiceExecutableRunOptions* run_options, tensorflow::gtl::ArraySlice arguments) override; @@ -89,17 +89,16 @@ class ParallelCpuExecutable : public Executable { // assignment. Each vector element corresponds to a particular Index. If // a vector element already contains a non-null DeviceMemoryBase, then no // buffer is assigned for this element. - Status AllocateBuffers( - DeviceMemoryAllocator* memory_allocator, int device_ordinal, - std::vector* buffers); + Status AllocateBuffers(DeviceMemoryAllocator* memory_allocator, + int device_ordinal, + std::vector* buffers); // Calls the generated functions in 'function_names_', performing the // computation with the given arguments using the supplied buffers. Status ExecuteComputeFunctions( const ServiceExecutableRunOptions* run_options, tensorflow::gtl::ArraySlice arguments, - tensorflow::gtl::ArraySlice - buffers, + tensorflow::gtl::ArraySlice buffers, HloExecutionProfile* hlo_execution_profile); // Returns the points-to set of the root instruction of the entry diff --git a/tensorflow/compiler/xla/service/cpu/parallel_loop_emitter.cc b/tensorflow/compiler/xla/service/cpu/parallel_loop_emitter.cc index 1e439cde11cf74272101b80c867a308e51ab26a6..54af40506dab48b3c2a3a44eb0b5f5fb213a32ec 100644 --- a/tensorflow/compiler/xla/service/cpu/parallel_loop_emitter.cc +++ b/tensorflow/compiler/xla/service/cpu/parallel_loop_emitter.cc @@ -29,7 +29,8 @@ ParallelLoopEmitter::ParallelLoopEmitter( : LoopEmitter(target_element_generator, target_array, ir_builder), dynamic_loop_bounds_(dynamic_loop_bounds) {} -llvm_ir::IrArray::Index ParallelLoopEmitter::EmitIndexAndSetExitBasicBlock( +std::vector +ParallelLoopEmitter::EmitIndexAndSetExitBasicBlock( tensorflow::StringPiece loop_name) { CHECK(!ShapeUtil::IsTuple(shape_)); CHECK(!ShapeUtil::IsScalar(shape_)); @@ -69,7 +70,7 @@ llvm_ir::IrArray::Index ParallelLoopEmitter::EmitIndexAndSetExitBasicBlock( exit_bb_ = loop_nest.GetOuterLoopExitBasicBlock(); CHECK(exit_bb_ != nullptr); - return array_index; + return {array_index}; } } // namespace cpu diff --git a/tensorflow/compiler/xla/service/cpu/parallel_loop_emitter.h b/tensorflow/compiler/xla/service/cpu/parallel_loop_emitter.h index ce92e36a944de33b991d97460f0b2e859ad56081..755715634aa70a822b21d25dcae20a8fe053477a 100644 --- a/tensorflow/compiler/xla/service/cpu/parallel_loop_emitter.h +++ b/tensorflow/compiler/xla/service/cpu/parallel_loop_emitter.h @@ -60,7 +60,7 @@ class ParallelLoopEmitter : public llvm_ir::LoopEmitter { ParallelLoopEmitter& operator=(const ParallelLoopEmitter&) = delete; ~ParallelLoopEmitter() override = default; - llvm_ir::IrArray::Index EmitIndexAndSetExitBasicBlock( + std::vector EmitIndexAndSetExitBasicBlock( tensorflow::StringPiece loop_name) override; private: diff --git a/tensorflow/compiler/xla/service/cpu/runtime_matmul_mkl.cc b/tensorflow/compiler/xla/service/cpu/runtime_matmul_mkl.cc new file mode 100644 index 0000000000000000000000000000000000000000..92da5f71c23d5e1450b39ea8b7bb8345f6fabb3b --- /dev/null +++ b/tensorflow/compiler/xla/service/cpu/runtime_matmul_mkl.cc @@ -0,0 +1,128 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifdef INTEL_MKL +#include "tensorflow/compiler/xla/service/cpu/runtime_matmul_mkl.h" +#include "third_party/intel_mkl_ml/include/mkl_cblas.h" +#include "third_party/intel_mkl_ml/include/mkl_service.h" + +#include "tensorflow/compiler/xla/executable_run_options.h" +#include "tensorflow/core/platform/types.h" + +#define EIGEN_USE_THREADS +#include "third_party/eigen3/unsupported/Eigen/CXX11/ThreadPool" + +using tensorflow::int32; +using tensorflow::int64; + +namespace { +// BLAS GEMM API for 32-bit Matrix Multiplication. + +// MatMul function is defined as: c = alpha * op(a) * op(b) + beta * c. +// Since XLA MatMul does not used alpha, beta, we set them to 1.0 and 0.0. +// Matrix lhs, rhs and out are all colum-major. +void MatMulF32(const void* run_options_ptr, float* out, float* lhs, float* rhs, + int64 m, int64 n, int64 k, int32 transpose_lhs, + int32 transpose_rhs) { + const float alpha = 1.0f, beta = 0.0f; + // lda, ldb, and ldc are the leading dimensions of matrices a, b, and c, + // respectively. For column-major matrices, the leading dimension is the + // stride between consecutive columns (which equals the number of rows). If + // the matrix is transposed, the leading dimension is the stride between + // consecutive rows (which equals the number of columns). + int lda = transpose_lhs ? k : m; + int ldb = transpose_rhs ? n : k; + int ldc = m; + cblas_sgemm(CblasColMajor, transpose_lhs ? CblasTrans : CblasNoTrans, + transpose_rhs ? CblasTrans : CblasNoTrans, m, n, k, alpha, lhs, + lda, rhs, ldb, beta, out, ldc); +} + +// BLAS GEMM API for 64-bit Matrix Multiplication. + +// MatMul function is defined as: c = alpha * op(a) * op(b) + beta * c. +// Since XLA MatMul does not used alpha, beta, we set them to 1.0 and 0.0. +// Matrix lhs, rhs and out are all colum-major. +void MatMulF64(const void* run_options_ptr, double* out, double* lhs, + double* rhs, int64 m, int64 n, int64 k, int32 transpose_lhs, + int32 transpose_rhs) { + const float alpha = 1.0f, beta = 0.0f; + // lda, ldb, and ldc are the leading dimensions of matrices a, b, and c, + // respectively. For a column-major matrix, the leading dimension is the + // stride between consecutive columns (which equals the number of rows). If + // the matrix is transposed, the leading dimension is the stride between + // consecutive rows (which equals the number of columns). + int lda = transpose_lhs ? k : m; + int ldb = transpose_rhs ? n : k; + int ldc = m; + cblas_dgemm(CblasColMajor, transpose_lhs ? CblasTrans : CblasNoTrans, + transpose_rhs ? CblasTrans : CblasNoTrans, m, n, k, alpha, lhs, + lda, rhs, ldb, beta, out, ldc); +} + +} // namespace + +void __xla_cpu_runtime_MKLMatMulF32(const void* run_options_ptr, float* out, + float* lhs, float* rhs, int64 m, int64 n, + int64 k, int32 transpose_lhs, + int32 transpose_rhs) { + const xla::ExecutableRunOptions* run_options = + static_cast(run_options_ptr); + // BLAS GEMM MatMul uses OpenMP for parallelization, so we pass the thread + // number specified in intra_op_thread_pool to MKL. + int prev_num_threads = mkl_set_num_threads_local( + run_options->intra_op_thread_pool()->numThreads()); + MatMulF32(nullptr, out, lhs, rhs, m, n, k, transpose_lhs, transpose_rhs); + // Set thread number back to the previous number. + mkl_set_num_threads_local(prev_num_threads); +} +// BLAS GEMM API for 64-bit Matrix Multiplication +void __xla_cpu_runtime_MKLMatMulF64(const void* run_options_ptr, double* out, + double* lhs, double* rhs, int64 m, int64 n, + int64 k, int32 transpose_lhs, + int32 transpose_rhs) { + const xla::ExecutableRunOptions* run_options = + static_cast(run_options_ptr); + // BLAS GEMM MatMul uses OpenMP for parallelization, so we pass the thread + // number specified in intra_op_thread_pool to MKL. + int prev_num_threads = mkl_set_num_threads_local( + run_options->intra_op_thread_pool()->numThreads()); + MatMulF64(nullptr, out, lhs, rhs, m, n, k, transpose_lhs, transpose_rhs); + // Set thread number back to the previous number. + mkl_set_num_threads_local(prev_num_threads); +} +void __xla_cpu_runtime_MKLSingleThreadedMatMulF32(const void* run_options_ptr, + float* out, float* lhs, + float* rhs, int64 m, int64 n, + int64 k, int32 transpose_lhs, + int32 transpose_rhs) { + // Set the thread number to 1 for single threaded excution. + int prev_num_threads = mkl_set_num_threads_local(1); + MatMulF32(nullptr, out, lhs, rhs, m, n, k, transpose_lhs, transpose_rhs); + // Set thread number back to the previous number. + mkl_set_num_threads_local(prev_num_threads); +} +void __xla_cpu_runtime_MKLSingleThreadedMatMulF64(const void* run_options_ptr, + double* out, double* lhs, + double* rhs, int64 m, int64 n, + int64 k, int32 transpose_lhs, + int32 transpose_rhs) { + // Set the thread number to 1 for single threaded excution. + int prev_num_threads = mkl_set_num_threads_local(1); + MatMulF64(nullptr, out, lhs, rhs, m, n, k, transpose_lhs, transpose_rhs); + // Set thread number back to the previous number. + mkl_set_num_threads_local(prev_num_threads); +} +#endif // INTEL_MKL diff --git a/tensorflow/compiler/xla/service/cpu/runtime_matmul_mkl.h b/tensorflow/compiler/xla/service/cpu/runtime_matmul_mkl.h new file mode 100644 index 0000000000000000000000000000000000000000..831b796efb971f6fb0170e2321c00ac415f2830f --- /dev/null +++ b/tensorflow/compiler/xla/service/cpu/runtime_matmul_mkl.h @@ -0,0 +1,84 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_CPU_RUNTIME_MATMUL_MKL_H_ +#define TENSORFLOW_COMPILER_XLA_SERVICE_CPU_RUNTIME_MATMUL_MKL_H_ + +#include +#include "tensorflow/core/platform/types.h" +#ifdef INTEL_MKL +#include "third_party/intel_mkl_ml/include/mkl_cblas.h" + +extern void __xla_cpu_runtime_MKLMatMulF32( + const void* /* xla::ExecutableRunOptions* */ run_options_ptr, float* out, + float* lhs, float* rhs, tensorflow::int64 m, tensorflow::int64 n, + tensorflow::int64 k, tensorflow::int32 transpose_lhs, + tensorflow::int32 transpose_rhs); +extern void __xla_cpu_runtime_MKLMatMulF64( + const void* /* xla::ExecutableRunOptions* */ run_options_ptr, double* out, + double* lhs, double* rhs, tensorflow::int64 m, tensorflow::int64 n, + tensorflow::int64 k, tensorflow::int32 transpose_lhs, + tensorflow::int32 transpose_rhs); +extern void __xla_cpu_runtime_MKLSingleThreadedMatMulF32( + const void* /* xla::ExecutableRunOptions* */ run_options_ptr, float* out, + float* lhs, float* rhs, tensorflow::int64 m, tensorflow::int64 n, + tensorflow::int64 k, tensorflow::int32 transpose_lhs, + tensorflow::int32 transpose_rhs); +extern void __xla_cpu_runtime_MKLSingleThreadedMatMulF64( + const void* /* xla::ExecutableRunOptions* */ run_options_ptr, double* out, + double* lhs, double* rhs, tensorflow::int64 m, tensorflow::int64 n, + tensorflow::int64 k, tensorflow::int32 transpose_lhs, + tensorflow::int32 transpose_rhs); + +#else +extern void __xla_cpu_runtime_MKLMatMulF32( + const void* /* xla::ExecutableRunOptions* */ run_options_ptr, float* out, + float* lhs, float* rhs, tensorflow::int64 m, tensorflow::int64 n, + tensorflow::int64 k, tensorflow::int32 transpose_lhs, + tensorflow::int32 transpose_rhs) { + std::cerr << "Attempt to call MKL MatMul runtime library without defining " + "INTEL_MKL. Add --config=mkl to build with MKL."; + exit(1); +} +extern void __xla_cpu_runtime_MKLMatMulF64( + const void* /* xla::ExecutableRunOptions* */ run_options_ptr, double* out, + double* lhs, double* rhs, tensorflow::int64 m, tensorflow::int64 n, + tensorflow::int64 k, tensorflow::int32 transpose_lhs, + tensorflow::int32 transpose_rhs) { + std::cerr << "Attempt to call MKL MatMul runtime library without defining " + "INTEL_MKL. Add --config=mkl to build with MKL."; + exit(1); +} +extern void __xla_cpu_runtime_MKLSingleThreadedMatMulF32( + const void* /* xla::ExecutableRunOptions* */ run_options_ptr, float* out, + float* lhs, float* rhs, tensorflow::int64 m, tensorflow::int64 n, + tensorflow::int64 k, tensorflow::int32 transpose_lhs, + tensorflow::int32 transpose_rhs) { + std::cerr << "Attempt to call MKL MatMul runtime library without defining " + "INTEL_MKL. Add --config=mkl to build with MKL."; + exit(1); +} +extern void __xla_cpu_runtime_MKLSingleThreadedMatMulF64( + const void* /* xla::ExecutableRunOptions* */ run_options_ptr, double* out, + double* lhs, double* rhs, tensorflow::int64 m, tensorflow::int64 n, + tensorflow::int64 k, tensorflow::int32 transpose_lhs, + tensorflow::int32 transpose_rhs) { + std::cerr << "Attempt to call MKL MatMul runtime library without defining " + "INTEL_MKL. Add --config=mkl to build with MKL."; + exit(1); +} + +#endif // INTEL_MKL +#endif // TENSORFLOW_COMPILER_XLA_SERVICE_CPU_RUNTIME_MATMUL_MKL_H_ diff --git a/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc b/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc index 80c24eaccfc2a83f8f3f311d60860715668d0c08..b7ce5bbe47482320bfb9524c8f366a463b9579ed 100644 --- a/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc +++ b/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc @@ -35,6 +35,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/cpu/runtime_fork_join.h" #include "tensorflow/compiler/xla/service/cpu/runtime_fp16.h" #include "tensorflow/compiler/xla/service/cpu/runtime_matmul.h" +#include "tensorflow/compiler/xla/service/cpu/runtime_matmul_mkl.h" #include "tensorflow/compiler/xla/service/cpu/runtime_single_threaded_conv2d.h" #include "tensorflow/compiler/xla/service/cpu/runtime_single_threaded_matmul.h" #include "tensorflow/compiler/xla/service/cpu/windows_compatibility.h" @@ -87,7 +88,6 @@ SimpleOrcJIT::SimpleOrcJIT(const llvm::TargetOptions& target_options, /*MAttrs=*/DetectMachineAttributes()))), disassembler_(*target_machine_), data_layout_(target_machine_->createDataLayout()), - execution_session_(string_pool_), symbol_resolver_(llvm::orc::createLegacyLookupResolver( [this](const std::string& name) -> llvm::JITSymbol { return this->ResolveRuntimeSymbol(name); @@ -184,6 +184,10 @@ bool RegisterKnownJITSymbols() { REGISTER_CPU_RUNTIME_SYMBOL(EigenMatMulF16); REGISTER_CPU_RUNTIME_SYMBOL(EigenMatMulF32); REGISTER_CPU_RUNTIME_SYMBOL(EigenMatMulF64); + REGISTER_CPU_RUNTIME_SYMBOL(MKLMatMulF32); + REGISTER_CPU_RUNTIME_SYMBOL(MKLMatMulF64); + REGISTER_CPU_RUNTIME_SYMBOL(MKLSingleThreadedMatMulF32); + REGISTER_CPU_RUNTIME_SYMBOL(MKLSingleThreadedMatMulF64); REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedConvF16); REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedConvF32); REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedMatMulF16); diff --git a/tensorflow/compiler/xla/service/cpu/simple_orc_jit.h b/tensorflow/compiler/xla/service/cpu/simple_orc_jit.h index aaeff2de8785b99d271f13b261c63118bcf7bd4a..f4260a95bc45557b6cd969f7d3fff01c8b392575 100644 --- a/tensorflow/compiler/xla/service/cpu/simple_orc_jit.h +++ b/tensorflow/compiler/xla/service/cpu/simple_orc_jit.h @@ -102,7 +102,6 @@ class SimpleOrcJIT { std::unique_ptr target_machine_; const Disassembler disassembler_; const llvm::DataLayout data_layout_; - llvm::orc::SymbolStringPool string_pool_; llvm::orc::ExecutionSession execution_session_; std::shared_ptr symbol_resolver_; ObjLayerT object_layer_; diff --git a/tensorflow/compiler/xla/service/despecializer.cc b/tensorflow/compiler/xla/service/despecializer.cc new file mode 100644 index 0000000000000000000000000000000000000000..d938f3a2c4b5bfdd70d5a614b9890b4d7bf050f7 --- /dev/null +++ b/tensorflow/compiler/xla/service/despecializer.cc @@ -0,0 +1,35 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/despecializer.h" + +#include "tensorflow/compiler/xla/service/bfloat16_normalization.h" +#include "tensorflow/compiler/xla/service/defuser.h" +#include "tensorflow/compiler/xla/service/implicit_broadcast_remover.h" + +namespace xla { + +Despecializer::Despecializer() : pipeline_("despecializer") { + // TODO(b/70588125): Also deal with window reversal in a fast way. + pipeline_.AddPass(); + pipeline_.AddPass(); + pipeline_.AddPass(); +} + +StatusOr Despecializer::Run(HloModule* module) { + return pipeline_.Run(module); +} + +} // namespace xla diff --git a/tensorflow/compiler/xla/service/despecializer.h b/tensorflow/compiler/xla/service/despecializer.h new file mode 100644 index 0000000000000000000000000000000000000000..af48f4ab6e506d295251239fe92db68cfec6dcfa --- /dev/null +++ b/tensorflow/compiler/xla/service/despecializer.h @@ -0,0 +1,45 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_DESPECIALIZER_H_ +#define TENSORFLOW_COMPILER_XLA_SERVICE_DESPECIALIZER_H_ + +#include "tensorflow/compiler/xla/service/hlo_module.h" +#include "tensorflow/compiler/xla/service/hlo_pass_interface.h" +#include "tensorflow/compiler/xla/service/hlo_pass_pipeline.h" +#include "tensorflow/compiler/xla/statusor.h" + +namespace xla { + +// Creates an HloPassPipeline containing multiple HloPasses that can +// despecialize an optimized HloModule. This is useful to run an HloModule +// optimized for one specfic platform on a different platform (undoing platform +// specific passes) with matching numerics for comparison. +// +// Current despecialization passes are Defuser, ImplicitBroadcastRemover, +// and BFloat16MixedPrecisionRemoval. +class Despecializer : public HloPassInterface { + public: + Despecializer(); + tensorflow::StringPiece name() const override { return "despecializer"; } + StatusOr Run(HloModule* module) override; + + private: + HloPassPipeline pipeline_; +}; + +} // namespace xla + +#endif // TENSORFLOW_COMPILER_XLA_SERVICE_DESPECIALIZER_H_ diff --git a/tensorflow/compiler/xla/service/device_memory_allocator.cc b/tensorflow/compiler/xla/service/device_memory_allocator.cc index 78e7aa48accdbb51a8477455f5f9c004828c068f..35db4fd2a22cc1615ade77a801cb28c504db09a6 100644 --- a/tensorflow/compiler/xla/service/device_memory_allocator.cc +++ b/tensorflow/compiler/xla/service/device_memory_allocator.cc @@ -24,19 +24,16 @@ limitations under the License. namespace xla { StreamExecutorMemoryAllocator::StreamExecutorMemoryAllocator( - const perftools::gputools::Platform* platform, - tensorflow::gtl::ArraySlice - stream_executors) + const se::Platform* platform, + tensorflow::gtl::ArraySlice stream_executors) : DeviceMemoryAllocator(platform), stream_executors_(stream_executors.begin(), stream_executors.end()) {} -StatusOr -StreamExecutorMemoryAllocator::Allocate(int device_ordinal, uint64 size, - bool retry_on_failure) { - TF_ASSIGN_OR_RETURN(perftools::gputools::StreamExecutor * stream_executor, +StatusOr StreamExecutorMemoryAllocator::Allocate( + int device_ordinal, uint64 size, bool retry_on_failure) { + TF_ASSIGN_OR_RETURN(se::StreamExecutor * stream_executor, GetStreamExecutor(device_ordinal)); - perftools::gputools::DeviceMemoryBase result = - stream_executor->AllocateArray(size); + se::DeviceMemoryBase result = stream_executor->AllocateArray(size); if (size > 0 && result == nullptr) { return ResourceExhausted( "Failed to allocate request for %s (%lluB) on device ordinal %d", @@ -47,22 +44,22 @@ StreamExecutorMemoryAllocator::Allocate(int device_ordinal, uint64 size, } tensorflow::Status StreamExecutorMemoryAllocator::Deallocate( - int device_ordinal, perftools::gputools::DeviceMemoryBase* mem) { + int device_ordinal, se::DeviceMemoryBase* mem) { if (!mem->is_null()) { - TF_ASSIGN_OR_RETURN(perftools::gputools::StreamExecutor * stream_executor, + TF_ASSIGN_OR_RETURN(se::StreamExecutor * stream_executor, GetStreamExecutor(device_ordinal)); // We make a local copy of 'mem' so the original is not zeroed out by the // Deallocate() call below. This gives us a better chance of // catching double-free bugs, since Deallocate silently succeeds for null // values. - perftools::gputools::DeviceMemoryBase mem_copy(*mem); + se::DeviceMemoryBase mem_copy(*mem); stream_executor->Deallocate(&mem_copy); } return tensorflow::Status::OK(); } -StatusOr -StreamExecutorMemoryAllocator::GetStreamExecutor(int device_ordinal) { +StatusOr StreamExecutorMemoryAllocator::GetStreamExecutor( + int device_ordinal) { if (device_ordinal < 0) { return InvalidArgument("device ordinal value (%d) must be non-negative", device_ordinal); diff --git a/tensorflow/compiler/xla/service/device_memory_allocator.h b/tensorflow/compiler/xla/service/device_memory_allocator.h index 39dfad84c1c1c1c461c24de555ecd919cea47d83..da45c4d45a1c56fd39b1e3e17ff131de59ceeced 100644 --- a/tensorflow/compiler/xla/service/device_memory_allocator.h +++ b/tensorflow/compiler/xla/service/device_memory_allocator.h @@ -33,30 +33,42 @@ class DeviceMemoryAllocator { public: // Parameter platform indicates which platform the allocator allocates memory // on. Must be non-null. - explicit DeviceMemoryAllocator(const perftools::gputools::Platform* platform) + explicit DeviceMemoryAllocator(const se::Platform* platform) : platform_(platform) {} virtual ~DeviceMemoryAllocator() {} // 'retry_on_failure': If false, and the first attempt to allocate the memory - // fails, the allocation should return immediately without retrying. - // An example use case is optional scratch spaces where a failure - // has only performance impact. + // fails, the allocation should return immediately without retrying. An + // example use case is optional scratch spaces where a failure has only + // performance impact. + // // Allocate() should return a null pointer for a size-0 allocation. // Deallocate() must be a no-op for null pointers. - virtual StatusOr Allocate( - int device_ordinal, uint64 size, bool retry_on_failure = true) = 0; - virtual tensorflow::Status Deallocate( - int device_ordinal, perftools::gputools::DeviceMemoryBase* mem) = 0; + virtual StatusOr Allocate(int device_ordinal, + uint64 size, + bool retry_on_failure) = 0; + + // Two-arg version of Allocate(), which sets retry-on-failure to true. + // + // (We don't simply use a default argument on the virtual Allocate function + // because default args on virtual functions are disallowed by the Google + // style guide.) + StatusOr Allocate(int device_ordinal, uint64 size) { + return Allocate(device_ordinal, size, /*retry_on_failure=*/true); + } + + virtual tensorflow::Status Deallocate(int device_ordinal, + se::DeviceMemoryBase* mem) = 0; // Return the platform that the allocator allocates memory on. - const perftools::gputools::Platform* platform() const { return platform_; } + const se::Platform* platform() const { return platform_; } // Can we call Deallocate() as soon as a computation has been scheduled on // a stream, or do we have to wait for the computation to complete first? virtual bool AllowsAsynchronousDeallocation() const = 0; protected: - const perftools::gputools::Platform* platform_; + const se::Platform* platform_; }; // Default memory allocator for a platform which uses @@ -64,25 +76,27 @@ class DeviceMemoryAllocator { class StreamExecutorMemoryAllocator : public DeviceMemoryAllocator { public: StreamExecutorMemoryAllocator( - const perftools::gputools::Platform* platform, - tensorflow::gtl::ArraySlice - stream_executors); + const se::Platform* platform, + tensorflow::gtl::ArraySlice stream_executors); - StatusOr Allocate( - int device_ordinal, uint64 size, bool retry_on_failure = true) override; - tensorflow::Status Deallocate( - int device_ordinal, perftools::gputools::DeviceMemoryBase* mem) override; + StatusOr Allocate(int device_ordinal, uint64 size, + bool retry_on_failure) override; + + // Pull in two-arg overload that sets retry_on_failure to true. + using DeviceMemoryAllocator::Allocate; + + tensorflow::Status Deallocate(int device_ordinal, + se::DeviceMemoryBase* mem) override; bool AllowsAsynchronousDeallocation() const override; private: - StatusOr GetStreamExecutor( - int device_ordinal); + StatusOr GetStreamExecutor(int device_ordinal); // A vector indexed by device ordinal of StreamExecutors for each device of // the allocator's platform type. If an element is nullptr, then the device // with the respective device ordinal is not supported by XLA. - std::vector stream_executors_; + std::vector stream_executors_; }; } // namespace xla diff --git a/tensorflow/compiler/xla/service/dfs_hlo_visitor.h b/tensorflow/compiler/xla/service/dfs_hlo_visitor.h index 56723e765048698baedc50ae7b189d0287ee56b8..0528b076027603796a445d8b0e9cbcebd1b513a7 100644 --- a/tensorflow/compiler/xla/service/dfs_hlo_visitor.h +++ b/tensorflow/compiler/xla/service/dfs_hlo_visitor.h @@ -147,6 +147,9 @@ class DfsHloVisitorBase { virtual Status HandleLog(HloInstructionPtr hlo) { return HandleElementwiseUnary(hlo); } + virtual Status HandleClz(HloInstructionPtr hlo) { + return HandleElementwiseUnary(hlo); + } virtual Status HandleCos(HloInstructionPtr hlo) { return HandleElementwiseUnary(hlo); } diff --git a/tensorflow/compiler/xla/service/elemental_ir_emitter.cc b/tensorflow/compiler/xla/service/elemental_ir_emitter.cc index b6a0903b0eeaa04d8bc1488378c148b2016c5d48..56e35e260469e383848db7bdbdbaacd9c9f9e56c 100644 --- a/tensorflow/compiler/xla/service/elemental_ir_emitter.cc +++ b/tensorflow/compiler/xla/service/elemental_ir_emitter.cc @@ -293,6 +293,12 @@ StatusOr ElementalIrEmitter::EmitIntegerUnaryOp( return operand_value; } } + case HloOpcode::kClz: { + auto is_zero_undef = ir_builder_->getFalse(); + return llvm_ir::EmitCallToIntrinsic( + llvm::Intrinsic::ctlz, {operand_value, is_zero_undef}, + {operand_value->getType()}, ir_builder_); + } case HloOpcode::kSign: { bool is_signed = primitive_util::IsSignedIntegralType(op->shape().element_type()); @@ -1334,6 +1340,7 @@ llvm_ir::ElementGenerator ElementalIrEmitter::MakeElementGenerator( case HloOpcode::kAbs: case HloOpcode::kRoundNearestAfz: case HloOpcode::kCeil: + case HloOpcode::kClz: case HloOpcode::kConvert: case HloOpcode::kBitcastConvert: case HloOpcode::kCopy: diff --git a/tensorflow/compiler/xla/service/executable.cc b/tensorflow/compiler/xla/service/executable.cc index be92b1629a2d8dae57b315751bd4f7f9ccddf171..8218b5f7c87a26e6eb4563e72fdb598eb2e87a99 100644 --- a/tensorflow/compiler/xla/service/executable.cc +++ b/tensorflow/compiler/xla/service/executable.cc @@ -29,18 +29,19 @@ using tensorflow::gtl::ArraySlice; namespace xla { -StatusOr>> -Executable::ExecuteOnStreams( +StatusOr> Executable::ExecuteOnStreams( ArraySlice run_options, ArraySlice> arguments) { TF_RET_CHECK(run_options.size() == arguments.size()); - std::vector> return_values(run_options.size()); + std::vector return_values; + return_values.reserve(run_options.size()); if (run_options.size() == 1) { - TF_ASSIGN_OR_RETURN(return_values[0], + TF_ASSIGN_OR_RETURN(auto rv, ExecuteOnStream(&run_options[0], arguments[0], /*hlo_execution_profile=*/nullptr)); + return_values.push_back(std::move(rv)); return std::move(return_values); } @@ -48,8 +49,9 @@ Executable::ExecuteOnStreams( // We cannot BlockHostUntilDone() on the already-launched executions in case // of error, since if the executions communicate, the initially launched // executions may never complete if not all executions are running. - TF_ASSIGN_OR_RETURN(return_values[i], + TF_ASSIGN_OR_RETURN(auto rv, ExecuteAsyncOnStream(&run_options[i], arguments[i])); + return_values.push_back(std::move(rv)); } for (const auto& options : run_options) { TF_RET_CHECK(options.stream() != nullptr); @@ -58,13 +60,13 @@ Executable::ExecuteOnStreams( return std::move(return_values); } -StatusOr> Executable::ExecuteOnStreamWrapper( +StatusOr Executable::ExecuteOnStreamWrapper( const ServiceExecutableRunOptions* run_options, ExecutionProfile* profile, ArraySlice arguments) { - perftools::gputools::Stream* stream = run_options->stream(); - std::unique_ptr timer; + se::Stream* stream = run_options->stream(); + std::unique_ptr timer; if (profile != nullptr) { - timer.reset(new perftools::gputools::Timer(stream->parent())); + timer.reset(new se::Timer(stream->parent())); stream->InitTimer(timer.get()).ThenStartTimer(timer.get()); } @@ -78,8 +80,9 @@ StatusOr> Executable::ExecuteOnStreamWrapper( &hlo_profile_index_map()) : nullptr; - StatusOr> return_value = + StatusOr return_value = ExecuteOnStream(run_options, arguments, profile_ptr.get()); + TF_RETURN_IF_ERROR(return_value.status()); if (profile != nullptr) { VLOG(1) << "enqueueing 'stop timer' and blocking host until done..."; @@ -160,4 +163,24 @@ Status Executable::DumpSessionModule() { result); } +/* static */ Status Executable::DumpToDirectory(const string& directory_path, + string filename, + const HloSession& hlo_session) { + tensorflow::Env* env = tensorflow::Env::Default(); + if (!env->IsDirectory(directory_path).ok()) { + // NB! CreateDir does not work reliably with multiple XLA threads -- two + // threads can race to observe the absence of the dump directory and + // simultaneously try to create it, causing the "losing" thread to get a + // "directory already exists" error. + TF_RETURN_IF_ERROR(env->RecursivelyCreateDir(directory_path)); + } + filename = SanitizeFileName(std::move(filename)); + string file_path = tensorflow::io::JoinPath(directory_path, filename); + string result; + TF_RET_CHECK( + tensorflow::SerializeToStringDeterministic(hlo_session, &result)); + return tensorflow::WriteStringToFile(tensorflow::Env::Default(), file_path, + result); +} + } // namespace xla diff --git a/tensorflow/compiler/xla/service/executable.h b/tensorflow/compiler/xla/service/executable.h index a157235f8af6ea64a488510e427bbae502c46ca6..bdbe119120f233c8ee7e1c23de27c0021838c8f7 100644 --- a/tensorflow/compiler/xla/service/executable.h +++ b/tensorflow/compiler/xla/service/executable.h @@ -22,6 +22,7 @@ limitations under the License. #include "tensorflow/compiler/xla/legacy_flags/debug_options_flags.h" #include "tensorflow/compiler/xla/service/computation_layout.h" #include "tensorflow/compiler/xla/service/device_memory_allocator.h" +#include "tensorflow/compiler/xla/service/hlo.pb.h" #include "tensorflow/compiler/xla/service/hlo_execution_profile.h" #include "tensorflow/compiler/xla/service/hlo_graph_dumper.h" #include "tensorflow/compiler/xla/service/hlo_module.h" @@ -62,14 +63,14 @@ class Executable { // enabled. // // Returns a shaped buffer containing the result of the computation. - virtual StatusOr> ExecuteOnStream( + virtual StatusOr ExecuteOnStream( const ServiceExecutableRunOptions* run_options, tensorflow::gtl::ArraySlice arguments, HloExecutionProfile* hlo_execution_profile) = 0; // Same as ExecuteOnStream(), but this call is non-blocking and returns as // soon as all of the operations are enqueued for launch on the stream. - virtual StatusOr> ExecuteAsyncOnStream( + virtual StatusOr ExecuteAsyncOnStream( const ServiceExecutableRunOptions* run_options, tensorflow::gtl::ArraySlice arguments) = 0; @@ -77,7 +78,7 @@ class Executable { // streams. arguments[i] contains the arguments to the execution on // run_options[i]->stream() and the returned value is at index i of the // returned vector. - virtual StatusOr>> ExecuteOnStreams( + virtual StatusOr> ExecuteOnStreams( tensorflow::gtl::ArraySlice run_options, tensorflow::gtl::ArraySlice< @@ -90,14 +91,14 @@ class Executable { // has completed. virtual Status PopulateExecutionProfile( HloExecutionProfile* hlo_execution_profile, - perftools::gputools::StreamExecutor* executor) { + se::StreamExecutor* executor) { return Status::OK(); } // Convenience wrapper for calling Executable::ExecuteOnStream. Sets up a // timer for the execution, sets up HLO profiling if enabled, and fills in the // given ExecutionProfile if non-null. - StatusOr> ExecuteOnStreamWrapper( + StatusOr ExecuteOnStreamWrapper( const ServiceExecutableRunOptions* run_options, ExecutionProfile* profile, tensorflow::gtl::ArraySlice arguments); @@ -155,6 +156,10 @@ class Executable { static Status DumpToDirectory(const string& directory_path, string filename, const SessionModule& session_module); + // Dump hlo_session to directory_path/filename. + static Status DumpToDirectory(const string& directory_path, string filename, + const HloSession& hlo_session); + protected: mutable tensorflow::mutex mutex_; diff --git a/tensorflow/compiler/xla/service/flatten_call_graph.cc b/tensorflow/compiler/xla/service/flatten_call_graph.cc index 2b6caa149439a86d6d047605099bc3ff7b295a8e..85409b330b11537158059dcce8c2a96c98d38f30 100644 --- a/tensorflow/compiler/xla/service/flatten_call_graph.cc +++ b/tensorflow/compiler/xla/service/flatten_call_graph.cc @@ -93,7 +93,7 @@ Status FlattenNode(const CallGraphNode& node) { auto current = worklist.back(); worklist.pop_back(); for (auto* instruction : current->instructions()) { - if (GetInstructionCallContext(instruction) != + if (GetInstructionCallContext(instruction->opcode()) != CallContext::kSequential) { continue; } diff --git a/tensorflow/compiler/xla/service/gather_expander.cc b/tensorflow/compiler/xla/service/gather_expander.cc index 221ff7900f398166c193c495848a2afcfd4edc81..2d3e4b1fcdf6675955714cab262a8b2ca8ff4297 100644 --- a/tensorflow/compiler/xla/service/gather_expander.cc +++ b/tensorflow/compiler/xla/service/gather_expander.cc @@ -28,9 +28,15 @@ using tensorflow::gtl::ArraySlice; static StatusOr TransposeIndexVectorDimToLast( HloInstruction* gather_indices, int64 index_vector_dim) { const Shape& gather_indices_shape = gather_indices->shape(); + + if (gather_indices_shape.dimensions_size() == index_vector_dim) { + return gather_indices; + } + if (index_vector_dim == (gather_indices_shape.dimensions_size() - 1)) { return gather_indices; } + std::vector permutation; permutation.reserve(gather_indices_shape.dimensions_size()); for (int64 i = 0, e = gather_indices_shape.dimensions_size(); i < e; i++) { @@ -42,55 +48,35 @@ static StatusOr TransposeIndexVectorDimToLast( return MakeTransposeHlo(gather_indices, permutation); } -// If the gather_indices holds scalar indices (i.e. gather_indices has rank N -// and index_vector_dim is N) then reshape it to have a trailing degenerate -// dimension. This makes the code for slicing out the index vector more -// uniform. -static StatusOr DeScalarizeGatherIndices( - HloInstruction* gather_indices, int64 index_vector_dim) { - const Shape& gather_indices_shape = gather_indices->shape(); - if (index_vector_dim != gather_indices_shape.dimensions_size()) { - return gather_indices; - } - - DCHECK_EQ(index_vector_dim, gather_indices_shape.dimensions_size()); - - std::vector result_shape_dims; - c_copy(gather_indices_shape.dimensions(), - std::back_inserter(result_shape_dims)); - result_shape_dims.push_back(1); - - return MakeReshapeHlo(result_shape_dims, gather_indices); -} - // Canonicalizes the gather_indices tensors so that we only have deal with some // specific cases in the while loop that does the heavy lifting. // // See the "High Level Algorithm" section for a broader picture. static StatusOr CanonicalizeGatherIndices( HloInstruction* gather_indices, int64 index_vector_dim) { - // If gather_indices holds scalar indices, normalize it to hold index vectors - // of size 1. + // Transpose the non-index-vector dimensions to the front. TF_ASSIGN_OR_RETURN( - HloInstruction * descalarized_gather_indices, - DeScalarizeGatherIndices(gather_indices, index_vector_dim)); + HloInstruction * transposed_gather_indices, + TransposeIndexVectorDimToLast(gather_indices, index_vector_dim)); + bool indices_are_scalar = + index_vector_dim == gather_indices->shape().dimensions_size(); - // Transpose the non-index-vector dimensions to the front. - TF_ASSIGN_OR_RETURN(HloInstruction * transposed_gather_indices, - TransposeIndexVectorDimToLast(descalarized_gather_indices, - index_vector_dim)); + // The number of dimensions in gather_indices that are index dimensions. + const int64 index_dims_in_gather_indices = indices_are_scalar ? 0 : 1; // If there is only one index (i.e. gather_indices has rank 1 and this gather // is really just a dynamic slice) add a leading degenerate dimension for // uniformity. Otherwise create a "collapsed" leading dimension that subsumes // all of the non-index-vector dimensions. const Shape& shape = transposed_gather_indices->shape(); - if (shape.dimensions_size() == 1) { - return ExpandFirstDimIntoNDims(transposed_gather_indices, - {1, shape.dimensions(0)}); + if (shape.dimensions_size() == index_dims_in_gather_indices) { + return PrependDegenerateDims(transposed_gather_indices, 1); } else { - return CollapseFirstNDims(transposed_gather_indices, - shape.dimensions_size() - 1); + // Collapse all but the dimensions (0 or 1) in gather_indices containing the + // index vectors. + return CollapseFirstNDims( + transposed_gather_indices, + shape.dimensions_size() - index_dims_in_gather_indices); } } @@ -112,11 +98,7 @@ static StatusOr AdjustGatherDimsInAccumulator( // dynamic-slice. In that case, there is a leading degenerate gather // dimension that we added to make this special case play well with the // general while loop which we need to remove now. - CHECK_EQ(accumulator->shape().dimensions(0), 1); - ArraySlice reshaped_dim_sizes = - AsInt64Slice(accumulator->shape().dimensions()); - reshaped_dim_sizes.remove_prefix(1); - return MakeReshapeHlo(reshaped_dim_sizes, accumulator); + return ElideDegenerateDims(accumulator, {0}); } return ExpandFirstDimIntoNDims(accumulator, output_gather_dim_bounds); @@ -161,50 +143,73 @@ static StatusOr ExpandIndexVectorIntoOperandSpace( static StatusOr> GatherLoopBody( const HloInstruction& gather, HloInstruction* induction_var, const std::vector& incoming_loop_state) { + const GatherDimensionNumbers& dim_numbers = gather.gather_dimension_numbers(); CHECK_EQ(incoming_loop_state.size(), 3); HloInstruction* const operand = incoming_loop_state[0]; HloInstruction* const gather_indices = incoming_loop_state[1]; HloInstruction* const output_accumulator = incoming_loop_state[2]; - int64 index_vector_size = gather_indices->shape().dimensions(1); + bool has_scalar_indices = gather_indices->shape().dimensions_size() == 1; + CHECK_EQ(has_scalar_indices, + dim_numbers.index_vector_dim() == + gather.operand(1)->shape().dimensions_size()); TF_ASSIGN_OR_RETURN( HloInstruction * induction_var_as_vector, MakeBroadcastHlo(induction_var, /*broadcast_dimensions=*/{}, /*result_shape_bounds=*/{1})); - TF_ASSIGN_OR_RETURN( - HloInstruction * index_into_gather_indices, - PadVectorWithZeros(induction_var_as_vector, - /*zeros_to_prepend=*/0, /*zeros_to_append=*/1)); - - TF_ASSIGN_OR_RETURN( - HloInstruction * index_vector_2d, - MakeDynamicSliceHlo(gather_indices, index_into_gather_indices, - {1, index_vector_size})); + HloInstruction* index_vector; - TF_ASSIGN_OR_RETURN(HloInstruction * index_vector, - ElideDegenerateDims(index_vector_2d, {0})); + if (has_scalar_indices) { + // In this case gather_indices has rank 1 and induction_var_as_vector (of + // shape {1}) is an index into this rank 1 tensor. + TF_ASSIGN_OR_RETURN( + index_vector, + MakeDynamicSliceHlo(gather_indices, induction_var_as_vector, {1})); + } else { + // In this case gather_indices has rank 2 and induction_var_as_vector (of + // shape {1}) is an index into just the first dimension of this rank 2 + // tensor. + TF_ASSIGN_OR_RETURN( + HloInstruction * index_into_gather_indices, + PadVectorWithZeros(induction_var_as_vector, + /*zeros_to_prepend=*/0, /*zeros_to_append=*/1)); + + int64 index_vector_size = gather_indices->shape().dimensions(1); + TF_ASSIGN_OR_RETURN( + HloInstruction * index_vector_2d, + MakeDynamicSliceHlo(gather_indices, index_into_gather_indices, + {1, index_vector_size})); + + TF_ASSIGN_OR_RETURN(index_vector, + ElideDegenerateDims(index_vector_2d, {0})); + } - TF_ASSIGN_OR_RETURN(HloInstruction * gathered_slice_start, - ExpandIndexVectorIntoOperandSpace( - index_vector, gather.gather_dimension_numbers(), - operand->shape().dimensions_size())); + TF_ASSIGN_OR_RETURN( + HloInstruction * gathered_slice_start, + ExpandIndexVectorIntoOperandSpace(index_vector, dim_numbers, + operand->shape().dimensions_size())); TF_ASSIGN_OR_RETURN(HloInstruction * gathered_slice, MakeDynamicSliceHlo(operand, gathered_slice_start, gather.gather_window_bounds())); + TF_ASSIGN_OR_RETURN( + HloInstruction * gathered_slice_with_dims_elided, + ElideDegenerateDims(gathered_slice, + AsInt64Slice(dim_numbers.elided_window_dims()))); + TF_ASSIGN_OR_RETURN( HloInstruction * gathered_slice_for_update, - ExpandFirstDimIntoNDims(gathered_slice, - {1, gathered_slice->shape().dimensions(0)})); + PrependDegenerateDims(gathered_slice_with_dims_elided, 1)); TF_ASSIGN_OR_RETURN( HloInstruction * index_vector_into_accumulator, PadVectorWithZeros( induction_var_as_vector, /*zeros_to_prepend=*/0, - /*zeros_to_append=*/gathered_slice->shape().dimensions_size())); + /*zeros_to_append=*/ + gathered_slice_with_dims_elided->shape().dimensions_size())); TF_ASSIGN_OR_RETURN( HloInstruction * updated_accumulator, @@ -220,26 +225,20 @@ static StatusOr> GatherLoopBody( static StatusOr CreateGatherLoopAccumulatorInitValue( HloComputation* computation, PrimitiveType element_type, - ArraySlice window_bounds, int64 gather_loop_trip_count) { + ArraySlice window_bounds, int64 gather_loop_trip_count, + const GatherDimensionNumbers& dim_numbers) { std::vector accumulator_state_shape_dims; accumulator_state_shape_dims.reserve(1 + window_bounds.size()); accumulator_state_shape_dims.push_back(gather_loop_trip_count); - c_copy(window_bounds, std::back_inserter(accumulator_state_shape_dims)); + for (int64 i = 0; i < window_bounds.size(); i++) { + if (!c_binary_search(dim_numbers.elided_window_dims(), i)) { + accumulator_state_shape_dims.push_back(window_bounds[i]); + } + } return BroadcastZeros(computation, element_type, accumulator_state_shape_dims); } -static StatusOr ElideWindowDimsFromAccumulator( - HloInstruction* accumulator, const GatherDimensionNumbers& dim_numbers) { - std::vector dims_to_elide; - dims_to_elide.reserve(dim_numbers.elided_window_dims_size()); - for (int64 elided_window_dim : dim_numbers.elided_window_dims()) { - dims_to_elide.push_back(elided_window_dim + 1); - } - - return ElideDegenerateDims(accumulator, dims_to_elide); -} - // `accumulator` is almost the tensor the gather operation would have produced, // except that it has the dimensions in the wrong order -- the gather dimensions // are the major dimensions and the window dimensions are the minor dimensions. @@ -338,7 +337,8 @@ StatusOr GatherExpander::ExpandGather( HloInstruction * accumulator_init, CreateGatherLoopAccumulatorInitValue( computation, output_shape.element_type(), - gather_instr->gather_window_bounds(), gather_loop_trip_count)); + gather_instr->gather_window_bounds(), gather_loop_trip_count, + gather_instr->gather_dimension_numbers())); StatusOr> gather_loop_result_or_error = WhileUtil::MakeCountedLoop( @@ -353,14 +353,10 @@ StatusOr GatherExpander::ExpandGather( gather_loop_result_or_error); HloInstruction* accumulator_result = gather_loop_result.back(); - TF_ASSIGN_OR_RETURN( - HloInstruction * accumulator_with_window_dims_elided, - ElideWindowDimsFromAccumulator(accumulator_result, dim_numbers)); TF_ASSIGN_OR_RETURN( HloInstruction * accumulator_with_output_gather_dims_decanonicalized, - AdjustGatherDimsInAccumulator(gather_indices->shape(), - accumulator_with_window_dims_elided, + AdjustGatherDimsInAccumulator(gather_indices->shape(), accumulator_result, dim_numbers.index_vector_dim())); return PermuteGatherAndWindowDims( diff --git a/tensorflow/compiler/xla/service/gather_expander_test.cc b/tensorflow/compiler/xla/service/gather_expander_test.cc index ba41ee8428cbe7132103df24d552565a8dc2f9f6..1c72ca066502eb549bf8638cdf0b7827b06f92d7 100644 --- a/tensorflow/compiler/xla/service/gather_expander_test.cc +++ b/tensorflow/compiler/xla/service/gather_expander_test.cc @@ -47,5 +47,62 @@ ENTRY main { "indices are not supported.")); } +TEST(GatherExpanderTest, AvoidDegenerateDims) { + const string hlo_text = R"( +HloModule TensorFlowGatherV2 + +ENTRY main { + operand = s32[3,3] parameter(0) + indices = s32[2] parameter(1) + ROOT gather = s32[3,2] gather(operand, indices), + output_window_dims={0}, + elided_window_dims={1}, + gather_dims_to_operand_dims={1}, + index_vector_dim=1, + window_bounds={3, 1} +} +)"; + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, + tools::Parse(hlo_text)); + TF_ASSERT_OK_AND_ASSIGN(bool changed, GatherExpander{}.Run(module.get())); + ASSERT_TRUE(changed); + + HloInstruction* while_instr = nullptr; + for (auto* instr : module->entry_computation()->instructions()) { + if (instr->opcode() == HloOpcode::kWhile) { + ASSERT_EQ(while_instr, nullptr) + << "Expected exactly one while instruction in the entry computation " + "after gather expansion"; + while_instr = instr; + } + } + + ASSERT_NE(while_instr, nullptr) + << "Expected exactly one while instruction in the entry computation " + "after gather expansion"; + + // We want to avoid create while loop with shapes that have degenerate + // dimensions for TF gather. In this case we expect the loop state to be of + // the shape (sNN[], s32[3,3]{1,0}, s32[2]{0}, s32[2,3]{1,0}). The leading + // sNN is an implementation detail from WhileUtil::MakeCountedLoop so we don't + // check it here (though in theory the form of the while loop state is itself + // an implementation detail from WhileUtil::MakeCountedLoop). + + const Shape& while_shape = while_instr->shape(); + ASSERT_TRUE(ShapeUtil::IsTuple(while_shape)); + ASSERT_EQ(ShapeUtil::TupleElementCount(while_shape), 4); + + EXPECT_TRUE(ShapeUtil::SameDimensions( + ShapeUtil::MakeShape(S32, {3, 3}), + ShapeUtil::GetTupleElementShape(while_shape, 1))); + + EXPECT_TRUE(ShapeUtil::SameDimensions( + ShapeUtil::MakeShape(S32, {2}), + ShapeUtil::GetTupleElementShape(while_shape, 2))); + + EXPECT_TRUE(ShapeUtil::SameDimensions( + ShapeUtil::MakeShape(S32, {2, 3}), + ShapeUtil::GetTupleElementShape(while_shape, 3))); +} } // namespace } // namespace xla diff --git a/tensorflow/compiler/xla/service/generic_transfer_manager.cc b/tensorflow/compiler/xla/service/generic_transfer_manager.cc index a99e2b7794a399047fb5a77a140bd333214e3f23..ddb687314ee8221ba9282f230db498b3a5d23499 100644 --- a/tensorflow/compiler/xla/service/generic_transfer_manager.cc +++ b/tensorflow/compiler/xla/service/generic_transfer_manager.cc @@ -32,8 +32,6 @@ limitations under the License. #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/stream_executor_no_cuda.h" -namespace se = ::perftools::gputools; - namespace xla { GenericTransferManager::GenericTransferManager(se::Platform::Id platform_id, @@ -45,9 +43,9 @@ se::Platform::Id GenericTransferManager::PlatformId() const { } Status GenericTransferManager::WriteSingleTupleIndexTable( - perftools::gputools::StreamExecutor* executor, + se::StreamExecutor* executor, tensorflow::gtl::ArraySlice elements, - const Shape& shape, perftools::gputools::DeviceMemoryBase* region) { + const Shape& shape, se::DeviceMemoryBase* region) { TF_RET_CHECK(elements.size() == ShapeUtil::TupleElementCount(shape)); std::vector element_pointers; @@ -144,20 +142,19 @@ Status GenericTransferManager::TransferLiteralToInfeed( } Status GenericTransferManager::TransferBufferToInfeed( - perftools::gputools::StreamExecutor* executor, int64 size, - const void* source) { + se::StreamExecutor* executor, int64 size, const void* source) { return Unimplemented("Generic transfer to Infeed"); } Status GenericTransferManager::TransferLiteralFromOutfeed( - perftools::gputools::StreamExecutor* executor, const Shape& literal_shape, + se::StreamExecutor* executor, const Shape& literal_shape, Literal* literal) { return Unimplemented( "Outfeed is not supported on this platform (b/30467474)"); } Status GenericTransferManager::ResetDevices( - tensorflow::gtl::ArraySlice + tensorflow::gtl::ArraySlice /*executors*/) { return Unimplemented( "Device reset is not yet supported on this platform (b/30481585)"); diff --git a/tensorflow/compiler/xla/service/generic_transfer_manager.h b/tensorflow/compiler/xla/service/generic_transfer_manager.h index 63a7c820cf4e5fbbdf870086a4fb5316ac50d10b..0579099de40ba3e43f69e4e6474b56691064c692 100644 --- a/tensorflow/compiler/xla/service/generic_transfer_manager.h +++ b/tensorflow/compiler/xla/service/generic_transfer_manager.h @@ -36,46 +36,41 @@ namespace xla { // infeed. class GenericTransferManager : public TransferManager { public: - GenericTransferManager(perftools::gputools::Platform::Id platform_id, - size_t pointer_size); + GenericTransferManager(se::Platform::Id platform_id, size_t pointer_size); ~GenericTransferManager() override {} - perftools::gputools::Platform::Id PlatformId() const override; + se::Platform::Id PlatformId() const override; StatusOr> TransferLiteralFromDevice( - perftools::gputools::StreamExecutor* executor, - const ShapedBuffer& device_buffer) override; + se::StreamExecutor* executor, const ShapedBuffer& device_buffer) override; - Status TransferLiteralToDevice(perftools::gputools::StreamExecutor* executor, + Status TransferLiteralToDevice(se::StreamExecutor* executor, const Literal& literal, const ShapedBuffer& device_buffer) override; - Status TransferLiteralToInfeed(perftools::gputools::StreamExecutor* executor, + Status TransferLiteralToInfeed(se::StreamExecutor* executor, const Literal& literal) override; - Status TransferLiteralFromOutfeed( - perftools::gputools::StreamExecutor* executor, const Shape& literal_shape, - Literal* literal) override; + Status TransferLiteralFromOutfeed(se::StreamExecutor* executor, + const Shape& literal_shape, + Literal* literal) override; Status ResetDevices( - tensorflow::gtl::ArraySlice - executors) override; + tensorflow::gtl::ArraySlice executors) override; int64 GetByteSizeRequirement(const Shape& shape) const override; protected: - Status TransferBufferToInfeed(perftools::gputools::StreamExecutor* executor, - int64 size, const void* source) override; + Status TransferBufferToInfeed(se::StreamExecutor* executor, int64 size, + const void* source) override; Status WriteSingleTupleIndexTable( - perftools::gputools::StreamExecutor* executor, - tensorflow::gtl::ArraySlice - elements, - const Shape& shape, - perftools::gputools::DeviceMemoryBase* region) override; + se::StreamExecutor* executor, + tensorflow::gtl::ArraySlice elements, + const Shape& shape, se::DeviceMemoryBase* region) override; private: // The platform this transfer manager targets. - const perftools::gputools::Platform::Id platform_id_; + const se::Platform::Id platform_id_; // The size in bytes of pointers on this platform. const size_t pointer_size_; diff --git a/tensorflow/compiler/xla/service/gpu/buffer_allocations.cc b/tensorflow/compiler/xla/service/gpu/buffer_allocations.cc index 2029c303d47e9a62135b003c3bd9be6f8b3438d4..837f05244f7a8c71483cc30eeac9e1c219e6bbd2 100644 --- a/tensorflow/compiler/xla/service/gpu/buffer_allocations.cc +++ b/tensorflow/compiler/xla/service/gpu/buffer_allocations.cc @@ -28,8 +28,6 @@ limitations under the License. #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/types.h" -namespace se = ::perftools::gputools; - namespace xla { namespace gpu { diff --git a/tensorflow/compiler/xla/service/gpu/buffer_allocations.h b/tensorflow/compiler/xla/service/gpu/buffer_allocations.h index ea7f0eb3745f2e0e0bfd88c3dca79d6ad25884ed..c2fc35be4ca4bc6df85ee21fb6b564bfd6de3ec0 100644 --- a/tensorflow/compiler/xla/service/gpu/buffer_allocations.h +++ b/tensorflow/compiler/xla/service/gpu/buffer_allocations.h @@ -41,7 +41,7 @@ class BufferAllocations { // user-specified result buffers) to the given buffer index. The builder // will skip allocating buffers for registered buffer indices. void RegisterBuffer(BufferAllocation::Index index, - perftools::gputools::DeviceMemoryBase address); + se::DeviceMemoryBase address); // Builds a BufferAllocations object from the given buffer assignment. // `memory_allocator` is what this function uses to allocate device memory. @@ -52,8 +52,7 @@ class BufferAllocations { DeviceMemoryAllocator* memory_allocator); private: - std::map - registered_buffers_; + std::map registered_buffers_; }; BufferAllocations(const BufferAllocations&) = delete; @@ -65,22 +64,20 @@ class BufferAllocations { // Returns the device address of buffer `buffer_index`. `buffer_index` must be // a valid index, i.e., in [0, buffer_count). This function returns null if // `buffer_index` is not assigned to a buffer address. - perftools::gputools::DeviceMemoryBase GetDeviceAddress( + se::DeviceMemoryBase GetDeviceAddress( BufferAllocation::Index buffer_index) const; // Same as above, but also adjusts the returned address for the offset and // size contained in the given slice. - perftools::gputools::DeviceMemoryBase GetDeviceAddress( + se::DeviceMemoryBase GetDeviceAddress( const BufferAllocation::Slice& buffer_slice) const; - perftools::gputools::DeviceMemoryBase GetTempBufferBase() const { - return temp_buffer_base_; - } + se::DeviceMemoryBase GetTempBufferBase() const { return temp_buffer_base_; } // Tears down all buffers allocated by this object that are not in // `live_addresses`. tensorflow::Status TearDown( - const std::set& live_addresses, + const std::set& live_addresses, const BufferAssignment& buffer_assignment); private: @@ -92,15 +89,15 @@ class BufferAllocations { // Sets the device address of buffer `buffer_index`. void SetBuffer(BufferAllocation::Index buffer_index, - perftools::gputools::DeviceMemoryBase buffer); + se::DeviceMemoryBase buffer); // An array of device pointers that stores the address of each buffer // indexed by Index. Each element can point to a temporary buffer, an // input buffer, or nullptr if no buffer is needed for that Index. - std::vector buffers_; + std::vector buffers_; // The base address of the memory block that contains all temporary buffers. - perftools::gputools::DeviceMemoryBase temp_buffer_base_; + se::DeviceMemoryBase temp_buffer_base_; int device_ordinal_; diff --git a/tensorflow/compiler/xla/service/gpu/conditional_thunk.cc b/tensorflow/compiler/xla/service/gpu/conditional_thunk.cc index 790ca535b11ee47724ef6227de40726d940d6153..dce8de2e301ecfaa4674b8be48b8c02bfabf3f4b 100644 --- a/tensorflow/compiler/xla/service/gpu/conditional_thunk.cc +++ b/tensorflow/compiler/xla/service/gpu/conditional_thunk.cc @@ -42,11 +42,10 @@ Status ConditionalThunk::Initialize(const GpuExecutable& executable) { } Status ConditionalThunk::ExecuteOnStream( - const BufferAllocations& buffer_allocations, - perftools::gputools::Stream* stream) { + const BufferAllocations& buffer_allocations, se::Stream* stream) { // Copy the predicate value from device. bool predicate; - perftools::gputools::DeviceMemoryBase predicate_address = + se::DeviceMemoryBase predicate_address = buffer_allocations.GetDeviceAddress(predicate_buffer_index_); stream->ThenMemcpy(&predicate, predicate_address, sizeof(bool)); diff --git a/tensorflow/compiler/xla/service/gpu/conditional_thunk.h b/tensorflow/compiler/xla/service/gpu/conditional_thunk.h index 7725c46a3b4b51af34a4dd977885353ff32c21f6..e40872688fdad24d24db5f7cebb3206c77652dce 100644 --- a/tensorflow/compiler/xla/service/gpu/conditional_thunk.h +++ b/tensorflow/compiler/xla/service/gpu/conditional_thunk.h @@ -49,7 +49,7 @@ class ConditionalThunk : public Thunk { Status Initialize(const GpuExecutable& executable) override; Status ExecuteOnStream(const BufferAllocations& buffer_allocations, - perftools::gputools::Stream* stream) override; + se::Stream* stream) override; private: BufferAllocation::Slice predicate_buffer_index_; diff --git a/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc b/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc index 461747b699b542ae0c8735aea34cc9e57c1fb387..64d3b84b8c73d82800270aebcebf7f7a8fec5fe4 100644 --- a/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc +++ b/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc @@ -25,8 +25,6 @@ limitations under the License. #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/stream_executor_no_cuda.h" -namespace se = ::perftools::gputools; - namespace xla { namespace gpu { diff --git a/tensorflow/compiler/xla/service/gpu/convolution_thunk.h b/tensorflow/compiler/xla/service/gpu/convolution_thunk.h index 900d9cb6243088b56a1825fb3ab8c06cf8d74726..6d845025b1aef2b0a5f147401b6db0598ba94d6d 100644 --- a/tensorflow/compiler/xla/service/gpu/convolution_thunk.h +++ b/tensorflow/compiler/xla/service/gpu/convolution_thunk.h @@ -66,23 +66,21 @@ class ConvolutionThunk : public Thunk { // Does the convolution for the thunk on "stream". Status ExecuteOnStream(const BufferAllocations& buffer_allocations, - perftools::gputools::Stream* stream) override; + se::Stream* stream) override; private: class ScratchAllocator; - Status Convolve( - const perftools::gputools::dnn::BatchDescriptor& input_descriptor, - perftools::gputools::DeviceMemory input_data, - const perftools::gputools::dnn::FilterDescriptor& filter_descriptor, - perftools::gputools::DeviceMemory filter_data, - const perftools::gputools::dnn::BatchDescriptor& output_descriptor, - perftools::gputools::DeviceMemory output_data, - const perftools::gputools::dnn::ConvolutionDescriptor& - convolution_descriptor, - const perftools::gputools::dnn::AlgorithmConfig& algorithm_config, - perftools::gputools::Stream* stream, ScratchAllocator* scratch_allocator, - perftools::gputools::dnn::ProfileResult* profile_result); + Status Convolve(const se::dnn::BatchDescriptor& input_descriptor, + se::DeviceMemory input_data, + const se::dnn::FilterDescriptor& filter_descriptor, + se::DeviceMemory filter_data, + const se::dnn::BatchDescriptor& output_descriptor, + se::DeviceMemory output_data, + const se::dnn::ConvolutionDescriptor& convolution_descriptor, + const se::dnn::AlgorithmConfig& algorithm_config, + se::Stream* stream, ScratchAllocator* scratch_allocator, + se::dnn::ProfileResult* profile_result); const CudnnConvKind convolution_kind_; diff --git a/tensorflow/compiler/xla/service/gpu/copy_thunk.cc b/tensorflow/compiler/xla/service/gpu/copy_thunk.cc index f4498663b1c039b3175376baf8f27c4ecec678ec..bf912fbd14de5874062a79db28186ab233575233 100644 --- a/tensorflow/compiler/xla/service/gpu/copy_thunk.cc +++ b/tensorflow/compiler/xla/service/gpu/copy_thunk.cc @@ -30,9 +30,8 @@ HostToDeviceCopyThunk::HostToDeviceCopyThunk( mem_size_(mem_size) {} tensorflow::Status HostToDeviceCopyThunk::ExecuteOnStream( - const BufferAllocations& buffer_allocations, - perftools::gputools::Stream* stream) { - perftools::gputools::DeviceMemoryBase destination_data = + const BufferAllocations& buffer_allocations, se::Stream* stream) { + se::DeviceMemoryBase destination_data = buffer_allocations.GetDeviceAddress(destination_buffer_); stream->ThenMemcpy(&destination_data, source_address_, mem_size_); return tensorflow::Status::OK(); @@ -48,11 +47,10 @@ DeviceToDeviceCopyThunk::DeviceToDeviceCopyThunk( mem_size_(mem_size) {} tensorflow::Status DeviceToDeviceCopyThunk::ExecuteOnStream( - const BufferAllocations& buffer_allocations, - perftools::gputools::Stream* stream) { - perftools::gputools::DeviceMemoryBase destination_data = + const BufferAllocations& buffer_allocations, se::Stream* stream) { + se::DeviceMemoryBase destination_data = buffer_allocations.GetDeviceAddress(destination_buffer_); - perftools::gputools::DeviceMemoryBase source_data = + se::DeviceMemoryBase source_data = buffer_allocations.GetDeviceAddress(source_buffer_); stream->ThenMemcpy(&destination_data, source_data, mem_size_); return tensorflow::Status::OK(); diff --git a/tensorflow/compiler/xla/service/gpu/copy_thunk.h b/tensorflow/compiler/xla/service/gpu/copy_thunk.h index e2783fd255239d31edc89701ea208f33ebb8d3fb..2e7eb5f3445bc9294fa455ef31fb816cdba4726c 100644 --- a/tensorflow/compiler/xla/service/gpu/copy_thunk.h +++ b/tensorflow/compiler/xla/service/gpu/copy_thunk.h @@ -40,8 +40,7 @@ class HostToDeviceCopyThunk : public Thunk { HostToDeviceCopyThunk& operator=(const HostToDeviceCopyThunk&) = delete; tensorflow::Status ExecuteOnStream( - const BufferAllocations& buffer_allocations, - perftools::gputools::Stream* stream) override; + const BufferAllocations& buffer_allocations, se::Stream* stream) override; private: const void* source_address_; @@ -64,8 +63,7 @@ class DeviceToDeviceCopyThunk : public Thunk { DeviceToDeviceCopyThunk& operator=(const DeviceToDeviceCopyThunk&) = delete; tensorflow::Status ExecuteOnStream( - const BufferAllocations& buffer_allocations, - perftools::gputools::Stream* stream) override; + const BufferAllocations& buffer_allocations, se::Stream* stream) override; private: const BufferAllocation::Slice source_buffer_; diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_batchnorm_thunk.cc b/tensorflow/compiler/xla/service/gpu/cudnn_batchnorm_thunk.cc index 58d9c8caff31e878487fbef01afce566e6187fd9..68099fd63847ef9993f9bc7ac0e28b2939631b35 100644 --- a/tensorflow/compiler/xla/service/gpu/cudnn_batchnorm_thunk.cc +++ b/tensorflow/compiler/xla/service/gpu/cudnn_batchnorm_thunk.cc @@ -28,7 +28,6 @@ limitations under the License. namespace xla { namespace gpu { -namespace se = ::perftools::gputools; namespace dnn = se::dnn; static std::pair> ScratchAllocator::AllocateBytes( // Determines whether we can safely perform a winograd non-fused convolution for // the given input and output shapes. This works around b/68264959, an integer // overflow in cuDNNv5 and cuDNNv6. -// -// TODO(jlebar): We shouldn't need this check for cuDNNv7. -bool ShouldIncludeWinogradNonfusedAlgo( - const Shape& input_shape, const Shape& output_shape, - const ConvolutionDimensionNumbers& dnums) { +bool ShouldIncludeWinogradNonfusedAlgo(const Shape& input_shape, + const Shape& output_shape, + const ConvolutionDimensionNumbers& dnums, + se::StreamExecutor* stream_exec) { + // Skip this check for cudnn7 and newer. + auto version = + stream_exec->AsDnn()->GetVersion(); + if (version.ok() && version.ValueOrDie().major_version() >= 7) { + return true; + } + int64 batch = input_shape.dimensions(dnums.input_batch_dimension()); int64 in_depths = input_shape.dimensions(dnums.input_feature_dimension()); int64 in_rows = input_shape.dimensions(dnums.input_spatial_dimensions(0)); @@ -118,20 +122,20 @@ bool ShouldIncludeWinogradNonfusedAlgo( std::vector GetAlgorithms(CudnnConvKind kind, bool with_winograd_nonfused, - se::StreamExecutor* stream_exec_) { + se::StreamExecutor* stream_exec) { std::vector algorithms; switch (kind) { case CudnnConvKind::kBackwardFilter: - CHECK(stream_exec_->GetConvolveBackwardFilterAlgorithms( + CHECK(stream_exec->GetConvolveBackwardFilterAlgorithms( with_winograd_nonfused, &algorithms)); break; case CudnnConvKind::kBackwardInput: - CHECK(stream_exec_->GetConvolveBackwardDataAlgorithms( + CHECK(stream_exec->GetConvolveBackwardDataAlgorithms( with_winograd_nonfused, &algorithms)); break; case CudnnConvKind::kForward: - CHECK(stream_exec_->GetConvolveAlgorithms(with_winograd_nonfused, - &algorithms)); + CHECK(stream_exec->GetConvolveAlgorithms(with_winograd_nonfused, + &algorithms)); break; } @@ -209,8 +213,8 @@ CudnnConvolutionAlgorithmPicker::PickBestAlgorithm( return nullopt; } - const bool use_winograd_nonfused = - ShouldIncludeWinogradNonfusedAlgo(input_shape, output_shape, dnums); + const bool use_winograd_nonfused = ShouldIncludeWinogradNonfusedAlgo( + input_shape, output_shape, dnums, stream_exec_); se::dnn::ProfileResult best_result; int64 best_result_bytes_used = 0; diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.h b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.h index 516210ec2e500cf03774d27408300ac3346e7b4f..bc5d1ce94afd2075a006899f0f6bcf64352e5e99 100644 --- a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.h +++ b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.h @@ -33,9 +33,8 @@ class CudnnConvolutionAlgorithmPicker : public HloPassInterface { // If the `allocator` parameter is not null, we will use it to allocate temp // memory while timing the various convolution algorithms. If it's null, // we'll use the default allocator on the StreamExecutor. - CudnnConvolutionAlgorithmPicker( - perftools::gputools::StreamExecutor* stream_exec, - DeviceMemoryAllocator* allocator) + CudnnConvolutionAlgorithmPicker(se::StreamExecutor* stream_exec, + DeviceMemoryAllocator* allocator) : stream_exec_(stream_exec), allocator_(allocator) {} tensorflow::StringPiece name() const override { @@ -52,7 +51,7 @@ class CudnnConvolutionAlgorithmPicker : public HloPassInterface { const Shape& output_shape, const Window& window, const ConvolutionDimensionNumbers& dnums, HloInstruction* instr); - perftools::gputools::StreamExecutor* stream_exec_; // never null + se::StreamExecutor* stream_exec_; // never null DeviceMemoryAllocator* allocator_; // may be null }; diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.cc b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.cc index e4ae839e1dd4cb3a744a3f6a3329cabdaeb3f38d..10b4c3de89989c52cfea5273c3d5b0beef76abd2 100644 --- a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.cc +++ b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.cc @@ -22,8 +22,6 @@ namespace xla { namespace gpu { namespace { -namespace se = ::perftools::gputools; - using se::DeviceMemory; using se::DeviceMemoryBase; using se::Stream; @@ -215,14 +213,12 @@ string CudnnConvKindToString(CudnnConvKind kind) { Status RunCudnnConvolution( CudnnConvKind kind, const Shape& input_shape, const Shape& filter_shape, - const Shape& output_shape, perftools::gputools::DeviceMemoryBase input_buf, - perftools::gputools::DeviceMemoryBase filter_buf, - perftools::gputools::DeviceMemoryBase output_buf, - perftools::gputools::DeviceMemoryBase scratch_buf, const Window& window, + const Shape& output_shape, se::DeviceMemoryBase input_buf, + se::DeviceMemoryBase filter_buf, se::DeviceMemoryBase output_buf, + se::DeviceMemoryBase scratch_buf, const Window& window, const ConvolutionDimensionNumbers& dnums, - perftools::gputools::dnn::AlgorithmConfig algorithm, - perftools::gputools::Stream* stream, - perftools::gputools::dnn::ProfileResult* profile_result) { + se::dnn::AlgorithmConfig algorithm, se::Stream* stream, + se::dnn::ProfileResult* profile_result) { ScratchBufAllocator scratch_allocator(scratch_buf); return RunCudnnConvolution(kind, input_shape, filter_shape, output_shape, input_buf, filter_buf, output_buf, @@ -232,14 +228,12 @@ Status RunCudnnConvolution( Status RunCudnnConvolution( CudnnConvKind kind, const Shape& input_shape, const Shape& filter_shape, - const Shape& output_shape, perftools::gputools::DeviceMemoryBase input_buf, - perftools::gputools::DeviceMemoryBase filter_buf, - perftools::gputools::DeviceMemoryBase output_buf, - perftools::gputools::ScratchAllocator* scratch_allocator, - const Window& window, const ConvolutionDimensionNumbers& dnums, - perftools::gputools::dnn::AlgorithmConfig algorithm, - perftools::gputools::Stream* stream, - perftools::gputools::dnn::ProfileResult* profile_result) { + const Shape& output_shape, se::DeviceMemoryBase input_buf, + se::DeviceMemoryBase filter_buf, se::DeviceMemoryBase output_buf, + se::ScratchAllocator* scratch_allocator, const Window& window, + const ConvolutionDimensionNumbers& dnums, + se::dnn::AlgorithmConfig algorithm, se::Stream* stream, + se::dnn::ProfileResult* profile_result) { PrimitiveType output_primitive_type = output_shape.element_type(); CHECK(output_primitive_type == F32 || output_primitive_type == F16) << ShapeUtil::HumanString(output_shape); diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.h b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.h index 3dbfa2730da359d3c7937140508017c4a7b02d6c..944e4ac686d45408b08ff1faa321510c1c8920ba 100644 --- a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.h +++ b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.h @@ -72,25 +72,21 @@ string CudnnConvKindToString(CudnnConvKind kind); // that size, if you like. Status RunCudnnConvolution( CudnnConvKind kind, const Shape& input_shape, const Shape& filter_shape, - const Shape& output_shape, perftools::gputools::DeviceMemoryBase input_buf, - perftools::gputools::DeviceMemoryBase filter_buf, - perftools::gputools::DeviceMemoryBase output_buf, - perftools::gputools::DeviceMemoryBase scratch_buf, const Window& window, + const Shape& output_shape, se::DeviceMemoryBase input_buf, + se::DeviceMemoryBase filter_buf, se::DeviceMemoryBase output_buf, + se::DeviceMemoryBase scratch_buf, const Window& window, const ConvolutionDimensionNumbers& dnums, - perftools::gputools::dnn::AlgorithmConfig algorithm, - perftools::gputools::Stream* stream, - perftools::gputools::dnn::ProfileResult* profile_result = nullptr); + se::dnn::AlgorithmConfig algorithm, se::Stream* stream, + se::dnn::ProfileResult* profile_result = nullptr); Status RunCudnnConvolution( CudnnConvKind kind, const Shape& input_shape, const Shape& filter_shape, - const Shape& output_shape, perftools::gputools::DeviceMemoryBase input_buf, - perftools::gputools::DeviceMemoryBase filter_buf, - perftools::gputools::DeviceMemoryBase output_buf, - perftools::gputools::ScratchAllocator* scratch_allocator, - const Window& window, const ConvolutionDimensionNumbers& dnums, - perftools::gputools::dnn::AlgorithmConfig algorithm, - perftools::gputools::Stream* stream, - perftools::gputools::dnn::ProfileResult* profile_result = nullptr); + const Shape& output_shape, se::DeviceMemoryBase input_buf, + se::DeviceMemoryBase filter_buf, se::DeviceMemoryBase output_buf, + se::ScratchAllocator* scratch_allocator, const Window& window, + const ConvolutionDimensionNumbers& dnums, + se::dnn::AlgorithmConfig algorithm, se::Stream* stream, + se::dnn::ProfileResult* profile_result = nullptr); } // namespace gpu } // namespace xla diff --git a/tensorflow/compiler/xla/service/gpu/fft_thunk.cc b/tensorflow/compiler/xla/service/gpu/fft_thunk.cc index 66931bdc8b1030b2b2e7731ce6327c1e908d4ee6..cc747addbd152eb82b0b2ef92b8653fc861f97be 100644 --- a/tensorflow/compiler/xla/service/gpu/fft_thunk.cc +++ b/tensorflow/compiler/xla/service/gpu/fft_thunk.cc @@ -24,8 +24,6 @@ limitations under the License. #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/stream_executor_no_cuda.h" -namespace se = ::perftools::gputools; - namespace xla { namespace gpu { diff --git a/tensorflow/compiler/xla/service/gpu/fft_thunk.h b/tensorflow/compiler/xla/service/gpu/fft_thunk.h index 52fb8c376d7acea0f15aaa865c23fa2382717338..24b1dca99865fe21d0ca3af91e0d169f7b74a78a 100644 --- a/tensorflow/compiler/xla/service/gpu/fft_thunk.h +++ b/tensorflow/compiler/xla/service/gpu/fft_thunk.h @@ -34,24 +34,24 @@ namespace gpu { // released on destruction. // // Not thread-safe in that AllocateBytes, destructor are not locked. -class FftScratchAllocator : public perftools::gputools::ScratchAllocator { +class FftScratchAllocator : public se::ScratchAllocator { public: FftScratchAllocator(int device_ordinal, DeviceMemoryAllocator* memory_allocator); ~FftScratchAllocator() override; - int64 GetMemoryLimitInBytes(perftools::gputools::Stream* stream) override; + int64 GetMemoryLimitInBytes(se::Stream* stream) override; int64 TotalAllocatedBytes() { return total_allocated_bytes_; } - perftools::gputools::port::StatusOr> - AllocateBytes(perftools::gputools::Stream* stream, int64 byte_size) override; + se::port::StatusOr> AllocateBytes( + se::Stream* stream, int64 byte_size) override; private: const int device_ordinal_; DeviceMemoryAllocator* memory_allocator_; - std::vector allocated_buffers_; + std::vector allocated_buffers_; int64 total_allocated_bytes_ = 0; }; @@ -74,16 +74,15 @@ class FftThunk : public Thunk { // Does the FFT for the thunk on "stream". tensorflow::Status ExecuteOnStream( - const BufferAllocations& buffer_allocations, - perftools::gputools::Stream* stream) override; + const BufferAllocations& buffer_allocations, se::Stream* stream) override; private: - const perftools::gputools::fft::Type fft_type_; + const se::fft::Type fft_type_; const std::vector fft_length_; float scale_factor_; - std::unique_ptr fft_plan_; + std::unique_ptr fft_plan_; const BufferAllocation::Slice input_buffer_; const BufferAllocation::Slice output_buffer_; diff --git a/tensorflow/compiler/xla/service/gpu/for_thunk.cc b/tensorflow/compiler/xla/service/gpu/for_thunk.cc index 283d21ca222a236a69e4bab1b6504665d4d1cdd3..6e6966df3987eef29b2122c3ef8f11b7cd0bfe14 100644 --- a/tensorflow/compiler/xla/service/gpu/for_thunk.cc +++ b/tensorflow/compiler/xla/service/gpu/for_thunk.cc @@ -36,8 +36,7 @@ tensorflow::Status ForThunk::Initialize(const GpuExecutable& executable) { } tensorflow::Status ForThunk::ExecuteOnStream( - const BufferAllocations& buffer_allocations, - perftools::gputools::Stream* stream) { + const BufferAllocations& buffer_allocations, se::Stream* stream) { for (int64 i = 0; i < loop_limit_; ++i) { // Invoke loop body thunk sequence. TF_RETURN_IF_ERROR( diff --git a/tensorflow/compiler/xla/service/gpu/for_thunk.h b/tensorflow/compiler/xla/service/gpu/for_thunk.h index 832494d17e9c4e1d9e92e18ef331df1cf3689024..c78d1c50686297aea8235af928aba562697f49bc 100644 --- a/tensorflow/compiler/xla/service/gpu/for_thunk.h +++ b/tensorflow/compiler/xla/service/gpu/for_thunk.h @@ -38,8 +38,7 @@ class ForThunk : public Thunk { tensorflow::Status Initialize(const GpuExecutable& executable) override; tensorflow::Status ExecuteOnStream( - const BufferAllocations& buffer_allocations, - perftools::gputools::Stream* stream) override; + const BufferAllocations& buffer_allocations, se::Stream* stream) override; private: const int64 loop_limit_; diff --git a/tensorflow/compiler/xla/service/gpu/gemm_thunk.cc b/tensorflow/compiler/xla/service/gpu/gemm_thunk.cc index 38668ff455a44c7ef99b57b750f1a3b18a90bd2c..0ec12f52d8b398218ec370fc74bfdf6f97f43893 100644 --- a/tensorflow/compiler/xla/service/gpu/gemm_thunk.cc +++ b/tensorflow/compiler/xla/service/gpu/gemm_thunk.cc @@ -22,8 +22,6 @@ limitations under the License. #include "tensorflow/core/platform/stream_executor_no_cuda.h" #include "tensorflow/core/platform/types.h" -namespace se = ::perftools::gputools; - namespace xla { namespace gpu { diff --git a/tensorflow/compiler/xla/service/gpu/gemm_thunk.h b/tensorflow/compiler/xla/service/gpu/gemm_thunk.h index df3edcefef898d465cd5ddc53e5d06a966a31f88..a18f425bc38fd3fbbb345901514c4ac16dbe97ec 100644 --- a/tensorflow/compiler/xla/service/gpu/gemm_thunk.h +++ b/tensorflow/compiler/xla/service/gpu/gemm_thunk.h @@ -50,14 +50,12 @@ class GemmThunk : public Thunk { // Does the gemm operation for the thunk on "stream", which must be non-null. tensorflow::Status ExecuteOnStream( - const BufferAllocations& buffer_allocations, - perftools::gputools::Stream* stream) override; + const BufferAllocations& buffer_allocations, se::Stream* stream) override; // Returns true if we'll perform autotuning if run on the given stream. If // so, we want the GPU to be quiescent during autotuning, so as not to // introduce noise in our results. - bool ShouldHaltAllActivityBeforeRunning( - perftools::gputools::Stream* stream) override { + bool ShouldHaltAllActivityBeforeRunning(se::Stream* stream) override { return autotune_results_.count( stream->parent()->GetDeviceDescription().name()) != 0; } @@ -79,8 +77,7 @@ class GemmThunk : public Thunk { // results. The map's value is the best algorithm we've found for this thunk // on this device, or an error if none of the algorithms worked and we should // use the regular gemm without an algorithm. - std::unordered_map> + std::unordered_map> autotune_results_; }; diff --git a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc index 07be2a0cf90c326af6e41764e79950db546e43e4..30bfc9351a5273b4cf854e269c5f576f6dd1bef7 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc @@ -91,8 +91,6 @@ limitations under the License. #include "tensorflow/core/platform/tracing.h" #include "tensorflow/stream_executor/cuda/cuda_diagnostics.h" -namespace se = ::perftools::gputools; - namespace xla { namespace gpu { @@ -779,9 +777,9 @@ se::Platform::Id GpuCompiler::PlatformId() const { } // namespace xla static bool InitModule() { - xla::Compiler::RegisterCompilerFactory(se::cuda::kCudaPlatformId, []() { - return xla::MakeUnique(); - }); + xla::Compiler::RegisterCompilerFactory( + stream_executor::cuda::kCudaPlatformId, + []() { return xla::MakeUnique(); }); return true; } static bool module_initialized = InitModule(); diff --git a/tensorflow/compiler/xla/service/gpu/gpu_compiler.h b/tensorflow/compiler/xla/service/gpu/gpu_compiler.h index c352d4d8462fadb266c55ad437de998e86a6528e..f3b02ae5d8867bdf1d970e809bff95a15d9f54d2 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_compiler.h +++ b/tensorflow/compiler/xla/service/gpu/gpu_compiler.h @@ -45,25 +45,23 @@ class GpuCompiler : public LLVMCompiler { // Bring in // StatusOr>> Compile( // std::vector> modules, - // std::vector> + // std::vector> // stream_execs) using LLVMCompiler::Compile; StatusOr> RunHloPasses( - std::unique_ptr module, - perftools::gputools::StreamExecutor* stream_exec, + std::unique_ptr module, se::StreamExecutor* stream_exec, DeviceMemoryAllocator* device_allocator) override; StatusOr> RunBackend( - std::unique_ptr module, - perftools::gputools::StreamExecutor* stream_exec, + std::unique_ptr module, se::StreamExecutor* stream_exec, DeviceMemoryAllocator* device_allocator) override; StatusOr>> CompileAheadOfTime(std::vector> module, AotCompilationOptions const& options) override; - perftools::gputools::Platform::Id PlatformId() const override; + se::Platform::Id PlatformId() const override; HloCostAnalysis::ShapeSizeFunction ShapeSizeBytesFunction() const override { // Capture just the pointer size, not the entire GpuCompiler object. diff --git a/tensorflow/compiler/xla/service/gpu/gpu_executable.cc b/tensorflow/compiler/xla/service/gpu/gpu_executable.cc index 28f93447953b90d8a7fa4386e2355066c0405aec..62ce15bc59d34e3bd1768313f2894871bf97ef21 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_executable.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_executable.cc @@ -34,8 +34,6 @@ limitations under the License. #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/types.h" -namespace se = ::perftools::gputools; - namespace xla { namespace gpu { namespace { @@ -252,7 +250,7 @@ Status GpuExecutable::ExecuteThunks( return Status::OK(); } -StatusOr> GpuExecutable::ExecuteOnStream( +StatusOr GpuExecutable::ExecuteOnStream( const ServiceExecutableRunOptions* run_options, tensorflow::gtl::ArraySlice arguments, HloExecutionProfile* hlo_execution_profile) { @@ -299,13 +297,13 @@ StatusOr> GpuExecutable::ExecuteOnStream( HloInstruction* root = hlo_module_->entry_computation()->root_instruction(); auto device_ordinal = executor->device_ordinal(); - auto shaped_buffer = MakeUnique( - root->shape(), root->shape(), executor->platform(), device_ordinal); + auto shaped_buffer = ShapedBuffer(root->shape(), root->shape(), + executor->platform(), device_ordinal); // Copy DeviceMemoryBase values which contain the array(s) of the result into // the respective location in ShapedBuffer. std::set buffers_in_result; - TF_RETURN_IF_ERROR(shaped_buffer->buffers().ForEachMutableElementWithStatus( + TF_RETURN_IF_ERROR(shaped_buffer.buffers().ForEachMutableElementWithStatus( [&buffer_allocations, &buffers_in_result, &shaped_buffer, this]( const ShapeIndex& index, se::DeviceMemoryBase* device_memory) { const auto& sources = this->GetRootPointsToSet().element(index); @@ -324,7 +322,7 @@ StatusOr> GpuExecutable::ExecuteOnStream( this->assignment_->GetUniqueSlice(src_hlo, sources[0]->index())); CHECK(!slice.allocation()->is_entry_computation_parameter()); - perftools::gputools::DeviceMemoryBase src_base = + se::DeviceMemoryBase src_base = buffer_allocations->GetDeviceAddress(slice.index()); CHECK(!src_base.is_null() || src_base.size() == 0); *device_memory = src_base; @@ -337,7 +335,7 @@ StatusOr> GpuExecutable::ExecuteOnStream( return std::move(shaped_buffer); } -StatusOr> GpuExecutable::ExecuteAsyncOnStream( +StatusOr GpuExecutable::ExecuteAsyncOnStream( const ServiceExecutableRunOptions* run_options, tensorflow::gtl::ArraySlice arguments) { // TODO(b/30671675): Implement asynchronous execution mode. diff --git a/tensorflow/compiler/xla/service/gpu/gpu_executable.h b/tensorflow/compiler/xla/service/gpu/gpu_executable.h index dcb3991f41a31db84d8e9e555ae7d13c3ac84b97..361bc30b2f367c23051e06a1169f9c15c19f4a6a 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_executable.h +++ b/tensorflow/compiler/xla/service/gpu/gpu_executable.h @@ -74,12 +74,12 @@ class GpuExecutable : public Executable { // ExecuteOnStream will fail if the compute capability of the stream doesn't // match the compute capability passed to this object's constructor. - StatusOr> ExecuteOnStream( + StatusOr ExecuteOnStream( const ServiceExecutableRunOptions* run_options, tensorflow::gtl::ArraySlice arguments, HloExecutionProfile* hlo_execution_profile) override; - StatusOr> ExecuteAsyncOnStream( + StatusOr ExecuteAsyncOnStream( const ServiceExecutableRunOptions* run_options, tensorflow::gtl::ArraySlice arguments) override; diff --git a/tensorflow/compiler/xla/service/gpu/gpu_transfer_manager.cc b/tensorflow/compiler/xla/service/gpu/gpu_transfer_manager.cc index af9897769fda371e47af06c19abce9a06015e094..f13727ca9b6954f6be9b9277018fcc64ee326954 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_transfer_manager.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_transfer_manager.cc @@ -33,8 +33,6 @@ limitations under the License. #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/stream_executor_no_cuda.h" -namespace se = ::perftools::gputools; - namespace xla { // TODO(b/30467474) Once GPU infeed implementation settles, consider @@ -153,8 +151,8 @@ static std::unique_ptr CreateGpuTransferManager() { } static bool InitModule() { - xla::TransferManager::RegisterTransferManager(se::cuda::kCudaPlatformId, - &CreateGpuTransferManager); + xla::TransferManager::RegisterTransferManager( + stream_executor::cuda::kCudaPlatformId, &CreateGpuTransferManager); return true; } static bool module_initialized = InitModule(); diff --git a/tensorflow/compiler/xla/service/gpu/gpu_transfer_manager.h b/tensorflow/compiler/xla/service/gpu/gpu_transfer_manager.h index 9aa369c668364079504ead3491903e2590a142cc..d040a99975230578c270deabdfe60c61649e778c 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_transfer_manager.h +++ b/tensorflow/compiler/xla/service/gpu/gpu_transfer_manager.h @@ -36,21 +36,20 @@ class GpuTransferManager : public GenericTransferManager { GpuTransferManager(); ~GpuTransferManager() override {} - Status TransferLiteralToInfeed(perftools::gputools::StreamExecutor* executor, + Status TransferLiteralToInfeed(se::StreamExecutor* executor, const Literal& literal) override; - Status TransferBufferToInfeed(perftools::gputools::StreamExecutor* executor, - int64 size, const void* source) override; + Status TransferBufferToInfeed(se::StreamExecutor* executor, int64 size, + const void* source) override; private: // Initiates the infeed data transfers. InfeedBuffer->Done() must be // called to clean up the memory allocated for InfeedBuffer. StatusOr TransferBufferToInfeedInternal( - perftools::gputools::StreamExecutor* executor, int64 size, - const void* source); + se::StreamExecutor* executor, int64 size, const void* source); // Enqueues infeed data buffers with the infeed manager after their // transfer completes. - Status EnqueueBuffersToInfeed(perftools::gputools::StreamExecutor* executor, + Status EnqueueBuffersToInfeed(se::StreamExecutor* executor, std::vector buffers); TF_DISALLOW_COPY_AND_ASSIGN(GpuTransferManager); diff --git a/tensorflow/compiler/xla/service/gpu/infeed_manager.cc b/tensorflow/compiler/xla/service/gpu/infeed_manager.cc index ee5b447c9cd0b1fde4d3a0943d5d4cb8cc5b3376..3ddc1c0789d746bf021256638342364aac63e0e3 100644 --- a/tensorflow/compiler/xla/service/gpu/infeed_manager.cc +++ b/tensorflow/compiler/xla/service/gpu/infeed_manager.cc @@ -19,8 +19,6 @@ limitations under the License. #include "tensorflow/compiler/xla/ptr_util.h" #include "tensorflow/core/platform/logging.h" -namespace se = ::perftools::gputools; - namespace xla { namespace gpu { diff --git a/tensorflow/compiler/xla/service/gpu/infeed_manager.h b/tensorflow/compiler/xla/service/gpu/infeed_manager.h index 73d5a5ce35497f156a181371bfb97fc37a8eb09e..d5f2216d460a45085536b15f9bf6e3bd3579f9c8 100644 --- a/tensorflow/compiler/xla/service/gpu/infeed_manager.h +++ b/tensorflow/compiler/xla/service/gpu/infeed_manager.h @@ -46,7 +46,7 @@ namespace gpu { // the client. The client manages the memory of the buffer. class InfeedBuffer { public: - InfeedBuffer(perftools::gputools::StreamExecutor* executor, int64 length) + InfeedBuffer(se::StreamExecutor* executor, int64 length) : executor_(executor), length_(length) { device_memory_ = executor_->AllocateArray(length); CHECK(!device_memory_.is_null()); @@ -60,14 +60,12 @@ class InfeedBuffer { // client to manage memory for the infeed buffers. void Done() { delete this; } - perftools::gputools::DeviceMemoryBase* device_memory() { - return &device_memory_; - } + se::DeviceMemoryBase* device_memory() { return &device_memory_; } private: - perftools::gputools::StreamExecutor* executor_; // Not owned. + se::StreamExecutor* executor_; // Not owned. const int64 length_; - perftools::gputools::DeviceMemoryBase device_memory_; + se::DeviceMemoryBase device_memory_; }; // Client-side class used to enqueue infeed buffers. @@ -100,8 +98,7 @@ class InfeedManager { // new stream on the first invocation. On subsequent invocations, if // the cached executor is not the same as the requested executor, // returns null. - perftools::gputools::Stream* GetStream( - perftools::gputools::StreamExecutor* executor); + se::Stream* GetStream(se::StreamExecutor* executor); private: // TODO(b/30467474): Revisit if this mutex becomes a point of @@ -121,10 +118,10 @@ class InfeedManager { tensorflow::gtl::FlatSet dequeued_buffer_; // Cached host to device stream for queuing infeed data. - std::unique_ptr host_to_device_stream_; + std::unique_ptr host_to_device_stream_; // Executor that the host_to_device_stream belongs to. Not owned. - perftools::gputools::StreamExecutor* host_to_device_executor_; + se::StreamExecutor* host_to_device_executor_; }; // Singleton creator-or-accessor: Returns the GPU infeed manager. diff --git a/tensorflow/compiler/xla/service/gpu/infeed_thunk.cc b/tensorflow/compiler/xla/service/gpu/infeed_thunk.cc index 2ac95ceb692447c7ac6dbbcd8b9a38876f7a77b6..ea34d5b30c91e8b809e3e17a904e27e589fd6b5f 100644 --- a/tensorflow/compiler/xla/service/gpu/infeed_thunk.cc +++ b/tensorflow/compiler/xla/service/gpu/infeed_thunk.cc @@ -31,10 +31,10 @@ InfeedThunk::InfeedThunk( destination_buffer_(destination_buffer) {} Status InfeedThunk::ExecuteOnStream(const BufferAllocations& buffer_allocations, - perftools::gputools::Stream* stream) { + se::Stream* stream) { VLOG(2) << "Infeeding to GPU "; - perftools::gputools::DeviceMemoryBase destination_address = + se::DeviceMemoryBase destination_address = buffer_allocations.GetDeviceAddress(destination_buffer_); InfeedManager* infeed_manager = GetOrCreateInfeedManager(); @@ -45,7 +45,7 @@ Status InfeedThunk::ExecuteOnStream(const BufferAllocations& buffer_allocations, std::vector tuple_element_addresses; for (BufferAllocation::Slice tuple_element_buffer : tuple_element_buffers_) { - perftools::gputools::DeviceMemoryBase tuple_element_address = + se::DeviceMemoryBase tuple_element_address = buffer_allocations.GetDeviceAddress(tuple_element_buffer); InfeedBuffer* buffer = infeed_manager->BlockingDequeueBuffer(); diff --git a/tensorflow/compiler/xla/service/gpu/infeed_thunk.h b/tensorflow/compiler/xla/service/gpu/infeed_thunk.h index 86918705fa0305217f11753e383200c7bd71474b..93713cb12defd95bdd69cb0aa7ad7b4e37fc8fae 100644 --- a/tensorflow/compiler/xla/service/gpu/infeed_thunk.h +++ b/tensorflow/compiler/xla/service/gpu/infeed_thunk.h @@ -44,7 +44,7 @@ class InfeedThunk : public Thunk { InfeedThunk& operator=(const InfeedThunk&) = delete; Status ExecuteOnStream(const BufferAllocations& buffer_allocations, - perftools::gputools::Stream* stream) override; + se::Stream* stream) override; private: const std::vector tuple_element_buffers_; diff --git a/tensorflow/compiler/xla/service/gpu/ir_emission_utils.cc b/tensorflow/compiler/xla/service/gpu/ir_emission_utils.cc index 32413f975a40c1abc334b16e81097bb44f56a44a..532d436ee82b985a4efe300f90223e1298e85765 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emission_utils.cc +++ b/tensorflow/compiler/xla/service/gpu/ir_emission_utils.cc @@ -160,14 +160,19 @@ static HloInstruction* CreateCudnnConv( Shape call_shape = ShapeUtil::MakeTupleShape({shape, ShapeUtil::MakeShape(U8, {0})}); - // Our CustomCall takes three arguments: The conv lhs and rhs, and the cudnn - // algorithm to use. It's up to a later pass to choose the algorithm, so to - // indicate that we haven't yet made a choice, we speicfy -1 for that arg. + // Our CustomCall takes four arguments: The conv lhs and rhs, the cudnn + // algorithm to use, and a boolean indicating whether to use tensor cores. + // + // It's up to a later pass to choose the algorithm and decide whether to use + // tensor cores, so to indicate that we haven't yet made a choice, we speicfy + // -1 and false for those args. HloInstruction* negative_one = computation->AddInstruction( HloInstruction::CreateConstant(Literal::CreateR0(-1))); + HloInstruction* false_constant = computation->AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR0(false))); HloInstruction* custom_call = computation->AddInstruction(HloInstruction::CreateCustomCall( - call_shape, {lhs, rhs, negative_one}, call_target)); + call_shape, {lhs, rhs, negative_one, false_constant}, call_target)); custom_call->set_window(window); custom_call->set_convolution_dimension_numbers(dnums); return custom_call; diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_context.h b/tensorflow/compiler/xla/service/gpu/ir_emitter_context.h index 3790ed313b9d0e167185a8b12c812132ee78811f..a78b4ff83075fd7ef330bb97ce217a198d450cf8 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter_context.h +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_context.h @@ -32,7 +32,7 @@ class IrEmitterContext { public: IrEmitterContext(const HloModule* hlo_module, const BufferAssignment* buffer_assignment, - const perftools::gputools::DeviceDescription* device_desc, + const se::DeviceDescription* device_desc, llvm::Module* llvm_module) : hlo_module_(hlo_module), buffer_assignment_(buffer_assignment), @@ -47,7 +47,7 @@ class IrEmitterContext { const BufferAssignment& buffer_assignment() const { return *buffer_assignment_; } - const perftools::gputools::DeviceDescription& device_description() const { + const se::DeviceDescription& device_description() const { return *device_desc_; } llvm::Module* llvm_module() { return llvm_module_; } @@ -56,7 +56,7 @@ class IrEmitterContext { private: const HloModule* hlo_module_; const BufferAssignment* buffer_assignment_; - const perftools::gputools::DeviceDescription* device_desc_; + const se::DeviceDescription* device_desc_; llvm::Module* llvm_module_; NameUniquer name_uniquer_; }; diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc index d29cc21ab1c697f8481ed1e94846d4df5ec5c1dc..26e497762f2a6f23767c5b98f339eefdef0b7468 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc @@ -536,7 +536,27 @@ Status IrEmitterUnnested::HandleFusion(HloInstruction* fusion) { thunk_sequence_->emplace_back(BuildGemmThunk(fusion)); return Status::OK(); } - thunk_sequence_->emplace_back(BuildKernelThunk(fusion)); + + int max_unroll_factor = fusion->GetModule() + ->config() + .debug_options() + .xla_gpu_max_kernel_unroll_factor(); + + // Find the largest possible power of two to unroll by. + // TODO(kramerb): Make this smarter. + int unroll_factor = 1; + if (!fusion->IsMultiOutputFusion()) { + CHECK(fusion->fusion_kind() == HloInstruction::FusionKind::kLoop); + int64 num_elements = ShapeUtil::ElementsIn(fusion->shape()); + for (int i = max_unroll_factor; i > 1; i /= 2) { + if (num_elements % i == 0) { + unroll_factor = i; + break; + } + } + } + + thunk_sequence_->emplace_back(BuildKernelThunk(fusion, unroll_factor)); return IrEmitter::HandleFusion(fusion); } @@ -2021,7 +2041,7 @@ Status IrEmitterUnnested::HandleGather(HloInstruction* gather) { } std::unique_ptr IrEmitterUnnested::BuildKernelThunk( - const HloInstruction* inst) { + const HloInstruction* inst, int unroll_factor) { const BufferAssignment& buffer_assn = ir_emitter_context_->buffer_assignment(); @@ -2113,7 +2133,7 @@ std::unique_ptr IrEmitterUnnested::BuildKernelThunk( } return MakeUnique(buffers, llvm_ir::AsString(kernel->getName()), - inst); + inst, unroll_factor); } std::unique_ptr IrEmitterUnnested::BuildHostToDeviceCopyThunk( @@ -2485,21 +2505,28 @@ std::unique_ptr IrEmitterUnnested::BuildConditionalThunk( Status IrEmitterUnnested::EmitTargetElementLoopInThunk( const HloInstruction& hlo, const llvm_ir::ElementGenerator& element_generator, KernelThunk* thunk) { + int unroll_factor = thunk->unroll_factor(); VLOG(3) << bindings_.ToString(); const Shape& element_shape = hlo.IsMultiOutputFusion() ? ShapeUtil::GetSubshape(hlo.shape(), {0}) : hlo.shape(); + VLOG(3) << "EmitTargetElementLoopInThunk " + << ShapeUtil::HumanStringWithLayout(hlo.shape()) + << " for unroll_factor " << unroll_factor; LaunchDimensions launch_dimensions = CalculateLaunchDimensions( - element_shape, ir_emitter_context_->device_description()); + element_shape, ir_emitter_context_->device_description(), unroll_factor); UpdateLaunchDimensions(launch_dimensions, thunk, ir_emitter_context_->llvm_module()); if (!hlo.IsMultiOutputFusion()) { return ParallelLoopEmitter(element_generator, GetIrArray(hlo, hlo), - launch_dimensions, &ir_builder_) + launch_dimensions, &ir_builder_, unroll_factor) .EmitLoop(IrName(&hlo)); } + CHECK_EQ(unroll_factor, 1) + << "multi-output fusion does not support unrolling"; + // For multiple outputs fusion, we need to emit each operand and the root. std::vector output_arrays; for (int64 i = 0; i < ShapeUtil::TupleElementCount(hlo.shape()); ++i) { diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h index 66c62e2d2de3ed1668271a21943dc73ed3d77651..b842f480c6257c1a8bee8cdac55e29c5db6801a0 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h @@ -150,8 +150,10 @@ class IrEmitterUnnested : public IrEmitter { // Returns a KernelThunk that invokes the kernel emitted for `inst`. The // caller needs to make sure `inst` outlives the lifetime of the returned - // Thunk object. - std::unique_ptr BuildKernelThunk(const HloInstruction* inst); + // Thunk object. The kernel implementation will be unrolled if unroll_factor + // is greater than one. + std::unique_ptr BuildKernelThunk(const HloInstruction* inst, + int unroll_factor = 1); // Returns a FftThunk that calls cuFFT to implement `inst`. std::unique_ptr BuildFftThunk(const HloInstruction* inst); diff --git a/tensorflow/compiler/xla/service/gpu/kernel_thunk.cc b/tensorflow/compiler/xla/service/gpu/kernel_thunk.cc index c20a781a33fe89af4740ed31dd5bfb1a64473057..d376ef7a245eb9ed86939f44c611b6dde5606b23 100644 --- a/tensorflow/compiler/xla/service/gpu/kernel_thunk.cc +++ b/tensorflow/compiler/xla/service/gpu/kernel_thunk.cc @@ -23,17 +23,17 @@ limitations under the License. #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/stream_executor_no_cuda.h" -namespace se = ::perftools::gputools; - namespace xla { namespace gpu { KernelThunk::KernelThunk( tensorflow::gtl::ArraySlice args, - const string& kernel_name, const HloInstruction* hlo_instruction) + const string& kernel_name, const HloInstruction* hlo_instruction, + int unroll_factor) : Thunk(Kind::kKernel, hlo_instruction), args_(args.begin(), args.end()), - kernel_name_(kernel_name) {} + kernel_name_(kernel_name), + unroll_factor_(unroll_factor) {} tensorflow::Status KernelThunk::Initialize(const GpuExecutable& executable) { tensorflow::mutex_lock lock(mutex_); diff --git a/tensorflow/compiler/xla/service/gpu/kernel_thunk.h b/tensorflow/compiler/xla/service/gpu/kernel_thunk.h index 9ae455e2fcc253a7a08ff95764721048a16b0bf7..b556befe66b6bebba1a958f553f0a9b2c4eebbe4 100644 --- a/tensorflow/compiler/xla/service/gpu/kernel_thunk.h +++ b/tensorflow/compiler/xla/service/gpu/kernel_thunk.h @@ -47,20 +47,21 @@ class KernelThunk : public Thunk { // // `hlo_instruction` is as in Thunk. Other arguments are as the class members. KernelThunk(tensorflow::gtl::ArraySlice args, - const string& kernel_name, const HloInstruction* hlo_instruction); + const string& kernel_name, const HloInstruction* hlo_instruction, + int unroll_factor); KernelThunk(const KernelThunk&) = delete; KernelThunk& operator=(const KernelThunk&) = delete; ~KernelThunk() override = default; const string& kernel_name() const { return kernel_name_; } + int unroll_factor() const { return unroll_factor_; } void SetLaunchDimensions(const LaunchDimensions& launch_dims); tensorflow::Status Initialize(const GpuExecutable& executable) override; // Executes the kernel for the thunk on "stream", which must be non-null. tensorflow::Status ExecuteOnStream( - const BufferAllocations& buffer_allocations, - perftools::gputools::Stream* stream) override; + const BufferAllocations& buffer_allocations, se::Stream* stream) override; private: // Buffers passed to the kernel as arguments. @@ -69,6 +70,10 @@ class KernelThunk : public Thunk { // Entry kernel name for the computation. const string kernel_name_; + // The number of times this kernel should be unrolled. This works as a + // multiplier on the number of elements produced by a GPU thread. + const int unroll_factor_; + // The thread and block dimension used to launch the kernel. // Will be set by IrEmitterUnnested. LaunchDimensions launch_dimensions_; @@ -76,13 +81,11 @@ class KernelThunk : public Thunk { // Describes how to load this kernel. ExecuteOnStream reuses this loader // specification for all executions. mutable tensorflow::mutex mutex_; - std::unique_ptr loader_spec_ - GUARDED_BY(mutex_); + std::unique_ptr loader_spec_ GUARDED_BY(mutex_); // Loaded kernels for each `StreamExecutor` - std::unordered_map - kernel_cache_ GUARDED_BY(mutex_); + std::unordered_map kernel_cache_ + GUARDED_BY(mutex_); }; } // namespace gpu diff --git a/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc b/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc index defd281d74bd38f7da3f268e0f55970fc1af8263..df9d9be889ce839ee665cd4820b169c124d9fcde 100644 --- a/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc +++ b/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc @@ -34,7 +34,7 @@ limitations under the License. #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Bitcode/BitcodeReader.h" #include "llvm/Bitcode/BitcodeWriter.h" -#include "llvm/CodeGen/CommandFlags.def" +#include "llvm/CodeGen/CommandFlags.inc" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Module.h" diff --git a/tensorflow/compiler/xla/service/gpu/memset_thunk.cc b/tensorflow/compiler/xla/service/gpu/memset_thunk.cc index 18e673542c5b47cb90d31a8eff62a5e4adb78d1d..d4100a898b5bb9eec382c34932c2db104c9e985b 100644 --- a/tensorflow/compiler/xla/service/gpu/memset_thunk.cc +++ b/tensorflow/compiler/xla/service/gpu/memset_thunk.cc @@ -19,8 +19,6 @@ limitations under the License. namespace xla { namespace gpu { -namespace se = ::perftools::gputools; - Status MemzeroThunk::ExecuteOnStream( const BufferAllocations& buffer_allocations, se::Stream* stream) { se::DeviceMemoryBase dest_data = buffer_allocations.GetDeviceAddress(dest_); diff --git a/tensorflow/compiler/xla/service/gpu/memset_thunk.h b/tensorflow/compiler/xla/service/gpu/memset_thunk.h index b4bb74d1dd6dc9d09c5e4d439d57dfe8b57c2ed9..51c332d287d139335b356fc66411b5ffaa448b5a 100644 --- a/tensorflow/compiler/xla/service/gpu/memset_thunk.h +++ b/tensorflow/compiler/xla/service/gpu/memset_thunk.h @@ -36,7 +36,7 @@ class MemzeroThunk : public Thunk { : Thunk(Kind::kMemzero, hlo), dest_(dest) {} Status ExecuteOnStream(const BufferAllocations& buffer_allocations, - perftools::gputools::Stream* stream) override; + se::Stream* stream) override; private: const BufferAllocation::Slice dest_; @@ -52,7 +52,7 @@ class Memset32BitValueThunk : public Thunk { : Thunk(Kind::kMemset32BitValue, hlo), value_(value), dest_(dest) {} Status ExecuteOnStream(const BufferAllocations& buffer_allocations, - perftools::gputools::Stream* stream) override; + se::Stream* stream) override; private: uint32 value_; diff --git a/tensorflow/compiler/xla/service/gpu/parallel_loop_emitter.cc b/tensorflow/compiler/xla/service/gpu/parallel_loop_emitter.cc index 388dcc008b07a76ff9ed07df04181e49a8734f51..d8c07dc3119fb81a3ef22822acb11b7c4d5bbca5 100644 --- a/tensorflow/compiler/xla/service/gpu/parallel_loop_emitter.cc +++ b/tensorflow/compiler/xla/service/gpu/parallel_loop_emitter.cc @@ -32,25 +32,32 @@ namespace gpu { ParallelLoopEmitter::ParallelLoopEmitter( BodyEmitter body_emitter, const Shape& shape, - const LaunchDimensions& launch_dimensions, llvm::IRBuilder<>* ir_builder) + const LaunchDimensions& launch_dimensions, llvm::IRBuilder<>* ir_builder, + int unroll_factor) : LoopEmitter(body_emitter, shape, ir_builder), - launch_dimensions_(launch_dimensions) {} + launch_dimensions_(launch_dimensions), + unroll_factor_(unroll_factor) {} ParallelLoopEmitter::ParallelLoopEmitter( const llvm_ir::ElementGenerator& target_element_generator, tensorflow::gtl::ArraySlice target_arrays, - const LaunchDimensions& launch_dimensions, llvm::IRBuilder<>* ir_builder) + const LaunchDimensions& launch_dimensions, llvm::IRBuilder<>* ir_builder, + int unroll_factor) : LoopEmitter(target_element_generator, target_arrays, ir_builder), - launch_dimensions_(launch_dimensions) {} + launch_dimensions_(launch_dimensions), + unroll_factor_(unroll_factor) {} ParallelLoopEmitter::ParallelLoopEmitter( const llvm_ir::ElementGenerator& target_element_generator, const llvm_ir::IrArray& target_array, - const LaunchDimensions& launch_dimensions, llvm::IRBuilder<>* ir_builder) + const LaunchDimensions& launch_dimensions, llvm::IRBuilder<>* ir_builder, + int unroll_factor) : LoopEmitter(target_element_generator, target_array, ir_builder), - launch_dimensions_(launch_dimensions) {} + launch_dimensions_(launch_dimensions), + unroll_factor_(unroll_factor) {} -llvm_ir::IrArray::Index ParallelLoopEmitter::EmitIndexAndSetExitBasicBlock( +std::vector +ParallelLoopEmitter::EmitIndexAndSetExitBasicBlock( tensorflow::StringPiece loop_name) { // Emit the following code in LLVM IR: // linear_index = blockIdx.x * blockDim.x + threadIdx.x; @@ -63,6 +70,9 @@ llvm_ir::IrArray::Index ParallelLoopEmitter::EmitIndexAndSetExitBasicBlock( // "It is guaranteed that [...] 0 <= %ctaid.x < %nctaid.x" // // %nctaid.x is currently specified as 2147483647. + VLOG(3) << "EmitIndexAndSetExitBasicBlock unroll_factor " << unroll_factor_; + std::vector array_indices; + llvm::Value* block_id = llvm_ir::EmitCallToIntrinsic( llvm::Intrinsic::nvvm_read_ptx_sreg_ctaid_x, {}, {}, ir_builder_); llvm_ir::AddRangeMetadata(0, launch_dimensions_.block_count(), @@ -81,7 +91,7 @@ llvm_ir::IrArray::Index ParallelLoopEmitter::EmitIndexAndSetExitBasicBlock( thread_id = ir_builder_->CreateZExt(thread_id, ir_builder_->getInt64Ty(), "thread_id"); - llvm::Value* linear_index = ir_builder_->CreateAdd( + llvm::Value* linear_index_base = ir_builder_->CreateAdd( ir_builder_->CreateMul( block_id, ir_builder_->getInt64(launch_dimensions_.threads_per_block()), "", @@ -99,15 +109,30 @@ llvm_ir::IrArray::Index ParallelLoopEmitter::EmitIndexAndSetExitBasicBlock( llvm_ir::EmitCallToIntrinsic( llvm::Intrinsic::assume, {ir_builder_->CreateICmpULT( - linear_index, + linear_index_base, ir_builder_->getInt64(launch_dimensions_.threads_per_block() * launch_dimensions_.block_count()), "linear_index_in_range")}, {}, ir_builder_); + if (unroll_factor_ > 1) { + linear_index_base = ir_builder_->CreateMul( + linear_index_base, ir_builder_->getInt64(unroll_factor_), + "linear_index_base", /*HasNUW=*/true, /*HasNSW=*/true); + } + + array_indices.emplace_back(linear_index_base, shape_, ir_builder_); + for (int i = 1; i < unroll_factor_; ++i) { + llvm::Value* linear_index = ir_builder_->CreateAdd( + linear_index_base, ir_builder_->getInt64(i), "linear_index", + /*HasNUW=*/true, /*HasNSW=*/true); + array_indices.emplace_back(linear_index, shape_, ir_builder_); + } + auto if_in_bounds = llvm_ir::EmitIfThenElse( ir_builder_->CreateICmpULT( - linear_index, ir_builder_->getInt64(ShapeUtil::ElementsIn(shape_))), + linear_index_base, + ir_builder_->getInt64(ShapeUtil::ElementsIn(shape_))), llvm_ir::IrName(loop_name, "in_bounds"), ir_builder_, false); // Set exit_bb_ to the exit block of the if structure. @@ -116,7 +141,8 @@ llvm_ir::IrArray::Index ParallelLoopEmitter::EmitIndexAndSetExitBasicBlock( // Set IR builder insertion point to the body of the if structure. llvm_ir::SetToFirstInsertPoint(if_in_bounds.true_block, ir_builder_); - return llvm_ir::IrArray::Index(linear_index, shape_, ir_builder_); + + return array_indices; } } // namespace gpu diff --git a/tensorflow/compiler/xla/service/gpu/parallel_loop_emitter.h b/tensorflow/compiler/xla/service/gpu/parallel_loop_emitter.h index 8ed63a854a74fc06c3c389f40fe1f5970885deac..25318b3bed8bf4a2dfe3a4a974269d0405c3bfec 100644 --- a/tensorflow/compiler/xla/service/gpu/parallel_loop_emitter.h +++ b/tensorflow/compiler/xla/service/gpu/parallel_loop_emitter.h @@ -34,13 +34,13 @@ class ParallelLoopEmitter : public llvm_ir::LoopEmitter { // The meanings of other parameters are the same as LoopEmitter. ParallelLoopEmitter(BodyEmitter body_emitter, const Shape& shape, const LaunchDimensions& launch_dimensions, - llvm::IRBuilder<>* ir_builder); + llvm::IRBuilder<>* ir_builder, int unroll_factor = 1); // Constructs a ParallelLoopEmitter from an element generator that generates // each element of the given target array. ParallelLoopEmitter(const llvm_ir::ElementGenerator& target_element_generator, const llvm_ir::IrArray& target_array, const LaunchDimensions& launch_dimensions, - llvm::IRBuilder<>* ir_builder); + llvm::IRBuilder<>* ir_builder, int unroll_factor = 1); // Constructs a loop emitter for a loop that generates on element of each of N // arrays on each iteration. @@ -50,18 +50,20 @@ class ParallelLoopEmitter : public llvm_ir::LoopEmitter { ParallelLoopEmitter( const llvm_ir::ElementGenerator& target_element_generator, tensorflow::gtl::ArraySlice target_arrays, - const LaunchDimensions& launch_dimensions, llvm::IRBuilder<>* ir_builder); + const LaunchDimensions& launch_dimensions, llvm::IRBuilder<>* ir_builder, + int unroll_factor = 1); ParallelLoopEmitter(const ParallelLoopEmitter&) = delete; ParallelLoopEmitter& operator=(const ParallelLoopEmitter&) = delete; ~ParallelLoopEmitter() override = default; - llvm_ir::IrArray::Index EmitIndexAndSetExitBasicBlock( + std::vector EmitIndexAndSetExitBasicBlock( tensorflow::StringPiece loop_name) override; private: // The thread and block dimension to parallelize the loop on. const LaunchDimensions launch_dimensions_; + const int unroll_factor_; }; } // namespace gpu diff --git a/tensorflow/compiler/xla/service/gpu/partition_assignment.cc b/tensorflow/compiler/xla/service/gpu/partition_assignment.cc index 6cf280df05496716a0780d61ded92efd9982734c..d3fd0544fb68809125e9b9f7a5e5b7eff8c6ef43 100644 --- a/tensorflow/compiler/xla/service/gpu/partition_assignment.cc +++ b/tensorflow/compiler/xla/service/gpu/partition_assignment.cc @@ -29,8 +29,6 @@ limitations under the License. #include "tensorflow/core/lib/strings/stringprintf.h" #include "tensorflow/core/platform/logging.h" -namespace se = ::perftools::gputools; - namespace xla { namespace gpu { @@ -44,12 +42,16 @@ std::ostream& operator<<(std::ostream& out, // Calculates the launch dimensions used to invoke `hlo`. LaunchDimensions CalculateLaunchDimensions( - const Shape& shape, const se::DeviceDescription& device_desc) { + const Shape& shape, const se::DeviceDescription& device_desc, + int unroll_factor) { int64 num_elements = ShapeUtil::ElementsIn(shape); if (num_elements <= 1) { return LaunchDimensions(); } + CHECK_EQ(num_elements % unroll_factor, 0); + num_elements = num_elements / unroll_factor; + // Since we don't do any inter-warp communication, we're free to choose any // block size we want, subject to hardware constraints. We choose the // smallest block size that allows the GPU to reach full occupancy (assuming diff --git a/tensorflow/compiler/xla/service/gpu/partition_assignment.h b/tensorflow/compiler/xla/service/gpu/partition_assignment.h index 0bf463a6ef95d5a32784838c08ad239752fd1acf..c125474edb1036090a926020f2b1e7fcf64c751a 100644 --- a/tensorflow/compiler/xla/service/gpu/partition_assignment.h +++ b/tensorflow/compiler/xla/service/gpu/partition_assignment.h @@ -57,8 +57,8 @@ std::ostream& operator<<(std::ostream& out, const LaunchDimensions& launch_dims); LaunchDimensions CalculateLaunchDimensions( - const Shape& shape, - const perftools::gputools::DeviceDescription& device_desc); + const Shape& shape, const se::DeviceDescription& device_desc, + int unroll_factor = 1); } // namespace gpu } // namespace xla diff --git a/tensorflow/compiler/xla/service/gpu/sequential_thunk.cc b/tensorflow/compiler/xla/service/gpu/sequential_thunk.cc index d8a43091d4037a0edd125a4a1b6cb5ad7c7065f0..c8510808f10a731af90154447bd3e1e037db6348 100644 --- a/tensorflow/compiler/xla/service/gpu/sequential_thunk.cc +++ b/tensorflow/compiler/xla/service/gpu/sequential_thunk.cc @@ -33,8 +33,7 @@ tensorflow::Status SequentialThunk::Initialize( } tensorflow::Status SequentialThunk::ExecuteOnStream( - const BufferAllocations& buffer_allocations, - perftools::gputools::Stream* stream) { + const BufferAllocations& buffer_allocations, se::Stream* stream) { for (const auto& thunk : thunks_) { TF_RETURN_IF_ERROR(thunk->ExecuteOnStream(buffer_allocations, stream)); } diff --git a/tensorflow/compiler/xla/service/gpu/sequential_thunk.h b/tensorflow/compiler/xla/service/gpu/sequential_thunk.h index 32c5b748aba14239d6795d14e442c1c3b43d010e..df17b8d67b80321c7088243eae46e7a723b4ede9 100644 --- a/tensorflow/compiler/xla/service/gpu/sequential_thunk.h +++ b/tensorflow/compiler/xla/service/gpu/sequential_thunk.h @@ -40,8 +40,7 @@ class SequentialThunk : public Thunk { tensorflow::Status Initialize(const GpuExecutable& executable) override; tensorflow::Status ExecuteOnStream( - const BufferAllocations& buffer_allocations, - perftools::gputools::Stream* stream) override; + const BufferAllocations& buffer_allocations, se::Stream* stream) override; private: // The list of sub-thunks. diff --git a/tensorflow/compiler/xla/service/gpu/thunk.h b/tensorflow/compiler/xla/service/gpu/thunk.h index 9eea958d1214b131d49cb4e28f1944860408d3a8..a0c785ed913109e987d058124c8ef49019c98500 100644 --- a/tensorflow/compiler/xla/service/gpu/thunk.h +++ b/tensorflow/compiler/xla/service/gpu/thunk.h @@ -85,8 +85,7 @@ class Thunk { // This value is not required to be constant for a given Thunk. For example, // a Thunk that performs autotuning may return true for its first run and // false thereafter. - virtual bool ShouldHaltAllActivityBeforeRunning( - perftools::gputools::Stream* /*stream*/) { + virtual bool ShouldHaltAllActivityBeforeRunning(se::Stream* /*stream*/) { return false; } @@ -104,8 +103,7 @@ class Thunk { // called after Initialize and can be called multiple times over Thunk's // lifetime. Stream argument must be non-null. virtual tensorflow::Status ExecuteOnStream( - const BufferAllocations& buffer_allocations, - perftools::gputools::Stream* stream) = 0; + const BufferAllocations& buffer_allocations, se::Stream* stream) = 0; private: Kind kind_; diff --git a/tensorflow/compiler/xla/service/gpu/tuple_thunk.cc b/tensorflow/compiler/xla/service/gpu/tuple_thunk.cc index bd65e72393a59e72671ff0cc32c37eaa48856255..ecb54857ccc40ead21e5a18d79a37b545680021d 100644 --- a/tensorflow/compiler/xla/service/gpu/tuple_thunk.cc +++ b/tensorflow/compiler/xla/service/gpu/tuple_thunk.cc @@ -17,8 +17,6 @@ limitations under the License. #include "tensorflow/compiler/xla/util.h" -namespace se = ::perftools::gputools; - namespace xla { namespace gpu { diff --git a/tensorflow/compiler/xla/service/gpu/tuple_thunk.h b/tensorflow/compiler/xla/service/gpu/tuple_thunk.h index 3b1a496328540ae69a449e7080903d31284885d1..8b459c29a136a6e7853e68a1bead7d12c0d08ad0 100644 --- a/tensorflow/compiler/xla/service/gpu/tuple_thunk.h +++ b/tensorflow/compiler/xla/service/gpu/tuple_thunk.h @@ -46,8 +46,7 @@ class TupleThunk : public Thunk { TupleThunk& operator=(const TupleThunk&) = delete; tensorflow::Status ExecuteOnStream( - const BufferAllocations& buffer_allocations, - perftools::gputools::Stream* stream) override; + const BufferAllocations& buffer_allocations, se::Stream* stream) override; private: const std::vector tuple_element_buffers_; diff --git a/tensorflow/compiler/xla/service/gpu/while_thunk.cc b/tensorflow/compiler/xla/service/gpu/while_thunk.cc index c21559af6d2e5dfb5aaf62afcdcaed514e0914c9..a9f3d619a3ffd6d849572355e2902375e43508fa 100644 --- a/tensorflow/compiler/xla/service/gpu/while_thunk.cc +++ b/tensorflow/compiler/xla/service/gpu/while_thunk.cc @@ -41,8 +41,8 @@ Status WhileThunk::Initialize(const GpuExecutable& executable) { } Status WhileThunk::ExecuteOnStream(const BufferAllocations& buffer_allocations, - perftools::gputools::Stream* stream) { - perftools::gputools::DeviceMemoryBase condition_result_data = + se::Stream* stream) { + se::DeviceMemoryBase condition_result_data = buffer_allocations.GetDeviceAddress(condition_result_buffer_index_); while (true) { diff --git a/tensorflow/compiler/xla/service/gpu/while_thunk.h b/tensorflow/compiler/xla/service/gpu/while_thunk.h index 4c9f45de9e42494df58706d0a4a3eb0c4220b8b8..e589ca78a7ea00e7592d6e09ead9c270f902702f 100644 --- a/tensorflow/compiler/xla/service/gpu/while_thunk.h +++ b/tensorflow/compiler/xla/service/gpu/while_thunk.h @@ -47,7 +47,7 @@ class WhileThunk : public Thunk { Status Initialize(const GpuExecutable& executable) override; Status ExecuteOnStream(const BufferAllocations& buffer_allocations, - perftools::gputools::Stream* stream) override; + se::Stream* stream) override; private: const BufferAllocation::Slice condition_result_buffer_index_; diff --git a/tensorflow/compiler/xla/service/hlo.proto b/tensorflow/compiler/xla/service/hlo.proto index 0b446c654779db410ebbd91ef9a5bab14d08a278..0c3eb7dcb44f1203ae10e238f194e41e9ecaea74 100644 --- a/tensorflow/compiler/xla/service/hlo.proto +++ b/tensorflow/compiler/xla/service/hlo.proto @@ -135,6 +135,10 @@ message HloInstructionProto { xla.GatherDimensionNumbers gather_dimension_numbers = 33; repeated int64 gather_window_bounds = 34; + // Compute Host. + string channel_name = 41; + int64 cost_estimate_ns = 42; + // The id of this instruction. int64 id = 35; @@ -292,3 +296,20 @@ message HloProto { HloOrderingProto hlo_ordering = 2; BufferAssignmentProto buffer_assignment = 3; } + +// Encapsulates HloProto together with the arguments, result, and +// execution_platform. This message is used for purposes such as +// analysis/replay/file-storage. +message HloSession { + // The hlo graph. + HloProto hlo = 1; + + // The arguments passed to the graph. + repeated LiteralProto arguments = 2; + + // The result of the graph. + LiteralProto result = 3; + + // The name of the platform used to run the graph. + string execution_platform = 4; +} diff --git a/tensorflow/compiler/xla/service/hlo_computation.cc b/tensorflow/compiler/xla/service/hlo_computation.cc index 6f983d0b950435d43fe3a1e0fe84902b51bfe249..594413e88fb26e86b198d08b2e4db77fad671348 100644 --- a/tensorflow/compiler/xla/service/hlo_computation.cc +++ b/tensorflow/compiler/xla/service/hlo_computation.cc @@ -304,19 +304,15 @@ void ComputeComputationPostOrder( HloComputation* computation, tensorflow::gtl::FlatSet* visited, std::list* post_order) { - if (visited->count(computation) > 0) { - return; - } - - for (auto* instruction : computation->instructions()) { - for (HloComputation* called_computation : - instruction->called_computations()) { - ComputeComputationPostOrder(called_computation, visited, post_order); + if (visited->insert(computation).second) { + for (auto* instruction : computation->instructions()) { + for (HloComputation* called_computation : + instruction->called_computations()) { + ComputeComputationPostOrder(called_computation, visited, post_order); + } } + post_order->push_back(computation); } - - visited->insert(computation); - post_order->push_back(computation); } } // namespace diff --git a/tensorflow/compiler/xla/service/hlo_creation_utils.cc b/tensorflow/compiler/xla/service/hlo_creation_utils.cc index b186767ce792cd89ae77fe9a03b3a2ecf296b804..9a89888480b8c79dfb1f79a50e9686bf45aa49b3 100644 --- a/tensorflow/compiler/xla/service/hlo_creation_utils.cc +++ b/tensorflow/compiler/xla/service/hlo_creation_utils.cc @@ -163,6 +163,8 @@ StatusOr MakeConcatHlo(ArraySlice operands, } StatusOr CollapseFirstNDims(HloInstruction* operand, int64 n) { + CHECK_GT(n, 0); + const Shape& operand_shape = operand->shape(); CHECK_GE(operand_shape.dimensions_size(), n); int64 new_shape_leading_bound = 1; @@ -184,6 +186,17 @@ StatusOr CollapseFirstNDims(HloInstruction* operand, int64 n) { return MakeReshapeHlo(output_shape, operand); } +StatusOr PrependDegenerateDims(HloInstruction* operand, + int64 n) { + CHECK_GT(n, 0); + std::vector new_shape_dims; + const Shape& operand_shape = operand->shape(); + new_shape_dims.reserve(n + operand_shape.dimensions_size()); + new_shape_dims.insert(new_shape_dims.begin(), n, 1); + c_copy(operand_shape.dimensions(), std::back_inserter(new_shape_dims)); + return MakeReshapeHlo(new_shape_dims, operand); +} + StatusOr ExpandFirstDimIntoNDims( HloInstruction* operand, ArraySlice expanded_dims) { CHECK_GT(operand->shape().dimensions_size(), 0); diff --git a/tensorflow/compiler/xla/service/hlo_creation_utils.h b/tensorflow/compiler/xla/service/hlo_creation_utils.h index d99e32a737e6aaa2ff746cf6c00d4300cf62f4e1..c9a7361a6af0c2a0839c59a0ea695ec1b9a98bd4 100644 --- a/tensorflow/compiler/xla/service/hlo_creation_utils.h +++ b/tensorflow/compiler/xla/service/hlo_creation_utils.h @@ -103,12 +103,22 @@ StatusOr MakeConcatHlo( // their operand(s). // Collapses (via reshape) the first N (logical) dimensions of `operand` into a -// single leading dimension. `operand` must have rank > n. +// single leading dimension. `operand` must have rank > `n` and `n` must not be +// 0. // // For instance if `operand` has shape f32[7,8,9] and n is 2 then the output is // the `operand` reshaped to [56,9]. StatusOr CollapseFirstNDims(HloInstruction* operand, int64 n); +// Prepends `n` degenerate dimensions (dimensions with bound = 1) to `operand` +// using a reshape. +// +// For instance if operand has shape f32[3,4,5] then this returns the operand +// reshaped to f32[1,3,4,5]. If the operand is a f32 scalar (i.e. has shape +// f32[]) then this returns the operand reshaped to f32[1]. +StatusOr PrependDegenerateDims(HloInstruction* operand, + int64 n); + // Expands (via reshape) the first (logical) dimension of `operand` into a // sequence of `expanded_dims` dimensions. `operand` must at least be of rank 1 // and the number of elements in its first dimension must be equal to the diff --git a/tensorflow/compiler/xla/service/hlo_creation_utils_test.cc b/tensorflow/compiler/xla/service/hlo_creation_utils_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..6b681a5bf6f34b724bed52d60df59c2ac1068b52 --- /dev/null +++ b/tensorflow/compiler/xla/service/hlo_creation_utils_test.cc @@ -0,0 +1,234 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/hlo_creation_utils.h" +#include "tensorflow/compiler/xla/ptr_util.h" +#include "tensorflow/compiler/xla/service/hlo_evaluator.h" +#include "tensorflow/compiler/xla/service/hlo_module.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/test.h" +#include "tensorflow/core/platform/test.h" + +namespace xla { +namespace { +using tensorflow::gtl::ArraySlice; + +std::unique_ptr CreateModuleWithProgramShape( + PrimitiveType primitive_type, ArraySlice input_shape_dims, + ArraySlice output_shape_dims, HloInstruction** param, + HloComputation** entry_computation) { + Shape input_shape = ShapeUtil::MakeShape(primitive_type, input_shape_dims); + Shape output_shape = ShapeUtil::MakeShape(primitive_type, output_shape_dims); + std::unique_ptr module = MakeUnique("test"); + *entry_computation = module->AddEntryComputation( + CreateComputationWithSignature({&input_shape}, output_shape, "entry") + .ValueOrDie()); + *param = (*entry_computation)->parameter_instruction(0); + return module; +} + +TEST(HloCreationUtilsTest, CollapseFirst1Dim) { + HloInstruction* param; + HloComputation* entry_computation; + + std::unique_ptr module = CreateModuleWithProgramShape( + S32, + /*input_shape_dims=*/{2}, /*output_shape_dims=*/{2}, ¶m, + &entry_computation); + + TF_ASSERT_OK_AND_ASSIGN(HloInstruction * first_1_dims_collapsed, + CollapseFirstNDims(param, 1)); + entry_computation->set_root_instruction(first_1_dims_collapsed); + + HloEvaluator evaluator; + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr result_literal, + evaluator.Evaluate>( + *module, {Literal::CreateR1({3, 4})})); + CHECK_EQ(*result_literal, *Literal::CreateR1({3, 4})); +} + +TEST(HloCreationUtilsTest, CollapseFirst2Dims) { + HloInstruction* param; + HloComputation* entry_computation; + + std::unique_ptr module = CreateModuleWithProgramShape( + S32, + /*input_shape_dims=*/{2, 3, 2}, /*output_shape_dims=*/{6, 2}, ¶m, + &entry_computation); + + TF_ASSERT_OK_AND_ASSIGN(HloInstruction * first_2_dims_collapsed, + CollapseFirstNDims(param, 2)); + entry_computation->set_root_instruction(first_2_dims_collapsed); + + HloEvaluator evaluator; + TF_ASSERT_OK_AND_ASSIGN( + std::unique_ptr result_literal, + evaluator.Evaluate>( + *module, + {Literal::CreateR3( + {{{1, 2}, {3, 4}, {5, 6}}, {{-1, -2}, {-3, -4}, {-5, -6}}})})); + CHECK_EQ(*result_literal, + *Literal::CreateR2( + {{1, 2}, {3, 4}, {5, 6}, {-1, -2}, {-3, -4}, {-5, -6}})); +} + +TEST(HloCreationUtilsTest, Prepend1DegenerateDim) { + HloInstruction* param; + HloComputation* entry_computation; + + std::unique_ptr module = CreateModuleWithProgramShape( + S32, + /*input_shape_dims=*/{2}, /*output_shape_dims=*/{1, 2}, ¶m, + &entry_computation); + + TF_ASSERT_OK_AND_ASSIGN(HloInstruction * with_1_degenerate_dim_prepended, + PrependDegenerateDims(param, 1)); + entry_computation->set_root_instruction(with_1_degenerate_dim_prepended); + + HloEvaluator evaluator; + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr result_literal, + evaluator.Evaluate>( + *module, {Literal::CreateR1({9, 10})})); + CHECK_EQ(*result_literal, *Literal::CreateR2({{9, 10}})); +} + +TEST(HloCreationUtilsTest, Prepend2DegenerateDims) { + HloInstruction* param; + HloComputation* entry_computation; + + std::unique_ptr module = CreateModuleWithProgramShape( + S32, + /*input_shape_dims=*/{2}, /*output_shape_dims=*/{1, 1, 2}, ¶m, + &entry_computation); + + TF_ASSERT_OK_AND_ASSIGN(HloInstruction * with_2_degenerate_dims_prepended, + PrependDegenerateDims(param, 2)); + entry_computation->set_root_instruction(with_2_degenerate_dims_prepended); + + HloEvaluator evaluator; + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr result_literal, + evaluator.Evaluate>( + *module, {Literal::CreateR1({9, 10})})); + CHECK_EQ(*result_literal, *Literal::CreateR3({{{9, 10}}})); +} + +TEST(HloCreationUtilsTest, Prepend2DegenerateDimsToScalar) { + HloInstruction* param; + HloComputation* entry_computation; + + std::unique_ptr module = CreateModuleWithProgramShape( + S32, + /*input_shape_dims=*/{}, /*output_shape_dims=*/{1, 1}, ¶m, + &entry_computation); + + TF_ASSERT_OK_AND_ASSIGN(HloInstruction * with_2_degenerate_dims_prepended, + PrependDegenerateDims(param, 2)); + entry_computation->set_root_instruction(with_2_degenerate_dims_prepended); + + HloEvaluator evaluator; + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr result_literal, + evaluator.Evaluate>( + *module, {Literal::CreateR0(9)})); + CHECK_EQ(*result_literal, *Literal::CreateR2({{9}})); +} + +TEST(HloCreationUtilsTest, ExpandFirstDimInto3Dims) { + HloInstruction* param; + HloComputation* entry_computation; + + std::unique_ptr module = CreateModuleWithProgramShape( + S32, + /*input_shape_dims=*/{6}, /*output_shape_dims=*/{3, 1, 2}, ¶m, + &entry_computation); + + TF_ASSERT_OK_AND_ASSIGN(HloInstruction * first_dim_expanded, + ExpandFirstDimIntoNDims(param, {3, 1, 2})); + entry_computation->set_root_instruction(first_dim_expanded); + + HloEvaluator evaluator; + TF_ASSERT_OK_AND_ASSIGN( + std::unique_ptr result_literal, + evaluator.Evaluate>( + *module, {Literal::CreateR1({1, 2, 3, 4, 5, 6})})); + CHECK_EQ(*result_literal, + *Literal::CreateR3({{{1, 2}}, {{3, 4}}, {{5, 6}}})); +} + +TEST(HloCreationUtilsTest, PadVectorWithZeros) { + HloInstruction* param; + HloComputation* entry_computation; + + std::unique_ptr module = CreateModuleWithProgramShape( + S32, + /*input_shape_dims=*/{2}, /*output_shape_dims=*/{6}, ¶m, + &entry_computation); + + TF_ASSERT_OK_AND_ASSIGN( + HloInstruction * zero_padded_param, + PadVectorWithZeros(param, /*zeros_to_prepend=*/3, /*zeros_to_append=*/1)); + entry_computation->set_root_instruction(zero_padded_param); + + HloEvaluator evaluator; + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr result_literal, + evaluator.Evaluate>( + *module, {Literal::CreateR1({3, 4})})); + CHECK_EQ(*result_literal, *Literal::CreateR1({0, 0, 0, 3, 4, 0})); +} + +TEST(HloCreationUtilsTest, BroadcastZeros_S32) { + HloInstruction* param; + HloComputation* entry_computation; + + std::unique_ptr module = CreateModuleWithProgramShape( + S32, + /*input_shape_dims=*/{}, /*output_shape_dims=*/{2, 2}, ¶m, + &entry_computation); + + TF_ASSERT_OK_AND_ASSIGN( + HloInstruction * zeros, + BroadcastZeros(module->entry_computation(), S32, {2, 2})); + entry_computation->set_root_instruction(zeros); + + HloEvaluator evaluator; + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr result_literal, + evaluator.Evaluate>( + *module, {Literal::CreateR0(0)})); + CHECK_EQ(*result_literal, *Literal::CreateR2({{0, 0}, {0, 0}})); +} + +TEST(HloCreationUtilsTest, BroadcastZeros_F32) { + HloInstruction* param; + HloComputation* entry_computation; + + std::unique_ptr module = CreateModuleWithProgramShape( + F32, + /*input_shape_dims=*/{}, /*output_shape_dims=*/{2, 2}, ¶m, + &entry_computation); + + TF_ASSERT_OK_AND_ASSIGN( + HloInstruction * zeros, + BroadcastZeros(module->entry_computation(), F32, {2, 2})); + entry_computation->set_root_instruction(zeros); + + HloEvaluator evaluator; + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr result_literal, + evaluator.Evaluate>( + *module, {Literal::CreateR0(0.0f)})); + CHECK_EQ(*result_literal, + *Literal::CreateR2({{0.0f, 0.0f}, {0.0f, 0.0f}})); +} + +} // namespace +} // namespace xla diff --git a/tensorflow/compiler/xla/service/hlo_element_type_converter.cc b/tensorflow/compiler/xla/service/hlo_element_type_converter.cc index c782d1b0add17c70e0f54826917df251d5a613e2..d236f83aeb9254b9c6e6d04629758ac2c8fd0da3 100644 --- a/tensorflow/compiler/xla/service/hlo_element_type_converter.cc +++ b/tensorflow/compiler/xla/service/hlo_element_type_converter.cc @@ -178,24 +178,37 @@ StatusOr HloElementTypeConverter::Run(HloModule* module) { if (hlo->shape().element_type() == eliminate_type_) { Shape shape = ShapeUtil::ChangeElementType(hlo->shape(), replace_with_type_); + new_hlo = computation->AddInstruction( hlo->CloneWithNewOperands(shape, new_operands, hlo->GetModule())); + TF_RETURN_IF_ERROR(new_hlo->CopyAllControlDepsFrom(hlo)); + new_hlo = ToElementType(new_hlo, eliminate_type_); } else if (ShapeUtil::IsTuple(hlo->shape())) { Shape old_shape = hlo->shape(); Shape new_shape = GetConvertedTupleShape(hlo->shape(), eliminate_type_, replace_with_type_); + new_hlo = computation->AddInstruction(hlo->CloneWithNewOperands( new_shape, new_operands, hlo->GetModule())); + TF_RETURN_IF_ERROR(new_hlo->CopyAllControlDepsFrom(hlo)); + // Convert the elements of the result of `new_hlo` to produce a new // tuple with shape `old_shape`. new_hlo = ConvertTupleElements(new_hlo, old_shape); } else { new_hlo = computation->AddInstruction(hlo->CloneWithNewOperands( hlo->shape(), new_operands, hlo->GetModule())); + TF_RETURN_IF_ERROR(new_hlo->CopyAllControlDepsFrom(hlo)); } - TF_RETURN_IF_ERROR(computation->ReplaceInstruction(hlo, new_hlo)); + TF_RETURN_IF_ERROR(hlo->ReplaceAllUsesWith(new_hlo)); + TF_RETURN_IF_ERROR(hlo->DropAllControlDeps()); + + // NB! We want to replace and remove side effecting instructions like Rng + // as well so we can't rely HloComputation::ReplaceInstruction to reliably + // remove the replaced instruction. + TF_RETURN_IF_ERROR(computation->RemoveInstruction(hlo)); changed = true; } } diff --git a/tensorflow/compiler/xla/service/hlo_element_type_converter_test.cc b/tensorflow/compiler/xla/service/hlo_element_type_converter_test.cc index cb94d9f19b825d1321263a4737b66a6bf198a772..5c5a059e0fd895f03bc26a975609b57333237faf 100644 --- a/tensorflow/compiler/xla/service/hlo_element_type_converter_test.cc +++ b/tensorflow/compiler/xla/service/hlo_element_type_converter_test.cc @@ -22,6 +22,12 @@ namespace { namespace op = xla::testing::opcode_matchers; +using ::testing::Contains; +using ::testing::ElementsAre; +using ::testing::Eq; +using ::testing::Not; +using ::testing::ResultOf; + class HloElementTypeConverterTest : public HloTestBase { public: std::unique_ptr CreateModuleFromHloString( @@ -117,5 +123,65 @@ TEST_F(HloElementTypeConverterTest, BatchNormGradBF16Converted) { op::Convert(op::GetTupleElement(batch_norm, 2)))); } +TEST_F(HloElementTypeConverterTest, RngIsRemoved) { + const string& hlo_string = R"( +HloModule RngIsRemoved + +ENTRY main { + constant.3 = bf16[] constant(0) + constant.4 = bf16[] constant(1) + ROOT rng = bf16[1,1000,20]{2,1,0} rng(constant.3, constant.4), distribution=rng_uniform +} + )"; + auto module = CreateModuleFromHloString(hlo_string); + HloElementTypeConverter type_converter(BF16, F32); + TF_ASSERT_OK_AND_ASSIGN(bool converted, type_converter.Run(module.get())); + EXPECT_TRUE(converted); + + std::function is_bf16_rng = + [](const HloInstruction* inst) { + return inst->shape().element_type() == BF16 && + inst->opcode() == HloOpcode::kRng; + }; + + EXPECT_THAT(module->entry_computation()->instructions(), + Not(Contains(ResultOf(is_bf16_rng, Eq(true))))); +} + +TEST_F(HloElementTypeConverterTest, RngCtrlDep) { + const string& hlo_string = R"( +HloModule RngIsRemoved + +ENTRY main { + constant.3 = bf16[] constant(0) + constant.4 = bf16[] constant(1) + rng0 = bf16[1,2000,20]{2,1,0} rng(constant.3, constant.4), distribution=rng_uniform + ROOT rng1 = bf16[1,1000,20]{2,1,0} rng(constant.3, constant.4), control-predecessors={%rng0}, distribution=rng_uniform +} + )"; + auto module = CreateModuleFromHloString(hlo_string); + + HloElementTypeConverter type_converter(BF16, F32); + TF_ASSERT_OK_AND_ASSIGN(bool converted, type_converter.Run(module.get())); + EXPECT_TRUE(converted); + + HloInstruction *rng0, *rng1; + for (auto* inst : module->entry_computation()->instructions()) { + if (inst->opcode() == HloOpcode::kRng) { + const Shape& shape = inst->shape(); + ASSERT_EQ(shape.dimensions_size(), 3); + ASSERT_TRUE(shape.dimensions(1) == 2000 || shape.dimensions(1) == 1000); + if (shape.dimensions(1) == 2000) { + rng0 = inst; + } else { + rng1 = inst; + } + } + } + + EXPECT_THAT(rng0->control_successors(), ElementsAre(rng1)); + EXPECT_THAT(rng1->control_predecessors(), ElementsAre(rng0)); +} + } // namespace } // namespace xla diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.cc b/tensorflow/compiler/xla/service/hlo_evaluator.cc index 693004d364114b1a25ce6b6791092665c861d13f..c5e30148345fec2029bf533fcfa9deb89662ec83 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator.cc +++ b/tensorflow/compiler/xla/service/hlo_evaluator.cc @@ -202,6 +202,25 @@ void IterateThroughWindow( } while (IndexUtil::BumpIndices(window_shape, &window_index)); } +// Creates a vector of multipliers which can be used to create a linear index +// into shape. +// +// Given the multidimensional index {i1, ..., iN} and +// M = MakeDimMultipliers(shape), the corresponding linear index LI is simply +// +// LI = i1 * M[1] + i2 * M[2] + ... + iN * M[N]. +// +// This lets you calculate LI given the multidimensional indices in any order. +DimensionVector MakeDimMultipliers(const Shape& shape) { + DimensionVector v(ShapeUtil::Rank(shape)); + int64 scale = 1; + for (auto dim : LayoutUtil::MinorToMajor(shape)) { + v[dim] = scale; + scale *= shape.dimensions(dim); + } + return v; +} + } // namespace template @@ -380,6 +399,22 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { return Status::OK(); } + Status HandleBitcastConvert(HloInstruction* convert) override { + const HloInstruction* operand = convert->operand(0); + TF_RET_CHECK(ShapeUtil::SameDimensions(operand->shape(), convert->shape())); + TF_ASSIGN_OR_RETURN(std::unique_ptr result, + parent_->GetEvaluatedLiteralFor(operand).BitcastConvert( + convert->shape().element_type())); + + if (LayoutUtil::LayoutsInShapesEqual(result->shape(), convert->shape())) { + parent_->evaluated_[convert] = std::move(result); + } else { + parent_->evaluated_[convert] = + result->Relayout(convert->shape().layout()); + } + return Status::OK(); + } + Status HandleExp(HloInstruction* exp) override { TF_ASSIGN_OR_RETURN(parent_->evaluated_[exp], ElementWiseUnaryOp(exp, [](ElementwiseT elem_operand) { @@ -979,18 +1014,6 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { const Literal& lhs_literal = parent_->GetEvaluatedLiteralFor(lhs); const Literal& rhs_literal = parent_->GetEvaluatedLiteralFor(rhs); - // Dimension number applicable for input (lhs). - const int64 input_batch_dim = dnums.input_batch_dimension(); - const int64 input_z_dim = dnums.input_feature_dimension(); - // Dimension number applicable for kernel (rhs). - const int64 kernel_input_z_dim = dnums.kernel_input_feature_dimension(); - const int64 kernel_output_z_dim = dnums.kernel_output_feature_dimension(); - // Dimension number applicable for output. - const int64 output_batch_dim = dnums.output_batch_dimension(); - const int64 output_z_dim = dnums.output_feature_dimension(); - - const int64 z_size = ShapeUtil::GetDimension(lhs_shape, input_z_dim); - std::vector window_dimension_sizes; for (auto i : dnums.kernel_spatial_dimensions()) { window_dimension_sizes.push_back(ShapeUtil::GetDimension(rhs_shape, i)); @@ -999,25 +1022,43 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { const Shape& window_shape = ShapeUtil::MakeShape(rhs_shape.element_type(), window_dimension_sizes); - DimensionVector lhs_index(lhs_rank); - DimensionVector rhs_index(rhs_rank); - DimensionVector rhs_spatial_index(dnums.kernel_spatial_dimensions_size()); + DimensionVector lhs_dim_multipliers = MakeDimMultipliers(lhs_shape); + DimensionVector rhs_dim_multipliers = MakeDimMultipliers(rhs_shape); - auto func = [&](ArraySlice out_index) { - ElementwiseT result_val = static_cast(0); + auto lhs_literal_data = lhs_literal.data(); + auto rhs_literal_data = rhs_literal.data(); - std::fill(lhs_index.begin(), lhs_index.end(), 0); - std::fill(rhs_index.begin(), rhs_index.end(), 0); - std::fill(rhs_spatial_index.begin(), rhs_spatial_index.end(), 0); + auto func = [&window_shape, &dnums, &lhs_shape, &rhs_shape, &window, + &lhs_dim_multipliers, &rhs_dim_multipliers, lhs_literal_data, + rhs_literal_data](ArraySlice out_index) { + // Dimension number applicable for input (lhs). + const int64 input_batch_dim = dnums.input_batch_dimension(); + const int64 input_z_dim = dnums.input_feature_dimension(); + // Dimension number applicable for kernel (rhs). + const int64 kernel_input_z_dim = dnums.kernel_input_feature_dimension(); + const int64 kernel_output_z_dim = dnums.kernel_output_feature_dimension(); + // Dimension number applicable for output. + const int64 output_batch_dim = dnums.output_batch_dimension(); + const int64 output_z_dim = dnums.output_feature_dimension(); - lhs_index[input_batch_dim] = out_index[output_batch_dim]; - rhs_index[kernel_output_z_dim] = out_index[output_z_dim]; + const int64 z_size = ShapeUtil::GetDimension(lhs_shape, input_z_dim); + + ElementwiseT result_val = static_cast(0); + DimensionVector rhs_spatial_index(dnums.kernel_spatial_dimensions_size(), + 0); // Convolve input feature with kernel. do { for (int64 iz = 0; iz < z_size; ++iz) { - lhs_index[input_z_dim] = iz; - rhs_index[kernel_input_z_dim] = iz; + int64 lhs_linear_index = 0; + lhs_linear_index += out_index[output_batch_dim] * + lhs_dim_multipliers[input_batch_dim]; + lhs_linear_index += iz * lhs_dim_multipliers[input_z_dim]; + + int64 rhs_linear_index = 0; + rhs_linear_index += out_index[output_z_dim] * + rhs_dim_multipliers[kernel_output_z_dim]; + rhs_linear_index += iz * rhs_dim_multipliers[kernel_input_z_dim]; // Find corresponding spatial dimension index for input (lhs). for (int64 ki = 0; ki < rhs_spatial_index.size(); ++ki) { @@ -1042,29 +1083,32 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { // Calculate the actual lhs (input) index after dilation. As an // optimization, skip this integer divide if there's no dilation. + int64 lhs_spatial_index; if (window_dim.base_dilation() > 1) { - lhs_index[input_spatial_dim] = - undilated_index / window_dim.base_dilation(); + lhs_spatial_index = undilated_index / window_dim.base_dilation(); } else { - lhs_index[input_spatial_dim] = undilated_index; + lhs_spatial_index = undilated_index; } + lhs_linear_index += + lhs_spatial_index * lhs_dim_multipliers[input_spatial_dim]; - // Skip if input index is not in bound. - if (!(lhs_index[input_spatial_dim] >= 0 && - lhs_index[input_spatial_dim] < + // Skip if input index is not in bounds. + if (!(lhs_spatial_index >= 0 && + lhs_spatial_index < lhs_shape.dimensions(input_spatial_dim))) { goto cnt; } - rhs_index[dnums.kernel_spatial_dimensions(ki)] = - window_dim.window_reversal() - ? ((window_dim.size() - 1) - rhs_spatial_index[ki]) - : rhs_spatial_index[ki]; + rhs_linear_index += + (window_dim.window_reversal() + ? ((window_dim.size() - 1) - rhs_spatial_index[ki]) + : rhs_spatial_index[ki]) * + rhs_dim_multipliers[dnums.kernel_spatial_dimensions(ki)]; } result_val += - static_cast(lhs_literal.Get(lhs_index)) * - static_cast(rhs_literal.Get(rhs_index)); + static_cast(lhs_literal_data[lhs_linear_index]) * + static_cast(rhs_literal_data[rhs_linear_index]); } cnt : {} } while (IndexUtil::BumpIndices(window_shape, &rhs_spatial_index)); @@ -1073,7 +1117,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { }; auto result = Literal::CreateFromShape(result_shape); - TF_RETURN_IF_ERROR(result->Populate(func)); + TF_RETURN_IF_ERROR(result->PopulateParallel(func)); parent_->evaluated_[conv] = std::move(result); return Status::OK(); @@ -1520,14 +1564,12 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { arg_dim_counts[dim] = arg_dimensions[dim]; } - // Create mapping from result index to arg index. - const int64 result_rank = ShapeUtil::Rank(result->shape()); - int64 result_dim = 0; - std::vector result_to_arg_index(result_rank); + // Map each dimension in the result to a dimension in arg that isn't + // being reduced. + std::vector result_to_arg_index; for (int64 i = 0; i < arg_dimensions.size(); ++i) { if (arg_dim_steps[i] == 0) { - result_to_arg_index[result_dim] = i; - ++result_dim; + result_to_arg_index.push_back(i); } } @@ -1542,31 +1584,43 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { base[result_to_arg_index[i]] = multi_index[i]; } + // When the reduction is addition of floats, accumulate in a double + // for better precision. Also, avoid creating Literals for the + // intermediate results; it's much faster. + if (ShapeUtil::ElementIsFloating(init_literal.shape()) && + IsScalarAdd(function)) { + double computed_result = 0; + auto func = [&](ArraySlice input_index) { + computed_result += arg_literal.Get(input_index); + return true; + }; + ShapeUtil::ForEachIndex(arg_literal.shape(), base, arg_dim_counts, + arg_dim_steps, func); + return static_cast(computed_result); + } auto func = [&](ArraySlice input_index) { auto curr_val = arg_literal.Get(input_index); // Evaluate computation with specified literal operands. auto curr_val_literal = Literal::CreateR0(curr_val); auto result_val_literal = Literal::CreateR0(result_val); - std::vector args = {curr_val_literal.get(), - result_val_literal.get()}; + std::vector args = {result_val_literal.get(), + curr_val_literal.get()}; std::unique_ptr computed_result = embedded_evaluator.Evaluate(*function, args) .ConsumeValueOrDie(); - // Clear visit states so that the we can use the evaluate again on + // Clear visit states so that we can use the evaluator again on // the same computation. embedded_evaluator.ResetVisitStates(); - // Assign computed result to result_val. result_val = computed_result->Get({}); - return true; }; - + // Computes one element of the result, reducing all dimensions that + // contribute to that element. ShapeUtil::ForEachIndex(arg_literal.shape(), base, arg_dim_counts, arg_dim_steps, func); - return result_val; })); @@ -1574,6 +1628,20 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { return Status::OK(); } + bool IsScalarAdd(HloComputation* computation) { + HloInstruction* instruction = computation->root_instruction(); + if (instruction->opcode() == HloOpcode::kAdd && + computation->num_parameters() == 2) { + const HloInstruction* lhs = instruction->operand(0); + const HloInstruction* rhs = instruction->operand(1); + return lhs->opcode() == HloOpcode::kParameter && + ShapeUtil::IsScalar(lhs->shape()) && + rhs->opcode() == HloOpcode::kParameter && + ShapeUtil::IsScalar(rhs->shape()) && lhs != rhs; + } + return false; + } + Status HandleSelectAndScatter(HloInstruction* select_and_scatter) override { auto operand = select_and_scatter->operand(0); auto source = select_and_scatter->operand(1); @@ -1736,7 +1804,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { const auto result_val_literal = Literal::CreateR0(result_val); const std::vector args = { - curr_val_literal.get(), result_val_literal.get()}; + result_val_literal.get(), curr_val_literal.get()}; std::unique_ptr computed_result = embedded_evaluator.Evaluate(*function, args) .ConsumeValueOrDie(); @@ -1785,6 +1853,34 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { return Status::OK(); } + // Enable CLZ only for int32 and uint32. + template < + typename NativeT, + typename std::enable_if< + (std::is_floating_point::value || + std::is_integral::value || is_complex_t::value) && + !(std::is_same::value || + std::is_same::value)>::type* = nullptr> + Status HandleClz(HloInstruction* clz) { + return InvalidArgument("Unsupported type for Clz"); + } + + template ::value || + std::is_same::value>::type* = nullptr> + Status HandleClz(HloInstruction* clz) { + TF_ASSIGN_OR_RETURN(parent_->evaluated_[clz], + ElementWiseUnaryOp(clz, [](ElementwiseT elem_operand) { + return 31 - tensorflow::Log2Floor(elem_operand); + })); + return Status::OK(); + } + + Status HandleClz(HloInstruction* clz) override { + return HandleClz(clz); + } + template ::value>::type* = nullptr> Status HandleSin(HloInstruction* sin) { diff --git a/tensorflow/compiler/xla/service/hlo_evaluator_test.cc b/tensorflow/compiler/xla/service/hlo_evaluator_test.cc index 685cacd7f74c00789296dee16f0a6a94c35a4393..dd14dd38537a83d0ee16cff9e3c22a38f544e208 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator_test.cc +++ b/tensorflow/compiler/xla/service/hlo_evaluator_test.cc @@ -40,6 +40,7 @@ limitations under the License. #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/lib/core/status_test_util.h" #include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/test_benchmark.h" #include "tensorflow/core/platform/types.h" namespace xla { @@ -1205,6 +1206,80 @@ TEST_P(HloEvaluatorTest, LiteralTestUtil::ExpectEqual(*expected, *result); } +class HloEvaluatorPreciseReduceTest : public HloVerifiedTestBase {}; + +// Tests that Reduce doesn't lose precision when adding many numbers (because +// it accumulates its result in a double). +TEST_F(HloEvaluatorPreciseReduceTest, AddReductionPrecisionTest) { + HloComputation::Builder b(TestName()); + + constexpr int kNumElements = 1 << 25; // float += 1 saturates at 1<<24 + std::vector v(kNumElements, 1.0f); + HloInstruction* arg_instruction = b.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR1(v))); + HloInstruction* init_value = b.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR0(0.f))); + + HloComputation::Builder add_computation("add"); + Shape scalar_shape = ShapeUtil::MakeShape(F32, {}); + auto param_lhs = add_computation.AddInstruction( + HloInstruction::CreateParameter(0, scalar_shape, "lhs")); + auto param_rhs = add_computation.AddInstruction( + HloInstruction::CreateParameter(1, scalar_shape, "rhs")); + add_computation.AddInstruction(HloInstruction::CreateBinary( + scalar_shape, HloOpcode::kAdd, param_lhs, param_rhs)); + auto add_func = module().AddEmbeddedComputation(add_computation.Build()); + + HloInstruction* reduce_instruction = b.AddInstruction( + HloInstruction::CreateReduce(scalar_shape, arg_instruction, init_value, + /*dimensions_to_reduce=*/{0}, add_func)); + module().AddEntryComputation(b.Build()); + + HloEvaluator hlo_eval; + std::unique_ptr result = + hlo_eval.Evaluate(reduce_instruction).ConsumeValueOrDie(); + LiteralTestUtil::ExpectR0Equal(kNumElements, *result); +} + +// Reducing many numbers should be fast because it doesn't create +// intermediate Literals; the microbenchmark should finish in < 1 msec. +void BM_ReducePrecisely(int num_iters) { + tensorflow::testing::StopTiming(); + HloComputation::Builder b("BM_ReducePrecisely"); + HloModuleConfig config; + config.set_debug_options(legacy_flags::GetDebugOptionsFromFlags()); + HloModule module("BM_ReducePrecisely", VersionedComputationHandle(), config); + + constexpr int kNumElements = 1 << 25; // float += 1 saturates at 1<<24 + std::vector v(kNumElements, 1.0f); + HloInstruction* arg_instruction = b.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR1(v))); + auto init_value = b.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR0(0.f))); + + HloComputation::Builder add_computation("add"); + Shape scalar_shape = ShapeUtil::MakeShape(F32, {}); + auto param_lhs = add_computation.AddInstruction( + HloInstruction::CreateParameter(0, scalar_shape, "lhs")); + auto param_rhs = add_computation.AddInstruction( + HloInstruction::CreateParameter(1, scalar_shape, "rhs")); + add_computation.AddInstruction(HloInstruction::CreateBinary( + scalar_shape, HloOpcode::kAdd, param_lhs, param_rhs)); + auto add_func = module.AddEmbeddedComputation(add_computation.Build()); + + HloInstruction* reduce_instruction = b.AddInstruction( + HloInstruction::CreateReduce(scalar_shape, arg_instruction, init_value, + /*dimensions_to_reduce=*/{0}, add_func)); + module.AddEntryComputation(b.Build()); + + HloEvaluator hlo_eval; + tensorflow::testing::StartTiming(); + hlo_eval.Evaluate(reduce_instruction).ConsumeValueOrDie(); + tensorflow::testing::StopTiming(); +} + +BENCHMARK(BM_ReducePrecisely); + TEST_P(HloEvaluatorTest, ReduceAdd) { HloComputation::Builder b(TestName()); diff --git a/tensorflow/compiler/xla/service/hlo_execution_profile.h b/tensorflow/compiler/xla/service/hlo_execution_profile.h index 6fb91b9bef9d1df82b8806ce79cc147823edeb3d..be989846ef5cd2645da88ac9bbfea9534dd47821 100644 --- a/tensorflow/compiler/xla/service/hlo_execution_profile.h +++ b/tensorflow/compiler/xla/service/hlo_execution_profile.h @@ -88,7 +88,7 @@ std::unique_ptr CreateHloProfilePrinterData( // down how much time each HLO took. class HloExecutionProfile { public: - using DeviceDescription = perftools::gputools::DeviceDescription; + using DeviceDescription = se::DeviceDescription; HloExecutionProfile(const HloProfilePrinterData* hlo_profile_printer_data, const HloProfileIndexMap* hlo_profile_index_map); diff --git a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc index 25702dc65ea1ebd9d91b3382dcb909e606628202..516e14b4642ae6665a2d15c91715dc9b057ab41a 100644 --- a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc +++ b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc @@ -909,6 +909,7 @@ ColorScheme HloDotDumper::GetInstructionColor(const HloInstruction* instr) { case HloOpcode::kBitcastConvert: case HloOpcode::kCeil: case HloOpcode::kClamp: + case HloOpcode::kClz: case HloOpcode::kComplex: case HloOpcode::kConvert: case HloOpcode::kCos: diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc index a2a2c1e615a7f2b226c712a75b1240b980fc8d3c..a714d0e114245021c28da26beae444dbd3d99bb5 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction.cc @@ -98,6 +98,13 @@ StatusOr> HloInstruction::CreateFromProto( } } + if (instruction->opcode() == HloOpcode::kTrace) { + TF_RET_CHECK(instruction->operands().size() == 1) + << "Trace instruction should have 1 operand but sees " + << instruction->operands().size(); + instruction->mutable_operand(0)->set_tracing(instruction.get()); + } + TF_RET_CHECK(!proto.name().empty()); instruction->name_ = proto.name(); @@ -152,6 +159,23 @@ StatusOr> HloInstruction::CreateFromProto( instruction->fft_length_.push_back(fft_len); } + if (proto.has_sharding()) { + TF_ASSIGN_OR_RETURN(const auto& sharding, + HloSharding::FromProto(proto.sharding())); + instruction->set_sharding(sharding); + } + + if (proto.has_gather_dimension_numbers()) { + instruction->gather_dimension_numbers_ = + MakeUnique(proto.gather_dimension_numbers()); + } + for (int64 bound : proto.gather_window_bounds()) { + instruction->gather_window_bounds_.push_back(bound); + } + + instruction->channel_name_ = proto.channel_name(); + instruction->cost_estimate_ns_ = proto.cost_estimate_ns(); + return std::move(instruction); } @@ -170,6 +194,7 @@ StatusOr> HloInstruction::CreateFromProto( WrapUnique(new HloInstruction(HloOpcode::kTrace, ShapeUtil::MakeNil())); instruction->operands_.push_back(operand); instruction->literal_ = Literal::CreateR1U8(tag); + operand->set_tracing(instruction.get()); return instruction; } @@ -229,6 +254,7 @@ HloInstruction::CreateGetTupleElement(const Shape& shape, case HloOpcode::kCeil: case HloOpcode::kCopy: case HloOpcode::kCos: + case HloOpcode::kClz: case HloOpcode::kExp: case HloOpcode::kFloor: case HloOpcode::kImag: @@ -804,6 +830,16 @@ static string FusionNodeName(HloInstruction::FusionKind fusion_kind) { return instruction; } +void HloInstruction::SetupDerivedInstruction( + HloInstruction* derived_instruction) const { + if (sharding_ != nullptr) { + derived_instruction->set_sharding(*sharding_); + } else { + derived_instruction->clear_sharding(); + } + derived_instruction->set_metadata(metadata_); +} + HloInstruction* HloInstruction::AddFusionOperand(HloInstruction* new_operand) { CHECK_EQ(opcode(), HloOpcode::kFusion); CHECK_EQ(operand_count(), @@ -1213,6 +1249,7 @@ std::unique_ptr HloInstruction::CloneWithNewOperands( case HloOpcode::kRoundNearestAfz: case HloOpcode::kBitcast: case HloOpcode::kCeil: + case HloOpcode::kClz: case HloOpcode::kCopy: case HloOpcode::kCos: case HloOpcode::kExp: @@ -1442,10 +1479,7 @@ std::unique_ptr HloInstruction::CloneWithNewOperands( case HloOpcode::kTrace: LOG(FATAL) << "Not yet implemented, clone: " << HloOpcodeString(opcode_); } - clone->set_metadata(metadata_); - if (has_sharding()) { - clone->set_sharding(sharding()); - } + SetupDerivedInstruction(clone.get()); clone->set_parent(parent_); return clone; } @@ -1644,14 +1678,35 @@ Status HloInstruction::AddControlDependencyTo(HloInstruction* instruction) { } Status HloInstruction::RemoveControlDependencyTo(HloInstruction* instruction) { - auto succ_it = std::find(control_successors_.begin(), - control_successors_.end(), instruction); - TF_RET_CHECK(succ_it != control_successors_.end()); - control_successors_.erase(succ_it); - auto pred_it = std::find(instruction->control_predecessors_.begin(), - instruction->control_predecessors_.end(), this); - TF_RET_CHECK(pred_it != instruction->control_predecessors_.end()); - instruction->control_predecessors_.erase(pred_it); + TF_RET_CHECK(instruction->parent() == parent()); + TF_RETURN_IF_ERROR(EraseElementFromVector(&control_successors_, instruction)); + TF_RETURN_IF_ERROR( + EraseElementFromVector(&instruction->control_predecessors_, this)); + return Status::OK(); +} + +Status HloInstruction::DropAllControlDeps() { + for (auto* ctrl_succ : control_successors_) { + TF_RETURN_IF_ERROR( + EraseElementFromVector(&ctrl_succ->control_predecessors_, this)); + } + for (auto* ctrl_pred : control_predecessors_) { + TF_RETURN_IF_ERROR( + EraseElementFromVector(&ctrl_pred->control_successors_, this)); + } + control_successors_.clear(); + control_predecessors_.clear(); + return Status::OK(); +} + +Status HloInstruction::CopyAllControlDepsFrom(const HloInstruction* inst) { + for (auto* ctrl_pred : inst->control_predecessors()) { + TF_RETURN_IF_ERROR(ctrl_pred->AddControlDependencyTo(this)); + } + + for (auto* ctrl_succ : inst->control_successors()) { + TF_RETURN_IF_ERROR(this->AddControlDependencyTo(ctrl_succ)); + } return Status::OK(); } @@ -1696,6 +1751,7 @@ bool HloInstruction::IdenticalSlowPath( case HloOpcode::kAdd: case HloOpcode::kCeil: case HloOpcode::kClamp: + case HloOpcode::kClz: case HloOpcode::kComplex: case HloOpcode::kCopy: case HloOpcode::kCos: @@ -2395,6 +2451,9 @@ HloInstructionProto HloInstruction::ToProto() const { proto.add_fft_length(fft_len); } + proto.set_channel_name(channel_name_); + proto.set_cost_estimate_ns(cost_estimate_ns_); + return proto; } @@ -2618,6 +2677,8 @@ Status HloInstruction::Visit(DfsHloVisitorBase* visitor) { return visitor->HandleFloor(this); case HloOpcode::kCeil: return visitor->HandleCeil(this); + case HloOpcode::kClz: + return visitor->HandleClz(this); case HloOpcode::kLog: return visitor->HandleLog(this); case HloOpcode::kTanh: @@ -2959,6 +3020,7 @@ bool HloInstruction::IsElementwise() const { case HloOpcode::kAbs: case HloOpcode::kRoundNearestAfz: case HloOpcode::kCeil: + case HloOpcode::kClz: case HloOpcode::kConvert: case HloOpcode::kBitcastConvert: case HloOpcode::kCopy: diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h index a94ba145df792ade9bb7ce3e9a31b56b2f460cd2..f3da3fc256efe8dc9cc85bcbc6cc0c064c770d5c 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.h +++ b/tensorflow/compiler/xla/service/hlo_instruction.h @@ -557,6 +557,18 @@ class HloInstruction { // 'instruction'. Status RemoveControlDependencyTo(HloInstruction* instruction); + // Drops all control predecessors and successors from this HLO instruction. + Status DropAllControlDeps(); + + // Copies the control predecessors and successors on this HLO instruction to + // `inst`. Does not do a deep copy so this makes sense only if `inst` and + // this HLO are in the same module. + // + // Depending on the use cases we see in practice, in the future we may + // consider folding the logic here into Clone, CloneWithNewOperands and + // ReplaceAllUsesWith by treating control dependencies like data dependencies. + Status CopyAllControlDepsFrom(const HloInstruction* inst); + // Returns the set of control predecessors (successors) of this // instruction. Control predecessors (successors) must execute before (after) // the current instruction. @@ -928,6 +940,13 @@ class HloInstruction { const HloSharding& sharding_or_default(const HloSharding& default_) const { return sharding_ ? *sharding_ : default_; } + // Returns the sharding unique device, if any. + tensorflow::gtl::optional sharding_unique_device() const { + if (sharding_ == nullptr || !sharding_->HasUniqueDevice()) { + return tensorflow::gtl::optional(); + } + return sharding_->UniqueDevice().ValueOrDie(); + } // Sets the sharding of this operator. Should only be called by HloModule or // HloComputation methods. void set_sharding(const HloSharding& sharding) { @@ -937,6 +956,21 @@ class HloInstruction { void clear_sharding() { sharding_ = nullptr; } // Return true if this operator has a sharding assigned. bool has_sharding() const { return sharding_ != nullptr; } + // Checks whether the instruction has compatible sharding with the other + // instruction. + bool has_compatible_sharding(const HloInstruction* other) const { + if (!has_sharding()) { + return !other->has_sharding(); + } + return other->has_sharding() ? sharding() == other->sharding() : false; + } + + // When creating a new instruction which either replaces, or shifts up (kCopy + // insertion case), another instruction, we need to make sure the certain + // properties of the new instruction are copied into the derived one. As of + // today, the metadata and sharding will be propagated to the derived + // instruction. + void SetupDerivedInstruction(HloInstruction* derived_instruction) const; // Adds a new operand the fusion instruction. HloInstruction* AddFusionOperand(HloInstruction* new_operand); @@ -1134,17 +1168,17 @@ class HloInstruction { // Clones the HLO instruction. The clone will have the same opcode, shape, and // operands. After creation the clone has no uses. "this" (the instruction // cloned from) is not changed. Suffix is the string to append to the name of - // the instruction to form the name of the cloned instruction. - // If the module pointer is not nullptr, it will be the module where - // the cloned computations will be added to (in order to support deep - // cloning). + // the instruction to form the name of the cloned instruction. If the module + // pointer is not nullptr, it will be the module where the cloned computations + // will be added to (in order to support deep cloning). Ignores the control + // predecessors and successors of this HLO instruction. std::unique_ptr Clone(const string& suffix = "clone", HloModule* module = nullptr) const; - // Clones the HLO instruction as above but with new shape and operands. - // If the module pointer is not nullptr, it will be the module where - // the cloned computations will be added to (in order to support deep - // cloning). + // Clones the HLO instruction as above but with new shape and operands. If + // the module pointer is not nullptr, it will be the module where the cloned + // computations will be added to (in order to support deep cloning). Ignores + // the control predecessors and successors of this HLO instruction. std::unique_ptr CloneWithNewOperands( const Shape& shape, tensorflow::gtl::ArraySlice operands, HloModule* module = nullptr) const; @@ -1435,7 +1469,7 @@ class HloInstruction { string channel_name_; // Estimate of the duration of a host computation in nanoseconds. - int64 cost_estimate_ns_; + int64 cost_estimate_ns_ = 0; // Computations called by this instruction. std::vector called_computations_; diff --git a/tensorflow/compiler/xla/service/hlo_module_group_metadata.cc b/tensorflow/compiler/xla/service/hlo_module_group_metadata.cc index fa5dcb0b369d17c70c64c67b9f11640c93fb4278..54c34ce116651608e6d91cdcba9c708ca3a5f75e 100644 --- a/tensorflow/compiler/xla/service/hlo_module_group_metadata.cc +++ b/tensorflow/compiler/xla/service/hlo_module_group_metadata.cc @@ -313,6 +313,27 @@ Status HloModuleGroupMetadata::VerifyChannelInstructions() { if (!ShapeUtil::Compatible(send_shape, recv_shape)) { return FailedPrecondition("send/recv shapes do not match"); } + const HloModule* send_module = channel.send->parent()->parent(); + const HloModule* send_done_module = channel.send_done->parent()->parent(); + if (send_module != send_done_module) { + return FailedPrecondition( + "send and send-done (channel=%lld) must be on the same device: %lld " + "vs. %lld", + channel.id, GetModuleId(send_module), GetModuleId(send_done_module)); + } + const HloModule* recv_module = channel.recv->parent()->parent(); + const HloModule* recv_done_module = channel.recv_done->parent()->parent(); + if (recv_module != recv_done_module) { + return FailedPrecondition( + "recv and recv-done (channel=%lld) must be on the same device: %lld " + "vs. %lld", + channel.id, GetModuleId(recv_module), GetModuleId(recv_done_module)); + } + if (send_module == recv_module) { + return FailedPrecondition( + "send and recv (channel=%lld) must be on different devices: %lld", + channel.id, GetModuleId(send_module)); + } } // Check if channel instructions are used only in allowed computations. diff --git a/tensorflow/compiler/xla/service/hlo_opcode.h b/tensorflow/compiler/xla/service/hlo_opcode.h index af24604c39b554f146793594958f373999844b4c..ca763076a16af1150a8623fb7dbf22c46a5ca263 100644 --- a/tensorflow/compiler/xla/service/hlo_opcode.h +++ b/tensorflow/compiler/xla/service/hlo_opcode.h @@ -57,6 +57,7 @@ namespace xla { V(kCall, "call", kHloOpcodeIsVariadic) \ V(kCeil, "ceil") \ V(kClamp, "clamp") \ + V(kClz, "count-leading-zeros") \ V(kComplex, "complex") \ V(kConcatenate, "concatenate", kHloOpcodeIsVariadic) \ V(kConditional, "conditional") \ diff --git a/tensorflow/compiler/xla/service/hlo_runner.cc b/tensorflow/compiler/xla/service/hlo_runner.cc index ec7d8210a70ad7498f77fe807abd53544d4b0487..df5ffd0b7d6f0f4575c5f3d8aaa7bff4a1b1e771 100644 --- a/tensorflow/compiler/xla/service/hlo_runner.cc +++ b/tensorflow/compiler/xla/service/hlo_runner.cc @@ -16,27 +16,20 @@ limitations under the License. #include "tensorflow/compiler/xla/service/hlo_runner.h" -#include #include #include +#include "absl/memory/memory.h" #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/compiler/xla/layout_util.h" #include "tensorflow/compiler/xla/ptr_util.h" -#include "tensorflow/compiler/xla/service/backend.h" -#include "tensorflow/compiler/xla/service/executable.h" -#include "tensorflow/compiler/xla/service/hlo_computation.h" #include "tensorflow/compiler/xla/service/transfer_manager.h" #include "tensorflow/compiler/xla/shape_util.h" -#include "tensorflow/compiler/xla/statusor.h" #include "tensorflow/compiler/xla/tools/parser/hlo_parser.h" -#include "tensorflow/compiler/xla/types.h" #include "tensorflow/core/common_runtime/eigen_thread_pool.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/types.h" -namespace se = ::perftools::gputools; - namespace xla { /*static*/ StatusOr> @@ -91,15 +84,6 @@ HloRunner::ReadModuleFromHloTextFile(const std::string& filename, return tools::Parse(hlo_string, config); } -// Define this in .cc file to avoid having to include eigen or forward declare -// these types in the header. -struct HloRunner::EigenThreadPoolWrapper { - std::unique_ptr pool; - std::unique_ptr device; -}; - -HloRunner::HloRunner() {} - HloRunner::HloRunner(se::Platform* platform) { BackendOptions backend_options; backend_options.set_platform(platform); @@ -113,61 +97,45 @@ StatusOr> HloRunner::Execute( std::unique_ptr module, const tensorflow::gtl::ArraySlice arguments, bool run_hlo_passes) { - if (run_hlo_passes) { - TF_ASSIGN_OR_RETURN( - module, backend().compiler()->RunHloPasses( - std::move(module), backend().default_stream_executor(), - /*device_allocator=*/nullptr)); - } - TF_ASSIGN_OR_RETURN( - std::unique_ptr executable, - backend().compiler()->RunBackend(std::move(module), - backend().default_stream_executor(), - /*device_allocator=*/nullptr)); - + TF_ASSIGN_OR_RETURN(std::unique_ptr executable, + CreateExecutable(std::move(module), run_hlo_passes)); se::Stream stream(backend().default_stream_executor()); stream.Init(); - ExecutableRunOptions run_options; - run_options.set_device_ordinal(backend().default_device_ordinal()); - run_options.set_stream(&stream); - run_options.set_allocator(backend().memory_allocator()); - run_options.set_inter_op_thread_pool(backend().inter_op_thread_pool()); - run_options.set_intra_op_thread_pool( - backend().eigen_intra_op_thread_pool_device()); - - ServiceExecutableRunOptions service_run_options( - run_options, backend().StreamBorrower(), - backend().inter_op_thread_pool()); + ServiceExecutableRunOptions service_run_options(GetServiceRunOptionsForDevice( + backend().default_device_ordinal(), &stream, nullptr)); + const ExecutableRunOptions& run_options = service_run_options.run_options(); // Copy arguments to device. - std::vector> argument_buffers; - std::vector argument_buffer_ptrs; + std::vector argument_buffers; for (Literal* argument : arguments) { TF_ASSIGN_OR_RETURN( - std::unique_ptr argument_buffer, + ScopedShapedBuffer argument_buffer, backend().transfer_manager()->AllocateScopedShapedBuffer( argument->shape(), run_options.allocator(), run_options.device_ordinal())); TF_RETURN_IF_ERROR(backend().transfer_manager()->TransferLiteralToDevice( - stream.parent(), *argument, *argument_buffer)); + stream.parent(), *argument, argument_buffer)); argument_buffers.push_back(std::move(argument_buffer)); - argument_buffer_ptrs.push_back(argument_buffers.back().get()); + } + + std::vector argument_buffer_ptrs; + argument_buffer_ptrs.reserve(argument_buffers.size()); + for (const auto& buf : argument_buffers) { + argument_buffer_ptrs.push_back(&buf); } TF_ASSIGN_OR_RETURN( - std::unique_ptr result, + ShapedBuffer result, executable->ExecuteOnStreamWrapper( &service_run_options, /*profile=*/nullptr, argument_buffer_ptrs)); // Create a ScopedShapedBuffer of the result to manage deallocation. This will // deallocate all the device memory when it goes out of scope. - TF_ASSIGN_OR_RETURN( - std::unique_ptr scoped_result, - ScopedShapedBuffer::MakeScoped(result.get(), run_options.allocator())); + ScopedShapedBuffer scoped_result(std::move(result), run_options.allocator()); auto result_literal = backend().transfer_manager()->TransferLiteralFromDevice( - stream.parent(), *scoped_result); + stream.parent(), scoped_result); if (result_literal.ok()) { VLOG(4) << "Executed binary and got result: " << result_literal.ValueOrDie()->ToString(); @@ -178,10 +146,158 @@ StatusOr> HloRunner::Execute( return result_literal; } +StatusOr>> HloRunner::ExecuteReplicated( + std::unique_ptr module, + const ReplicatedExecuteOptions& options) { + TF_ASSIGN_OR_RETURN( + std::unique_ptr executable, + CreateExecutable(std::move(module), options.run_hlo_passes)); + TF_ASSIGN_OR_RETURN( + DeviceAssignment device_assignment, + backend().computation_placer()->AssignDevices(options.num_replicas, 1)); + std::vector> streams; + std::vector service_run_options; + + std::vector argument_buffers; + // This reserve() call is necessary for correctness, because + // argument_buffer_ptrs contains pointers into the elements of + // argument_buffers. + argument_buffers.reserve(options.num_replicas * options.arguments.size()); + + // Plus one so we can safely get &argument_buffer_ptrs[0] in case there are + // no arguments. + std::vector argument_buffer_ptrs( + options.num_replicas * options.arguments.size() + 1); + std::vector> + argument_buffer_slices; + int64 index = 0; + for (int64 i = 0; i < options.num_replicas; ++i) { + int64 device = device_assignment(i, 0); + TF_ASSIGN_OR_RETURN(se::StreamExecutor * executor, + backend().stream_executor(device)); + streams.push_back(absl::make_unique(executor)); + streams.back()->Init(); + service_run_options.emplace_back(GetServiceRunOptionsForDevice( + device, streams.back().get(), &device_assignment)); + + // Copy arguments to device. + for (const Literal* argument : options.arguments) { + TF_ASSIGN_OR_RETURN( + ScopedShapedBuffer argument_buffer, + backend().transfer_manager()->AllocateScopedShapedBuffer( + argument->shape(), backend().memory_allocator(), device)); + TF_RETURN_IF_ERROR(backend().transfer_manager()->TransferLiteralToDevice( + executor, *argument, argument_buffer)); + argument_buffers.push_back(std::move(argument_buffer)); + argument_buffer_ptrs[index++] = &argument_buffers.back(); + } + argument_buffer_slices.emplace_back( + &argument_buffer_ptrs[index - options.arguments.size()], + options.arguments.size()); + } + + std::unique_ptr pool; + int64 num_threads = (options.infeed != nullptr) ? options.num_replicas : 0; + if (ShapeUtil::IsInitialized(options.outfeed_shape)) { + num_threads += options.num_replicas; + } + if (num_threads > 0) { + pool = absl::make_unique( + tensorflow::Env::Default(), "infeed_outfeed", + /*num_threads=*/num_threads); + } + if (options.infeed != nullptr) { + for (int64 i = 0; i < options.num_replicas; ++i) { + int64 device = device_assignment(i, 0); + pool->Schedule([this, device, &options]() { + se::StreamExecutor* executor = + backend().stream_executor(device).ValueOrDie(); + VLOG(1) << "Starting infeed on device " << device; + for (int64 step = 1; + options.infeed_steps < 0 || step <= options.infeed_steps; ++step) { + TF_CHECK_OK(backend().transfer_manager()->TransferLiteralToInfeed( + executor, *options.infeed)); + if (step % 100 == 0) { + VLOG(1) << "Infeed step " << step; + } + } + }); + } + } + if (ShapeUtil::IsInitialized(options.outfeed_shape)) { + for (int64 i = 0; i < options.num_replicas; ++i) { + int64 device = device_assignment(i, 0); + pool->Schedule([this, device, &options]() { + se::StreamExecutor* executor = + backend().stream_executor(device).ValueOrDie(); + VLOG(1) << "Starting outfeed on device " << device; + for (int64 step = 1; + options.infeed_steps < 0 || step <= options.infeed_steps; ++step) { + auto literal = absl::make_unique(); + TF_CHECK_OK(backend().transfer_manager()->TransferLiteralFromOutfeed( + executor, options.outfeed_shape, literal.get())); + if (options.outfeed_values != nullptr) { + options.outfeed_values->push_back(std::move(literal)); + } + if (step % 100 == 0) { + VLOG(1) << "Outfeed step " << step; + } + } + }); + } + } + + LOG(INFO) << "Replicated execution started"; + TF_ASSIGN_OR_RETURN(std::vector results, + executable->ExecuteOnStreams(service_run_options, + argument_buffer_slices)); + LOG(INFO) << "Replicated execution terminated"; + + std::vector> exec_results; + for (int64 i = 0; i < options.num_replicas; ++i) { + ScopedShapedBuffer result(std::move(results[i]), + backend().memory_allocator()); + TF_ASSIGN_OR_RETURN(std::unique_ptr literal, + backend().transfer_manager()->TransferLiteralFromDevice( + streams[i]->parent(), result)); + exec_results.push_back(std::move(literal)); + } + return std::move(exec_results); +} + +StatusOr> HloRunner::CreateExecutable( + std::unique_ptr module, bool run_hlo_passes) { + if (run_hlo_passes) { + TF_ASSIGN_OR_RETURN( + module, backend().compiler()->RunHloPasses( + std::move(module), backend().default_stream_executor(), + backend().memory_allocator())); + } + return backend().compiler()->RunBackend(std::move(module), + backend().default_stream_executor(), + backend().memory_allocator()); +} + +ServiceExecutableRunOptions HloRunner::GetServiceRunOptionsForDevice( + int64 device, se::Stream* stream, DeviceAssignment* device_assignment) { + ExecutableRunOptions run_options; + run_options.set_device_ordinal(device); + run_options.set_stream(stream); + run_options.set_allocator(backend().memory_allocator()); + run_options.set_inter_op_thread_pool(backend().inter_op_thread_pool()); + run_options.set_intra_op_thread_pool( + backend().eigen_intra_op_thread_pool_device()); + if (device_assignment != nullptr) { + run_options.set_device_assignment(device_assignment); + } + return ServiceExecutableRunOptions(run_options, backend().StreamBorrower(), + backend().inter_op_thread_pool()); +} + Backend& HloRunner::backend() { if (!backend_) { backend_ = Backend::CreateDefaultBackend().ConsumeValueOrDie(); - VLOG(1) << "executing on platform " << backend().platform()->Name(); + VLOG(1) << "Executing on platform " << backend().platform()->Name(); } return *backend_; } diff --git a/tensorflow/compiler/xla/service/hlo_runner.h b/tensorflow/compiler/xla/service/hlo_runner.h index 06ce22a5b9fc7b3d6c10857c84196094c0eed303..53f7c6fe4a09111c5ee24f2290f0f4aeed0a4401 100644 --- a/tensorflow/compiler/xla/service/hlo_runner.h +++ b/tensorflow/compiler/xla/service/hlo_runner.h @@ -16,12 +16,16 @@ limitations under the License. #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_HLO_RUNNER_H_ #define TENSORFLOW_COMPILER_XLA_SERVICE_HLO_RUNNER_H_ +#include #include +#include #include #include #include "tensorflow/compiler/xla/service/backend.h" #include "tensorflow/compiler/xla/service/compiler.h" +#include "tensorflow/compiler/xla/service/computation_placer.h" +#include "tensorflow/compiler/xla/service/executable.h" #include "tensorflow/compiler/xla/service/hlo_computation.h" #include "tensorflow/compiler/xla/service/hlo_module.h" #include "tensorflow/compiler/xla/status_macros.h" @@ -40,9 +44,43 @@ namespace xla { // file), or parsed from a hlo textual IR string. class HloRunner { public: - HloRunner(); - - HloRunner(::perftools::gputools::Platform* platform); + // The options used to configure a ExecuteReplicated() call. + struct ReplicatedExecuteOptions { + // The number of devices the HLO module should be replicated onto. + int64 num_replicas = 1; + + // The arguments to be fed to each replica. Since this is used for a + // replicated execution, all the arguments are the same for all replicas. + std::vector arguments; + + // If the HLO module being run has an infeed instruction, this will be the + // data which will be fed to it, for as many as infeed_steps steps. + const Literal* infeed = nullptr; + + // The number of times the infeed literal should be fed to the HLO module. + // For a clean exit, this should match the iterations-per-loop parameter + // used when generating the HLO module proto (that is usually the main + // while bounary counter). A value higher then iterations-per-loop would + // lead to infeed threads feeding to a gone computation, while a lower + // value would trigger a stuck ExecuteReplicated() call (the computation + // will be trying to infeed data which will never come). + int64 infeed_steps = -1; + + // The shape of the outfeed operation. If empty, the HLO module does not + // generate any outfeed. + Shape outfeed_shape; + + // A pointer to a vector where the outfeed values will be stored. If + // nullptr, the values will be read and discarded. + std::vector>* outfeed_values = nullptr; + + // Whether the HLO passes should be run on the input module. Usually + // saved modules are coming from after the HLO pass pipeline, so triggering + // another run will likely cause errors. + bool run_hlo_passes = false; + }; + + explicit HloRunner(se::Platform* platform); ~HloRunner(); @@ -86,6 +124,13 @@ class HloRunner { return Execute(std::move(module), argument_pointers, run_hlo_passes); } + // Executes a given HLO module into a set of replicas, and returns a map + // with the replica number as key, and the corresponding returned literal as + // value. + StatusOr>> ExecuteReplicated( + std::unique_ptr module, + const ReplicatedExecuteOptions& options); + // If backend is not created in the constructor, creates and returns the // default backend. If creation fails, crashes the program. // @@ -94,9 +139,17 @@ class HloRunner { Backend& backend(); private: - struct EigenThreadPoolWrapper; - - std::unique_ptr thread_pool_wrapper_; + // Creates an executable object given an HLO module. If run_hlo_passes is + // true, the HLO passes will be run before. + StatusOr> CreateExecutable( + std::unique_ptr module, bool run_hlo_passes); + + // Creates a ServiceExecutableRunOptions object to configure a run on device, + // using the provided stream object. If device_assignment is not nullptr, it + // will be used to configure the replication parameters. Replicated executions + // should pass the device_assignment parameter. + ServiceExecutableRunOptions GetServiceRunOptionsForDevice( + int64 device, se::Stream* stream, DeviceAssignment* device_assignment); std::unique_ptr backend_; }; diff --git a/tensorflow/compiler/xla/service/hlo_sharding.cc b/tensorflow/compiler/xla/service/hlo_sharding.cc index e8e45f1ee968992901988e8b85d4e9ae28f2abe9..1b42349b0b3ad9634bb910b3843affed6a0ca334 100644 --- a/tensorflow/compiler/xla/service/hlo_sharding.cc +++ b/tensorflow/compiler/xla/service/hlo_sharding.cc @@ -376,6 +376,16 @@ HloSharding HloSharding::TransformShardedTileShape( return HloSharding::Tile(new_tile_shape, tile_assignment()); } +HloSharding HloSharding::GetSubSharding(const Shape& shape, + const ShapeIndex& index) const { + CHECK(IsTuple()); + + ShapeTree sub_shape_tree(ShapeUtil::GetSubshape(shape, index), + Replicate()); + sub_shape_tree.CopySubtreeFrom(GetAsShapeTree(shape), index, {}); + return Tuple(sub_shape_tree); +} + std::ostream& operator<<(std::ostream& out, const HloSharding& sharding) { out << sharding.ToString(); return out; diff --git a/tensorflow/compiler/xla/service/hlo_sharding.h b/tensorflow/compiler/xla/service/hlo_sharding.h index 18d406f3700da6dfdfcd16fb76bf9c1d2bc63141..2b8e757f42991f697df37d3d34bfdff6a36bc509 100644 --- a/tensorflow/compiler/xla/service/hlo_sharding.h +++ b/tensorflow/compiler/xla/service/hlo_sharding.h @@ -94,6 +94,10 @@ class HloSharding { // Create a new sharding from a protobuf OpSharding. static StatusOr FromProto(const OpSharding& proto); + // Checks whether device is a reserved device number. A reserved device number + // has usually a special meaning, with dedicated handling logic. + static bool IsReservedDevice(int64 device) { return device < 0; } + OpSharding ToProto() const; string ToString() const; @@ -171,6 +175,10 @@ class HloSharding { } } + // Retrieves the sub sharding at a given index, out of a tuple sharding. + // REQUIRES: IsTuple() + HloSharding GetSubSharding(const Shape& shape, const ShapeIndex& index) const; + bool operator==(const HloSharding& other) const { return replicated_ == other.replicated_ && maximal_ == other.maximal_ && ShapeUtil::Compatible(tile_shape_, other.tile_shape_) && diff --git a/tensorflow/compiler/xla/service/hlo_verifier.cc b/tensorflow/compiler/xla/service/hlo_verifier.cc index 8c875698eb1992719d504d272ca338b05b60e36b..8a30cbf9cd622ffb64d345ddaf0dc88f34850bfc 100644 --- a/tensorflow/compiler/xla/service/hlo_verifier.cc +++ b/tensorflow/compiler/xla/service/hlo_verifier.cc @@ -15,6 +15,7 @@ limitations under the License. #include +#include "tensorflow/compiler/xla/service/hlo_opcode.h" #include "tensorflow/compiler/xla/service/hlo_verifier.h" #include "tensorflow/compiler/xla/status_macros.h" #include "tensorflow/core/lib/core/errors.h" @@ -731,6 +732,73 @@ Status HloVerifier::CheckFusionInstruction(HloInstruction* fusion) const { return tensorflow::Status::OK(); } +Status HloVerifier::CheckWhileInstruction(HloInstruction* instruction) { + auto* while_cond = instruction->while_condition(); + auto* while_body = instruction->while_body(); + if (while_cond->num_parameters() != 1) { + return FailedPrecondition( + "While condition must have exactly 1 parameter; had %lld : %s", + while_cond->num_parameters(), while_cond->ToString().c_str()); + } + if (while_body->num_parameters() != 1) { + return FailedPrecondition( + "While body must have exactly 1 parameter; had %lld : %s", + while_body->num_parameters(), while_body->ToString().c_str()); + } + if (instruction->operand_count() != 1) { + return FailedPrecondition( + "While loop must have exactly one operand; had %lld : %s", + instruction->operand_count(), instruction->ToString().c_str()); + } + auto* init = instruction->operand(0); + auto* cond_param = while_cond->parameter_instruction(0); + if (!ShapeUtil::Compatible(init->shape(), cond_param->shape())) { + return FailedPrecondition( + "While condition's parameter must have the same shape as the " + "loop's 'init'. init: %s, param: %s", + init->ToString().c_str(), cond_param->ToString().c_str()); + } + auto* cond_root = while_cond->root_instruction(); + if (!ShapeUtil::Compatible(cond_root->shape(), + ShapeUtil::MakeShape(PRED, {}))) { + return FailedPrecondition("While condition should have shape PRED: %s", + cond_root->ToString().c_str()); + } + auto* body_param = while_body->parameter_instruction(0); + if (!ShapeUtil::Compatible(init->shape(), body_param->shape())) { + return FailedPrecondition( + "While body's parameter must have the same shape as the loop's" + " 'init'. init: %s, param: %s", + init->ToString().c_str(), body_param->ToString().c_str()); + } + auto* body_root = while_body->root_instruction(); + if (!ShapeUtil::Compatible(init->shape(), body_root->shape())) { + return FailedPrecondition( + "While body should have same shape as the loop's 'init'." + "init: %s, body: %s", + init->ToString().c_str(), body_root->ToString().c_str()); + } + return tensorflow::Status::OK(); +} + +Status HloVerifier::CheckElementwiseInstruction(HloInstruction* instruction) { + const Shape& out_shape = instruction->shape(); + for (HloInstruction* operand : instruction->operands()) { + const Shape& operand_shape = operand->shape(); + if (!ShapeUtil::IsScalar(operand_shape) && + !ShapeUtil::CompatibleIgnoringElementType(operand_shape, out_shape)) { + return FailedPrecondition( + "Implicit broadcast is not allowed in HLO." + "Found non-compatible shapes for instruction %s.\n" + "output: %s\noperand: %s\n", + HloOpcodeString(instruction->opcode()).c_str(), + ShapeUtil::HumanString(out_shape).c_str(), + ShapeUtil::HumanString(operand_shape).c_str()); + } + } + return tensorflow::Status::OK(); +} + StatusOr HloVerifier::Run(HloModule* module) { TF_RETURN_IF_ERROR(VerifyHloStructure(module)); @@ -771,39 +839,9 @@ StatusOr HloVerifier::Run(HloModule* module) { << instruction->dimensions().size() << " != " << ShapeUtil::Rank(instruction->operand(0)->shape()); } else if (instruction->opcode() == HloOpcode::kWhile) { - auto* while_cond = instruction->while_condition(); - auto* while_body = instruction->while_body(); - TF_RET_CHECK(while_cond->num_parameters() == 1) - << "While condition must have exactly 1 parameter; had " - << while_cond->num_parameters() << ": " << while_cond->ToString(); - TF_RET_CHECK(while_body->num_parameters() == 1) - << "While body must have exactly 1 parameter; had " - << while_body->num_parameters() << ": " << while_body->ToString(); - TF_RET_CHECK(instruction->operand_count() == 1) - << "While loop must have exactly one operand; had " - << instruction->operand_count() << ": " << instruction->ToString(); - - auto* init = instruction->operand(0); - auto* cond_param = while_cond->parameter_instruction(0); - TF_RET_CHECK(ShapeUtil::Compatible(init->shape(), cond_param->shape())) - << "While condition's parameter must have the same shape as the " - "loop's 'init'. init: " - << init->ToString() << ", param: " << cond_param->ToString(); - auto* cond_root = while_cond->root_instruction(); - TF_RET_CHECK(ShapeUtil::Compatible(cond_root->shape(), - ShapeUtil::MakeShape(PRED, {}))) - << "While condition should have shape PRED: " - << cond_root->ToString(); - - auto* body_param = while_body->parameter_instruction(0); - TF_RET_CHECK(ShapeUtil::Compatible(init->shape(), body_param->shape())) - << "While body's parameter must have the same shape as the loop's " - "'init'. init: " - << init->ToString() << ", param: " << body_param->ToString(); - auto* body_root = while_body->root_instruction(); - TF_RET_CHECK(ShapeUtil::Compatible(init->shape(), body_root->shape())) - << "While body should have same shape as the loop's 'init'. init: " - << init->ToString() << ", body: " << body_root->ToString(); + TF_RETURN_IF_ERROR(CheckWhileInstruction(instruction)); + } else if (instruction->IsElementwise()) { + TF_RETURN_IF_ERROR(CheckElementwiseInstruction(instruction)); } auto previous = instructions.find(instruction->name()); diff --git a/tensorflow/compiler/xla/service/hlo_verifier.h b/tensorflow/compiler/xla/service/hlo_verifier.h index 1dd7ec3c51e18dcfe89bd478de87798ba3858119..6208887547a14d22b512ef308dd2668af2f4468d 100644 --- a/tensorflow/compiler/xla/service/hlo_verifier.h +++ b/tensorflow/compiler/xla/service/hlo_verifier.h @@ -102,7 +102,7 @@ class ShapeVerifier : public DfsHloVisitor { Status CheckTernaryShape(const HloInstruction* instruction); Status CheckVariadicShape(const HloInstruction* instruction); - // Checks if the given two instructions shares the same channel id. + // Checks if the given two instructions share the same channel id. Status CheckSameChannel(const HloInstruction* instr1, const HloInstruction* instr2); @@ -144,9 +144,15 @@ class HloVerifier : public HloPassInterface { // CHECKs various invariants of a fusion instruction. Status CheckFusionInstruction(HloInstruction* fusion) const; + Status CheckWhileInstruction(HloInstruction* instruction); + + // Checks that the non-scalar operand shapes are compatible to the output + // shape, i.e., that there are no implicit broadcasts of size-one dimensions. + Status CheckElementwiseInstruction(HloInstruction* instruction); + // Creates a ShapeVerifier that checks that shapes match inferred - // expectations. This is a factory function because ShapeVerifier, Note that - // ShapeVerifier, being a DfsHloVisitor, is stateful. We want a clean object + // expectations. This is a factory function because ShapeVerifier, + // being a DfsHloVisitor, is stateful. We want a clean object // for each run of the verifier. ShapeVerifierFactory shape_verifier_factory_; }; diff --git a/tensorflow/compiler/xla/service/instruction_fusion.cc b/tensorflow/compiler/xla/service/instruction_fusion.cc index d69ad80bdb4d2eab2d34228be026d7bc0b76efc0..b9ccfeddb565b7b44f5b38281a49df6cd0fdc766 100644 --- a/tensorflow/compiler/xla/service/instruction_fusion.cc +++ b/tensorflow/compiler/xla/service/instruction_fusion.cc @@ -39,6 +39,7 @@ namespace xla { case HloOpcode::kBroadcast: case HloOpcode::kCeil: case HloOpcode::kClamp: + case HloOpcode::kClz: case HloOpcode::kComplex: case HloOpcode::kConcatenate: case HloOpcode::kConstant: diff --git a/tensorflow/compiler/xla/service/interpreter/compiler.cc b/tensorflow/compiler/xla/service/interpreter/compiler.cc index 9171e859c6f84ceef9664aa1eb90a07c87dfab40..76b3ecad26fe92e910fd3fe0e405c726da7e14b7 100644 --- a/tensorflow/compiler/xla/service/interpreter/compiler.cc +++ b/tensorflow/compiler/xla/service/interpreter/compiler.cc @@ -41,9 +41,6 @@ limitations under the License. namespace xla { namespace interpreter { -namespace se = ::perftools::gputools; -namespace sep = ::perftools::gputools::interpreter; - Status InterpreterCompiler::RunHloOptimization(HloModule* hlo_module) { HloPassPipeline pipeline("Interpreter"); @@ -96,7 +93,7 @@ InterpreterCompiler::CompileAheadOfTime( } se::Platform::Id InterpreterCompiler::PlatformId() const { - return sep::kInterpreterPlatformId; + return se::interpreter::kXlaInterpreterPlatformId; } HloCostAnalysis::ShapeSizeFunction InterpreterCompiler::ShapeSizeBytesFunction() @@ -109,11 +106,12 @@ static std::unique_ptr CreateComputationPlacer() { } static bool InitModule() { - xla::Compiler::RegisterCompilerFactory(sep::kInterpreterPlatformId, []() { - return xla::MakeUnique(); - }); - xla::ComputationPlacer::RegisterComputationPlacer(sep::kInterpreterPlatformId, - &CreateComputationPlacer); + xla::Compiler::RegisterCompilerFactory( + se::interpreter::kXlaInterpreterPlatformId, []() { + return xla::MakeUnique(); + }); + xla::ComputationPlacer::RegisterComputationPlacer( + se::interpreter::kXlaInterpreterPlatformId, &CreateComputationPlacer); return true; } diff --git a/tensorflow/compiler/xla/service/interpreter/compiler.h b/tensorflow/compiler/xla/service/interpreter/compiler.h index c8660c04d86a82e7dfcfd1658310c2a0e4fa0083..e90ae3e818522e6e4fd9d9f5acb846800bc899ca 100644 --- a/tensorflow/compiler/xla/service/interpreter/compiler.h +++ b/tensorflow/compiler/xla/service/interpreter/compiler.h @@ -44,19 +44,16 @@ class InterpreterCompiler : public Compiler { ~InterpreterCompiler() override {} StatusOr> RunHloPasses( - std::unique_ptr hlo_module, - perftools::gputools::StreamExecutor* stream_exec, + std::unique_ptr hlo_module, se::StreamExecutor* stream_exec, DeviceMemoryAllocator* device_allocator) override; StatusOr> RunBackend( - std::unique_ptr hlo_module, - perftools::gputools::StreamExecutor* stream_exec, + std::unique_ptr hlo_module, se::StreamExecutor* stream_exec, DeviceMemoryAllocator* device_allocator) override; StatusOr>> Compile( std::vector> hlo_modules, - std::vector> - stream_exec, + std::vector> stream_exec, DeviceMemoryAllocator* device_allocator) override; StatusOr>> @@ -65,7 +62,7 @@ class InterpreterCompiler : public Compiler { HloCostAnalysis::ShapeSizeFunction ShapeSizeBytesFunction() const override; - perftools::gputools::Platform::Id PlatformId() const override; + se::Platform::Id PlatformId() const override; private: Status RunHloOptimization(HloModule* hlo_module); diff --git a/tensorflow/compiler/xla/service/interpreter/executable.cc b/tensorflow/compiler/xla/service/interpreter/executable.cc index 883063d0f075f5b0d79edc01bcd27a7c579272f4..6553000336b072d56ea01cad1f2f66c7bd339e1a 100644 --- a/tensorflow/compiler/xla/service/interpreter/executable.cc +++ b/tensorflow/compiler/xla/service/interpreter/executable.cc @@ -38,8 +38,6 @@ limitations under the License. namespace xla { namespace interpreter { -namespace se = ::perftools::gputools; - InterpreterExecutable::InterpreterExecutable( std::unique_ptr hlo_module) : Executable(std::move(hlo_module), /*hlo_profile_printer=*/nullptr, @@ -47,7 +45,7 @@ InterpreterExecutable::InterpreterExecutable( InterpreterExecutable::~InterpreterExecutable() {} -StatusOr> InterpreterExecutable::ExecuteOnStream( +StatusOr InterpreterExecutable::ExecuteOnStream( const ServiceExecutableRunOptions* run_options, tensorflow::gtl::ArraySlice arguments, HloExecutionProfile* hlo_execution_profile) { @@ -90,12 +88,12 @@ StatusOr> InterpreterExecutable::ExecuteOnStream( evaluator.Evaluate>(*computation, arg_literals)); // Transform the result literal back into a ShapedBuffer. - TF_ASSIGN_OR_RETURN(std::unique_ptr result, + TF_ASSIGN_OR_RETURN(ShapedBuffer result, transfer_manager->AllocateShapedBuffer( result_literal->shape(), run_options->allocator(), executor->device_ordinal())); TF_RETURN_IF_ERROR(transfer_manager->TransferLiteralToDevice( - executor, *result_literal, *result)); + executor, *result_literal, result)); uint64 end_micros = tensorflow::Env::Default()->NowMicros(); @@ -108,8 +106,7 @@ StatusOr> InterpreterExecutable::ExecuteOnStream( return std::move(result); } -StatusOr> -InterpreterExecutable::ExecuteAsyncOnStream( +StatusOr InterpreterExecutable::ExecuteAsyncOnStream( const ServiceExecutableRunOptions* run_options, tensorflow::gtl::ArraySlice arguments) { return tensorflow::errors::Unimplemented( diff --git a/tensorflow/compiler/xla/service/interpreter/executable.h b/tensorflow/compiler/xla/service/interpreter/executable.h index 410110a1adf04c83001c38ed03f5d60dd203dc7e..c825a9a368d43b5ce1cdc4701e3bbf57374d81f3 100644 --- a/tensorflow/compiler/xla/service/interpreter/executable.h +++ b/tensorflow/compiler/xla/service/interpreter/executable.h @@ -43,12 +43,12 @@ class InterpreterExecutable : public Executable { InterpreterExecutable(std::unique_ptr hlo_module); ~InterpreterExecutable() override; - StatusOr> ExecuteOnStream( + StatusOr ExecuteOnStream( const ServiceExecutableRunOptions* run_options, tensorflow::gtl::ArraySlice arguments, HloExecutionProfile* hlo_execution_profile) override; - StatusOr> ExecuteAsyncOnStream( + StatusOr ExecuteAsyncOnStream( const ServiceExecutableRunOptions* run_options, tensorflow::gtl::ArraySlice arguments) override; diff --git a/tensorflow/compiler/xla/service/interpreter/executor.cc b/tensorflow/compiler/xla/service/interpreter/executor.cc index 68371910d76f42c0b6d4b1adad9d6a83bdb858e6..97e9fa2c8e8ecd918ffe3df2fd4e731f3b91e6db 100644 --- a/tensorflow/compiler/xla/service/interpreter/executor.cc +++ b/tensorflow/compiler/xla/service/interpreter/executor.cc @@ -19,8 +19,7 @@ limitations under the License. #include "tensorflow/compiler/xla/status_macros.h" -namespace perftools { -namespace gputools { +namespace stream_executor { namespace interpreter { host::HostStream *AsExecutorStream(Stream *stream) { @@ -28,84 +27,85 @@ host::HostStream *AsExecutorStream(Stream *stream) { return dynamic_cast(stream->implementation()); } -InterpreterExecutor::InterpreterExecutor(const PluginConfig &plugin_config) +XlaInterpreterExecutor::XlaInterpreterExecutor( + const PluginConfig &plugin_config) : plugin_config_(plugin_config) {} -InterpreterExecutor::~InterpreterExecutor() {} +XlaInterpreterExecutor::~XlaInterpreterExecutor() {} -void *InterpreterExecutor::Allocate(uint64 size) { return new char[size]; } +void *XlaInterpreterExecutor::Allocate(uint64 size) { return new char[size]; } -void *InterpreterExecutor::AllocateSubBuffer(DeviceMemoryBase *parent, - uint64 offset_bytes, - uint64 /*size_bytes*/) { +void *XlaInterpreterExecutor::AllocateSubBuffer(DeviceMemoryBase *parent, + uint64 offset_bytes, + uint64 /*size_bytes*/) { return parent + offset_bytes; } -void InterpreterExecutor::Deallocate(DeviceMemoryBase *mem) { +void XlaInterpreterExecutor::Deallocate(DeviceMemoryBase *mem) { if (!mem->is_sub_buffer()) { delete[] static_cast(mem->opaque()); } } -bool InterpreterExecutor::Memcpy(Stream *stream, void *host_dst, - const DeviceMemoryBase &dev_src, uint64 size) { +bool XlaInterpreterExecutor::Memcpy(Stream *stream, void *host_dst, + const DeviceMemoryBase &dev_src, + uint64 size) { AsExecutorStream(stream)->EnqueueTask([this, host_dst, dev_src, size]() { port::Status ok = SynchronousMemcpy(host_dst, dev_src, size); }); return true; } -bool InterpreterExecutor::Memcpy(Stream *stream, DeviceMemoryBase *dev_dst, - const void *host_src, uint64 size) { +bool XlaInterpreterExecutor::Memcpy(Stream *stream, DeviceMemoryBase *dev_dst, + const void *host_src, uint64 size) { AsExecutorStream(stream)->EnqueueTask([this, dev_dst, host_src, size]() { port::Status ok = SynchronousMemcpy(dev_dst, host_src, size); }); return true; } -port::Status InterpreterExecutor::SynchronousMemcpy(DeviceMemoryBase *dev_dst, - const void *host_src, - uint64 size) { +port::Status XlaInterpreterExecutor::SynchronousMemcpy( + DeviceMemoryBase *dev_dst, const void *host_src, uint64 size) { memcpy(dev_dst->opaque(), host_src, size); return port::Status::OK(); } -port::Status InterpreterExecutor::SynchronousMemcpy( +port::Status XlaInterpreterExecutor::SynchronousMemcpy( void *host_dst, const DeviceMemoryBase &dev_src, uint64 size) { memcpy(host_dst, dev_src.opaque(), size); return port::Status::OK(); } -bool InterpreterExecutor::HostCallback(Stream *stream, - std::function callback) { +bool XlaInterpreterExecutor::HostCallback(Stream *stream, + std::function callback) { AsExecutorStream(stream)->EnqueueTask(callback); return true; } -bool InterpreterExecutor::CreateStreamDependency(Stream *dependent, - Stream *other) { +bool XlaInterpreterExecutor::CreateStreamDependency(Stream *dependent, + Stream *other) { AsExecutorStream(dependent)->EnqueueTask( [other]() { SE_CHECK_OK(other->BlockHostUntilDone()); }); AsExecutorStream(dependent)->BlockUntilDone(); return true; } -bool InterpreterExecutor::StartTimer(Stream *stream, Timer *timer) { +bool XlaInterpreterExecutor::StartTimer(Stream *stream, Timer *timer) { dynamic_cast(timer->implementation())->Start(stream); return true; } -bool InterpreterExecutor::StopTimer(Stream *stream, Timer *timer) { +bool XlaInterpreterExecutor::StopTimer(Stream *stream, Timer *timer) { dynamic_cast(timer->implementation())->Stop(stream); return true; } -port::Status InterpreterExecutor::BlockHostUntilDone(Stream *stream) { +port::Status XlaInterpreterExecutor::BlockHostUntilDone(Stream *stream) { AsExecutorStream(stream)->BlockUntilDone(); return port::Status::OK(); } -DeviceDescription *InterpreterExecutor::PopulateDeviceDescription() const { +DeviceDescription *XlaInterpreterExecutor::PopulateDeviceDescription() const { internal::DeviceDescriptionBuilder builder; builder.set_device_address_bits(64); @@ -118,5 +118,4 @@ DeviceDescription *InterpreterExecutor::PopulateDeviceDescription() const { } } // namespace interpreter -} // namespace gputools -} // namespace perftools +} // namespace stream_executor diff --git a/tensorflow/compiler/xla/service/interpreter/executor.h b/tensorflow/compiler/xla/service/interpreter/executor.h index c5d07e906dafb033905c50c604069e80e1ce80cd..9b109022fbfc698f7dadc678ef837da270a5e74a 100644 --- a/tensorflow/compiler/xla/service/interpreter/executor.h +++ b/tensorflow/compiler/xla/service/interpreter/executor.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -// Declares the InterpreterExecutor class, which is a CPU-only implementation of -// the StreamExecutor interface. For now, this is used for testing and to +// Declares the XlaInterpreterExecutor class, which is a CPU-only implementation +// of the StreamExecutor interface. For now, this is used for testing and to // examine the performance of host-based StreamExecutor code. #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_INTERPRETER_EXECUTOR_H_ #define TENSORFLOW_COMPILER_XLA_SERVICE_INTERPRETER_EXECUTOR_H_ @@ -44,16 +44,15 @@ limitations under the License. #include "tensorflow/stream_executor/stream_executor_internal.h" #include "tensorflow/stream_executor/timer.h" -namespace perftools { -namespace gputools { +namespace stream_executor { namespace interpreter { using Args = tensorflow::gtl::ArraySlice; -class InterpreterExecutor : public internal::StreamExecutorInterface { +class XlaInterpreterExecutor : public internal::StreamExecutorInterface { public: - explicit InterpreterExecutor(const PluginConfig &plugin_config); - ~InterpreterExecutor() override; + explicit XlaInterpreterExecutor(const PluginConfig &plugin_config); + ~XlaInterpreterExecutor() override; port::Status Init(int device_ordinal, DeviceOptions device_options) override { return port::Status::OK(); @@ -213,7 +212,6 @@ class InterpreterExecutor : public internal::StreamExecutorInterface { }; } // namespace interpreter -} // namespace gputools -} // namespace perftools +} // namespace stream_executor #endif // TENSORFLOW_COMPILER_XLA_SERVICE_INTERPRETER_EXECUTOR_H_ diff --git a/tensorflow/compiler/xla/service/interpreter/interpreter_transfer_manager.cc b/tensorflow/compiler/xla/service/interpreter/interpreter_transfer_manager.cc index cf98ecd7749d61261bf072cdb1882c7603f39556..d27cd7502f10a1f615fc5b0d610acafdf55e3e43 100644 --- a/tensorflow/compiler/xla/service/interpreter/interpreter_transfer_manager.cc +++ b/tensorflow/compiler/xla/service/interpreter/interpreter_transfer_manager.cc @@ -21,12 +21,10 @@ limitations under the License. #include "tensorflow/compiler/xla/service/interpreter/platform_id.h" #include "tensorflow/compiler/xla/service/transfer_manager.h" -namespace sei = ::perftools::gputools::interpreter; - namespace xla { InterpreterTransferManager::InterpreterTransferManager() - : GenericTransferManager(sei::kInterpreterPlatformId, + : GenericTransferManager(se::interpreter::kXlaInterpreterPlatformId, /*pointer_size=*/sizeof(void*)) {} } // namespace xla @@ -38,7 +36,8 @@ CreateInterpreterTransferManager() { static bool InitModule() { xla::TransferManager::RegisterTransferManager( - sei::kInterpreterPlatformId, &CreateInterpreterTransferManager); + stream_executor::interpreter::kXlaInterpreterPlatformId, + &CreateInterpreterTransferManager); return true; } diff --git a/tensorflow/compiler/xla/service/interpreter/platform.cc b/tensorflow/compiler/xla/service/interpreter/platform.cc index a60e7fc59f7c5f0b1b24e026b34e195ca0fe5ebb..ce2f4d378c02592da01be12c0647da47d00b01ba 100644 --- a/tensorflow/compiler/xla/service/interpreter/platform.cc +++ b/tensorflow/compiler/xla/service/interpreter/platform.cc @@ -28,24 +28,22 @@ limitations under the License. #include "tensorflow/stream_executor/multi_platform_manager.h" #include "tensorflow/stream_executor/platform.h" -namespace se = ::perftools::gputools; -namespace sep = ::perftools::gputools::interpreter; - -namespace perftools { -namespace gputools { +namespace stream_executor { namespace interpreter { -InterpreterPlatform::InterpreterPlatform() : name_("Interpreter") {} +XlaInterpreterPlatform::XlaInterpreterPlatform() : name_("Interpreter") {} -InterpreterPlatform::~InterpreterPlatform() {} +XlaInterpreterPlatform::~XlaInterpreterPlatform() {} -Platform::Id InterpreterPlatform::id() const { return kInterpreterPlatformId; } +Platform::Id XlaInterpreterPlatform::id() const { + return kXlaInterpreterPlatformId; +} -int InterpreterPlatform::VisibleDeviceCount() const { return 1; } +int XlaInterpreterPlatform::VisibleDeviceCount() const { return 1; } -const string& InterpreterPlatform::Name() const { return name_; } +const string& XlaInterpreterPlatform::Name() const { return name_; } -port::StatusOr InterpreterPlatform::ExecutorForDevice( +port::StatusOr XlaInterpreterPlatform::ExecutorForDevice( int ordinal) { StreamExecutorConfig config; config.ordinal = ordinal; @@ -55,7 +53,7 @@ port::StatusOr InterpreterPlatform::ExecutorForDevice( } port::StatusOr -InterpreterPlatform::ExecutorForDeviceWithPluginConfig( +XlaInterpreterPlatform::ExecutorForDeviceWithPluginConfig( int device_ordinal, const PluginConfig& plugin_config) { StreamExecutorConfig config; config.ordinal = device_ordinal; @@ -64,16 +62,17 @@ InterpreterPlatform::ExecutorForDeviceWithPluginConfig( return GetExecutor(config); } -port::StatusOr InterpreterPlatform::GetExecutor( +port::StatusOr XlaInterpreterPlatform::GetExecutor( const StreamExecutorConfig& config) { return executor_cache_.GetOrCreate( config, [&]() { return GetUncachedExecutor(config); }); } port::StatusOr> -InterpreterPlatform::GetUncachedExecutor(const StreamExecutorConfig& config) { +XlaInterpreterPlatform::GetUncachedExecutor( + const StreamExecutorConfig& config) { auto executor = port::MakeUnique( - this, port::MakeUnique(config.plugin_config)); + this, port::MakeUnique(config.plugin_config)); auto init_status = executor->Init(config.ordinal, config.device_options); if (!init_status.ok()) { return port::Status{ @@ -86,26 +85,26 @@ InterpreterPlatform::GetUncachedExecutor(const StreamExecutorConfig& config) { return std::move(executor); } -void InterpreterPlatform::RegisterTraceListener( +void XlaInterpreterPlatform::RegisterTraceListener( std::unique_ptr listener) { LOG(FATAL) << "not yet implemented: register executor trace listener"; } -void InterpreterPlatform::UnregisterTraceListener(TraceListener* listener) { +void XlaInterpreterPlatform::UnregisterTraceListener(TraceListener* listener) { LOG(FATAL) << "not yet implemented: unregister executor trace listener"; } -static void InitializeInterpreterPlatform() { - std::unique_ptr platform(new sep::InterpreterPlatform); - SE_CHECK_OK(se::MultiPlatformManager::RegisterPlatform(std::move(platform))); +static void InitializeXlaInterpreterPlatform() { + std::unique_ptr platform(new XlaInterpreterPlatform); + SE_CHECK_OK(MultiPlatformManager::RegisterPlatform(std::move(platform))); } } // namespace interpreter -} // namespace gputools -} // namespace perftools +} // namespace stream_executor -REGISTER_MODULE_INITIALIZER(interpreter_platform, - sep::InitializeInterpreterPlatform()); +REGISTER_MODULE_INITIALIZER( + interpreter_platform, + stream_executor::interpreter::InitializeXlaInterpreterPlatform()); DECLARE_MODULE_INITIALIZER(multi_platform_manager); diff --git a/tensorflow/compiler/xla/service/interpreter/platform.h b/tensorflow/compiler/xla/service/interpreter/platform.h index c66ddb907d1c5a8e99d3178a202a77a72a646ce5..d68c5aa20dda7ac246ed4aa667851e385a604c04 100644 --- a/tensorflow/compiler/xla/service/interpreter/platform.h +++ b/tensorflow/compiler/xla/service/interpreter/platform.h @@ -23,14 +23,13 @@ limitations under the License. #include "tensorflow/stream_executor/stream_executor.h" #include "tensorflow/stream_executor/trace_listener.h" -namespace perftools { -namespace gputools { +namespace stream_executor { namespace interpreter { -class InterpreterPlatform : public Platform { +class XlaInterpreterPlatform : public Platform { public: - InterpreterPlatform(); - ~InterpreterPlatform() override; + XlaInterpreterPlatform(); + ~XlaInterpreterPlatform() override; Platform::Id id() const override; @@ -60,11 +59,10 @@ class InterpreterPlatform : public Platform { // Cache of created StreamExecutors. ExecutorCache executor_cache_; - SE_DISALLOW_COPY_AND_ASSIGN(InterpreterPlatform); + SE_DISALLOW_COPY_AND_ASSIGN(XlaInterpreterPlatform); }; } // namespace interpreter -} // namespace gputools -} // namespace perftools +} // namespace stream_executor #endif // TENSORFLOW_COMPILER_XLA_SERVICE_INTERPRETER_PLATFORM_H_ diff --git a/tensorflow/compiler/xla/service/interpreter/platform_id.cc b/tensorflow/compiler/xla/service/interpreter/platform_id.cc index 1a0373cf86e26b564e0e732e8de1a0a5d868bfa6..3272396ce5045129a7689a160ec859d11fbbe9fa 100644 --- a/tensorflow/compiler/xla/service/interpreter/platform_id.cc +++ b/tensorflow/compiler/xla/service/interpreter/platform_id.cc @@ -14,12 +14,10 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/compiler/xla/service/interpreter/platform_id.h" -namespace perftools { -namespace gputools { +namespace stream_executor { namespace interpreter { -PLATFORM_DEFINE_ID(kInterpreterPlatformId); +PLATFORM_DEFINE_ID(kXlaInterpreterPlatformId); } // namespace interpreter -} // namespace gputools -} // namespace perftools +} // namespace stream_executor diff --git a/tensorflow/compiler/xla/service/interpreter/platform_id.h b/tensorflow/compiler/xla/service/interpreter/platform_id.h index 905efef1690d3bd32461353fe32dd394eb6bca9e..a6cc10bcc1eb756a3146d4a834efa4cd3ceb2d27 100644 --- a/tensorflow/compiler/xla/service/interpreter/platform_id.h +++ b/tensorflow/compiler/xla/service/interpreter/platform_id.h @@ -18,14 +18,12 @@ limitations under the License. #include "tensorflow/stream_executor/platform.h" -namespace perftools { -namespace gputools { +namespace stream_executor { namespace interpreter { -extern const Platform::Id kInterpreterPlatformId; +extern const Platform::Id kXlaInterpreterPlatformId; } // namespace interpreter -} // namespace gputools -} // namespace perftools +} // namespace stream_executor #endif // TENSORFLOW_COMPILER_XLA_SERVICE_INTERPRETER_PLATFORM_ID_H_ diff --git a/tensorflow/compiler/xla/service/layout_assignment.cc b/tensorflow/compiler/xla/service/layout_assignment.cc index 39f9120e552f014dd2759bff2892157402d9c47a..7067b6f86a0fb24fb946ad236bca9bbd48d53722 100644 --- a/tensorflow/compiler/xla/service/layout_assignment.cc +++ b/tensorflow/compiler/xla/service/layout_assignment.cc @@ -31,10 +31,12 @@ limitations under the License. #include "tensorflow/compiler/xla/ptr_util.h" #include "tensorflow/compiler/xla/service/computation_layout.h" #include "tensorflow/compiler/xla/service/hlo_computation.h" +#include "tensorflow/compiler/xla/service/hlo_dce.h" #include "tensorflow/compiler/xla/service/hlo_graph_dumper.h" #include "tensorflow/compiler/xla/service/hlo_instruction.h" #include "tensorflow/compiler/xla/service/hlo_opcode.h" #include "tensorflow/compiler/xla/service/logical_buffer.h" +#include "tensorflow/compiler/xla/service/tuple_simplifier.h" #include "tensorflow/compiler/xla/shape_layout.h" #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/status_macros.h" @@ -57,76 +59,6 @@ namespace xla { // anonymous namespace, instead of three or four spread all over this file. namespace { -// Creates and returns a copy of the given instruction with a different -// layout. Tuple-shaped instructions will be deep-copied, and the last Tuple -// instruction producing the copy is returned. -StatusOr CreateCopyWithNewLayout( - const Shape& shape_with_layout, HloInstruction* instruction) { - TF_RET_CHECK(LayoutUtil::HasLayout(shape_with_layout)); - DCHECK(ShapeUtil::Compatible(shape_with_layout, instruction->shape())) - << ShapeUtil::HumanString(shape_with_layout) << " " - << ShapeUtil::HumanString(instruction->shape()) - << " instruction: " << instruction->ToString(); - - if (ShapeUtil::IsTuple(instruction->shape())) { - // Deep-copy tuples. - std::vector element_copies; - for (int64 i = 0; i < ShapeUtil::TupleElementCount(instruction->shape()); - ++i) { - HloInstruction* gte = instruction->parent()->AddInstruction( - HloInstruction::CreateGetTupleElement( - ShapeUtil::GetSubshape(instruction->shape(), {i}), instruction, - i)); - - // Recurse to copy each elements. - TF_ASSIGN_OR_RETURN( - HloInstruction * element_copy, - CreateCopyWithNewLayout( - ShapeUtil::GetSubshape(shape_with_layout, {i}), gte)); - element_copies.push_back(element_copy); - } - // Gather element copies into a tuple with a new Tuple instruction. - HloInstruction* tuple_copy = instruction->parent()->AddInstruction( - HloInstruction::CreateTuple(element_copies)); - LayoutUtil::ClearLayout(tuple_copy->mutable_shape()); - TF_RETURN_IF_ERROR(LayoutUtil::CopyLayoutBetweenShapes( - shape_with_layout, tuple_copy->mutable_shape())); - return tuple_copy; - } else if (ShapeUtil::IsArray(instruction->shape())) { - HloInstruction* copy = - instruction->parent()->AddInstruction(HloInstruction::CreateUnary( - instruction->shape(), HloOpcode::kCopy, instruction)); - LayoutUtil::ClearLayout(copy->mutable_shape()); - TF_RETURN_IF_ERROR(LayoutUtil::CopyLayoutBetweenShapes( - shape_with_layout, copy->mutable_shape())); - - return copy; - } else { - return FailedPrecondition( - "Can only copy array and tuple shaped instructions"); - } -} - -// Creates a copy of the given operand if the operand's layout does not match -// the given layout. This copy replaces the use in the given instruction. Tuple -// operands will be deep-copied. -Status CopyOperandIfLayoutsDiffer(const ShapeLayout& operand_layout, - HloInstruction* instruction, - int64 operand_no) { - HloInstruction* operand = instruction->mutable_operand(operand_no); - TF_RET_CHECK(operand_layout.LayoutIsSet()); - TF_RET_CHECK(LayoutUtil::HasLayout(operand->shape())); - - if (ShapeUtil::Equal(operand_layout.shape(), operand->shape())) { - // Operand layout already matches our constraint. Nothing to do. - return Status::OK(); - } - - TF_ASSIGN_OR_RETURN(HloInstruction * operand_copy, - CreateCopyWithNewLayout(operand_layout.shape(), operand)); - - return instruction->ReplaceOperandWith(operand_no, operand_copy); -} } // namespace @@ -470,9 +402,9 @@ string LayoutConstraints::ToString() const { } Status LayoutAssignment::AddMandatoryConstraints( - const ComputationLayout& computation_layout, - const ChannelLayoutConstraints* channel_constraints, - HloComputation* computation, LayoutConstraints* constraints) { + const ComputationLayout* computation_layout, + ChannelLayoutConstraints* channel_constraints, HloComputation* computation, + LayoutConstraints* constraints) { VLOG(3) << "Adding mandatory layout constraints to computation " << computation->name(); @@ -494,11 +426,16 @@ Status LayoutAssignment::AddMandatoryConstraints( TF_RETURN_IF_ERROR(constraints->SetOperandLayout( instruction->outfeed_shape(), instruction, 0)); } else if (instruction->opcode() == HloOpcode::kParameter) { - // Parameter layouts must match the respective layout in - // ComputationLayout. - shape_with_layout = - &computation_layout.parameter_layout(instruction->parameter_number()) - .shape(); + if (computation_layout != nullptr) { + const ShapeLayout& parameter_layout = + computation_layout->parameter_layout( + instruction->parameter_number()); + if (parameter_layout.LayoutIsSet()) { + // Parameter layouts must match the respective layout in + // ComputationLayout, if there is one. + shape_with_layout = ¶meter_layout.shape(); + } + } } if (shape_with_layout != nullptr) { TF_RETURN_IF_ERROR( @@ -563,9 +500,8 @@ Status LayoutAssignment::AddMandatoryConstraints( HloComputation* body = instruction->while_body(); HloComputation* condition = instruction->while_condition(); const HloInstruction* init = instruction->operand(0); - const ComputationLayout& body_layout = - FindOrDie(computation_layouts_, body); - const ComputationLayout& condition_layout = + ComputationLayout& body_layout = FindOrDie(computation_layouts_, body); + ComputationLayout& condition_layout = FindOrDie(computation_layouts_, condition); // Check a few invariants irrespective of layout. @@ -578,26 +514,19 @@ Status LayoutAssignment::AddMandatoryConstraints( condition_layout.parameter_shape(0))); DCHECK(ShapeUtil::Compatible(body_layout.result_shape(), init->shape())); - // Return error if earlier layout assignment of the embedded computations - // has produced conflicting layouts. - if (!ShapeUtil::Equal(body_layout.result_shape(), - body_layout.parameter_shape(0))) { - return InternalError( - "Parameter and result of body computation %s of while instruction " - "%s have different layouts: %s vs %s", - body->name().c_str(), instruction->name().c_str(), - ShapeUtil::HumanString(body_layout.result_shape()).c_str(), - ShapeUtil::HumanString(body_layout.parameter_shape(0)).c_str()); + if (body_layout.result_layout() != body_layout.parameter_layout(0)) { + VLOG(2) << "Reset %while body parameter layout: body=" << body->name() + << " while=" << instruction->name() + << " shape=" << body_layout.result_layout().ToString(); + *body_layout.mutable_parameter_layout(0) = body_layout.result_layout(); } - if (!ShapeUtil::Equal(body->root_instruction()->shape(), - condition->parameter_instruction(0)->shape())) { - return InternalError( - "Parameter of condition computation %s of while instruction " - "%s does not match body computation %s result: %s vs %s", - condition->name().c_str(), instruction->name().c_str(), - body->name().c_str(), - ShapeUtil::HumanString(condition_layout.parameter_shape(0)).c_str(), - ShapeUtil::HumanString(body_layout.result_shape()).c_str()); + if (condition_layout.parameter_layout(0) != + body_layout.parameter_layout(0)) { + VLOG(2) << "Reset %while condition parameter layout: cond=" + << condition->name() << " while=" << instruction->name() + << " shape=" << body_layout.parameter_layout(0).ToString(); + *condition_layout.mutable_parameter_layout(0) = + body_layout.parameter_layout(0); } // Constrain the output and the operand of the while instruction to match @@ -627,7 +556,20 @@ Status LayoutAssignment::AddMandatoryConstraints( true_computation_layout.parameter_shape(0))); DCHECK(ShapeUtil::Compatible( false_operand->shape(), false_computation_layout.parameter_shape(0))); - + if (true_computation_layout.result_layout() != + false_computation_layout.result_layout()) { + // We assign layouts in DFS fashion, so the true and false computations + // might have negotiated a different layout. But for the conditional + // instruction POV the layout must match, so we run again on the false + // computation, this time with proper computation layout. + VLOG(2) << "Reset %conditional false computation result layout: " + "false_computation=" + << false_computation->name() + << " conditional=" << instruction->name() << " shape=" + << true_computation_layout.result_layout().ToString(); + *false_computation_layout.mutable_result_layout() = + true_computation_layout.result_layout(); + } TF_RETURN_IF_ERROR(constraints->SetInstructionLayout( true_computation_layout.result_shape(), instruction)); TF_RETURN_IF_ERROR(constraints->SetOperandLayout( @@ -663,10 +605,14 @@ Status LayoutAssignment::AddMandatoryConstraints( } } } - - // Finally set the result layout to match ComputationLayout. - return constraints->SetResultLayout( - computation_layout.result_layout().shape()); + // Finally set the result layout to match ComputationLayout, if there is one. + if (computation_layout != nullptr) { + const ShapeLayout& result_layout = computation_layout->result_layout(); + if (result_layout.LayoutIsSet()) { + TF_RETURN_IF_ERROR(constraints->SetResultLayout(result_layout.shape())); + } + } + return Status::OK(); } namespace { @@ -793,6 +739,106 @@ Status CheckConstantLayout(HloInstruction* constant) { } // namespace +StatusOr LayoutAssignment::CreateCopyWithNewLayout( + const Shape& shape_with_layout, HloInstruction* instruction) { + TF_RET_CHECK(LayoutUtil::HasLayout(shape_with_layout)); + DCHECK(ShapeUtil::Compatible(shape_with_layout, instruction->shape())) + << ShapeUtil::HumanString(shape_with_layout) << " " + << ShapeUtil::HumanString(instruction->shape()) + << " instruction: " << instruction->ToString(); + + if (ShapeUtil::IsTuple(instruction->shape())) { + // Deep-copy tuples. + std::vector element_copies; + for (int64 i = 0; i < ShapeUtil::TupleElementCount(instruction->shape()); + ++i) { + HloInstruction* gte = instruction->parent()->AddInstruction( + HloInstruction::CreateGetTupleElement( + ShapeUtil::GetSubshape(instruction->shape(), {i}), instruction, + i)); + SetupCopiedInstruction(*instruction, gte, {i}); + // Recurse to copy each elements. + TF_ASSIGN_OR_RETURN( + HloInstruction * element_copy, + CreateCopyWithNewLayout( + ShapeUtil::GetSubshape(shape_with_layout, {i}), gte)); + element_copies.push_back(element_copy); + } + // Gather element copies into a tuple with a new Tuple instruction. + HloInstruction* tuple_copy = instruction->parent()->AddInstruction( + HloInstruction::CreateTuple(element_copies)); + SetupCopiedInstruction(*instruction, tuple_copy, {}); + LayoutUtil::ClearLayout(tuple_copy->mutable_shape()); + TF_RETURN_IF_ERROR(LayoutUtil::CopyLayoutBetweenShapes( + shape_with_layout, tuple_copy->mutable_shape())); + return tuple_copy; + } else if (ShapeUtil::IsArray(instruction->shape())) { + HloInstruction* copy = + instruction->parent()->AddInstruction(HloInstruction::CreateUnary( + instruction->shape(), HloOpcode::kCopy, instruction)); + RegisterAddedCopy(copy); + SetupCopiedInstruction(*instruction, copy, {}); + LayoutUtil::ClearLayout(copy->mutable_shape()); + TF_RETURN_IF_ERROR(LayoutUtil::CopyLayoutBetweenShapes( + shape_with_layout, copy->mutable_shape())); + + return copy; + } else { + return FailedPrecondition( + "Can only copy array and tuple shaped instructions"); + } +} + +// Creates a copy of the given operand if the operand's layout does not match +// the given layout. This copy replaces the use in the given instruction. Tuple +// operands will be deep-copied. +Status LayoutAssignment::CopyOperandIfLayoutsDiffer( + const ShapeLayout& operand_layout, HloInstruction* instruction, + int64 operand_no) { + HloInstruction* operand = instruction->mutable_operand(operand_no); + TF_RET_CHECK(operand_layout.LayoutIsSet()); + TF_RET_CHECK(LayoutUtil::HasLayout(operand->shape())); + + if (ShapeUtil::Equal(operand_layout.shape(), operand->shape())) { + VLOG(5) << "Operand " << operand->ToString() << " layout matches in " + << instruction->ToString(); + // Operand layout already matches our constraint. Nothing to do. + return Status::OK(); + } + VLOG(4) << "Operand " << operand->ToString() << " layout does not match " + << operand_layout.ToString() << " in " << instruction->ToString(); + + TF_ASSIGN_OR_RETURN(HloInstruction * operand_copy, + CreateCopyWithNewLayout(operand_layout.shape(), operand)); + + VLOG(4) << "New copy of " << operand->ToString() << " is " + << operand_copy->ToString(); + return instruction->ReplaceOperandWith(operand_no, operand_copy); +} + +void LayoutAssignment::SetupCopiedInstruction(const HloInstruction& instruction, + HloInstruction* copy, + const ShapeIndex& index) { + if (instruction.has_sharding()) { + // If the index is empty, we want to copy the whole sharding, in case the + // sharding is a tuple sharding. + HloSharding sharding = + !index.empty() && instruction.sharding().IsTuple() + ? instruction.sharding().GetSubSharding(instruction.shape(), index) + : instruction.sharding(); + // We propagate the sharding to the copied instruction only if it is a + // special sharding, like tiled ones, or special devices like the + // HostCompute module. + // Otherwise it is preferable to leave the new instruction without device, + // and let the automatic device placer to choose the best location. + if (!sharding.HasUniqueDevice() || + HloSharding::IsReservedDevice(sharding.UniqueDevice().ValueOrDie())) { + copy->set_sharding(sharding); + } + } + copy->set_metadata(instruction.metadata()); +} + Status LayoutAssignment::CheckLayouts(HloModule* module) { TF_ASSIGN_OR_RETURN(auto points_to_analysis, TuplePointsToAnalysis::Run(module)); @@ -873,15 +919,16 @@ Status LayoutAssignment::CheckLayouts(HloModule* module) { } } } - - // Finally verify the result layout matches the layout of the entry + // Finally verify the result layout, if set, matches the layout of the entry // computation root. - TF_RET_CHECK(ShapeUtil::Equal( - module->entry_computation()->root_instruction()->shape(), + const ShapeLayout& result_layout = FindOrDie(computation_layouts_, module->entry_computation()) - .result_layout() - .shape())); - + .result_layout(); + if (result_layout.LayoutIsSet()) { + TF_RET_CHECK(ShapeUtil::Equal( + module->entry_computation()->root_instruction()->shape(), + result_layout.shape())); + } return Status::OK(); } @@ -890,18 +937,13 @@ LayoutAssignment::LayoutAssignment( ChannelLayoutConstraints* channel_constraints) : entry_computation_layout_(entry_computation_layout), channel_layout_constraints_(channel_constraints) { - VLOG(1) << "entry computation layout given to layout assignment: " + VLOG(1) << "Entry computation layout given to layout assignment: " << entry_computation_layout_->ToString(); // Layouts of all parameter instructions must be set. for (const ShapeLayout& parameter_layout : entry_computation_layout_->parameter_layouts()) { CHECK(parameter_layout.LayoutIsSet()); } - // If the result layout is not set, then choose the default. - // TODO(b/29118294): Choose a better layout in this case. - if (!entry_computation_layout_->result_layout().LayoutIsSet()) { - entry_computation_layout_->mutable_result_layout()->SetToDefaultLayout(); - } } std::unique_ptr LayoutAssignment::ChooseOperandLayoutFromOutputLayout( @@ -1461,16 +1503,60 @@ Status LayoutAssignment::AssignLayouts(const LayoutConstraints& constraints, return Status::OK(); } +Status LayoutAssignment::CalculateComputationLayout( + HloComputation* computation) { + ComputationLayout computation_layout(computation->ComputeProgramShape(), + /*ignore_layouts=*/false); + InsertOrDie(&computation_layouts_, computation, computation_layout); + VLOG(2) << " Calculated ComputationLayout = " + << computation_layout.ToString(); + return Status::OK(); +} + +Status LayoutAssignment::ClearComputationLayouts(HloComputation* computation) { + // Clear existing layouts of the instructions. All layouts must be assigned + // by the LayoutAssignment pass, except for those on infeeds, parameters, + // and the computation result. The latter two are specified in + // computation_layout, so we only need to keep the existing layouts for + // infeeds. Clearing the layouts here avoids hiding potential bugs in the + // layout assignment pass that may accidently use the existing layout. + for (HloInstruction* instruction : computation->instructions()) { + if (instruction->opcode() == HloOpcode::kBitcast) { + // bitcasts are inherently layout sensitive and so a bitcast instruction + // present in the IR before layout assignment is a bug. + return InternalError( + "Unexpected bitcast operation seen during layout assignment: %s.", + instruction->ToString().c_str()); + } + if (instruction->opcode() != HloOpcode::kInfeed) { + LayoutUtil::ClearLayout(instruction->mutable_shape()); + } + } + return Status::OK(); +} + Status LayoutAssignment::RunOnComputation( - const ComputationLayout& computation_layout, + ComputationLayout* computation_layout, const TuplePointsToAnalysis& points_to_analysis, HloComputation* computation, ChannelLayoutConstraints* channel_constraints) { - DCHECK(computation_layout.LayoutIsSet()); - InsertOrDie(&computation_layouts_, computation, computation_layout); VLOG(2) << "LayoutAssignment::RunOnComputation(" << computation->name() << ")"; - VLOG(2) << " ComputationLayout = " << computation_layout.ToString(); + TF_RETURN_IF_ERROR(ClearComputationLayouts(computation)); + if (computation_layout != nullptr) { + auto it = computation_layouts_.find(computation); + if (it == computation_layouts_.end()) { + VLOG(2) << " New ComputationLayout = " << computation_layout->ToString(); + computation_layouts_.emplace(computation, *computation_layout); + } else { + TF_RET_CHECK(computation_layout == &it->second || + computation_layout == entry_computation_layout_); + VLOG(2) << " Existing ComputationLayout = " + << computation_layout->ToString(); + } + } else { + VLOG(2) << " No ComputationLayout specified (will be calculated)"; + } // Construct LayoutConstraints with all layout constraints of the computation. LayoutConstraints constraints(points_to_analysis, computation); @@ -1513,12 +1599,19 @@ Status LayoutAssignment::RunOnComputation( CHECK_LT(constraints.unconstrained_buffer_ids().size(), unconstrained_count); } - // All logical buffers should have constraints at this point. All that // remains is assign the constraints to the buffers and infer layouts for // aliased buffers. TF_RETURN_IF_ERROR(AssignLayouts(constraints, computation)); + // If the computation layout wasn't specified, now it is the time to compute + // it according to the parameters and root instruction layouts. + // This allows the first pass through this API to record the best flowing + // layout to parameters and root instruction. + if (computation_layout == nullptr) { + TF_RETURN_IF_ERROR(CalculateComputationLayout(computation)); + } + // Record the layouts assigned for any communication ops in // channel_constraints so that they are constrained for future modules. for (HloInstruction* instruction : computation->instructions()) { @@ -1533,6 +1626,34 @@ Status LayoutAssignment::RunOnComputation( return Status::OK(); } +Status LayoutAssignment::PropagateComputationLayouts( + HloComputation* computation, ComputationLayout* computation_layout) { + ComputationLayout computed_computation_layout( + computation->ComputeProgramShape(), + /*ignore_layouts=*/false); + for (int64 i = 0; i < computed_computation_layout.parameter_count(); ++i) { + ShapeLayout* param_layout = computation_layout->mutable_parameter_layout(i); + if (!param_layout->LayoutIsSet()) { + VLOG(4) << "Assigning layout to parameter " << i << " of computation " + << computation->name() << ": " + << computed_computation_layout.parameter_layout(i).ToString(); + *param_layout = computed_computation_layout.parameter_layout(i); + } else { + TF_RET_CHECK(computed_computation_layout.parameter_layout(i) == + *param_layout); + } + } + ShapeLayout* result_layout = computation_layout->mutable_result_layout(); + if (!result_layout->LayoutIsSet()) { + VLOG(4) << "Assigning result layout of computation " << computation->name() + << ": " << computed_computation_layout.result_layout().ToString(); + *result_layout = computed_computation_layout.result_layout(); + } else { + TF_RET_CHECK(computed_computation_layout.result_layout() == *result_layout); + } + return Status::OK(); +} + StatusOr LayoutAssignment::Run(HloModule* module) { VLOG(2) << "Running layout assignment on module " << module->name(); XLA_VLOG_LINES(3, module->ToString()); @@ -1541,52 +1662,45 @@ StatusOr LayoutAssignment::Run(HloModule* module) { "before layout assignment", module->config().debug_options()); } - - TF_ASSIGN_OR_RETURN(auto points_to_analysis, - TuplePointsToAnalysis::Run(module)); - - // Assign layouts to computations in an order such that a callee computation - // is handled before its caller computation. This ensures that the layout of - // all callers of a computation will agree. - std::list computation_post_order = - module->MakeComputationPostOrder(); - for (auto* computation : module->MakeComputationPostOrder()) { - if (computation->IsFusionComputation()) { - continue; - } - // Clear existing layouts of the instructions. All layouts must be assigned - // by the LayoutAssignment pass, except for those on infeeds, parameters, - // and the computation result. The latter two are specified in - // computation_layout, so we only need to keep the existing layouts for - // infeeds. Clearing the layouts here avoids hiding potential bugs in the - // layout assignment pass that may accidently use the existing layout. - for (HloInstruction* instruction : computation->instructions()) { - if (instruction->opcode() == HloOpcode::kBitcast) { - // bitcasts are inherently layout sensitive and so a bitcast instruction - // present in the IR before layout assignment is a bug. - return InternalError( - "Unexpected bitcast operation seen during layout assignment: %s.", - instruction->ToString().c_str()); + TF_RETURN_IF_ERROR(Init()); + + // We do two passes. The first one we pass a nullptr ComputationLayout to + // the RunOnComputation() calls (for non entry computations), and we register + // the ComputationLayout which are naturally flowing in DFS fashion to the + // parameters and root instruction. + // Walking in DFS mode though, means that we can end up with incorrect layouts + // when seen from an outer instruction, which has across-computation + // constraints to impose. + // For example, the kWhile instruction needs to enforce the same layouts for + // the parameters and root of the bosy, as well as the condition parameters. + // Similarly, the kConditional instruction needs to enforce the same layouts + // for the root of the true and false computations. + // So in the first pass, while allowing the layouts to flow to parameters and + // root, we also fix up the eventually inconsistent ComputationLayout, which + // will be then made mandatory by the second pass. + for (int64 i = 0; i < 2; ++i) { + TF_RETURN_IF_ERROR(ClearPreviousPassSideEffects(module)); + TF_ASSIGN_OR_RETURN(auto points_to_analysis, + TuplePointsToAnalysis::Run(module)); + for (auto* computation : module->MakeComputationPostOrder()) { + if (computation->IsFusionComputation()) { + continue; } - if (instruction->opcode() != HloOpcode::kInfeed) { - LayoutUtil::ClearLayout(instruction->mutable_shape()); + if (computation == module->entry_computation()) { + TF_RETURN_IF_ERROR(RunOnComputation( + entry_computation_layout_, *points_to_analysis, + module->entry_computation(), channel_layout_constraints_)); + } else { + ComputationLayout* computation_layout = + (i == 0) ? nullptr : &FindOrDie(computation_layouts_, computation); + TF_RETURN_IF_ERROR(RunOnComputation(computation_layout, + *points_to_analysis, computation, + channel_layout_constraints_)); } } - if (computation == module->entry_computation()) { - TF_RETURN_IF_ERROR(RunOnComputation( - *entry_computation_layout_, *points_to_analysis, - module->entry_computation(), channel_layout_constraints_)); - } else { - ComputationLayout computation_layout(computation->ComputeProgramShape()); - // Setting all embedded computations to the default layout is potentially - // suboptimal. - computation_layout.SetToDefaultLayout(); - TF_RETURN_IF_ERROR(RunOnComputation(computation_layout, - *points_to_analysis, computation, - channel_layout_constraints_)); - } } - + TF_RETURN_IF_ERROR(PropagateComputationLayouts(module->entry_computation(), + entry_computation_layout_)); TF_RETURN_IF_ERROR(CheckLayouts(module)); VLOG(3) << "After layout assignment:"; @@ -1596,9 +1710,54 @@ StatusOr LayoutAssignment::Run(HloModule* module) { "after layout assignment", module->config().debug_options()); } - // All layouts are reset then reassigned by this pass. return true; } +Status LayoutAssignment::Init() { + computation_layouts_.clear(); + return Status::OK(); +} + +Status LayoutAssignment::ClearPreviousPassSideEffects(HloModule* module) { + // Clear all the copies which have been added, and all the related + // instructions (like GTE and tuples). + int64 removed_copies = 0; + for (HloComputation* computation : module->computations()) { + for (HloInstruction* instruction : + computation->MakeInstructionPostOrder()) { + if (instruction->opcode() == HloOpcode::kCopy && + added_copies_.count(instruction) > 0) { + VLOG(5) << "Removing added copy: " << instruction->ToString(); + TF_RETURN_IF_ERROR( + instruction->ReplaceAllUsesWith(instruction->mutable_operand(0))); + TF_RETURN_IF_ERROR(computation->RemoveInstruction(instruction)); + ++removed_copies; + } + } + } + added_copies_.clear(); + if (removed_copies > 0) { + TupleSimplifier tuple_simplifier; + HloDCE dce; + TF_RETURN_IF_ERROR(tuple_simplifier.Run(module).status()); + TF_RETURN_IF_ERROR(dce.Run(module).status()); + } + return Status::OK(); +} + +Status LayoutAssignment::AddCopyForOperand(HloInstruction* instruction, + int64 operand_number) { + HloInstruction* operand = instruction->mutable_operand(operand_number); + if (operand->opcode() != HloOpcode::kCopy || operand->user_count() > 1) { + HloInstruction* copy = + instruction->parent()->AddInstruction(HloInstruction::CreateUnary( + operand->shape(), HloOpcode::kCopy, operand)); + SetupCopiedInstruction(*operand, copy, {}); + LayoutUtil::ClearLayout(copy->mutable_shape()); + TF_RETURN_IF_ERROR(instruction->ReplaceOperandWith(operand_number, copy)); + } + return Status::OK(); +} + } // namespace xla diff --git a/tensorflow/compiler/xla/service/layout_assignment.h b/tensorflow/compiler/xla/service/layout_assignment.h index 680f88048a1f0cd5ede7991640003ef407d4facf..8b4e07995afffa196f689ffc45985b00bbeeb77d 100644 --- a/tensorflow/compiler/xla/service/layout_assignment.h +++ b/tensorflow/compiler/xla/service/layout_assignment.h @@ -39,6 +39,7 @@ limitations under the License. #include "tensorflow/compiler/xla/xla_data.pb.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/lib/gtl/flatmap.h" +#include "tensorflow/core/lib/gtl/flatset.h" #include "tensorflow/core/platform/types.h" namespace xla { @@ -362,12 +363,15 @@ class LayoutAssignment : public HloPassInterface { int64 operand_no); private: + // Initializes the layout assignment object for a new Run() call. + Status Init(); + // Adds constraints which must be satisfied for correctness on all // backends. Called once prior to propagating constraints. - Status AddMandatoryConstraints( - const ComputationLayout& computation_layout, - const ChannelLayoutConstraints* channel_constraints, - HloComputation* computation, LayoutConstraints* constraints); + Status AddMandatoryConstraints(const ComputationLayout* computation_layout, + ChannelLayoutConstraints* channel_constraints, + HloComputation* computation, + LayoutConstraints* constraints); // This method can be overridden to add backend-specific constraints to the // layout of the instructions of a computation. This method is called after @@ -378,10 +382,12 @@ class LayoutAssignment : public HloPassInterface { } // Construct contraints and assign layouts to all instructions in the - // computation satisfying the given ComputationLayout. Layouts constraints are - // added, then propagated until all LogicalBuffers in the computation are - // constrained. - Status RunOnComputation(const ComputationLayout& computation_layout, + // computation satisfying the given ComputationLayout, if not nullptr. + // Otherwise the ComputationLayout will be calculated by propagating the + // computation instruction contraints. + // Layouts constraints are added, then propagated until all LogicalBuffers in + // the computation are constrained. + Status RunOnComputation(ComputationLayout* computation_layout, const TuplePointsToAnalysis& points_to_analysis, HloComputation* computation, ChannelLayoutConstraints* channel_constraints); @@ -402,14 +408,72 @@ class LayoutAssignment : public HloPassInterface { // necessary conditions. Status CheckLayouts(HloModule* module); + // Computes the ComputationLayout of the given computation based of the + // layouts assigned to parameters and root instruction, and inserts it to the + // computation_layouts_ map. + Status CalculateComputationLayout(HloComputation* computation); + + // Clears all the layouts which can be cleared within a computation. + Status ClearComputationLayouts(HloComputation* computation); + + // Clears the side effects of a previous pass, like added copy instructions. + Status ClearPreviousPassSideEffects(HloModule* module); + + // Propagates the layouts computed by the layout assignment pass on the given + // computation, to the computation layout passed in to this API. + // This API propagates missing layout, and also checks that the caller + // specified have been respected, by comparing those with the parameters and + // root computation instruction. + Status PropagateComputationLayouts(HloComputation* computation, + ComputationLayout* computation_layout); + ComputationLayout* entry_computation_layout_; protected: + // Sets up the copy instruction according to the characteristic (sharding, + // metadata, ...) of the reference instruction. The index argument is used + // when the instruction is a tuple, and in such case the index represents + // the location from where the copy instruction was created from. + // If the index is empty, the whole sharding will be propagated, even in case + // the intruction has a tuple sharding. + static void SetupCopiedInstruction(const HloInstruction& instruction, + HloInstruction* copy, + const ShapeIndex& index); + + // Creates and returns a copy of the given instruction with a different + // layout. Tuple-shaped instructions will be deep-copied, and the last Tuple + // instruction producing the copy is returned. + StatusOr CreateCopyWithNewLayout( + const Shape& shape_with_layout, HloInstruction* instruction); + + // Creates a copy of the given operand if the operand's layout does not match + // the given layout. This copy replaces the use in the given instruction. + // Tuple operands will be deep-copied. + Status CopyOperandIfLayoutsDiffer(const ShapeLayout& operand_layout, + HloInstruction* instruction, + int64 operand_no); + + // Registers a copy instruction added by the layout assignment pass. + void RegisterAddedCopy(HloInstruction* copy) { + CHECK_EQ(copy->opcode(), HloOpcode::kCopy); + added_copies_.insert(copy); + } + + // Adds a copy for the operand of an instruction, unless such operand is + // already a copy, and has a single user (which is forcibly the instruction + // itself). + Status AddCopyForOperand(HloInstruction* instruction, int64 operand_number); + // Map containing the layouts of all computations assigned so // far. Computations are handled in a topological sort where computations are // handled before their caller instructions so the layouts of caller // instructions can be set to match the computation. std::map computation_layouts_; + + // Every copy added to the module by the layout assignment pass is registered + // here. + tensorflow::gtl::FlatSet added_copies_; + ChannelLayoutConstraints* channel_layout_constraints_; }; diff --git a/tensorflow/compiler/xla/service/llvm_compiler.cc b/tensorflow/compiler/xla/service/llvm_compiler.cc index 911b243fe28a5baf8a4b8ed752b892265f5388ac..b17c9d504501a907e27d5152e0082799e87443c7 100644 --- a/tensorflow/compiler/xla/service/llvm_compiler.cc +++ b/tensorflow/compiler/xla/service/llvm_compiler.cc @@ -23,7 +23,7 @@ limitations under the License. namespace xla { StatusOr>> LLVMCompiler::Compile( std::vector> modules, - std::vector> stream_execs, + std::vector> stream_execs, DeviceMemoryAllocator* device_allocator) { // Tensorflow tries to enable the following behaviors in all its threads: // diff --git a/tensorflow/compiler/xla/service/llvm_compiler.h b/tensorflow/compiler/xla/service/llvm_compiler.h index d74e81bb7f622ac5e89203a3d02ca5ad839da07e..f1c623508c5307f2b1c036d3ec6823b75c7eda13 100644 --- a/tensorflow/compiler/xla/service/llvm_compiler.h +++ b/tensorflow/compiler/xla/service/llvm_compiler.h @@ -60,19 +60,18 @@ class LLVMCompiler : public Compiler { // Bring in // StatusOr> RunBackend( // std::unique_ptr module, - // perftools::gputools::StreamExecutor* stream_exec, + // se::StreamExecutor* stream_exec, // DeviceMemoryAllocator* device_allocator) // StatusOr> RunHloPasses( // std::unique_ptr module, - // perftools::gputools::StreamExecutor* stream_exec, + // se::StreamExecutor* stream_exec, // DeviceMemoryAllocator* device_allocator) using Compiler::RunBackend; using Compiler::RunHloPasses; StatusOr>> Compile( std::vector> modules, - std::vector> - stream_execs, + std::vector> stream_execs, DeviceMemoryAllocator* device_allocator) override; protected: diff --git a/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc b/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc index 2a282f3be79f847a6569416794d1a2a3fcd69148..ec04239b4f9112134ba876fdfbb3905a3baf1f72 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc +++ b/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc @@ -34,6 +34,7 @@ limitations under the License. #include "tensorflow/core/lib/core/casts.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/io/path.h" +#include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/platform/env.h" #include "tensorflow/core/platform/logging.h" @@ -762,7 +763,7 @@ void InitializeLLVMCommandLineOptions(const HloModuleConfig& config) { fake_argv_storage.push_back(""); for (const auto& it : options) { // Skip options the XLA backend itself consumes. - if (!tensorflow::StringPiece(it.first).starts_with("xla_")) { + if (!tensorflow::str_util::StartsWith(it.first, "xla_")) { if (it.second.empty()) { fake_argv_storage.push_back(it.first); } else { diff --git a/tensorflow/compiler/xla/service/llvm_ir/loop_emitter.cc b/tensorflow/compiler/xla/service/llvm_ir/loop_emitter.cc index b6b918ec78a27b90325f72eea14b97f9aee43c54..3978acc132f34b8b195d3772ccf71d0d467984db 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/loop_emitter.cc +++ b/tensorflow/compiler/xla/service/llvm_ir/loop_emitter.cc @@ -88,12 +88,12 @@ LoopEmitter::LoopEmitter(const ElementGenerator& target_element_generator, } } -IrArray::Index LoopEmitter::EmitIndexAndSetExitBasicBlock( +std::vector LoopEmitter::EmitIndexAndSetExitBasicBlock( tensorflow::StringPiece loop_name) { if (ShapeUtil::IsScalar(shape_)) { // No loop needed, so set exit_bb_ to nullptr. exit_bb_ = nullptr; - return IrArray::Index(); + return {IrArray::Index()}; } // Create loop nest with one for-loop for each dimension of the target shape. @@ -121,12 +121,14 @@ IrArray::Index LoopEmitter::EmitIndexAndSetExitBasicBlock( exit_bb_ = loop_nest.GetOuterLoopExitBasicBlock(); CHECK_NOTNULL(exit_bb_); - return array_index; + return {array_index}; } tensorflow::Status LoopEmitter::EmitLoop(tensorflow::StringPiece loop_name) { - IrArray::Index array_index = EmitIndexAndSetExitBasicBlock(loop_name); - TF_RETURN_IF_ERROR(body_emitter_(array_index)); + for (const IrArray::Index& array_index : + EmitIndexAndSetExitBasicBlock(loop_name)) { + TF_RETURN_IF_ERROR(body_emitter_(array_index)); + } // Set the insertion point of ir_builder_ to the loop exit, so that // code emitted for later instructions will be correctly placed. diff --git a/tensorflow/compiler/xla/service/llvm_ir/loop_emitter.h b/tensorflow/compiler/xla/service/llvm_ir/loop_emitter.h index 0fc528439a0d5bf8382dfcf2d8b3051f8900bf1d..9ff497aecd0bc964c929205c7fd410cca87d9b77 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/loop_emitter.h +++ b/tensorflow/compiler/xla/service/llvm_ir/loop_emitter.h @@ -63,11 +63,12 @@ class LoopEmitter { // Emits a loop nest (with a yet-to-be-filled loop body) that iterates through // every element in the given shape. Returns the multi-dimensional index that - // specifies the element. - IrArray::Index EmitIndexAndSetExitBasicBlock() { + // specifies the element, will return multiple indices if the loop is + // unrolled. + std::vector EmitIndexAndSetExitBasicBlock() { return EmitIndexAndSetExitBasicBlock(/*loop_name=*/""); } - virtual IrArray::Index EmitIndexAndSetExitBasicBlock( + virtual std::vector EmitIndexAndSetExitBasicBlock( tensorflow::StringPiece loop_name); // Emits a complete loop nest for every element in the given shape. diff --git a/tensorflow/compiler/xla/service/local_service.cc b/tensorflow/compiler/xla/service/local_service.cc index 499f280211aacd00e79b3ca0ddb3413f933b02da..0fa4061738612df76c72a18a9353f16bf6a42677 100644 --- a/tensorflow/compiler/xla/service/local_service.cc +++ b/tensorflow/compiler/xla/service/local_service.cc @@ -43,13 +43,11 @@ limitations under the License. #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/stream_executor_no_cuda.h" -namespace se = ::perftools::gputools; - namespace xla { /* static */ StatusOr> LocalService::NewService( const ServiceOptions& options) { - perftools::gputools::Platform* platform = options.platform(); + se::Platform* platform = options.platform(); if (platform == nullptr) { TF_ASSIGN_OR_RETURN(platform, PlatformUtil::GetDefaultPlatform()); } diff --git a/tensorflow/compiler/xla/service/name_uniquer.cc b/tensorflow/compiler/xla/service/name_uniquer.cc index 7d8c05fffa4ab11d7dbf9956d2cb7ebd5bcdd3c4..f74bcb0b79355c8e69890487266cbc5f2a4500be 100644 --- a/tensorflow/compiler/xla/service/name_uniquer.cc +++ b/tensorflow/compiler/xla/service/name_uniquer.cc @@ -53,17 +53,18 @@ NameUniquer::NameUniquer(const string& separator) { } string NameUniquer::GetUniqueName(tensorflow::StringPiece prefix) { - string root = prefix.empty() ? "name" : prefix.ToString(); - root = GetSanitizedName(root); + string root = GetSanitizedName(prefix.empty() ? "name" : prefix.ToString()); // Strip away numeric suffix (if any). Only recognize separator if it is in // the middle of the name. + bool has_numeric_suffix = false; + int64 numeric_suffix = 0; size_t separator_index = root.rfind(separator_); if (separator_index != string::npos && (separator_index > 0) && (separator_index < root.size() - 1)) { string after_suffix = root.substr(separator_index + 1); - int64 numeric_suffix; if (tensorflow::strings::safe_strto64(after_suffix, &numeric_suffix)) { + has_numeric_suffix = true; // Remove numeric suffix from root. root = root.substr(0, separator_index); // Update count to at least the numeric suffix value to avoid future @@ -71,11 +72,11 @@ string NameUniquer::GetUniqueName(tensorflow::StringPiece prefix) { generated_names_[root] = std::max(generated_names_[root], numeric_suffix); } } - int64* count = &(generated_names_[root]); if (*count == 0) { *count = 1; - return root; + return has_numeric_suffix ? tensorflow::strings::StrCat(root, separator_, 0) + : root; } else { tensorflow::strings::StrAppend(&root, separator_, *count); // Increment lookup under old 'root' name. diff --git a/tensorflow/compiler/xla/service/name_uniquer_test.cc b/tensorflow/compiler/xla/service/name_uniquer_test.cc index 4258cf16876ab46dce6df062ab701b1b1a4a7580..2ec255558c4ed3695ec6c824458cbedac44dc297 100644 --- a/tensorflow/compiler/xla/service/name_uniquer_test.cc +++ b/tensorflow/compiler/xla/service/name_uniquer_test.cc @@ -57,11 +57,18 @@ TEST_F(NameUniquerTest, NumericSuffixes) { EXPECT_EQ("foo.55", uniquer.GetUniqueName("foo")); EXPECT_EQ("foo.55.1", uniquer.GetUniqueName("foo.55.1")); EXPECT_EQ("foo.55.2", uniquer.GetUniqueName("foo.55.1")); - EXPECT_EQ("bar", uniquer.GetUniqueName("bar.-1000")); + EXPECT_EQ("bar.0", uniquer.GetUniqueName("bar.-1000")); EXPECT_EQ("bar.1", uniquer.GetUniqueName("bar.-2000")); EXPECT_EQ("bar.2", uniquer.GetUniqueName("bar.1")); } +TEST_F(NameUniquerTest, PrefixHasSuffix) { + NameUniquer uniquer("."); + + EXPECT_EQ("foo.11.0", uniquer.GetUniqueName("foo.11.0")); + EXPECT_EQ("foo.11", uniquer.GetUniqueName("foo.11")); +} + TEST_F(NameUniquerTest, Sanitize) { NameUniquer uniquer("_"); @@ -73,7 +80,7 @@ TEST_F(NameUniquerTest, Sanitize) { EXPECT_EQ("foo_55", uniquer.GetUniqueName("foo")); // Invalid characters will be replaced with '_'. - EXPECT_EQ("bar", uniquer.GetUniqueName("bar<-1000")); + EXPECT_EQ("bar_0", uniquer.GetUniqueName("bar<-1000")); EXPECT_EQ("bar_1", uniquer.GetUniqueName("bar<-2000")); EXPECT_EQ("bar_2", uniquer.GetUniqueName("bar_1")); diff --git a/tensorflow/compiler/xla/service/pattern_matcher.h b/tensorflow/compiler/xla/service/pattern_matcher.h new file mode 100644 index 0000000000000000000000000000000000000000..586f6ef7a9c4f17f69340e77be17aec2f677a791 --- /dev/null +++ b/tensorflow/compiler/xla/service/pattern_matcher.h @@ -0,0 +1,1013 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_PATTERN_MATCHER_H_ +#define TENSORFLOW_COMPILER_XLA_SERVICE_PATTERN_MATCHER_H_ + +#include "tensorflow/compiler/xla/layout_util.h" +#include "tensorflow/compiler/xla/service/hlo_instruction.h" +#include "tensorflow/compiler/xla/service/hlo_opcode.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/core/lib/core/stringpiece.h" + +namespace xla { + +// A pattern matcher for HloInstructions, Shapes, and Layouts. +// +// The Match function's first argument must be HloInstruction*, Shape*, or +// Layout*. The second argument is a pattern that will be matched against the +// first argument, as described below. +// +// Patterns are constructed using the match::Op, match::Shape, or match::Layout +// functions. By default, the returned patterns will match any HloInstruction, +// Shape, or Layout, respectively. However the match can be made more specific +// by using the pattern's modifier methods, for example: +// +// match::Op().WithOpcode(HloOpcode::kAdd).WithOperand( +// 0, match::Op().WithOpcode(HloOpcode::kConstant)) +// +// This pattern will match Add instructions whose first operand is a constant. +// +// Each pattern type has the following modifiers: +// +// Op(): +// - WithName: match operations with the given name +// - WithOpcode: match operations with the given opcode +// - WithShape: match operations whose shape matches the given pattern +// - WithOperand: match operations whose operand matches the given pattern +// +// Shape(): +// - EqualTo: matches shapes that are equal to the argument +// - CompatibleTo: matches shapes that are compatible to the argument +// - IsScalar/IsArray/IsTuple: matches scalar/array/tuple shapes +// - IsDenseArray/IsSparseArray: matches arrays with dense/sparse format +// - WithLayout: match shapes whose layout matches the given pattern +// - WithLayoutEqualTo: matches shapes whose layouts equal the argument +// - WithSubshape: matches tuple shapes whose subshape matches the given +// pattern +// - WithSubshapeEqualTo: matches shapes with a subshape equal the argument +// - WithElementType: matches array/scalar shapes with the given element +// type +// - WithRank: matches array/scalar types with the given rank +// +// Layout(): +// - EqualTo: matches layouts that are equal to the argument +// - WithDenseFormat/WithSparseFormat: matches layouts with dense/sparse +// format +// +// Op(), Shape(), and Layout() may be passed an argument of type +// HloInstruction**, Shape**, or Layout**, respectively, or const versions of +// these pointers. If the pattern is matched, the address of the matched value +// will be "captured" and stored at this location. +// +// For example: +// HloInstruction* foo = ...; +// HloInstruction* matched_operand; +// CHECK(Match(foo, +// match::Op().WithOperand(0, match::Op(&matched_operand)))); +// +// Helpers are provided for common nullary, unary, binary, and ternary +// instructions. These helpers can be called with no arguments, in which case +// they will match any instruction matching the opcode. They may also be called +// with matches for the operands and with an optional capture. (The capture must +// be the first argument.) Some examples of these helpers and their equivalents +// are provided below. +// +// Example nullary instruction: +// Recv() == Op().WithOpcode(HloOpcode::kRecv) +// Recv(&a) == Op(&a).WithOpcode(HloOpcode::kRecv) +// +// Example unary instruction: +// Abs() == Op().WithOpcode(HloOpcode::kAbs) +// Abs(Op(&a)) == Op().WithOpcode(HloOpcode::kAbs) +// .WithOperand(0, Op(&a))) +// Abs(&a, Op(&b)) == Op(&a).WithOpcode(HloOpcode::kAbs) +// .WithOperand(0, Op(&b)) +// +// Example binary instruction: +// Add() == Op().WithOpcode(HloOpcode::kAdd) +// Add(Op(&a), Op(&b)) == Op().WithOpcode(HloOpcode::kAdd) +// .WithOperand(0, Op(&a)) +// .WithOperand(1, Op(&b)) +// Add(&a, Op(&b), Op(&c)) == Op(&a).WithOpcode(HloOpcode::kAdd) +// .WithOperand(0, Op(&b)) +// .WithOperand(1, Op(&c)) +// +// Example ternary instruction: +// Clamp() == Op().WithOpcode(HloOpcode::kClamp) +// Clamp(Op(&a), Op(&b), Op(&c)) == Op().WithOpcode(HloOpcode::kClamp) +// .WithOperand(0, Op(&a)) +// .WithOperand(1, Op(&b)) +// .WithOperand(2, Op(&c)) +// Clamp(&a, Op(&b), Op(&c), Op(&d)) == Op(&a).WithOpcode(HloOpcode::kClamp) +// .WithOperand(0, Op(&b)) +// .WithOperand(1, Op(&c)) +// .WithOperand(2, Op(&d)) +// +template +bool Match(Value* value, const Pattern& pattern) { + return pattern.Match(value); +} + +namespace match { + +namespace detail { + +template +class LayoutPattern; + +// The base LayoutPattern implementation. Matches only if the layout is not +// nullptr. +class LayoutPatternBaseImpl { + public: + bool Match(const ::xla::Layout* layout) const { return layout != nullptr; } +}; + +// A LayoutPattern implementation that matches only if the layout equals a +// Layout proto. +template +class LayoutPatternEqualImpl { + public: + explicit constexpr LayoutPatternEqualImpl(const Previous& previous, + const ::xla::Layout* layout) + : previous_(previous), layout_(layout) {} + + bool Match(const ::xla::Layout* layout) const { + return previous_.Match(layout) && LayoutUtil::Equal(*layout_, *layout); + } + + private: + Previous previous_; + const ::xla::Layout* layout_; +}; + +// A LayoutPattern implementation that matches only if the layout has a given +// format. +template +class LayoutPatternFormatImpl { + public: + explicit constexpr LayoutPatternFormatImpl(const Previous& previous, + Format format) + : previous_(previous), format_(format) {} + + bool Match(const ::xla::Layout* layout) const { + return previous_.Match(layout) && layout->format() == format_; + } + + private: + Previous previous_; + Format format_; +}; + +// A pattern that matches Layouts. +template +class LayoutPattern { + public: + explicit constexpr LayoutPattern(const Impl& impl, + LayoutType** matched_layout) + : impl_(impl), matched_layout_(matched_layout) {} + + // Returns true and captures the layout iff it matches the pattern. + bool Match(const ::xla::Layout* layout) const { + if (impl_.Match(layout)) { + if (matched_layout_) { + *matched_layout_ = layout; + } + return true; + } + return false; + } + + // Returns true and captures the layout iff it matches the pattern. + bool Match(::xla::Layout* layout) const { + if (impl_.Match(layout)) { + if (matched_layout_) { + *matched_layout_ = layout; + } + return true; + } + return false; + } + + // Modifies the pattern to match only if the layout equals the given proto. + // The layout must outlive the returned pattern. + constexpr LayoutPattern> EqualTo( + const Layout* layout) const { + return LayoutPattern>( + LayoutPatternEqualImpl(impl_, layout), matched_layout_); + } + + // Modifies the pattern to match only if the layout has a dense format. + constexpr LayoutPattern> + WithDenseFormat() const { + return LayoutPattern>( + LayoutPatternFormatImpl(impl_, DENSE), matched_layout_); + } + + // Modifies the pattern to match only if the layout has a sparse format. + constexpr LayoutPattern> + WithSparseFormat() const { + return LayoutPattern>( + LayoutPatternFormatImpl(impl_, SPARSE), matched_layout_); + } + + private: + Impl impl_; + LayoutType** matched_layout_; +}; + +} // namespace detail + +// Creates a layout pattern that will capture the matched layout in the +// argument. +inline constexpr detail::LayoutPattern +Layout(const ::xla::Layout** matched_layout = nullptr) { + return detail::LayoutPattern( + detail::LayoutPatternBaseImpl(), matched_layout); +} + +// Creates a layout pattern that will capture the matched layout in the +// argument. +inline constexpr detail::LayoutPattern<::xla::Layout, + detail::LayoutPatternBaseImpl> +Layout(::xla::Layout** matched_layout) { + return detail::LayoutPattern<::xla::Layout, detail::LayoutPatternBaseImpl>( + detail::LayoutPatternBaseImpl(), matched_layout); +} + +namespace detail { + +template +class ShapePattern; + +// The base ShapePattern implementation. Matches only if the shape is not +// nullptr. +class ShapePatternBaseImpl { + public: + bool Match(const ::xla::Shape* shape) const { return shape != nullptr; } +}; + +// A ShapePattern implementation that matches only if the shape equals a Shape +// proto. +template +class ShapePatternEqualImpl { + public: + explicit constexpr ShapePatternEqualImpl(const Previous& previous, + const ::xla::Shape* shape) + : previous_(previous), shape_(shape) {} + + bool Match(const ::xla::Shape* shape) const { + return previous_.Match(shape) && ShapeUtil::Equal(*shape_, *shape); + } + + private: + Previous previous_; + const ::xla::Shape* shape_; +}; + +// A ShapePattern implementation that matches only if the shape is compatible to +// a Shape proto. +template +class ShapePatternCompatibleImpl { + public: + explicit constexpr ShapePatternCompatibleImpl(const Previous& previous, + const ::xla::Shape* shape) + : previous_(previous), shape_(shape) {} + + bool Match(const ::xla::Shape* shape) const { + return previous_.Match(shape) && ShapeUtil::Compatible(*shape_, *shape); + } + + private: + Previous previous_; + const ::xla::Shape* shape_; +}; + +// A ShapePattern implementation that matches only if the shape has a given +// element type. +template +class ShapePatternElementTypeImpl { + public: + explicit constexpr ShapePatternElementTypeImpl(const Previous& previous, + PrimitiveType element_type) + : previous_(previous), element_type_(element_type) {} + + bool Match(const ::xla::Shape* shape) const { + return previous_.Match(shape) && shape->element_type() == element_type_; + } + + private: + Previous previous_; + PrimitiveType element_type_; +}; + +// A ShapePattern implementation that matches only if the shape is scalar. +template +class ShapePatternIsScalarImpl { + public: + explicit constexpr ShapePatternIsScalarImpl(const Previous& previous) + : previous_(previous) {} + + bool Match(const ::xla::Shape* shape) const { + return previous_.Match(shape) && ShapeUtil::IsScalar(*shape); + } + + private: + Previous previous_; +}; + +// A ShapePattern implementation that matches only if the shape is an array +template +class ShapePatternIsArrayImpl { + public: + explicit constexpr ShapePatternIsArrayImpl(const Previous& previous) + : previous_(previous) {} + + bool Match(const ::xla::Shape* shape) const { + return previous_.Match(shape) && ShapeUtil::IsArray(*shape); + } + + private: + Previous previous_; +}; + +// A ShapePattern implementation that matches only if the shape is a tuple. +template +class ShapePatternIsTupleImpl { + public: + explicit constexpr ShapePatternIsTupleImpl(const Previous& previous) + : previous_(previous) {} + + bool Match(const ::xla::Shape* shape) const { + return previous_.Match(shape) && ShapeUtil::IsTuple(*shape); + } + + private: + Previous previous_; +}; + +// A ShapePattern implementation that matches only if the shape has a given +// rank. +template +class ShapePatternRankImpl { + public: + explicit constexpr ShapePatternRankImpl(const Previous& previous, int64 rank) + : previous_(previous), rank_(rank) {} + + bool Match(const ::xla::Shape* shape) const { + return previous_.Match(shape) && ShapeUtil::Rank(*shape) == rank_; + } + + private: + Previous previous_; + int64 rank_; +}; + +// A ShapePattern implementation that matches only if the shape has a layout +// that matches a given pattern. +template +class ShapePatternLayoutImpl { + public: + explicit constexpr ShapePatternLayoutImpl( + const Previous& previous, + const LayoutPattern& layout) + : previous_(previous), layout_(layout) {} + + bool Match(const ::xla::Shape* shape) const { + return previous_.Match(shape) && LayoutUtil::HasLayout(*shape) && + layout_.Match(&shape->layout()); + } + + bool Match(Shape* shape) const { + return previous_.Match(shape) && LayoutUtil::HasLayout(*shape) && + layout_.Match(shape->mutable_layout()); + } + + private: + Previous previous_; + LayoutPattern layout_; +}; + +// A ShapePattern implementation that matches only if the shape has a subshape +// that matches a given pattern. +template +class ShapePatternSubshapeImpl { + public: + explicit ShapePatternSubshapeImpl( + const Previous& previous, ShapeIndexView index, + const ShapePattern& subshape) + : previous_(previous), index_(index), subshape_(subshape) {} + + bool Match(const ::xla::Shape* shape) const { + return previous_.Match(shape) && ShapeUtil::IndexIsValid(*shape, index_) && + subshape_.Match(&ShapeUtil::GetSubshape(*shape, index_)); + } + + bool Match(::xla::Shape* shape) const { + return previous_.Match(shape) && ShapeUtil::IndexIsValid(*shape, index_) && + subshape_.Match(ShapeUtil::GetMutableSubshape(shape, index_)); + } + + private: + Previous previous_; + ShapeIndexView index_; + ShapePattern subshape_; +}; + +// A pattern that matches Shapes. +template +class ShapePattern { + public: + explicit constexpr ShapePattern(const Impl& impl, ShapeType** matched_shape) + : impl_(impl), matched_shape_(matched_shape) {} + + // Returns true and captures the shape iff it matches the pattern. + bool Match(const ::xla::Shape* shape) const { + if (impl_.Match(shape)) { + if (matched_shape_) { + *matched_shape_ = shape; + } + return true; + } + return false; + } + + // Returns true and captures the shape iff it matches the pattern. + bool Match(::xla::Shape* shape) const { + if (impl_.Match(shape)) { + if (matched_shape_) { + *matched_shape_ = shape; + } + return true; + } + return false; + } + + // Modifies the pattern to match only if the shape equals the given proto. + // The layout must outlive the returned pattern. + constexpr ShapePattern> EqualTo( + const ::xla::Shape* shape) const { + return ShapePattern>( + ShapePatternEqualImpl(impl_, shape), matched_shape_); + } + + // Modifies the pattern to match only if the shape is compatible to the given + // proto. The layout must outlive the returned pattern. + constexpr ShapePattern> + CompatibleTo(const ::xla::Shape* shape) const { + return ShapePattern>( + ShapePatternCompatibleImpl(impl_, shape), matched_shape_); + } + + // Modifies the pattern to match only if the shape has the given element type. + constexpr ShapePattern> + WithElementType(PrimitiveType element_type) const { + return ShapePattern>( + ShapePatternElementTypeImpl(impl_, element_type), matched_shape_); + } + + // Modifies the pattern to match only if the shape is scalar. + constexpr ShapePattern> IsScalar() + const { + return ShapePattern>( + ShapePatternIsScalarImpl(impl_), matched_shape_); + } + + // Modifies the pattern to match only if the shape is an array. + constexpr ShapePattern> IsArray() + const { + return ShapePattern>( + ShapePatternIsArrayImpl(impl_), matched_shape_); + } + + // Modifies the pattern to match only if the shape is a tuple. + constexpr ShapePattern> IsTuple() + const { + return ShapePattern>( + ShapePatternIsTupleImpl(impl_), matched_shape_); + } + + // Modifies the pattern to match only if the shape has the given rank. + constexpr ShapePattern> WithRank( + int64 rank) const { + return ShapePattern>( + ShapePatternRankImpl(impl_, rank), matched_shape_); + } + + // Modifies the pattern to match only if the shape has a layout that matches + // the given pattern. + template + constexpr ShapePattern> + WithLayout(const LayoutPattern& layout) const { + return ShapePattern>( + ShapePatternLayoutImpl(impl_, layout), + matched_shape_); + } + + constexpr ShapePattern< + ShapeType, + ShapePatternLayoutImpl>> + WithLayoutEqualTo(const ::xla::Layout* layout) const { + return WithLayout(Layout().EqualTo(layout)); + } + + constexpr ShapePattern< + ShapeType, + ShapePatternLayoutImpl>> + IsDenseArray() const { + return WithLayout(Layout().WithDenseFormat()); + } + + constexpr ShapePattern< + ShapeType, + ShapePatternLayoutImpl>> + IsSparseArray() const { + return WithLayout(Layout().WithSparseFormat()); + } + + // Modifies the pattern to match only if the shape has a subshape that matches + // the given pattern. + template + ShapePattern> + WithSubshape(ShapeIndexView index, + const ShapePattern& subshape) const { + return ShapePattern< + ShapeType, ShapePatternSubshapeImpl>( + ShapePatternSubshapeImpl(impl_, index, + subshape), + matched_shape_); + } + + ShapePattern>> + WithSubshapeEqualTo(ShapeIndexView index, const ::xla::Shape* shape) const { + return WithSubshape(index, + ShapePattern( + ShapePatternBaseImpl(), nullptr) + .EqualTo(shape)); + } + + ShapePattern>> + WithSubshapeCompatibleTo(ShapeIndexView index, + const ::xla::Shape* shape) const { + return WithSubshape(index, + ShapePattern( + ShapePatternBaseImpl(), nullptr) + .CompatibleTo(shape)); + } + + private: + Impl impl_; + ShapeType** matched_shape_; +}; + +} // namespace detail + +// Creates a shape pattern that will capture the matched layout in the argument. +inline constexpr detail::ShapePattern +Shape(const ::xla::Shape** matched_shape = nullptr) { + return detail::ShapePattern( + detail::ShapePatternBaseImpl(), matched_shape); +} + +// Creates a shape pattern that will capture the matched layout in the argument. +inline constexpr detail::ShapePattern<::xla::Shape, + detail::ShapePatternBaseImpl> +Shape(::xla::Shape** matched_shape) { + return detail::ShapePattern<::xla::Shape, detail::ShapePatternBaseImpl>( + detail::ShapePatternBaseImpl(), matched_shape); +} + +namespace detail { + +template +class HloInstructionPattern; + +// The base HloInstructionPattern implementation. Matches only if the +// instruction is not nullptr. +class HloInstructionPatternBaseImpl { + public: + bool Match(const ::xla::HloInstruction* inst) const { + return inst != nullptr; + } +}; + +// An HloInstructionPattern implementation that matches only if the instruction +// has a given name. +template +class HloInstructionPatternNameImpl { + public: + explicit HloInstructionPatternNameImpl(const Previous& previous, + tensorflow::StringPiece name) + : previous_(previous), name_(name) {} + + bool Match(const ::xla::HloInstruction* inst) const { + return previous_.Match(inst) && inst->name() == name_; + } + + private: + Previous previous_; + tensorflow::StringPiece name_; +}; + +// An HloInstructionPattern implementation that matches only if the instruction +// has a given opcode. +template +class HloInstructionPatternOpcodeImpl { + public: + explicit constexpr HloInstructionPatternOpcodeImpl(const Previous& previous, + HloOpcode opcode, + bool invert) + : previous_(previous), opcode_(opcode), invert_(invert) {} + + bool Match(const ::xla::HloInstruction* inst) const { + return previous_.Match(inst) && (invert_ ^ (inst->opcode() == opcode_)); + } + + private: + Previous previous_; + HloOpcode opcode_; + bool invert_; +}; + +// An HloInstructionPattern implementation that matches only if the instruction +// has a shape that matches a given pattern. +template +class HloInstructionPatternShapeImpl { + public: + explicit constexpr HloInstructionPatternShapeImpl( + const Previous& previous, const ShapePattern& shape) + : previous_(previous), shape_(shape) {} + + bool Match(const ::xla::HloInstruction* inst) const { + return previous_.Match(inst) && shape_.Match(&inst->shape()); + } + + bool Match(::xla::HloInstruction* inst) const { + return previous_.Match(inst) && shape_.Match(inst->mutable_shape()); + } + + private: + Previous previous_; + ShapePattern shape_; +}; + +// An HloInstructionPattern implementation that matches only if the instruction +// has an operand that matches a given pattern. +template +class HloInstructionPatternOperandImpl { + public: + explicit constexpr HloInstructionPatternOperandImpl( + const Previous& previous, int64 operand_index, + const HloInstructionPattern& operand) + : previous_(previous), operand_index_(operand_index), operand_(operand) {} + + bool Match(const ::xla::HloInstruction* inst) const { + return previous_.Match(inst) && operand_index_ < inst->operand_count() && + operand_.Match(inst->operand(operand_index_)); + } + + bool Match(::xla::HloInstruction* inst) const { + return previous_.Match(inst) && operand_index_ < inst->operand_count() && + operand_.Match(inst->mutable_operand(operand_index_)); + } + + private: + Previous previous_; + int64 operand_index_; + HloInstructionPattern operand_; +}; + +// A pattern that matches HloInstructions. +template +class HloInstructionPattern { + public: + explicit constexpr HloInstructionPattern(const Impl& impl, + HloInstructionType** matched_inst) + : impl_(impl), matched_inst_(matched_inst) {} + + // Returns true and captures the instruction iff it matches the pattern. + bool Match(const ::xla::HloInstruction* inst) const { + if (impl_.Match(inst)) { + if (matched_inst_) { + *matched_inst_ = inst; + } + return true; + } + return false; + } + + // Returns true and captures the instruction iff it matches the pattern. + bool Match(::xla::HloInstruction* inst) const { + if (impl_.Match(inst)) { + if (matched_inst_) { + *matched_inst_ = inst; + } + return true; + } + return false; + } + + // Modifies the pattern to match only if the instruction has the given name. + HloInstructionPattern> + WithName(tensorflow::StringPiece name) const { + return HloInstructionPattern>( + HloInstructionPatternNameImpl(impl_, name), matched_inst_); + } + + // Modifies the pattern to match only if the instruction has the given opcode. + constexpr HloInstructionPattern> + WithOpcode(HloOpcode opcode) const { + return HloInstructionPattern>( + HloInstructionPatternOpcodeImpl(impl_, opcode, false), + matched_inst_); + } + + // Modifies the pattern to match only if the instruction does not have the + // given opcode. + constexpr HloInstructionPattern> + WithoutOpcode(HloOpcode opcode) const { + return HloInstructionPattern>( + HloInstructionPatternOpcodeImpl(impl_, opcode, true), + matched_inst_); + } + + // Modifies the pattern to match only if the instruction is a constant. + constexpr HloInstructionPattern> + IsConstant() const { + return WithOpcode(HloOpcode::kConstant); + } + + // Modifies the pattern to match only if the instruction is not a constant. + constexpr HloInstructionPattern> + IsNonConstant() const { + return WithoutOpcode(HloOpcode::kConstant); + } + + // Modifies the pattern to match only if the instruction has a shape that + // matches the given pattern. + template + constexpr HloInstructionPattern< + HloInstructionType, + HloInstructionPatternShapeImpl> + WithShape(const ShapePattern& shape) const { + return HloInstructionPattern< + HloInstructionType, + HloInstructionPatternShapeImpl>( + HloInstructionPatternShapeImpl(impl_, + shape), + matched_inst_); + } + + // Modifies the pattern to match only if the instruction has an operand that + // matches the given pattern. + template + constexpr HloInstructionPattern< + HloInstructionType, + HloInstructionPatternOperandImpl> + WithOperand( + int64 operand_index, + const HloInstructionPattern& operand) const { + return HloInstructionPattern< + HloInstructionType, + HloInstructionPatternOperandImpl>( + HloInstructionPatternOperandImpl( + impl_, operand_index, operand), + matched_inst_); + } + + private: + Impl impl_; + HloInstructionType** matched_inst_; +}; + +} // namespace detail + +// Creates an instruction pattern that will capture the matched instruction in +// the argument. +inline constexpr detail::HloInstructionPattern< + const ::xla::HloInstruction, detail::HloInstructionPatternBaseImpl> +Op(const ::xla::HloInstruction** matched_inst = nullptr) { + return detail::HloInstructionPattern( + detail::HloInstructionPatternBaseImpl(), matched_inst); +} + +// Creates an instruction pattern that will capture the matched instruction in +// the argument. +inline constexpr detail::HloInstructionPattern< + ::xla::HloInstruction, detail::HloInstructionPatternBaseImpl> +Op(::xla::HloInstruction** matched_inst) { + return detail::HloInstructionPattern<::xla::HloInstruction, + detail::HloInstructionPatternBaseImpl>( + detail::HloInstructionPatternBaseImpl(), matched_inst); +} + +// Helpers for nullary instructions. +#define XLA_NULLOP_PATTERN(NAME) \ + inline auto NAME()->decltype(Op().WithOpcode(HloOpcode::k##NAME)) { \ + return Op().WithOpcode(HloOpcode::k##NAME); \ + } \ + \ + template \ + inline auto NAME(HloInstructionType** matched_inst) \ + ->decltype(Op(matched_inst).WithOpcode(HloOpcode::k##NAME)) { \ + return Op(matched_inst).WithOpcode(HloOpcode::k##NAME); \ + } +XLA_NULLOP_PATTERN(Constant) +XLA_NULLOP_PATTERN(Infeed) +XLA_NULLOP_PATTERN(Parameter) +XLA_NULLOP_PATTERN(Recv) +#undef XLA_NULLOP_PATTERN + +// Helpers for unary instructions. +#define XLA_UNOP_PATTERN(NAME) \ + inline auto NAME()->decltype(Op().WithOpcode(HloOpcode::k##NAME)) { \ + return Op().WithOpcode(HloOpcode::k##NAME); \ + } \ + \ + template \ + inline auto NAME(Arg&& arg)->decltype( \ + Op().WithOpcode(HloOpcode::k##NAME) \ + .WithOperand(0, std::forward(arg))) { \ + return Op() \ + .WithOpcode(HloOpcode::k##NAME) \ + .WithOperand(0, std::forward(arg)); \ + } \ + \ + template \ + inline auto NAME(HloInstructionType** matched_inst, Arg&& arg) \ + ->decltype(Op(matched_inst) \ + .WithOpcode(HloOpcode::k##NAME) \ + .WithOperand(0, std::forward(arg))) { \ + return Op(matched_inst) \ + .WithOpcode(HloOpcode::k##NAME) \ + .WithOperand(0, std::forward(arg)); \ + } +XLA_UNOP_PATTERN(Abs) +XLA_UNOP_PATTERN(RoundNearestAfz) +XLA_UNOP_PATTERN(Bitcast) +XLA_UNOP_PATTERN(Broadcast) +XLA_UNOP_PATTERN(Ceil) +XLA_UNOP_PATTERN(Copy) +XLA_UNOP_PATTERN(Cos) +XLA_UNOP_PATTERN(Exp) +XLA_UNOP_PATTERN(Fft) +XLA_UNOP_PATTERN(Floor) +XLA_UNOP_PATTERN(Imag) +XLA_UNOP_PATTERN(IsFinite) +XLA_UNOP_PATTERN(Log) +XLA_UNOP_PATTERN(Not) +XLA_UNOP_PATTERN(Negate) +XLA_UNOP_PATTERN(Outfeed) +XLA_UNOP_PATTERN(Real) +XLA_UNOP_PATTERN(Reduce) +XLA_UNOP_PATTERN(ReducePrecision) +XLA_UNOP_PATTERN(Reshape) +XLA_UNOP_PATTERN(Reverse) +XLA_UNOP_PATTERN(Send) +XLA_UNOP_PATTERN(Sign) +XLA_UNOP_PATTERN(Sin) +XLA_UNOP_PATTERN(Sort) +XLA_UNOP_PATTERN(Tanh) +XLA_UNOP_PATTERN(Transpose) +#undef XLA_UNOP_PATTERN + +// Helpers for binary instructions. +#define XLA_BINOP_PATTERN(NAME) \ + inline auto NAME()->decltype(Op().WithOpcode(HloOpcode::k##NAME)) { \ + return Op().WithOpcode(HloOpcode::k##NAME); \ + } \ + \ + template \ + inline auto NAME(Lhs&& lhs, Rhs&& rhs) \ + ->decltype(Op().WithOpcode(HloOpcode::k##NAME) \ + .WithOperand(0, std::forward(lhs)) \ + .WithOperand(1, std::forward(rhs))) { \ + return Op() \ + .WithOpcode(HloOpcode::k##NAME) \ + .WithOperand(0, std::forward(lhs)) \ + .WithOperand(1, std::forward(rhs)); \ + } \ + \ + template \ + inline auto NAME(HloInstructionType** matched_inst, Lhs&& lhs, Rhs&& rhs) \ + ->decltype(Op(matched_inst) \ + .WithOpcode(HloOpcode::k##NAME) \ + .WithOperand(0, std::forward(lhs)) \ + .WithOperand(1, std::forward(rhs))) { \ + return Op(matched_inst) \ + .WithOpcode(HloOpcode::k##NAME) \ + .WithOperand(0, std::forward(lhs)) \ + .WithOperand(1, std::forward(rhs)); \ + } +XLA_BINOP_PATTERN(Add) +XLA_BINOP_PATTERN(Atan2) +XLA_BINOP_PATTERN(Divide) +XLA_BINOP_PATTERN(Complex) +XLA_BINOP_PATTERN(Dot) +XLA_BINOP_PATTERN(Eq) +XLA_BINOP_PATTERN(Gather) +XLA_BINOP_PATTERN(Ge) +XLA_BINOP_PATTERN(Gt) +XLA_BINOP_PATTERN(Le) +XLA_BINOP_PATTERN(Lt) +XLA_BINOP_PATTERN(Maximum) +XLA_BINOP_PATTERN(Minimum) +XLA_BINOP_PATTERN(Multiply) +XLA_BINOP_PATTERN(Ne) +XLA_BINOP_PATTERN(Power) +XLA_BINOP_PATTERN(Remainder) +XLA_BINOP_PATTERN(Subtract) +XLA_BINOP_PATTERN(And) +XLA_BINOP_PATTERN(Or) +XLA_BINOP_PATTERN(ShiftLeft) +XLA_BINOP_PATTERN(ShiftRightArithmetic) +XLA_BINOP_PATTERN(ShiftRightLogical) +#undef XLA_BINOP_PATTERN + +// Helpers for ternary instructions. +#define XLA_TERNOP_PATTERN(NAME) \ + inline auto NAME()->decltype(Op().WithOpcode(HloOpcode::k##NAME)) { \ + return Op().WithOpcode(HloOpcode::k##NAME); \ + } \ + \ + template \ + inline auto NAME(Arg0&& arg0, Arg1&& arg1, Arg2&& arg2) \ + ->decltype(Op().WithOpcode(HloOpcode::k##NAME) \ + .WithOperand(0, std::forward(arg0)) \ + .WithOperand(1, std::forward(arg1)) \ + .WithOperand(2, std::forward(arg2))) { \ + return Op() \ + .WithOpcode(HloOpcode::k##NAME) \ + .WithOperand(0, std::forward(arg0)) \ + .WithOperand(1, std::forward(arg1)) \ + .WithOperand(2, std::forward(arg2)); \ + } \ + \ + template \ + inline auto NAME(HloInstructionType** matched_inst, Arg0&& arg0, \ + Arg1&& arg1, Arg2&& arg2) \ + ->decltype(Op(matched_inst) \ + .WithOpcode(HloOpcode::k##NAME) \ + .WithOperand(0, std::forward(arg0)) \ + .WithOperand(1, std::forward(arg1)) \ + .WithOperand(2, std::forward(arg2))) { \ + return Op(matched_inst) \ + .WithOpcode(HloOpcode::k##NAME) \ + .WithOperand(0, std::forward(arg0)) \ + .WithOperand(1, std::forward(arg1)) \ + .WithOperand(2, std::forward(arg2)); \ + } +XLA_TERNOP_PATTERN(Clamp); +XLA_TERNOP_PATTERN(Select); +#undef XLA_TERNOP_PATTERN + +// Helpers for matching non-constant instructions. +inline auto NonConstant() -> decltype(Op().IsNonConstant()) { + return Op().IsNonConstant(); +} + +template +inline auto NonConstant(HloInstructionType** matched_inst) + -> decltype(Op(matched_inst).IsNonConstant()) { + return Op(matched_inst).IsNonConstant(); +} + +} // namespace match + +} // namespace xla + +#endif // TENSORFLOW_COMPILER_XLA_SERVICE_PATTERN_MATCHER_H_ diff --git a/tensorflow/compiler/xla/service/pattern_matcher_test.cc b/tensorflow/compiler/xla/service/pattern_matcher_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..c88157c312524fb273e6df368d2ef61d679d1d8b --- /dev/null +++ b/tensorflow/compiler/xla/service/pattern_matcher_test.cc @@ -0,0 +1,174 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/pattern_matcher.h" +#include "tensorflow/compiler/xla/service/hlo_instruction.h" +#include "tensorflow/compiler/xla/service/hlo_opcode.h" +#include "tensorflow/compiler/xla/tools/parser/hlo_parser.h" +#include "tensorflow/core/platform/test.h" + +namespace xla { +namespace { + +TEST(PatternMatcherTest, AddOp) { + constexpr char kModuleStr[] = R"(HloModule two_plus_two_module + ENTRY %two_plus_two_computation () -> f32[] { + %two = f32[] constant(2) + ROOT %two_plus_two = f32[] add(f32[] %two, f32[] %two) + } + )"; + TF_ASSERT_OK_AND_ASSIGN(auto hlo_module, tools::Parse(kModuleStr)); + + const HloInstruction* matched_inst; + HloInstruction* matched_operand; + Shape* matched_shape; + Layout* matched_layout; + + ASSERT_TRUE(Match( + hlo_module->entry_computation()->root_instruction(), + match::Op(&matched_inst) + .WithName("two_plus_two") + .WithOpcode(HloOpcode::kAdd) + .WithShape( + match::Shape(&matched_shape) + .WithLayout(match::Layout(&matched_layout).WithDenseFormat())) + .WithOperand( + 0, + match::Op(&matched_operand).WithOpcode(HloOpcode::kConstant)))); + ASSERT_NE(matched_inst, nullptr); + EXPECT_EQ(matched_inst->name(), "two_plus_two"); + EXPECT_EQ(matched_inst->opcode(), HloOpcode::kAdd); + + EXPECT_TRUE(Match(hlo_module->entry_computation()->root_instruction(), + match::Add(match::Constant(), match::Constant()))); + + EXPECT_FALSE(Match(hlo_module->entry_computation()->root_instruction(), + match::Op().WithName("bad_name"))); + matched_inst = nullptr; + EXPECT_FALSE(Match(hlo_module->entry_computation()->root_instruction(), + match::Multiply(&matched_inst, match::Op(), match::Op()))); +} + +TEST(PatternMatcherTest, ScalarShape) { + auto scalar_shape = ShapeUtil::MakeShape(F32, {}); + Shape* matched_shape; + EXPECT_TRUE(Match(&scalar_shape, match::Shape(&matched_shape).IsScalar())); + EXPECT_EQ(matched_shape, &scalar_shape); + EXPECT_TRUE(Match(&scalar_shape, match::Shape().IsArray())); + EXPECT_TRUE(Match(&scalar_shape, match::Shape().IsDenseArray())); + EXPECT_FALSE(Match(&scalar_shape, match::Shape().IsTuple())); + EXPECT_TRUE(Match(&scalar_shape, match::Shape().WithElementType(F32))); + EXPECT_TRUE(Match(&scalar_shape, match::Shape().WithRank(0))); + EXPECT_FALSE(Match( + &scalar_shape, + match::Shape().WithSubshape({0}, match::Shape()).WithElementType(F32))); +} + +TEST(PatternMatcherTest, DenseArrayShape) { + auto array_shape = ShapeUtil::MakeShape(F32, {2, 3, 4}); + Shape* matched_shape; + EXPECT_TRUE(Match(&array_shape, match::Shape(&matched_shape).IsArray())); + EXPECT_EQ(matched_shape, &array_shape); + EXPECT_TRUE(Match(&array_shape, match::Shape().IsDenseArray())); + EXPECT_FALSE(Match(&array_shape, match::Shape().IsSparseArray())); + EXPECT_FALSE(Match(&array_shape, match::Shape().IsScalar())); + EXPECT_FALSE(Match(&array_shape, match::Shape().IsTuple())); + EXPECT_TRUE(Match(&array_shape, match::Shape().WithElementType(F32))); + EXPECT_TRUE(Match(&array_shape, match::Shape().WithRank(3))); + EXPECT_FALSE( + Match(&array_shape, match::Shape().WithSubshape({0}, match::Shape()))); + Layout* matched_layout; + EXPECT_FALSE(Match(&array_shape, + match::Shape().WithLayout( + match::Layout(&matched_layout).WithSparseFormat()))); + EXPECT_TRUE(Match(&array_shape, + match::Shape().WithLayout( + match::Layout(&matched_layout).WithDenseFormat()))); + EXPECT_EQ(matched_layout, &array_shape.layout()); +} + +TEST(PatternMatcherTest, SparseArrayShape) { + auto array_shape = ShapeUtil::MakeShapeWithSparseLayout(F32, {2, 3, 4}, 10); + Shape* matched_shape; + EXPECT_TRUE(Match(&array_shape, match::Shape(&matched_shape).IsArray())); + EXPECT_EQ(matched_shape, &array_shape); + EXPECT_FALSE(Match(&array_shape, match::Shape().IsDenseArray())); + EXPECT_TRUE(Match(&array_shape, match::Shape().IsSparseArray())); + EXPECT_FALSE(Match(&array_shape, match::Shape().IsScalar())); + EXPECT_FALSE(Match(&array_shape, match::Shape().IsTuple())); + EXPECT_TRUE(Match(&array_shape, match::Shape().WithElementType(F32))); + EXPECT_TRUE(Match(&array_shape, match::Shape().WithRank(3))); + EXPECT_FALSE( + Match(&array_shape, match::Shape().WithSubshape({0}, match::Shape()))); + Layout* matched_layout; + EXPECT_FALSE(Match(&array_shape, + match::Shape().WithLayout( + match::Layout(&matched_layout).WithDenseFormat()))); + EXPECT_TRUE(Match(&array_shape, + match::Shape().WithLayout( + match::Layout(&matched_layout).WithSparseFormat()))); + EXPECT_EQ(matched_layout, &array_shape.layout()); +} + +TEST(PatternMatcherTest, TupleShape) { + auto tuple_shape = ShapeUtil::MakeTupleShape({ + ShapeUtil::MakeShape(F32, {1, 2, 3}), + ShapeUtil::MakeShape(S32, {4, 5}), + }); + EXPECT_TRUE(Match(&tuple_shape, match::Shape().IsTuple())); + EXPECT_FALSE(Match(&tuple_shape, match::Shape().IsArray())); + EXPECT_FALSE(Match(&tuple_shape, match::Shape().IsScalar())); + + Shape* subshape; + ASSERT_TRUE(Match( + &tuple_shape, + match::Shape().WithSubshape( + {0}, match::Shape(&subshape).WithElementType(F32).WithRank(3)))); + ASSERT_NE(subshape, nullptr); + EXPECT_TRUE( + ShapeUtil::Equal(*subshape, ShapeUtil::GetSubshape(tuple_shape, {0}))); + EXPECT_TRUE(Match(&tuple_shape, + match::Shape().WithSubshape( + {0}, match::Shape().EqualTo( + &ShapeUtil::GetSubshape(tuple_shape, {0}))))); + EXPECT_FALSE(Match(&tuple_shape, + match::Shape().WithSubshape( + {0}, match::Shape().EqualTo( + &ShapeUtil::GetSubshape(tuple_shape, {1}))))); + + ASSERT_TRUE(Match( + &tuple_shape, + match::Shape().WithSubshape( + {1}, match::Shape(&subshape).WithElementType(S32).WithRank(2)))); + ASSERT_NE(subshape, nullptr); + EXPECT_TRUE( + ShapeUtil::Equal(*subshape, ShapeUtil::GetSubshape(tuple_shape, {1}))); + EXPECT_TRUE(Match(&tuple_shape, + match::Shape().WithSubshape( + {1}, match::Shape().EqualTo( + &ShapeUtil::GetSubshape(tuple_shape, {1}))))); + EXPECT_FALSE(Match(&tuple_shape, + match::Shape().WithSubshape( + {1}, match::Shape().EqualTo( + &ShapeUtil::GetSubshape(tuple_shape, {0}))))); + + EXPECT_FALSE( + Match(&tuple_shape, match::Shape().WithSubshape({2}, match::Shape()))); + EXPECT_FALSE( + Match(&tuple_shape, match::Shape().WithSubshape({0, 0}, match::Shape()))); +} + +} // namespace +} // namespace xla diff --git a/tensorflow/compiler/xla/service/platform_util.cc b/tensorflow/compiler/xla/service/platform_util.cc index aa974ee61a27de9c19e97d8a6eb48f9261ce4bd9..7c63c0acc7764d558b2151190f0fa79fac355cbf 100644 --- a/tensorflow/compiler/xla/service/platform_util.cc +++ b/tensorflow/compiler/xla/service/platform_util.cc @@ -29,8 +29,6 @@ limitations under the License. #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/stream_executor_no_cuda.h" -namespace se = ::perftools::gputools; - namespace xla { using tensorflow::str_util::Lowercase; diff --git a/tensorflow/compiler/xla/service/platform_util.h b/tensorflow/compiler/xla/service/platform_util.h index 69188820a70707d9c9be10b20fb7de92ad4d9873..571451ba43a81d19b70e4954e45d3447f15dcedc 100644 --- a/tensorflow/compiler/xla/service/platform_util.h +++ b/tensorflow/compiler/xla/service/platform_util.h @@ -34,29 +34,27 @@ class PlatformUtil { // // Note that, even if a platform is present with zero devices, if we *do* have // compilation support for it, it will be returned in this sequence. - static StatusOr> - GetSupportedPlatforms(); + static StatusOr> GetSupportedPlatforms(); // Convenience function which returns the default supported platform for // tests. If exactly one supported platform is present, then this platform is // the default platform. If exactly two platforms are present and one of them // is the interpreter platform, then the other platform is the default // platform. Otherwise returns an error. - static StatusOr GetDefaultPlatform(); + static StatusOr GetDefaultPlatform(); // Convenience function which returns the sole supported platform. If // exactly one supported platform is present, then this platform is the // default platform. Otherwise returns an error. - static StatusOr GetSolePlatform(); + static StatusOr GetSolePlatform(); // Returns the platform according to the given name. Returns error if there is // no such platform. - static StatusOr GetPlatform( - const string& platform_name); + static StatusOr GetPlatform(const string& platform_name); // Returns exactly one platform that does not have given name. Returns error // if there is no such platform, or there are multiple such platforms. - static StatusOr GetPlatformExceptFor( + static StatusOr GetPlatformExceptFor( const string& platform_name); // Returns a vector of StreamExecutors for the given platform. The vector is @@ -64,8 +62,8 @@ class PlatformUtil { // element is nullptr, then the device is present by not supported by XLA. // // If the platform has no visible devices, a not-found error is returned. - static StatusOr> - GetStreamExecutors(perftools::gputools::Platform* platform); + static StatusOr> GetStreamExecutors( + se::Platform* platform); private: TF_DISALLOW_COPY_AND_ASSIGN(PlatformUtil); diff --git a/tensorflow/compiler/xla/service/reshape_mover.cc b/tensorflow/compiler/xla/service/reshape_mover.cc index f15117f45c689f2d717fbfe6191b510586449bc4..0f26a025bf125f70199637894741540f89eae7e5 100644 --- a/tensorflow/compiler/xla/service/reshape_mover.cc +++ b/tensorflow/compiler/xla/service/reshape_mover.cc @@ -53,16 +53,8 @@ bool IsReshapeOrTranspose(const HloInstruction* instruction) { instruction->opcode() == HloOpcode::kTranspose; } -// Returns true if `a` is a broadcast instruction to target shape `shape` and -// its operand is a scalar. -bool IsBroadcastScalarToShape(const HloInstruction* a, const Shape& shape) { - return a->opcode() == HloOpcode::kBroadcast && - ShapeUtil::SameDimensions(a->shape(), shape) && - ShapeUtil::IsScalar(a->operand(0)->shape()); -} - -// Returns true iff `instruction` can change its shape simply by adjusting -// metadata. +// Returns true if `instruction` can change its shape simply by adjusting +// metadata or if `instruction` is a broadcast of a scalar value. bool CanTriviallyChangeShape(const HloInstruction* instruction) { // NOTE: Technically a sequence of reshape(reshape(constant)) is also // trivially reshapable, so we might be tempted to simply recurse if @@ -97,19 +89,30 @@ bool CanTriviallyChangeShape(const HloInstruction* instruction) { return true; } + // A broadcase of scalar can trivially change its shape. + if (instruction->opcode() == HloOpcode::kBroadcast && + ShapeUtil::IsScalar(instruction->operand(0)->shape())) { + return true; + } + return false; } -// Finds the first non-scalar operand of an instruction that is a non-trivial -// reshape or transpose. Returns the operand if it is found or nullptr if not -// found. +// Returns true iff `instruction` is a reshape/transpose instruction for which +// a shape change is nontrivial. +bool IsNontrivialReshape(const HloInstruction* instruction) { + return !ShapeUtil::IsScalar(instruction->shape()) && + IsReshapeOrTranspose(instruction) && + !CanTriviallyChangeShape(instruction->operand(0)); +} + +// Finds the first operand of an instruction that is a non-trivial reshape or +// transpose. Returns such an operand or nullptr if not found. HloInstruction* FirstNonScalarAndNonTrivialReshapeOperand( const HloInstruction* hlo) { for (HloInstruction* operand : hlo->operands()) { - if (!ShapeUtil::IsScalar(operand->shape()) && - IsReshapeOrTranspose(operand) && - !CanTriviallyChangeShape(operand->operand(0))) { - VLOG(5) << "Found first non-scalar and non-trivial reshape operand of " + if (IsNontrivialReshape(operand)) { + VLOG(5) << "Found first non-trivial reshape operand of " << hlo->ToString(HloPrintOptions().set_print_metadata(false)) << ":\n\t" << operand->ToString(HloPrintOptions().set_print_metadata(false)); @@ -119,7 +122,7 @@ HloInstruction* FirstNonScalarAndNonTrivialReshapeOperand( return nullptr; } -// Returns whether `a` and `b` are equivalent for the purposes of this pass. +// Returns whether `a` and `b` are equivalent reshapes/transposes. bool AreEquivalentReshapes(const HloInstruction* a, const HloInstruction* b) { if (a->opcode() != b->opcode() || !ShapeUtil::SameDimensions(a->shape(), b->shape())) { @@ -136,85 +139,14 @@ bool AreEquivalentReshapes(const HloInstruction* a, const HloInstruction* b) { } } -// Returns true if all operands of `instruction` can easily change shape. -// Operands can easily change shape if they are all reshapes/transposes to and -// from the same shape. Additionally, operands like constant, rng, and any -// scalar change shape with only an adjustment of metadata. -bool AllOperandsHaveEasyShapeChanges( - const HloInstruction* instruction, - const HloInstruction* first_reshape_operand) { - auto print_no_metadata = HloPrintOptions().set_print_metadata(false); - VLOG(3) << "** Checking whether all operands have easy shape changes: " - << instruction->ToString(print_no_metadata); - // Check whether all operands: - // 0. Have the same dimensions as the output -- if not, it may be - // implicitly broadcast, which can confound the movement's - // correctness. - // - // And one of the following: - // 1. Are reshapes or transposes that have the same input and - // output shapes as all other reshaped or transposed operands. - // or - // 2. Are one of kConstant, kRng, and scalars that can change shape - // trivially, - // or - // 3. Are broadcast with a scalar operand. - for (const HloInstruction* operand : instruction->operands()) { - if (!ShapeUtil::SameDimensions(operand->shape(), instruction->shape())) { - VLOG(5) << "Operand shape differs from output shape; may be " - "implicitly broadcast, so preventing " - "movement\n\toperand: " - << operand->ToString(print_no_metadata) << "\n\tinstruction: " - << instruction->ToString(print_no_metadata); - return false; - } - - // Skip the rest checks if the current operand is first_reshape_operand - // itself. - if (first_reshape_operand == operand) { - continue; - } - - if (AreEquivalentReshapes(first_reshape_operand, operand)) { - VLOG(5) << "Are equivalent reshapes:\n\tfirst_reshape_operand: " - << first_reshape_operand->ToString(print_no_metadata) - << "\n\toperand: " << operand->ToString(print_no_metadata); - continue; - } - - if (CanTriviallyChangeShape(operand)) { - VLOG(5) << "Operand can trivially change shape: " - << operand->ToString(print_no_metadata); - continue; - } - - if (IsBroadcastScalarToShape(operand, first_reshape_operand->shape())) { - VLOG(5) << "Broadcast scalar to shape: " - << operand->ToString(print_no_metadata); - continue; - } - - // TODO(someone): Look into supporting general ops for the operands as - // well. - VLOG(5) << "Operand is neither equalivant to the first Reshape operand" - "nor can trivially change shape: " - << operand->ToString(print_no_metadata); - return false; - } - - VLOG(3) << "All operands have easy shape changes: " - << instruction->ToString(print_no_metadata); - return true; -} - // This function is called once we've decided to sink reshape/transpose operands // across an instruction. It returns an updated `operand` with a shape that // plays nicely with `new_operand_shape`; either it has the same shape (of the // correct type), or it is a scalar that may be implicitly broadcast. -HloInstruction* UpdateOperand(HloComputation* computation, - const HloInstruction* first_reshape_operand, +HloInstruction* UpdateOperand(const HloInstruction* first_reshape_operand, const Shape& new_operand_shape, HloInstruction* operand) { + HloComputation* computation = operand->parent(); const PrimitiveType element_type = operand->shape().element_type(); const Shape new_shape = ShapeUtil::ChangeElementType(new_operand_shape, element_type); @@ -223,15 +155,20 @@ HloInstruction* UpdateOperand(HloComputation* computation, case HloOpcode::kConstant: { if (first_reshape_operand->opcode() == HloOpcode::kReshape) { VLOG(5) << "Adding reshape to kConstant operand"; - return computation->AddInstruction( + HloInstruction* reshape = computation->AddInstruction( HloInstruction::CreateReshape(new_shape, operand)); + operand->SetupDerivedInstruction(reshape); + return reshape; } else { CHECK(first_reshape_operand->opcode() == HloOpcode::kTranspose); VLOG(5) << "Adding transpose to kConstant operand"; std::vector inverse_permutation = InversePermutation(first_reshape_operand->dimensions()); - return computation->AddInstruction(HloInstruction::CreateTranspose( - new_shape, operand, inverse_permutation)); + HloInstruction* transpose = + computation->AddInstruction(HloInstruction::CreateTranspose( + new_shape, operand, inverse_permutation)); + operand->SetupDerivedInstruction(transpose); + return transpose; } } case HloOpcode::kRng: { @@ -245,42 +182,24 @@ HloInstruction* UpdateOperand(HloComputation* computation, VLOG(5) << "Using existing operand of kReshape or kTranspose"; return operand->mutable_operand(0); } - case HloOpcode::kBroadcast: - CHECK(IsBroadcastScalarToShape(operand, first_reshape_operand->shape())); - VLOG(5) << "Changing broadcast"; - return computation->AddInstruction( + case HloOpcode::kBroadcast: { + CHECK(ShapeUtil::IsScalar(operand->operand(0)->shape())); + HloInstruction* inst = computation->AddInstruction( operand->CloneWithNewOperands(new_shape, operand->operands())); + VLOG(5) << "Changing broadcast from " << operand->ToString() << " to " + << inst->ToString(); + return inst; + } default: LOG(FATAL) << "Unexpected operand opcode during update: " << operand; } } -// Try to sink any reshape or transpose operands of `instruction` across it. We -// do so if `instruction` is elementwise and all operands are either equivalent -// reshapes/transposes or are trivially reshapable. -StatusOr TrySinkReshapeOrTranspose(HloComputation* computation, - HloInstruction* instruction) { - // Only perform sinks for live elementwise instructions with operands. - const bool is_dead = instruction->user_count() == 0 && - instruction != computation->root_instruction(); - if (!instruction->IsElementwise() || instruction->operands().empty() || - is_dead) { - return false; - } - - // Only perform sinks if there are any nontrivial reshape/transpose operands. - const HloInstruction* first_reshape_operand = - FirstNonScalarAndNonTrivialReshapeOperand(instruction); - if (!first_reshape_operand) { - return false; - } - - // Only perform sinks if all operands can easily change shape. - if (!AllOperandsHaveEasyShapeChanges(instruction, first_reshape_operand)) { - return false; - } - +// Actually performs the reshape-move transformation -- that is, sinks the +// reshape or transpose operands of `instruction` across it. +StatusOr PerformSinkReshapeOrTranspose( + HloInstruction* instruction, const HloInstruction* first_reshape_operand) { auto print_no_metadata = HloPrintOptions().set_print_metadata(false); // At this point we've decided to sink reshape/transpose operands. const Shape& new_operand_shape = first_reshape_operand->operand(0)->shape(); @@ -301,8 +220,8 @@ StatusOr TrySinkReshapeOrTranspose(HloComputation* computation, } VLOG(3) << "Updating operand #" << i << ": " << operands[i]->ToString(print_no_metadata); - operands[i] = UpdateOperand(computation, first_reshape_operand, - new_operand_shape, operands[i]); + operands[i] = + UpdateOperand(first_reshape_operand, new_operand_shape, operands[i]); } if (HloOpcode::kFusion == instruction->opcode()) { // Here we already know `instruction` is elementwise, and no operand is @@ -314,6 +233,7 @@ StatusOr TrySinkReshapeOrTranspose(HloComputation* computation, *shape->mutable_layout() = new_operand_shape.layout(); } } + HloComputation* computation = instruction->parent(); HloInstruction* new_elementwise = computation->AddInstruction(instruction->CloneWithNewOperands( // `instruction` may change the element type, e.g., from @@ -348,6 +268,141 @@ StatusOr TrySinkReshapeOrTranspose(HloComputation* computation, return true; } +// Returns true if the instruction is a reshape-move candidate. +// +// An instruction is a reshape-move candidate if the instruction is elementwise, +// has at least one nontrivial reshape/transpose operand, and its operands are +// either trivially reshapable or are equivalent nontrivial reshapes/transposes. +bool IsReshapeMoveCandidate(HloInstruction* instruction) { + auto print_no_metadata = HloPrintOptions().set_print_metadata(false); + VLOG(5) << "** Checking instruction: " + << instruction->ToString(print_no_metadata); + + // Only perform reshape-move for live elementwise instructions with operands. + const bool is_dead = instruction->user_count() == 0 && + instruction != instruction->parent()->root_instruction(); + if (!instruction->IsElementwise() || instruction->operands().empty() || + is_dead) { + return false; + } + + // Check whether all operands: + // 0. Have the same dimensions as the output -- if not, they may be + // implicitly broadcast, which can confound the movement's + // correctness. + // + // And one of the following: + // 1. Are reshapes or transposes that have the same input and + // output shapes as all other reshaped or transposed operands. + // or + // 2. Are one of kConstant, kRng, broadcast of a scalar value, and scalars + // that can change shape trivially. + const HloInstruction* first_reshape_operand = nullptr; + for (const HloInstruction* operand : instruction->operands()) { + if (!ShapeUtil::SameDimensions(operand->shape(), instruction->shape())) { + VLOG(5) << "Operand shape differs from output shape; may be " + "implicitly broadcast, so preventing " + "movement\n\toperand: " + << operand->ToString(print_no_metadata) << "\n\tinstruction: " + << instruction->ToString(print_no_metadata); + return false; + } + + if (CanTriviallyChangeShape(operand)) { + VLOG(5) << "Operand can trivially change shape: " + << operand->ToString(print_no_metadata); + continue; + } + + if (!IsNontrivialReshape(operand)) { + VLOG(5) << "Operand can't trivially change shape: " + << operand->ToString(print_no_metadata); + return false; + } + + if (first_reshape_operand == nullptr) { + first_reshape_operand = operand; + VLOG(5) << "First reshape operand " + << operand->ToString(print_no_metadata); + } else if (AreEquivalentReshapes(first_reshape_operand, operand)) { + VLOG(5) + << "Operand is an equivalent reshape of the first reshape operand " + << operand->ToString(print_no_metadata); + } else { + // TODO(someone): Look into supporting general ops for the operands as + // well. + VLOG(5) << "Operand is a reshape but is not equivalent to the first " + "Reshape operand" + << operand->ToString(print_no_metadata); + return false; + } + } + + if (first_reshape_operand) { + VLOG(5) << "All operands have easy shape changes: " + << instruction->ToString(print_no_metadata); + } + + return first_reshape_operand != nullptr; +} + +// Reshape-moves all qualifying instructions in reshape_candidates. Returns +// true if it makes changes. +// +// `reshape_candidates` is a set of HloInstructions with nontrivial reshape +// operands, and a instruction in the set can be reshape-moved iff all the users +// of its nontrivial reshape operands can also be reshaped-moved. +// +// The algorithm here iteratively finds the nontrivial operands with users that +// are outside the set of `reshape_candidates`, and removes their users from +// `reshape_candidates`, until either `reshape_candidates` becomes empty or none +// of the remaining nontrivial operands have users outside `reshape_candidates`. +// In the later case, all the remaining instructions in `reshape_candidates` +// are reshape-moved and the routine returns true. +StatusOr TryReshapeMoveOnCandidates( + HloInstructionSet* reshape_candidates) { + bool removed = true; + while (!reshape_candidates->empty() && removed) { + if (VLOG_IS_ON(5)) { + for (const HloInstruction* instruction : *reshape_candidates) { + VLOG(5) << "candidate " << instruction->ToString(); + } + } + ConstHloInstructionSet nontrivial_operands; + for (const HloInstruction* instruction : *reshape_candidates) { + for (const auto* operand : instruction->operands()) { + if (IsNontrivialReshape(operand)) { + nontrivial_operands.insert(operand); + } + } + } + + removed = false; + for (auto operand : nontrivial_operands) { + if (c_any_of(operand->users(), [&](HloInstruction* user) { + return !reshape_candidates->count(user); + })) { + for (auto* user : operand->users()) { + removed |= reshape_candidates->erase(user) > 0; + } + } + } + } + + if (reshape_candidates->empty()) { + return false; + } + for (HloInstruction* instruction : *reshape_candidates) { + const HloInstruction* first_reshape_operand = + FirstNonScalarAndNonTrivialReshapeOperand(instruction); + TF_ASSIGN_OR_RETURN( + bool did_change, + PerformSinkReshapeOrTranspose(instruction, first_reshape_operand)); + CHECK(did_change); + } + return true; +} + } // namespace StatusOr ReshapeMover::Run(HloModule* module) { @@ -355,11 +410,15 @@ StatusOr ReshapeMover::Run(HloModule* module) { VLOG(2) << "Pre ReshapeMover HLO:"; XLA_VLOG_LINES(2, module->ToString()); for (auto* comp : module->MakeNonfusionComputations()) { - for (HloInstruction* instruction : comp->MakeInstructionPostOrder()) { - TF_ASSIGN_OR_RETURN(bool did_change, - TrySinkReshapeOrTranspose(comp, instruction)); - changed |= did_change; + HloInstructionSet reshape_candidates; + for (HloInstruction* instruction : comp->instructions()) { + if (IsReshapeMoveCandidate(instruction)) { + reshape_candidates.insert(instruction); + } } + TF_ASSIGN_OR_RETURN(bool did_change, + TryReshapeMoveOnCandidates(&reshape_candidates)); + changed |= did_change; } VLOG(2) << "Post ReshapeMover HLO:"; XLA_VLOG_LINES(2, module->ToString()); diff --git a/tensorflow/compiler/xla/service/reshape_mover_test.cc b/tensorflow/compiler/xla/service/reshape_mover_test.cc index 4e0a0a8832379402edfc231ea84221448d70bac2..13e2d3258e3b92f52320201c382594962c0e3b2b 100644 --- a/tensorflow/compiler/xla/service/reshape_mover_test.cc +++ b/tensorflow/compiler/xla/service/reshape_mover_test.cc @@ -458,57 +458,6 @@ TEST_F(ReshapeMoverTest, ScalarReshapeNotMovedAcrossSelect) { EXPECT_EQ(select, computation->root_instruction()); } -// Tree looks like: -// -// param0 [1,128,1] -// | -// reshape [128,1] constant [128,1024] -// \ / -// multiply w/implicit broadcast [128,1024] -// -// The reshape mover would like to sink the reshape below the multiply. -// -// Previously we would attempt to insert a reshape of the constant to [1,128,1] -// (which is unsound, because it has a different number of elements) as -// preparation for sinking the reshape. -// -// To eliminate the unsoundness, we outlaw reshape sinking when one of the -// operands is implicitly broadcast in the elementwise consumer. -// -// TODO(b/37799338) However, it would be possible in this case to do a more -// in-depth analysis to get reshape movement to occur: -// -// 1. Note that the broadcast dimension (logical dimension 1) in the operands -// would map back to logical dimension 2 in the param0 node. -// 2. Match rank of the constant to the param0 node (by prepending a trivial 1 -// dimension). -// 3. Reshape to [128,1024] at the root. -// -// But this is not currently done. -TEST_F(ReshapeMoverTest, ImplicitlyBroadcastReshapeIsNotMovedBug37787999) { - HloComputation::Builder builder(TestName()); - auto param0 = builder.AddInstruction(HloInstruction::CreateParameter( - 0, ShapeUtil::MakeShape(F32, {1, 128, 1}), "param0")); - auto reshape = builder.AddInstruction(HloInstruction::CreateReshape( - ShapeUtil::MakeShape(F32, {128, 1}), param0)); - Array2D a(128, 1024); - auto literal = Literal::CreateR2FromArray2D(a); - auto constant = builder.AddInstruction( - HloInstruction::CreateConstant(std::move(literal))); - auto multiply = builder.AddInstruction(HloInstruction::CreateBinary( - constant->shape(), HloOpcode::kMultiply, constant, reshape)); - - auto computation = module().AddEntryComputation(builder.Build()); - EXPECT_THAT(computation->root_instruction(), - op::Multiply(op::Constant(), op::Reshape(param0))); - - EXPECT_FALSE(ReshapeMover().Run(&module()).ValueOrDie()); - - EXPECT_THAT(computation->root_instruction(), - op::Multiply(op::Constant(), op::Reshape(param0))); - EXPECT_EQ(multiply, computation->root_instruction()); -} - // Tree looks like this: // // add1 @@ -564,15 +513,15 @@ TEST_F(ReshapeMoverTest, SinkTransposeAcrossBroadcastScalar) { const string hlo_string = R"( HloModule TransposeMulInversedTransposeModule ENTRY TransposeMulInversedTranspose { - src0 = f32[1,20,8,32]{3,2,1,0} parameter(0) - transpose0 = f32[1,8,20,32]{3,2,1,0} transpose(src0), dimensions={0,2,1,3} + src0 = f32[20,8]{1,0} parameter(0) + transpose0 = f32[8,20]{1,0} transpose(src0), dimensions={1,0} src1 = f32[] parameter(1) - broadcast0 = f32[1,8,20,32]{3,2,1,0} broadcast(src1), dimensions={} - ROOT multiply0 = f32[1,8,20,32]{3,2,1,0} multiply(transpose0, broadcast0) + broadcast0 = f32[8,20]{1,0} broadcast(src1), dimensions={} + ROOT multiply0 = f32[8,20]{1,0} multiply(transpose0, broadcast0) } )"; - ParseAndVerifyModule(hlo_string.c_str()); + ParseAndVerifyModule(hlo_string); TF_ASSERT_OK_AND_ASSIGN(bool changed, ReshapeMover().Run(&module())); EXPECT_TRUE(changed); @@ -580,5 +529,75 @@ TEST_F(ReshapeMoverTest, SinkTransposeAcrossBroadcastScalar) { op::Transpose(op::Multiply())); } +TEST_F(ReshapeMoverTest, ReshapeWithUsersOutsideCandidatesNotSink) { + const string hlo_string = R"( + HloModule ReshapeWithUsersOutsideCandidates + ENTRY ReshapeWithMultipleUsers { + param0 = f32[20,8]{1,0} parameter(0) + reshape0 = f32[8,20]{1,0} reshape(param0) + param1 = f32[] parameter(1) + broadcast0 = f32[8,20]{1,0} broadcast(param1), dimensions={} + param2 = f32[20,8]{1,0} parameter(2) + reshape1 = f32[8,20]{1,0} reshape(param2) + param3 = f32[20,8]{1,0} parameter(3) + reshape2 = f32[8,20]{1,0} reshape(param3) + param4 = f32[8,20]{1,0} parameter(4) + add0 = f32[8,20]{1,0} add(reshape0, broadcast0) + add1 = f32[8,20]{1,0} add(reshape0, reshape1) + add2 = f32[8,20]{1,0} add(reshape1, param4) + ROOT tuple = (f32[8,20]{1,0},f32[8,20]{1,0}, + f32[8,20]{1,0}) tuple(add0, add1, add2) + } + )"; + + ParseAndVerifyModule(hlo_string); + TF_ASSERT_OK_AND_ASSIGN(bool changed, ReshapeMover().Run(&module())); + EXPECT_FALSE(changed); +} + +TEST_F(ReshapeMoverTest, ReshapeNoUsersOutsideCandidatesSink1) { + const string hlo_string = R"( + HloModule ReshapeNoUsersOutsideCandidates1 + ENTRY ReshapeWithMultipleUsers1 { + param0 = f32[20,8]{1,0} parameter(0) + reshape0 = f32[8,20]{1,0} reshape(param0) + param1 = f32[] parameter(1) + broadcast0 = f32[8,20]{1,0} broadcast(param1), dimensions={} + param2 = f32[20,8]{1,0} parameter(2) + reshape1 = f32[8,20]{1,0} reshape(param2) + param3 = f32[20,8]{1,0} parameter(3) + reshape2 = f32[8,20]{1,0} reshape(param3) + add0 = f32[8,20]{1,0} add(reshape0, broadcast0) + add1 = f32[8,20]{1,0} add(reshape0, reshape1) + add2 = f32[8,20]{1,0} add(reshape1, reshape2) + ROOT tuple = (f32[8,20]{1,0},f32[8,20]{1,0}, + f32[8,20]{1,0}) tuple(add0, add1, add2) + } + )"; + + ParseAndVerifyModule(hlo_string); + TF_ASSERT_OK_AND_ASSIGN(bool changed, ReshapeMover().Run(&module())); + EXPECT_TRUE(changed); + EXPECT_THAT(module().entry_computation()->root_instruction(), + op::Tuple(op::Reshape(), op::Reshape(), op::Reshape())); +} + +TEST_F(ReshapeMoverTest, ReshapeNoUsersOutsideCandidatesSink2) { + const string hlo_string = R"( + HloModule ReshapeNoUsersOutsideCandidates2 + ENTRY ReshapeWithMultipleUsers2 { + param0 = f32[20,8]{1,0} parameter(0) + reshape0 = f32[8,20]{1,0} reshape(param0) + ROOT add0 = f32[8,20]{1,0} add(reshape0, reshape0) + } + )"; + + ParseAndVerifyModule(hlo_string); + TF_ASSERT_OK_AND_ASSIGN(bool changed, ReshapeMover().Run(&module())); + EXPECT_TRUE(changed); + EXPECT_THAT(module().entry_computation()->root_instruction(), + op::Reshape(op::Add())); +} + } // namespace } // namespace xla diff --git a/tensorflow/compiler/xla/service/service.cc b/tensorflow/compiler/xla/service/service.cc index ca8071b7bbbca3c68df3278dde61403957022afc..a73118c68a7f7d4169c4e592fc250fec8108c0f3 100644 --- a/tensorflow/compiler/xla/service/service.cc +++ b/tensorflow/compiler/xla/service/service.cc @@ -54,8 +54,6 @@ limitations under the License. #include "tensorflow/core/platform/stream_executor_no_cuda.h" #include "tensorflow/core/platform/types.h" -namespace se = ::perftools::gputools; - using ::tensorflow::strings::Printf; using ::tensorflow::strings::StrCat; using ::xla::source_map_util::InvalidParameterArgument; @@ -95,15 +93,12 @@ tensorflow::Status RecordResult(const ShapedBuffer& result, } // namespace -ServiceOptions& ServiceOptions::set_platform( - perftools::gputools::Platform* platform) { +ServiceOptions& ServiceOptions::set_platform(se::Platform* platform) { platform_ = platform; return *this; } -perftools::gputools::Platform* ServiceOptions::platform() const { - return platform_; -} +se::Platform* ServiceOptions::platform() const { return platform_; } ServiceOptions& ServiceOptions::set_number_of_replicas(int number_of_replicas) { number_of_replicas_ = number_of_replicas; @@ -123,7 +118,7 @@ int ServiceOptions::intra_op_parallelism_threads() const { } /* static */ StatusOr> Service::NewService( - perftools::gputools::Platform* platform) { + se::Platform* platform) { ServiceOptions default_options; default_options.set_platform(platform); return NewService(default_options); @@ -131,7 +126,7 @@ int ServiceOptions::intra_op_parallelism_threads() const { /* static */ StatusOr> Service::NewService( const ServiceOptions& options) { - perftools::gputools::Platform* platform = options.platform(); + se::Platform* platform = options.platform(); std::unique_ptr execute_backend; if (platform == nullptr) { TF_ASSIGN_OR_RETURN(platform, PlatformUtil::GetDefaultPlatform()); @@ -235,8 +230,7 @@ tensorflow::Status Service::ValidateResultShapeWithLayout( StatusOr>> Service::ResolveAndValidateArguments( tensorflow::gtl::ArraySlice arguments, - tensorflow::gtl::ArraySlice - stream_executors) { + tensorflow::gtl::ArraySlice stream_executors) { CHECK_EQ(options_.number_of_replicas(), stream_executors.size()); std::vector> replicated_arguments; replicated_arguments.resize(options_.number_of_replicas()); @@ -314,7 +308,10 @@ StatusOr> Service::CreateModuleConfig( computation_layout->mutable_result_layout()->CopyLayoutFromShape( shape_with_output_layout)); } else { - computation_layout->mutable_result_layout()->Clear(); + // TODO(b/78356948): We are forcing the default layout here. We should fix + // clients which expect a default layout, to be explicit about it, by + // passing the proper ExecutionOptions with shape_with_output_layout set. + computation_layout->mutable_result_layout()->SetToDefaultLayout(); } config->set_replica_count(options_.number_of_replicas()); @@ -349,8 +346,7 @@ StatusOr> Service::CreateModuleConfig( StatusOr>> Service::BuildExecutables( std::vector versioned_handles, std::vector> module_configs, - Backend* backend, - std::vector> executors, + Backend* backend, std::vector> executors, DeviceMemoryAllocator* device_allocator) { VLOG(1) << Printf("BuildExecutable on service %p", this); @@ -409,6 +405,36 @@ StatusOr>> Service::BuildExecutables( return std::move(executables); } +StatusOr>> Service::BuildExecutables( + const std::vector& module_protos, + std::vector> module_configs, + Backend* backend, std::vector> executors, + DeviceMemoryAllocator* device_allocator) { + VLOG(1) << Printf("BuildExecutable on service %p", this); + + VLOG(1) << "Computations:"; + for (const HloModuleProto* proto : module_protos) { + VLOG(1) << proto->name(); + } + + CHECK_EQ(module_protos.size(), module_configs.size()); + std::vector> modules; + for (int64 i = 0; i < module_protos.size(); ++i) { + const HloModuleProto* proto = module_protos[i]; + const HloModuleConfig& config = *module_configs[i]; + TF_ASSIGN_OR_RETURN(auto module, + HloModule::CreateFromProto(*proto, config)); + modules.push_back(std::move(module)); + } + + TF_ASSIGN_OR_RETURN( + std::vector> executables, + backend->compiler()->Compile(std::move(modules), std::move(executors), + device_allocator)); + + return std::move(executables); +} + StatusOr> Service::BuildExecutable( const VersionedComputationHandle& versioned_handle, std::unique_ptr module_config, Backend* backend, @@ -462,7 +488,7 @@ StatusOr> Service::BuildExecutable( StatusOr> Service::BuildAndCacheExecutable( const VersionedComputationHandle& versioned_handle, std::unique_ptr module_config, Backend* backend, - perftools::gputools::StreamExecutor* executor, ExecutionProfile* profile, + se::StreamExecutor* executor, ExecutionProfile* profile, DeviceMemoryAllocator* device_allocator) { std::shared_ptr executable = compilation_cache_.LookUp(versioned_handle, *module_config); @@ -510,7 +536,7 @@ Service::ExecuteParallelAndRegisterResult( // Streams where the computation are launched, so we can wait on the streams // to complete. std::vector::SmartPtr> streams; - std::vector> timers; + std::vector> timers; // Global data handles for the computation results, one for each computation. std::vector result_handles; @@ -527,15 +553,14 @@ Service::ExecuteParallelAndRegisterResult( // Stream executors for the replicas of the current computation. TF_ASSIGN_OR_RETURN(auto replicas, Replicas(*backend, device_handles[i])); CHECK_EQ(replicas.size(), arguments[i].size()); - std::vector> result_buffers; + std::vector result_buffers; for (int64 replica = 0; replica < replicas.size(); ++replica) { TF_ASSIGN_OR_RETURN(Pool::SmartPtr stream, backend->BorrowStream(replicas[replica])); streams.push_back(std::move(stream)); if (replica == 0 && profile != nullptr) { - timers.emplace_back( - new perftools::gputools::Timer(streams.back()->parent())); + timers.emplace_back(new se::Timer(streams.back()->parent())); streams.back() ->InitTimer(timers.back().get()) .ThenStartTimer(timers.back().get()); @@ -560,7 +585,7 @@ Service::ExecuteParallelAndRegisterResult( backend->StreamBorrower()); // Asynchronously launch the computation. - TF_ASSIGN_OR_RETURN(std::unique_ptr result, + TF_ASSIGN_OR_RETURN(ShapedBuffer result, executables[i]->ExecuteAsyncOnStream( &run_options, arguments[i][replica])); @@ -703,12 +728,53 @@ tensorflow::Status Service::SetReturnValue(const SetReturnValueRequest* arg, return computation->SetReturnValue(arg->operand()); } +StatusOr> Service::GetExecutors( + const ExecutionOptions& execution_options, int64 requests_size, + int64 request_index) const { + if (execution_options.device_handles().empty()) { + return FailedPrecondition( + "device handles must be given to execute parallel computations"); + } + if (requests_size > 1 && execution_options.device_handles_size() > 1) { + return InvalidArgument( + "Parallel requests with multiple device handles is not supported. " + "Found %lld parallel requests, with request %lld containing %d device " + "handles.", + requests_size, request_index, execution_options.device_handles_size()); + } + std::vector executors; + for (const auto& device_handle : execution_options.device_handles()) { + TF_ASSIGN_OR_RETURN(auto replicas, + Replicas(*execute_backend_, device_handle)); + se::StreamExecutor* executor = replicas[0]; + CHECK(executor != nullptr); + executors.push_back(executor); + } + return executors; +} + +StatusOr>> Service::GetArguments( + const ExecutionOptions& execution_options, + tensorflow::gtl::ArraySlice arguments) { + // Resolve the allocations for the arguments of the computation, and create + // a vector of device memory offsets for the arguments from the allocations. + // In the case of partitioned computations, assume all arguments go on the + // zeroth core. + TF_ASSIGN_OR_RETURN( + auto replicas, + Replicas(*execute_backend_, execution_options.device_handles(0))); + TF_ASSIGN_OR_RETURN( + std::vector> replicated_arguments, + ResolveAndValidateArguments(arguments, replicas)); + return replicated_arguments; +} + tensorflow::Status Service::ExecuteParallel(const ExecuteParallelRequest* arg, ExecuteParallelResponse* result) { VLOG(1) << "running execute-parallel request: " << arg->ShortDebugString(); std::vector>> all_arguments; - std::vector> all_executors; + std::vector> all_executors; std::vector versioned_handles; std::vector> module_configs; std::vector computation_names; @@ -731,26 +797,10 @@ tensorflow::Status Service::ExecuteParallel(const ExecuteParallelRequest* arg, // is one of the executors to run the replicated computation. const ExecutionOptions& execution_options = arg->requests(i).execution_options(); - if (execution_options.device_handles().empty()) { - return FailedPrecondition( - "device handles must be given to execute parallel computations"); - } - if (arg->requests_size() > 1 && - execution_options.device_handles_size() > 1) { - return InvalidArgument( - "Parallel requests with multiple device handles is not supported. " - "Found %d parallel requests, with request %lld containing %d device " - "handles.", - arg->requests_size(), i, execution_options.device_handles_size()); - } - std::vector executors; - for (const auto& device_handle : execution_options.device_handles()) { - TF_ASSIGN_OR_RETURN(auto replicas, - Replicas(*execute_backend_, device_handle)); - se::StreamExecutor* executor = replicas[0]; - CHECK(executor != nullptr); - executors.push_back(executor); - } + + // Get the executors. + TF_ASSIGN_OR_RETURN(auto executors, GetExecutors(execution_options, + arg->requests_size(), i)); // Resolve the UserComputation object associated with the requested // computation and compute the program shape. @@ -767,16 +817,9 @@ tensorflow::Status Service::ExecuteParallel(const ExecuteParallelRequest* arg, std::shared_ptr program_shape, user_computation->ComputeProgramShape(versioned_handle.version)); - // Resolve the allocations for the arguments of the computation, and create - // a vector of device memory offsets for the arguments from the allocations. - // In the case of partitioned computations, assume all arguments go on the - // zeroth core. - TF_ASSIGN_OR_RETURN( - auto replicas, - Replicas(*execute_backend_, execution_options.device_handles(0))); - TF_ASSIGN_OR_RETURN( - std::vector> replicated_arguments, - ResolveAndValidateArguments(request.arguments(), replicas)); + // Get the replicated arguments. + TF_ASSIGN_OR_RETURN(auto replicated_arguments, + GetArguments(execution_options, request.arguments())); // Create an HloModuleConfig object for the computation, given the shape of // the program and the argument allocations. Here, we care only about the @@ -839,7 +882,103 @@ tensorflow::Status Service::ExecuteParallel(const ExecuteParallelRequest* arg, tensorflow::Status Service::ExecuteGraphParallel( const ExecuteGraphParallelRequest* arg, ExecuteParallelResponse* result) { - return Unimplemented("execute-graph-parallel is not yet implemented"); + VLOG(1) << "running execute-graph-parallel request"; + + std::vector>> all_arguments; + std::vector> all_executors; + std::vector module_protos; + std::vector> module_configs; + std::vector computation_names; + std::vector device_handles; + + int num_requested_devices = + std::accumulate(arg->requests().begin(), arg->requests().end(), 0, + [](int a, const ExecuteGraphRequest& r) -> int { + return a + r.execution_options().device_handles_size(); + }); + if (num_requested_devices * options_.number_of_replicas() > + execute_backend_->device_count()) { + return FailedPrecondition( + "there are not enough stream executors to execute %d computations", + num_requested_devices); + } + + for (int64 i = 0; i < arg->requests_size(); ++i) { + // Get the stream executor for the i'th computation. This stream executor + // is one of the executors to run the replicated computation. + const ExecutionOptions& execution_options = + arg->requests(i).execution_options(); + const ExecuteGraphRequest& request = arg->requests(i); + TF_RET_CHECK(request.has_computation()) << "computations may not be empty"; + TF_RET_CHECK(request.computation().has_program_shape()) + << "programe shape may not be empty"; + + // Get the executors. + TF_ASSIGN_OR_RETURN(auto executors, GetExecutors(execution_options, + arg->requests_size(), i)); + + // Get the replicated arguments. + TF_ASSIGN_OR_RETURN(auto replicated_arguments, + GetArguments(execution_options, request.arguments())); + + // Create an HloModuleConfig object for the computation, given the shape of + // the program and the argument allocations. Here, we care only about the + // shapes of the arguments, so, it is sufficient to use the arguments of + // replica 0. + TF_ASSIGN_OR_RETURN( + std::unique_ptr module_config, + CreateModuleConfig(request.computation().program_shape(), + replicated_arguments.front(), + request.execution_options(), + /*user_computation=*/nullptr)); + VLOG(3) + << "ExecuteGraphParallel created HloModuleConfig computation layout: " + << module_config->entry_computation_layout().ToString(); + + // Adds to the vectors to build and execute the computations after the loop. + all_arguments.push_back(replicated_arguments); + all_arguments.insert(all_arguments.end(), executors.size() - 1, {{}}); + module_protos.push_back(&request.computation()); + module_configs.push_back(std::move(module_config)); + computation_names.insert(computation_names.end(), executors.size(), + request.computation().name()); + all_executors.push_back(executors); + device_handles.insert(device_handles.end(), + execution_options.device_handles().begin(), + execution_options.device_handles().end()); + } + + // Build the HloModules and compile to generate the executables. + // + // TODO(jlebar): There's currently no way to pass a device allocator to + // ExecuteGraphParallel, so we have to pass a null device_allocator below. + TF_ASSIGN_OR_RETURN(std::vector> executables, + BuildExecutables(module_protos, std::move(module_configs), + execute_backend_.get(), all_executors, + /*device_allocator=*/nullptr)); + std::vector executable_ptrs; + executable_ptrs.reserve(executables.size()); + for (const auto& executable : executables) { + executable_ptrs.push_back(executable.get()); + } + + // Execute the generated executables in parallel and return the device + // handles for each computation's output. + ExecutionProfile profile; + TF_ASSIGN_OR_RETURN( + std::vector outputs, + ExecuteParallelAndRegisterResult(executable_ptrs, all_arguments, + execute_backend_.get(), device_handles, + computation_names, &profile)); + for (const GlobalDataHandle& output : outputs) { + ExecuteResponse response; + *response.mutable_output() = output; + *response.mutable_profile() = profile; + *result->add_responses() = response; + } + + VLOG(1) << "successfully completed 'execute-graph-parallel' request"; + return tensorflow::Status::OK(); } tensorflow::Status Service::GetDeviceHandles(const GetDeviceHandlesRequest* arg, @@ -872,6 +1011,20 @@ tensorflow::Status Service::ExecuteOneToN(const ExecuteRequest* arg, *parallel_arg.add_requests() = *arg; ExecuteParallelResponse parallel_result; TF_RETURN_IF_ERROR(ExecuteParallel(¶llel_arg, ¶llel_result)); + return PickParallelResponse(parallel_result, result); +} + +tensorflow::Status Service::ExecuteOneToN(const ExecuteGraphRequest* arg, + ExecuteResponse* result) { + ExecuteGraphParallelRequest parallel_arg; + *parallel_arg.add_requests() = *arg; + ExecuteParallelResponse parallel_result; + TF_RETURN_IF_ERROR(ExecuteGraphParallel(¶llel_arg, ¶llel_result)); + return PickParallelResponse(parallel_result, result); +} + +tensorflow::Status Service::PickParallelResponse( + const ExecuteParallelResponse& parallel_result, ExecuteResponse* result) { // The "result device" selection is a bit hacky, but better than assuming it // is device 0. We have b/76035356 for restructuring the client API to clean // up the current asymmetries and support more functionalities. @@ -999,8 +1152,14 @@ tensorflow::Status Service::ExecuteGraph(const ExecuteGraphRequest* arg, if (!arg->has_computation()) { return InvalidArgument("computations may not be empty"); } + if (!arg->computation().has_program_shape()) { + return InvalidArgument("programe shape may not be empty"); + } - // TODO(b/74197823): Handle partitioning. + // If we received multiple device handles, we must partition the module. + if (arg->execution_options().device_handles_size() > 1) { + return ExecuteOneToN(arg, result); + } TF_ASSIGN_OR_RETURN(auto replicas, Replicas(*execute_backend_, SingleComputationDeviceHandle())); @@ -1078,7 +1237,7 @@ tensorflow::Status Service::ExecuteAsync(const ExecuteAsyncRequest* arg, streams.push_back(std::move(stream)); } - std::vector> result_buffers; + std::vector result_buffers; for (size_t i = 0; i < streams.size(); ++i) { const auto& stream = streams[i]; ExecutableRunOptions options; @@ -1091,7 +1250,7 @@ tensorflow::Status Service::ExecuteAsync(const ExecuteAsyncRequest* arg, ServiceExecutableRunOptions service_options( options, execute_backend_->StreamBorrower()); - TF_ASSIGN_OR_RETURN(std::unique_ptr this_result_buffer, + TF_ASSIGN_OR_RETURN(ShapedBuffer this_result_buffer, executable->ExecuteAsyncOnStream( &service_options, replicated_arguments[i])); @@ -1191,16 +1350,16 @@ tensorflow::Status Service::TransferToServer(const TransferToServerRequest* arg, } // Allocate memory in each replica and transfer the data to all replicas. - std::vector> replicated_buffers; + std::vector replicated_buffers; for (se::StreamExecutor* executor : replicas) { TF_ASSIGN_OR_RETURN( - std::unique_ptr shaped_buffer, + ShapedBuffer shaped_buffer, execute_backend_->transfer_manager()->AllocateShapedBuffer( shape, execute_backend_->memory_allocator(), executor->device_ordinal())); TF_RETURN_IF_ERROR( execute_backend_->transfer_manager()->TransferLiteralToDevice( - executor, *literal, *shaped_buffer)); + executor, *literal, shaped_buffer)); replicated_buffers.emplace_back(std::move(shaped_buffer)); } TF_ASSIGN_OR_RETURN(*result->mutable_data(), @@ -1379,6 +1538,50 @@ tensorflow::Status Service::ComputeConstant(const ComputeConstantRequest* arg, // Since the shape_with_output_layout option in ExecutionOption is // non-effective to the Evaluator results, explicit relayout here. + // + // TODO(b/77824332): Make HloEvaluator take care of the re-layout. + if (arg->has_output_layout()) { + result_literal = result_literal->Relayout(arg->output_layout()); + } + *result->mutable_literal() = result_literal->ToProto(); + + return tensorflow::Status::OK(); +} + +tensorflow::Status Service::ComputeConstantGraph( + const ComputeConstantGraphRequest* arg, ComputeConstantResponse* result) { + if (!arg->has_computation()) { + return InvalidArgument("computations may not be empty"); + } + if (!arg->computation().has_program_shape()) { + return InvalidArgument("program shape may not be empty"); + } + if (arg->computation().program_shape().parameters_size() != 0) { + return InvalidArgument( + "constant computation may not depend on any parameters."); + } + + ProgramShape program_shape = arg->computation().program_shape(); + TF_DCHECK_OK(ShapeUtil::ValidateShape(program_shape.result())); + if (arg->has_output_layout()) { + TF_RETURN_IF_ERROR(LayoutUtil::ValidateLayoutForShape( + arg->output_layout(), program_shape.result())); + } + + HloModuleConfig config(program_shape); + + TF_ASSIGN_OR_RETURN(std::unique_ptr module, + HloModule::CreateFromProto(arg->computation(), config)); + + HloEvaluator evaluator; + TF_ASSIGN_OR_RETURN(auto result_literal, + evaluator.Evaluate>( + *module, /*arg_literals=*/{})); + + // Since the result layout is non-effective to the Evaluator results, explicit + // relayout here. + // + // TODO(b/77824332): Make HloEvaluator take care of the re-layout. if (arg->has_output_layout()) { result_literal = result_literal->Relayout(arg->output_layout()); } @@ -1452,7 +1655,14 @@ tensorflow::Status Service::GetComputationStats( tensorflow::Status Service::GetComputationGraphStats( const ComputationGraphStatsRequest* arg, ComputationStatsResponse* result) { - HloModuleConfig config; + if (!arg->has_computation()) { + return InvalidArgument("Computations may not be empty."); + } + if (!arg->computation().has_program_shape()) { + return InvalidArgument("Program shape may not be empty."); + } + + HloModuleConfig config(arg->computation().program_shape()); config.set_debug_options(arg->debug_options()); TF_ASSIGN_OR_RETURN(std::unique_ptr module, HloModule::CreateFromProto(arg->computation(), config)); @@ -1737,9 +1947,9 @@ DeviceHandle Service::SingleComputationDeviceHandle() const { return device_handle; } -StatusOr> Service::Replicas( +StatusOr> Service::Replicas( const Backend& backend, const DeviceHandle& device_handle) const { - std::vector replicas; + std::vector replicas; for (int replica = 0; replica < options_.number_of_replicas(); ++replica) { // From the computation placer, find out the device ids of the replicas for // the given device handle. diff --git a/tensorflow/compiler/xla/service/service.h b/tensorflow/compiler/xla/service/service.h index ebe4a2e04372bbb23488d13051a4be73abac5ff0..476bd0597de735a9f777be78f5ab01dac1188525 100644 --- a/tensorflow/compiler/xla/service/service.h +++ b/tensorflow/compiler/xla/service/service.h @@ -53,8 +53,8 @@ namespace xla { class ServiceOptions { public: // Set the platform backing the service, or nullptr for the default platform. - ServiceOptions& set_platform(perftools::gputools::Platform* platform); - perftools::gputools::Platform* platform() const; + ServiceOptions& set_platform(se::Platform* platform); + se::Platform* platform() const; // Set the number of replicas to use when compiling replicated // programs. @@ -66,7 +66,7 @@ class ServiceOptions { int intra_op_parallelism_threads() const; private: - perftools::gputools::Platform* platform_ = nullptr; + se::Platform* platform_ = nullptr; int number_of_replicas_ = 1; int intra_op_parallelism_threads_ = -1; }; @@ -79,7 +79,7 @@ class Service : public ServiceInterface { public: // Factory method for creating a new Service. static StatusOr> NewService( - perftools::gputools::Platform* platform = nullptr); + se::Platform* platform = nullptr); static StatusOr> NewService( const ServiceOptions& options); @@ -206,6 +206,9 @@ class Service : public ServiceInterface { // Computes the value of a constant expression. tensorflow::Status ComputeConstant(const ComputeConstantRequest* arg, ComputeConstantResponse* result) override; + tensorflow::Status ComputeConstantGraph( + const ComputeConstantGraphRequest* arg, + ComputeConstantResponse* result) override; // Returns the shape (with layout) of an array associated with a given data // handle. @@ -278,6 +281,20 @@ class Service : public ServiceInterface { const ExecutionOptions& execution_options, const UserComputation* user_computation = nullptr); + // Picks a parallel response and fills the result. + Status PickParallelResponse(const ExecuteParallelResponse& parallel_result, + ExecuteResponse* result); + + // Prepare the executors for executing parallel. + StatusOr> GetExecutors( + const ExecutionOptions& execution_options, int64 requests_size, + int64 request_index) const; + + // Prepare the arguments for executing parallel. + StatusOr>> GetArguments( + const ExecutionOptions& execution_options, + tensorflow::gtl::ArraySlice arguments); + protected: friend class LocalExecutable; @@ -286,8 +303,6 @@ class Service : public ServiceInterface { Service(const ServiceOptions& options, std::unique_ptr execute_backend); - static StatusOr> CreateComputeConstantBackend(); - // Resolves the given argument handles in the allocation tracker and returns // the corresponding allocations for every replica. The function also verifies // that each allocation matches the execution platform and device ordinal of @@ -295,8 +310,7 @@ class Service : public ServiceInterface { StatusOr>> ResolveAndValidateArguments( tensorflow::gtl::ArraySlice arguments, - tensorflow::gtl::ArraySlice - stream_executors); + tensorflow::gtl::ArraySlice stream_executors); // Create a Hlo module config for the given program shape and arguments. // execution_options is optional; if not given a default is used. @@ -314,7 +328,7 @@ class Service : public ServiceInterface { StatusOr> BuildExecutable( const VersionedComputationHandle& versioned_handle, std::unique_ptr module_config, Backend* backend, - perftools::gputools::StreamExecutor* executor, + se::StreamExecutor* executor, DeviceMemoryAllocator* device_allocator = nullptr); // Builds an Executable for the given HLO module proto. @@ -323,7 +337,7 @@ class Service : public ServiceInterface { StatusOr> BuildExecutable( const HloModuleProto& module_proto, std::unique_ptr module_config, Backend* backend, - perftools::gputools::StreamExecutor* executor, + se::StreamExecutor* executor, DeviceMemoryAllocator* device_allocator = nullptr); // Same as BuildExecutable() above, but builds a list of Executables for the @@ -331,8 +345,12 @@ class Service : public ServiceInterface { StatusOr>> BuildExecutables( std::vector versioned_handles, std::vector> module_configs, - Backend* backend, - std::vector> executors, + Backend* backend, std::vector> executors, + DeviceMemoryAllocator* device_allocator); + StatusOr>> BuildExecutables( + const std::vector& module_protos, + std::vector> module_configs, + Backend* backend, std::vector> executors, DeviceMemoryAllocator* device_allocator); // Similar to BuildExecutable, but look in the compilation cache for the @@ -341,7 +359,7 @@ class Service : public ServiceInterface { StatusOr> BuildAndCacheExecutable( const VersionedComputationHandle& versioned_handle, std::unique_ptr module_config, Backend* backend, - perftools::gputools::StreamExecutor* executor, ExecutionProfile* profile, + se::StreamExecutor* executor, ExecutionProfile* profile, DeviceMemoryAllocator* device_allocator = nullptr); // Runs the given executable with the given arguments and register the result @@ -378,6 +396,8 @@ class Service : public ServiceInterface { // will be the result of this computation. tensorflow::Status ExecuteOneToN(const ExecuteRequest* arg, ExecuteResponse* result); + tensorflow::Status ExecuteOneToN(const ExecuteGraphRequest* arg, + ExecuteResponse* result); // Convenience function which checks whether the given shape_with_layout // (presumably passed by the client to set the result layout) is valid for the @@ -388,7 +408,7 @@ class Service : public ServiceInterface { // Returns the stream executors assigned to the replicas represented by the // given device handle. Each device_handle is a virtual replicated device that // represents a set of physical devices for the replicas. - StatusOr> Replicas( + StatusOr> Replicas( const Backend& backend, const DeviceHandle& device_handle) const; Status MaybeDumpHloModule(const HloModule& module) const; @@ -415,8 +435,6 @@ class Service : public ServiceInterface { CompilationCache compilation_cache_; // Backend to compile and execute computations on. - // - // TODO(b/28616830): Support multiple backends for execution. std::unique_ptr execute_backend_; TF_DISALLOW_COPY_AND_ASSIGN(Service); diff --git a/tensorflow/compiler/xla/service/service_executable_run_options.h b/tensorflow/compiler/xla/service/service_executable_run_options.h index 6c1f8feac7ed4423051cf2737be57dcfab508671..7f3910cdb0366078b97fb5f6a2dc498b37570926 100644 --- a/tensorflow/compiler/xla/service/service_executable_run_options.h +++ b/tensorflow/compiler/xla/service/service_executable_run_options.h @@ -28,7 +28,7 @@ namespace xla { class ServiceExecutableRunOptions { public: using StreamBorrower = - std::function::SmartPtr>(int)>; + std::function::SmartPtr>(int)>; ServiceExecutableRunOptions() : ServiceExecutableRunOptions(ExecutableRunOptions()) {} @@ -45,14 +45,13 @@ class ServiceExecutableRunOptions { ExecutableRunOptions* mutable_run_options() { return &run_options_; } // Delegate to `ExecutableRunOptions` member. - perftools::gputools::Stream* stream() const { return run_options_.stream(); } + se::Stream* stream() const { return run_options_.stream(); } DeviceMemoryAllocator* allocator() const { return run_options_.allocator(); } int device_ordinal() const { return run_options_.device_ordinal(); } // Borrows a stream and returns a smart pointer which returns the stream on // destruction. - StatusOr::SmartPtr> BorrowStream( - int device_ordinal) const { + StatusOr::SmartPtr> BorrowStream(int device_ordinal) const { return borrow_stream_ ? borrow_stream_(device_ordinal) : Status(tensorflow::error::UNIMPLEMENTED, "No stream cache"); diff --git a/tensorflow/compiler/xla/service/shape_inference.cc b/tensorflow/compiler/xla/service/shape_inference.cc index 77e12d36024dae56003ad4e59b54f9934dfc2c58..48b2922e77b78719e5d3469cbaa4fc15969de91b 100644 --- a/tensorflow/compiler/xla/service/shape_inference.cc +++ b/tensorflow/compiler/xla/service/shape_inference.cc @@ -52,6 +52,8 @@ UnaryOperation OpcodeToUnaryOperation(HloOpcode opcode) { return UNOP_ABS; case HloOpcode::kCeil: return UNOP_CEIL; + case HloOpcode::kClz: + return UNOP_CLZ; case HloOpcode::kCos: return UNOP_COS; case HloOpcode::kExp: @@ -360,6 +362,7 @@ StatusOr InferWindowOutputShape(const Shape& base_shape, arg, primitive_util::ComplexComponentType(arg.element_type())); } return arg; + case UNOP_CLZ: case UNOP_NEGATE: case UNOP_ROUND_NEAREST_AFZ: case UNOP_SIGN: diff --git a/tensorflow/compiler/xla/service/shaped_buffer.cc b/tensorflow/compiler/xla/service/shaped_buffer.cc index 6e9986165f7eaf71a964b42b734a5ae5db5e45d7..0b5a383f6fe5b01615c8d24ccf00fb667e0fe0c2 100644 --- a/tensorflow/compiler/xla/service/shaped_buffer.cc +++ b/tensorflow/compiler/xla/service/shaped_buffer.cc @@ -28,8 +28,6 @@ limitations under the License. #include "tensorflow/core/lib/strings/stringprintf.h" #include "tensorflow/core/platform/logging.h" -namespace se = ::perftools::gputools; - namespace xla { using ::tensorflow::strings::Appendf; @@ -68,6 +66,8 @@ ShapedBuffer& ShapedBuffer::operator=(ShapedBuffer&& s) { return *this; } +ShapedBuffer::~ShapedBuffer() {} + void ShapedBuffer::clear() { for (auto& pair : buffers_) { // A default constructed DeviceMemoryBase is a null pointer. @@ -104,18 +104,6 @@ std::ostream& operator<<(std::ostream& out, const ShapedBuffer& buffer) { return out; } -/* static */ -StatusOr> ScopedShapedBuffer::MakeScoped( - ShapedBuffer* shaped_buffer, DeviceMemoryAllocator* allocator) { - auto scoped_buffer = WrapUnique(new ScopedShapedBuffer( - shaped_buffer->on_host_shape(), shaped_buffer->on_device_shape(), - allocator, shaped_buffer->device_ordinal())); - scoped_buffer->buffers_ = shaped_buffer->buffers(); - shaped_buffer->clear(); - - return std::move(scoped_buffer); -} - ScopedShapedBuffer::ScopedShapedBuffer(const Shape& on_host_shape, const Shape& on_device_shape, DeviceMemoryAllocator* allocator, @@ -128,7 +116,25 @@ ScopedShapedBuffer::ScopedShapedBuffer(ShapedBuffer shaped_buffer, DeviceMemoryAllocator* allocator) : ShapedBuffer(std::move(shaped_buffer)), allocator_(allocator) {} +ScopedShapedBuffer::ScopedShapedBuffer(ScopedShapedBuffer&& s) + : ShapedBuffer(std::move(s)), allocator_(s.allocator_) { + // Null out s.allocator_ so it doesn't try to free anything in its destructor. + s.allocator_ = nullptr; +} + +ScopedShapedBuffer& ScopedShapedBuffer::operator=(ScopedShapedBuffer&& s) { + *static_cast(this) = std::move(static_cast(s)); + allocator_ = s.allocator_; + // Null out s.allocator_ so it doesn't try to free anything in its destructor. + s.allocator_ = nullptr; + return *this; +} + ScopedShapedBuffer::~ScopedShapedBuffer() { + // allocator_ will be null if we were moved-from. + if (allocator_ == nullptr) { + return; + } // Deallocate all non-null buffers. A buffer may appear in more than one spot // in the shape (eg, a tuple with a repeated element) so keep track of what // has been deallocated. @@ -144,9 +150,9 @@ ScopedShapedBuffer::~ScopedShapedBuffer() { } } -std::unique_ptr ScopedShapedBuffer::release() { - auto shaped_buffer = MakeUnique(std::move(*this)); - buffers_ = ShapeTree(); +ShapedBuffer ScopedShapedBuffer::release() { + ShapedBuffer shaped_buffer(std::move(*this)); + buffers_ = ShapeTree(); return shaped_buffer; } diff --git a/tensorflow/compiler/xla/service/shaped_buffer.h b/tensorflow/compiler/xla/service/shaped_buffer.h index b816df8385ef65b0b69ede1d6e65a1991b4bd7c6..f1b0527474cfe28fa7108fda829556d24c12b4d9 100644 --- a/tensorflow/compiler/xla/service/shaped_buffer.h +++ b/tensorflow/compiler/xla/service/shaped_buffer.h @@ -41,8 +41,15 @@ class ShapedBuffer { // determines the number of device allocations (DeviceMemoryBase) held by the // ShapedBuffer. ShapedBuffer(const Shape& on_host_shape, const Shape& on_device_shape, - const perftools::gputools::Platform* platform, - int device_ordinal); + const se::Platform* platform, int device_ordinal); + + // Movable, but not copyable. + ShapedBuffer(ShapedBuffer&& s); + ShapedBuffer& operator=(ShapedBuffer&&); + ShapedBuffer(const ShapedBuffer&) = delete; + ShapedBuffer& operator=(const ShapedBuffer&) = delete; + + virtual ~ShapedBuffer(); // Returns the shape of the on-host representation of the data held by this // ShapedBuffer. @@ -52,48 +59,36 @@ class ShapedBuffer { // ShapedBuffer. const Shape& on_device_shape() const { return on_device_shape_; } - const perftools::gputools::Platform* platform() const { return platform_; } + const se::Platform* platform() const { return platform_; } int device_ordinal() const { return device_ordinal_; } // Return the root buffer of the shape (shape index {}). - const perftools::gputools::DeviceMemoryBase& root_buffer() const { + const se::DeviceMemoryBase& root_buffer() const { return buffer(/*index=*/{}); } // Returns the buffer at the given shape index where index is defined as in // ShapeUtil::GetSubshape. - const perftools::gputools::DeviceMemoryBase& buffer( - const ShapeIndex& index) const { + const se::DeviceMemoryBase& buffer(const ShapeIndex& index) const { return buffers_.element(index); } // Sets the device memory buffer at the given index. - void set_buffer(const perftools::gputools::DeviceMemoryBase& buffer, - const ShapeIndex& index) { + void set_buffer(const se::DeviceMemoryBase& buffer, const ShapeIndex& index) { *buffers_.mutable_element(index) = buffer; } // Returns the underlying ShapeTree containing all the device addresses in the // ShapedBuffer. - const ShapeTree& buffers() const { - return buffers_; - } - ShapeTree& buffers() { - return buffers_; - } + const ShapeTree& buffers() const { return buffers_; } + ShapeTree& buffers() { return buffers_; } // Set all device memory pointers in the object to null. void clear(); string ToString() const; - ShapedBuffer(ShapedBuffer&& s); - ShapedBuffer& operator=(ShapedBuffer&&); - protected: - ShapedBuffer(const ShapedBuffer&) = delete; - ShapedBuffer& operator=(const ShapedBuffer&) = delete; - // The shape of the data when represented on the host. Shape on_host_shape_; @@ -101,13 +96,13 @@ class ShapedBuffer { Shape on_device_shape_; // The platform the memory is allocated on. - const perftools::gputools::Platform* platform_; + const se::Platform* platform_; // The device the memory is allocated on. int device_ordinal_; // The tree of device buffers. Its shape is on_device_shape(). - ShapeTree buffers_; + ShapeTree buffers_; }; std::ostream& operator<<(std::ostream& out, const ShapedBuffer& buffer); @@ -115,41 +110,45 @@ std::ostream& operator<<(std::ostream& out, const ShapedBuffer& buffer); // ShapedBuffer derived class which allocates all internal buffers on // construction and deallocates the memory when the object is // destructed. +// +// TODO(timshen): Remove inheritance between ScopedShapedBuffer and +// ShapedBuffer. There should never be a need to consider a ScopedShapedBuffer +// as a ShapedBuffer, because in that case we should just be able to pass around +// our ShapeTree. Inheritance only adds complexity. See +// discussion in cl/192849370. class ScopedShapedBuffer : public ShapedBuffer { public: - // Takes a ShapedBuffer and returns a ScopedShapedBuffer which manages the - // deallocation of the device memory held in the shaped buffer. All device - // memory pointers in the given ShapedBuffer are set to null. - static StatusOr> MakeScoped( - ShapedBuffer* shaped_buffer, DeviceMemoryAllocator* allocator); - - // Create a ScopedShapedBuffer with null DeviceMemoryBases at each index. - ScopedShapedBuffer(const Shape& on_host_shape, const Shape& on_device_shape, - DeviceMemoryAllocator* allocator, int device_ordinal); + // Creates a ScopedShapedBuffer with null DeviceMemoryBases at each index. + explicit ScopedShapedBuffer(const Shape& on_host_shape, + const Shape& on_device_shape, + DeviceMemoryAllocator* allocator, + int device_ordinal); // Create a ScopedShapedBuffer by taking over the memory from the incoming // ShapedBuffer. - ScopedShapedBuffer(ShapedBuffer shaped_buffer, - DeviceMemoryAllocator* allocator); + explicit ScopedShapedBuffer(ShapedBuffer shaped_buffer, + DeviceMemoryAllocator* allocator); + + // Movable, but not copyable. + ScopedShapedBuffer(ScopedShapedBuffer&& s); + ScopedShapedBuffer& operator=(ScopedShapedBuffer&&); + ScopedShapedBuffer(const ScopedShapedBuffer&) = delete; + ScopedShapedBuffer& operator=(const ScopedShapedBuffer&) = delete; + + // All buffers in the shape are deallocated on destruction. + ~ScopedShapedBuffer() override; // Return the allocator used to allocate the device memory held in this // ScopedShapedBuffer. DeviceMemoryAllocator* memory_allocator() const { return allocator_; } - // Release all device memory owned by this ScopedShapedBuffer and - // return the device memory pointers in the form of a - // ShapedBuffer. The returned ShapedBuffer takes over the memory - // from the ScopedShapedBuffer. The resulting ScopedShapedBuffer can - // only be destroyed. - std::unique_ptr release(); - - // All buffers in the shape are deallocated on destruction. - virtual ~ScopedShapedBuffer(); + // Releases all device memory owned by this ScopedShapedBuffer and returns the + // device memory pointers in the form of a ShapedBuffer. The returned + // ShapedBuffer takes over the memory from the ScopedShapedBuffer. The + // resulting ScopedShapedBuffer can only be destroyed. + ShapedBuffer release(); protected: - ScopedShapedBuffer(const ScopedShapedBuffer&) = delete; - void operator=(const ScopedShapedBuffer&) = delete; - DeviceMemoryAllocator* allocator_; }; diff --git a/tensorflow/compiler/xla/service/transfer_manager.cc b/tensorflow/compiler/xla/service/transfer_manager.cc index 2f36e2b16e0f2eed10aef811dd3cceeba6a5b8a9..98d0111d04d981dd3e26733e3311fd846b55d8ae 100644 --- a/tensorflow/compiler/xla/service/transfer_manager.cc +++ b/tensorflow/compiler/xla/service/transfer_manager.cc @@ -25,24 +25,20 @@ limitations under the License. #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/macros.h" -namespace se = ::perftools::gputools; - namespace xla { /* static */ tensorflow::mutex TransferManager::platform_transfer_manager_mutex_( tensorflow::LINKER_INITIALIZED); -/* static */ std::map* +/* static */ std::map* TransferManager::GetPlatformTransferManagers() { - static auto* r = - new std::map; + static auto* r = new std::map; return r; } Status TransferManager::TransferArrayToDevice( - perftools::gputools::StreamExecutor* executor, const Literal& literal, - const perftools::gputools::DeviceMemoryBase& dest) { + se::StreamExecutor* executor, const Literal& literal, + const se::DeviceMemoryBase& dest) { const Shape on_device_shape = HostShapeToDeviceShape(literal.shape()); TF_RET_CHECK(ShapeUtil::IsArray(on_device_shape)) << "On-device representation of " @@ -61,8 +57,8 @@ Status TransferManager::TransferArrayToDevice( } StatusOr> TransferManager::TransferArrayFromDevice( - perftools::gputools::StreamExecutor* executor, const Shape& shape, - const perftools::gputools::DeviceMemoryBase& source) { + se::StreamExecutor* executor, const Shape& shape, + const se::DeviceMemoryBase& source) { TF_RET_CHECK(ShapeUtil::Equal(HostShapeToDeviceShape(shape), shape)) << "Shape " << ShapeUtil::HumanString(shape) << " has a differently shaped representation on-device: " @@ -112,8 +108,7 @@ StatusOr> TransferManager::TransferArrayFromDevice( } Status TransferManager::WriteTupleIndexTables( - perftools::gputools::StreamExecutor* executor, - const ShapedBuffer& device_buffer) { + se::StreamExecutor* executor, const ShapedBuffer& device_buffer) { VLOG(2) << "Writing tuple index tables for " << device_buffer; TF_RET_CHECK(executor->device_ordinal() == device_buffer.device_ordinal()); @@ -180,7 +175,7 @@ Status TransferManager::TransferBufferToDevice( return Status::OK(); } -StatusOr> TransferManager::AllocateShapedBuffer( +StatusOr TransferManager::AllocateShapedBuffer( const Shape& on_host_shape, DeviceMemoryAllocator* allocator, int device_ordinal) { if (!LayoutUtil::HasLayout(on_host_shape)) { @@ -192,31 +187,30 @@ StatusOr> TransferManager::AllocateShapedBuffer( const Shape on_device_shape = HostShapeToDeviceShape(on_host_shape); TF_RET_CHECK(LayoutUtil::HasLayout(on_device_shape)); - auto shaped_buffer = WrapUnique(new ShapedBuffer( - on_host_shape, on_device_shape, allocator->platform(), device_ordinal)); + ShapedBuffer shaped_buffer(on_host_shape, on_device_shape, + allocator->platform(), device_ordinal); // Allocate an appropriate sized buffer for each element in the shape // including the tuple pointer arrays. - for (auto& pair : shaped_buffer->buffers()) { + for (auto& pair : shaped_buffer.buffers()) { const ShapeIndex& index = pair.first; se::DeviceMemoryBase& memory_base = pair.second; const Shape& subshape = ShapeUtil::GetSubshape(on_device_shape, index); TF_ASSIGN_OR_RETURN(memory_base, - allocator->Allocate(shaped_buffer->device_ordinal(), + allocator->Allocate(shaped_buffer.device_ordinal(), GetByteSizeRequirement(subshape))); } return std::move(shaped_buffer); } -StatusOr> -TransferManager::AllocateScopedShapedBuffer(const Shape& on_host_shape, - DeviceMemoryAllocator* allocator, - int device_ordinal) { +StatusOr TransferManager::AllocateScopedShapedBuffer( + const Shape& on_host_shape, DeviceMemoryAllocator* allocator, + int device_ordinal) { TF_ASSIGN_OR_RETURN( - std::unique_ptr unscoped_buffer, + ShapedBuffer unscoped_buffer, AllocateShapedBuffer(on_host_shape, allocator, device_ordinal)); - return ScopedShapedBuffer::MakeScoped(unscoped_buffer.get(), allocator); + return ScopedShapedBuffer(std::move(unscoped_buffer), allocator); } } // namespace xla diff --git a/tensorflow/compiler/xla/service/transfer_manager.h b/tensorflow/compiler/xla/service/transfer_manager.h index 9f2b5c4aecf0b52f610171e0c2755de577b2bd9e..a6451c4bb1104cc865c010463bbfd9107f6c4f9b 100644 --- a/tensorflow/compiler/xla/service/transfer_manager.h +++ b/tensorflow/compiler/xla/service/transfer_manager.h @@ -42,7 +42,7 @@ class TransferManager { virtual ~TransferManager() {} // Returns the ID of the platform that this transfer manager acts on. - virtual perftools::gputools::Platform::Id PlatformId() const = 0; + virtual se::Platform::Id PlatformId() const = 0; // Returns the shape of the on-device representation for the given shape on // the host. This is intended for use with ShapedBuffer where buffers are @@ -58,48 +58,45 @@ class TransferManager { // DeviceShape(literal_shape) must be compatible, but need not have the same // layout. virtual StatusOr> TransferLiteralFromDevice( - perftools::gputools::StreamExecutor* executor, - const ShapedBuffer& device_buffer) = 0; + se::StreamExecutor* executor, const ShapedBuffer& device_buffer) = 0; // Transfers the given literal into the previously allocated device memory // represented by the given ShapedBuffer using the given executor. The shape // of the ShapedBuffer and DeviceShape(literal.shape()) must be compatible, // but need not have the same layout - virtual Status TransferLiteralToDevice( - perftools::gputools::StreamExecutor* executor, const Literal& literal, - const ShapedBuffer& device_buffer) = 0; + virtual Status TransferLiteralToDevice(se::StreamExecutor* executor, + const Literal& literal, + const ShapedBuffer& device_buffer) = 0; // Convenience methods for transferring an array to or from the device at a // known address. This avoids having to construct a ShapedBuffer just to // transfer an array at a known address. - Status TransferArrayToDevice( - perftools::gputools::StreamExecutor* executor, const Literal& literal, - const perftools::gputools::DeviceMemoryBase& dest); + Status TransferArrayToDevice(se::StreamExecutor* executor, + const Literal& literal, + const se::DeviceMemoryBase& dest); StatusOr> TransferArrayFromDevice( - perftools::gputools::StreamExecutor* executor, const Shape& shape, - const perftools::gputools::DeviceMemoryBase& source); + se::StreamExecutor* executor, const Shape& shape, + const se::DeviceMemoryBase& source); // Transfers the given literal into the Infeed interface of the device, // using the given executor. - virtual Status TransferLiteralToInfeed( - perftools::gputools::StreamExecutor* executor, - const Literal& literal) = 0; + virtual Status TransferLiteralToInfeed(se::StreamExecutor* executor, + const Literal& literal) = 0; // Transfers the given literal from the Outfeed interface of the device, // using the given executor. - virtual Status TransferLiteralFromOutfeed( - perftools::gputools::StreamExecutor* executor, const Shape& literal_shape, - Literal* literal) = 0; + virtual Status TransferLiteralFromOutfeed(se::StreamExecutor* executor, + const Shape& literal_shape, + Literal* literal) = 0; // Resets the devices associated with this transfer manager. virtual Status ResetDevices( - tensorflow::gtl::ArraySlice - executor) = 0; + tensorflow::gtl::ArraySlice executor) = 0; // Given an allocated ShapedBuffer, constructs the tuple index table(s) in // each buffer of the given ShapedBuffer corresponding to tuple shapes. If the // ShapedBuffer is array-shaped this method does nothing. - Status WriteTupleIndexTables(perftools::gputools::StreamExecutor* executor, + Status WriteTupleIndexTables(se::StreamExecutor* executor, const ShapedBuffer& device_buffer); // Determines the byte size requirement for the given shape on the underlying @@ -110,10 +107,10 @@ class TransferManager { // Allocate a ShapedBuffer which can hold data with the given on-host // shape. The on-device shape may be different as indicated by // HostShapeToDeviceShape. - StatusOr> AllocateShapedBuffer( - const Shape& on_host_shape, DeviceMemoryAllocator* allocator, - int device_ordinal); - StatusOr> AllocateScopedShapedBuffer( + StatusOr AllocateShapedBuffer(const Shape& on_host_shape, + DeviceMemoryAllocator* allocator, + int device_ordinal); + StatusOr AllocateScopedShapedBuffer( const Shape& on_host_shape, DeviceMemoryAllocator* allocator, int device_ordinal); @@ -127,13 +124,13 @@ class TransferManager { // Precondition: a platform kind must not be registered more than once. typedef std::unique_ptr (*TransferManagerCreationFunction)(); static void RegisterTransferManager( - perftools::gputools::Platform::Id platform_id, + se::Platform::Id platform_id, TransferManagerCreationFunction transfer_manager); // Returns the transfer manager singleton pointer if it is available for the // given platform, or an error status if it is not. static StatusOr GetForPlatform( - const perftools::gputools::Platform* platform); + const se::Platform* platform); protected: // Transfer a memory block of the given size from 'source' buffer to the @@ -143,35 +140,32 @@ class TransferManager { // // source is the source data that must be in the target-dependent layout that // the Infeed HLO used in the computation expects. - virtual Status TransferBufferToInfeed( - perftools::gputools::StreamExecutor* executor, int64 size, - const void* source) = 0; + virtual Status TransferBufferToInfeed(se::StreamExecutor* executor, + int64 size, const void* source) = 0; // Transfer a memory block of the given size from the device source into the // 'destination' buffer. // // size is the size to transfer to destination in bytes. - virtual Status TransferBufferFromDevice( - perftools::gputools::StreamExecutor* executor, - const perftools::gputools::DeviceMemoryBase& source, int64 size, - void* destination); + virtual Status TransferBufferFromDevice(se::StreamExecutor* executor, + const se::DeviceMemoryBase& source, + int64 size, void* destination); // Transfer a memory block of the given size from 'source' buffer to the given // destination of the device. // // size is the size to transfer from source in bytes. - virtual Status TransferBufferToDevice( - perftools::gputools::StreamExecutor* executor, int64 size, - const void* source, perftools::gputools::DeviceMemoryBase* destination); + virtual Status TransferBufferToDevice(se::StreamExecutor* executor, + int64 size, const void* source, + se::DeviceMemoryBase* destination); // Writes the given device-memory pointers in 'elements' to the given region // to construct a tuple index table in the platform-specific tuple // representation. virtual Status WriteSingleTupleIndexTable( - perftools::gputools::StreamExecutor* executor, - tensorflow::gtl::ArraySlice - elements, - const Shape& shape, perftools::gputools::DeviceMemoryBase* region) = 0; + se::StreamExecutor* executor, + tensorflow::gtl::ArraySlice elements, + const Shape& shape, se::DeviceMemoryBase* region) = 0; private: // The mutex that guards the platform-to-transfer manager map. @@ -186,8 +180,7 @@ class TransferManager { }; // Map from platform kind to transfer manager singleton. - static std::map* - GetPlatformTransferManagers(); + static std::map* GetPlatformTransferManagers(); }; } // namespace xla diff --git a/tensorflow/compiler/xla/service/transpose_folding.cc b/tensorflow/compiler/xla/service/transpose_folding.cc index 83185ac49e9b7c386d10d1cbc4e20dcdfdfd6cae..3efd38ce0daa3e3f3398b32463019df6cd10a009 100644 --- a/tensorflow/compiler/xla/service/transpose_folding.cc +++ b/tensorflow/compiler/xla/service/transpose_folding.cc @@ -159,6 +159,7 @@ bool FoldTransposeIntoConvolution(InstructionOperandsPair pair) { auto new_conv = HloInstruction::CreateConvolve( convolution.shape(), new_lhs, new_rhs, convolution.window(), new_dnums); + convolution.SetupDerivedInstruction(new_conv.get()); TF_CHECK_OK(convolution.parent()->ReplaceWithNewInstruction( &convolution, std::move(new_conv))); diff --git a/tensorflow/compiler/xla/service/tuple_simplifier.cc b/tensorflow/compiler/xla/service/tuple_simplifier.cc index 113c2e2bd9f73a2b0c783103d7f2da9534bc97c3..d668855084a884518b338cdf396a9330b9f43a2b 100644 --- a/tensorflow/compiler/xla/service/tuple_simplifier.cc +++ b/tensorflow/compiler/xla/service/tuple_simplifier.cc @@ -69,6 +69,7 @@ StatusOr TupleSimplifier::Run(HloModule* module) { // Tuple // HloInstruction* top_tuple = nullptr; + HloInstruction* first_gte = nullptr; bool can_simplify = true; for (int64 operand_number = 0; operand_number < instruction->operand_count(); ++operand_number) { @@ -78,11 +79,17 @@ StatusOr TupleSimplifier::Run(HloModule* module) { can_simplify = false; break; } - + if (first_gte == nullptr) { + first_gte = operand; + } else if (!first_gte->has_compatible_sharding(operand)) { + can_simplify = false; + break; + } if (top_tuple == nullptr) { top_tuple = operand->mutable_operand(0); if (!ShapeUtil::Compatible(top_tuple->shape(), - instruction->shape())) { + instruction->shape()) || + !instruction->has_compatible_sharding(top_tuple)) { can_simplify = false; break; } @@ -108,15 +115,17 @@ StatusOr TupleSimplifier::Run(HloModule* module) { // | // GTE if (instruction->operand(0)->opcode() == HloOpcode::kTuple) { - changed = true; HloInstruction* element_source = instruction->mutable_operand(0)->mutable_operand( instruction->tuple_index()); - TF_RETURN_IF_ERROR(instruction->ReplaceAllUsesWith(element_source)); - for (HloInstruction* user : element_source->users()) { - if (user->opcode() == HloOpcode::kTuple || - user->opcode() == HloOpcode::kGetTupleElement) { - worklist.push(user); + if (instruction->has_compatible_sharding(element_source)) { + changed = true; + TF_RETURN_IF_ERROR(instruction->ReplaceAllUsesWith(element_source)); + for (HloInstruction* user : element_source->users()) { + if (user->opcode() == HloOpcode::kTuple || + user->opcode() == HloOpcode::kGetTupleElement) { + worklist.push(user); + } } } } diff --git a/tensorflow/compiler/xla/service/user_computation.cc b/tensorflow/compiler/xla/service/user_computation.cc index fcdb2e01fb0984e44a82a6991e2ac63249f4c073..0f16a592b68e20f5dbd1e4655ad5720ecce5a7bd 100644 --- a/tensorflow/compiler/xla/service/user_computation.cc +++ b/tensorflow/compiler/xla/service/user_computation.cc @@ -49,6 +49,8 @@ HloOpcode UnaryOperationToHloOpcode(UnaryOperation unop) { return HloOpcode::kAbs; case UNOP_CEIL: return HloOpcode::kCeil; + case UNOP_CLZ: + return HloOpcode::kClz; case UNOP_COS: return HloOpcode::kCos; case UNOP_EXP: @@ -3491,7 +3493,6 @@ void ComputationLowerer::Visit( HloInstruction* operand = lookup_instruction(trace_request.operand()); hlo_instruction = add_instruction( HloInstruction::CreateTrace(trace_request.tag(), operand)); - operand->set_tracing(hlo_instruction); break; } diff --git a/tensorflow/compiler/xla/service_interface.h b/tensorflow/compiler/xla/service_interface.h index 32aae64973dbd7ac2f8d403d8fbd155d432642f9..5b44c26b7c7b082556d9533cf3b3b1b98e5e4b09 100644 --- a/tensorflow/compiler/xla/service_interface.h +++ b/tensorflow/compiler/xla/service_interface.h @@ -112,6 +112,10 @@ class ServiceInterface { virtual tensorflow::Status ComputeConstant( const ComputeConstantRequest* arg, ComputeConstantResponse* result) = 0; + virtual tensorflow::Status ComputeConstantGraph( + const ComputeConstantGraphRequest* arg, + ComputeConstantResponse* result) = 0; + // Methods used by Computation. virtual tensorflow::Status SnapshotComputation( const SnapshotComputationRequest* ag, diff --git a/tensorflow/compiler/xla/shape_util.cc b/tensorflow/compiler/xla/shape_util.cc index 6825d2476587d037aace043230168f78f4e46344..ac7e201bfdceabdd0f11db61bbb3b460017401ca 100644 --- a/tensorflow/compiler/xla/shape_util.cc +++ b/tensorflow/compiler/xla/shape_util.cc @@ -824,6 +824,18 @@ StatusOr ParseShapeStringInternal(tensorflow::StringPiece* s) { return new_shape; } +/* static */ bool ShapeUtil::IndexIsValid(const Shape& shape, + ShapeIndexView index) { + const Shape* subshape = &shape; + for (auto i : index) { + if (!IsTuple(*subshape) || i >= subshape->tuple_shapes_size()) { + return false; + } + subshape = &subshape->tuple_shapes(i); + } + return true; +} + /* static */ const Shape& ShapeUtil::GetSubshape(const Shape& shape, ShapeIndexView index) { const Shape* return_shape = &shape; diff --git a/tensorflow/compiler/xla/shape_util.h b/tensorflow/compiler/xla/shape_util.h index 3e130a02e2ce853ee157e46afb9760f5ff5a5026..63da9154cfc1a5e7e8c0eeaa103d27096540fefe 100644 --- a/tensorflow/compiler/xla/shape_util.h +++ b/tensorflow/compiler/xla/shape_util.h @@ -28,8 +28,10 @@ limitations under the License. #include "tensorflow/compiler/xla/statusor.h" #include "tensorflow/compiler/xla/types.h" #include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/lib/core/threadpool.h" #include "tensorflow/core/lib/gtl/array_slice.h" #include "tensorflow/core/lib/gtl/optional.h" +#include "tensorflow/core/platform/env.h" #include "tensorflow/core/platform/macros.h" #include "tensorflow/core/platform/types.h" @@ -317,6 +319,11 @@ class ShapeUtil { // Returns an empty tuple shape. Can be used to indicate side-effects. static Shape MakeNil() { return MakeTupleShape({}); } + // Checks whether the shape is initialized. + static bool IsInitialized(const Shape& shape) { + return shape.element_type() != PRIMITIVE_TYPE_INVALID; + } + // Constructs a new shape with the given element type and sequence of // dimensions. static Shape MakeShape(PrimitiveType element_type, @@ -441,6 +448,9 @@ class ShapeUtil { static bool ShapeIs(const Shape& shape, PrimitiveType element_type, std::initializer_list dimensions); + // Returns true if the given shape has a subshape at the given index. + static bool IndexIsValid(const Shape& shape, ShapeIndexView index); + // GetSubshape and GetMutableSubshape return a particular nested Shape within // the given Shape argument. static const Shape& GetSubshape(const Shape& shape, ShapeIndexView index); @@ -583,34 +593,7 @@ class ShapeUtil { tensorflow::gtl::ArraySlice count, tensorflow::gtl::ArraySlice incr, const FnType& visitor_function) { - if (ShapeUtil::HasZeroElements(shape)) { - return Status::OK(); - } - CHECK_EQ(Rank(shape), base.size()); - CHECK_EQ(incr.size(), base.size()); - CHECK_EQ(count.size(), base.size()); - const int64 rank = LayoutUtil::MinorToMajor(shape).size(); - // Allows handling R0 arrays, such that the visitor function will be called - // once with the proper empty indexes. - int64 n = -1; - std::vector indexes(base.begin(), base.end()); - while (n < rank) { - TF_ASSIGN_OR_RETURN(bool should_continue, visitor_function(indexes)); - if (!should_continue) { - break; - } - // Increments dimensions in minor to major order. - for (n = 0; n < rank; ++n) { - int64 dim = LayoutUtil::Minor(shape.layout(), n); - indexes[dim] += incr[dim]; - if (indexes[dim] < base[dim] + count[dim]) { - break; - } - indexes[dim] = base[dim]; - } - } - - return Status::OK(); + return ForEachIndexInternal(shape, base, count, incr, visitor_function); } // Simple ergonomic wrapper around ShapeUtil::ForEachIndexWithStatus. @@ -642,11 +625,83 @@ class ShapeUtil { .IgnoreError(); } + // A parallel version of ForEachIndex(WithStatus). This can only be used if + // the visitor_function is thread-safe and the order of iteration does not + // matter. + // + // visitor_function must be a callable of type + // void(ArraySlice) or compatible. + template + static void ForEachIndexParallel(const Shape& shape, + tensorflow::gtl::ArraySlice base, + tensorflow::gtl::ArraySlice count, + tensorflow::gtl::ArraySlice incr, + const FnType& visitor_function) { + // The parallel version of ForEachIndexInternal can never fail. + CHECK(ForEachIndexInternal( + shape, base, count, incr, + [&visitor_function](tensorflow::gtl::ArraySlice indexes) + -> StatusOr { + visitor_function(indexes); + return true; + }, + /*parallel=*/true) + .ok()); + } + private: // Validates all of the non-layout properties of the shape -- this is a helper // used by both the layout-optional and layout-required public method. static Status ValidateShapeWithOptionalLayoutInternal(const Shape& shape); + template + static Status ForEachIndexInternal(const Shape& shape, + tensorflow::gtl::ArraySlice base, + tensorflow::gtl::ArraySlice count, + tensorflow::gtl::ArraySlice incr, + const FnType& visitor_function, + bool parallel = false) { + if (ShapeUtil::HasZeroElements(shape)) { + return Status::OK(); + } + CHECK_EQ(Rank(shape), base.size()); + CHECK_EQ(incr.size(), base.size()); + CHECK_EQ(count.size(), base.size()); + const int64 rank = LayoutUtil::MinorToMajor(shape).size(); + // Allows handling R0 arrays, such that the visitor function will be called + // once with the proper empty indexes. + int64 n = -1; + std::vector indexes(base.begin(), base.end()); + const int kNumThreads = tensorflow::port::NumSchedulableCPUs(); + tensorflow::gtl::optional pool; + if (parallel) { + pool.emplace(tensorflow::Env::Default(), "foreach", kNumThreads); + } + + while (n < rank) { + if (pool != tensorflow::gtl::nullopt) { + pool->Schedule( + [indexes, &visitor_function] { visitor_function(indexes); }); + } else { + TF_ASSIGN_OR_RETURN(bool should_continue, visitor_function(indexes)); + if (!should_continue) { + break; + } + } + // Increments dimensions in minor to major order. + for (n = 0; n < rank; ++n) { + int64 dim = LayoutUtil::Minor(shape.layout(), n); + indexes[dim] += incr[dim]; + if (indexes[dim] < base[dim] + count[dim]) { + break; + } + indexes[dim] = base[dim]; + } + } + + return Status::OK(); + } + TF_DISALLOW_COPY_AND_ASSIGN(ShapeUtil); }; diff --git a/tensorflow/compiler/xla/shape_util_test.cc b/tensorflow/compiler/xla/shape_util_test.cc index 424cfe37ea44d64884e08695fd1f49ca1970ca62..13582a2a2678548dfc8e9c329dfb6def9d51fc9d 100644 --- a/tensorflow/compiler/xla/shape_util_test.cc +++ b/tensorflow/compiler/xla/shape_util_test.cc @@ -624,6 +624,24 @@ TEST(ShapeUtilTest, ForEachIndexWithStatus) { EXPECT_EQ(invocations, 5); } +TEST(ShapeUtilTest, ForEachIndexParallel) { + Shape shape = ShapeUtil::MakeShape(F32, {10, 10}); + int64 output[10][10]; + int init = 5; + auto set_func = [&](tensorflow::gtl::ArraySlice indexes) { + output[indexes[0]][indexes[1]] = init + indexes[0] + indexes[1]; + }; + + ShapeUtil::ForEachIndexParallel(shape, /*base=*/{0, 0}, /*count=*/{10, 10}, + /*incr=*/{1, 1}, set_func); + + for (int i = 0; i < 10; ++i) { + for (int j = 0; j < 10; ++j) { + EXPECT_EQ(output[i][j], init + i + j); + } + } +} + TEST(ShapeUtilTest, DimensionsUnmodifiedByReshape_1x1x1x1_to_1x1x1) { // All output dimensions should be unmodified. One of the input dimensions is // modified because the input rank is larger by one. diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD index e337669aeb6f9a3bac90319ee7fc7626a3fa9d2a..1f90a44d8ba725c1bc7d23b581161f8915ff74fd 100644 --- a/tensorflow/compiler/xla/tests/BUILD +++ b/tensorflow/compiler/xla/tests/BUILD @@ -191,6 +191,8 @@ cc_library( "//tensorflow/compiler/xla/client:global_data", "//tensorflow/compiler/xla/client:local_client", "//tensorflow/compiler/xla/client/xla_client:xla_builder", + "//tensorflow/compiler/xla/service:interpreter_plugin", # reference backend + "//tensorflow/compiler/xla/service:platform_util", "//tensorflow/compiler/xla/tests:literal_test_util", "//tensorflow/compiler/xla/tests:test_utils", "//tensorflow/core:lib", @@ -347,10 +349,10 @@ xla_test( "//tensorflow/compiler/xla:statusor", "//tensorflow/compiler/xla:xla_data_proto", "//tensorflow/compiler/xla/client:client_library", - "//tensorflow/compiler/xla/client:computation", - "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/client:local_client", "//tensorflow/compiler/xla/client/lib:arithmetic", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_computation", "//tensorflow/compiler/xla/service:platform_util", "//tensorflow/compiler/xla/tests:client_library_test_base", "//tensorflow/compiler/xla/tests:literal_test_util", @@ -413,6 +415,8 @@ xla_test( "//tensorflow/compiler/xla/client:global_data", "//tensorflow/compiler/xla/client:local_client", "//tensorflow/compiler/xla/client/lib:arithmetic", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_computation", "//tensorflow/compiler/xla/tests:client_library_test_base", "//tensorflow/compiler/xla/tests:literal_test_util", "//tensorflow/compiler/xla/tests:test_utils", @@ -488,9 +492,10 @@ xla_test( tags = ["enable_for_xla_interpreter"], deps = [ "//tensorflow/compiler/xla:xla_data_proto", - "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/client:global_data", "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_computation", "//tensorflow/compiler/xla/tests:client_library_test_base", "//tensorflow/compiler/xla/tests:literal_test_util", "//tensorflow/compiler/xla/tests:xla_internal_test_main", @@ -639,9 +644,9 @@ xla_test( "//tensorflow/compiler/xla:test", "//tensorflow/compiler/xla:types", "//tensorflow/compiler/xla:xla_data_proto", - "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/client:global_data", "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", "//tensorflow/compiler/xla/service:reduce_precision_insertion", "//tensorflow/compiler/xla/tests:client_library_test_base", "//tensorflow/compiler/xla/tests:literal_test_util", @@ -777,10 +782,10 @@ xla_test( "//tensorflow/compiler/xla:statusor", "//tensorflow/compiler/xla:util", "//tensorflow/compiler/xla:xla_data_proto", - "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/client:global_data", "//tensorflow/compiler/xla/client:local_client", "//tensorflow/compiler/xla/client:padding", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", "//tensorflow/compiler/xla/tests:client_library_test_base", "//tensorflow/compiler/xla/tests:literal_test_util", "//tensorflow/compiler/xla/tests:xla_internal_test_main", @@ -856,11 +861,11 @@ xla_test( "//tensorflow/compiler/xla:test_helpers", "//tensorflow/compiler/xla:util", "//tensorflow/compiler/xla:xla_data_proto", - "//tensorflow/compiler/xla/client:computation", - "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/client:global_data", "//tensorflow/compiler/xla/client:local_client", "//tensorflow/compiler/xla/client/lib:arithmetic", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_computation", "//tensorflow/compiler/xla/service:hlo", "//tensorflow/compiler/xla/tests:client_library_test_base", "//tensorflow/compiler/xla/tests:hlo_test_base", @@ -937,8 +942,8 @@ xla_test( deps = [ "//tensorflow/compiler/xla:array2d", "//tensorflow/compiler/xla:reference_util", - "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", "//tensorflow/compiler/xla/tests:client_library_test_base", "//tensorflow/compiler/xla/tests:literal_test_util", "//tensorflow/compiler/xla/tests:xla_internal_test_main", @@ -977,9 +982,8 @@ xla_test( "//tensorflow/compiler/xla:reference_util", "//tensorflow/compiler/xla:test_helpers", "//tensorflow/compiler/xla/client:client_library", - "//tensorflow/compiler/xla/client:computation", - "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", "//tensorflow/compiler/xla/service:computation_placer", "//tensorflow/compiler/xla/service:device_memory_allocator", "//tensorflow/compiler/xla/service:local_service", @@ -1064,6 +1068,8 @@ xla_test( "//tensorflow/compiler/xla/client:global_data", "//tensorflow/compiler/xla/client:local_client", "//tensorflow/compiler/xla/client/lib:arithmetic", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_computation", "//tensorflow/compiler/xla/tests:client_library_test_base", "//tensorflow/compiler/xla/tests:literal_test_util", "//tensorflow/compiler/xla/tests:xla_internal_test_main", @@ -1086,10 +1092,11 @@ xla_test_library( "//tensorflow/compiler/xla:reference_util", "//tensorflow/compiler/xla:shape_util", "//tensorflow/compiler/xla:xla_data_proto", - "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/client:local_client", "//tensorflow/compiler/xla/client:padding", "//tensorflow/compiler/xla/client/lib:arithmetic", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_computation", "//tensorflow/compiler/xla/tests:client_library_test_base", "//tensorflow/compiler/xla/tests:hlo_test_base", "//tensorflow/compiler/xla/tests:literal_test_util", @@ -1126,11 +1133,11 @@ xla_test( "//tensorflow/compiler/xla:shape_util", "//tensorflow/compiler/xla:status_macros", "//tensorflow/compiler/xla:xla_data_proto", - "//tensorflow/compiler/xla/client:computation", - "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/client:local_client", "//tensorflow/compiler/xla/client:padding", "//tensorflow/compiler/xla/client/lib:arithmetic", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_computation", "//tensorflow/compiler/xla/tests:client_library_test_base", "//tensorflow/compiler/xla/tests:literal_test_util", "//tensorflow/compiler/xla/tests:xla_internal_test_main", @@ -1267,9 +1274,9 @@ xla_test( "//tensorflow/compiler/xla:reference_util", "//tensorflow/compiler/xla:util", "//tensorflow/compiler/xla:xla_data_proto", - "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/client:local_client", "//tensorflow/compiler/xla/client/lib:arithmetic", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", "//tensorflow/compiler/xla/tests:client_library_test_base", "//tensorflow/compiler/xla/tests:literal_test_util", "//tensorflow/compiler/xla/tests:xla_internal_test_main", @@ -1373,11 +1380,10 @@ xla_test( "//tensorflow/compiler/xla:test", "//tensorflow/compiler/xla:test_helpers", "//tensorflow/compiler/xla:xla_data_proto", - "//tensorflow/compiler/xla/client:computation", - "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/client:global_data", "//tensorflow/compiler/xla/client:local_client", "//tensorflow/compiler/xla/client/xla_client:xla_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_computation", "//tensorflow/compiler/xla/tests:client_library_test_base", "//tensorflow/compiler/xla/tests:literal_test_util", "//tensorflow/compiler/xla/tests:xla_internal_test_main", @@ -1395,8 +1401,8 @@ xla_test( deps = [ "//tensorflow/compiler/xla:array2d", "//tensorflow/compiler/xla:array4d", - "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", "//tensorflow/compiler/xla/tests:client_library_test_base", "//tensorflow/compiler/xla/tests:literal_test_util", "//tensorflow/compiler/xla/tests:xla_internal_test_main", @@ -1444,9 +1450,9 @@ xla_test( "//tensorflow/compiler/xla:statusor", "//tensorflow/compiler/xla:test", "//tensorflow/compiler/xla:test_helpers", - "//tensorflow/compiler/xla/client:computation", - "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_computation", "//tensorflow/compiler/xla/tests:client_library_test_base", "//tensorflow/compiler/xla/tests:literal_test_util", "//tensorflow/compiler/xla/tests:xla_internal_test_main", @@ -1478,11 +1484,14 @@ xla_test( xla_test( name = "bitcast_convert_test", srcs = ["bitcast_convert_test.cc"], + tags = [ + "enable_for_xla_interpreter", + ], deps = [ "//tensorflow/compiler/xla:shape_util", "//tensorflow/compiler/xla:xla_data_proto", - "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", "//tensorflow/compiler/xla/tests:client_library_test_base", "//tensorflow/compiler/xla/tests:literal_test_util", "//tensorflow/compiler/xla/tests:xla_internal_test_main", @@ -1546,6 +1555,8 @@ xla_test( "//tensorflow/compiler/xla/client:computation", "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/client:global_data", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_computation", "//tensorflow/compiler/xla/tests:literal_test_util", "//tensorflow/compiler/xla/tests:test_utils", "//tensorflow/compiler/xla/tests:xla_internal_test_main", @@ -1566,6 +1577,8 @@ xla_test( "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/client:global_data", "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_computation", "//tensorflow/compiler/xla/tests:client_library_test_base", "//tensorflow/compiler/xla/tests:literal_test_util", "//tensorflow/compiler/xla/tests:test_utils", @@ -1960,3 +1973,16 @@ tf_cc_test( "//tensorflow/core:test", ], ) + +xla_test( + name = "test_utils_test", + srcs = ["test_utils_test.cc"], + deps = [ + ":local_client_test_base", + ":test_utils", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/compiler/xla/tests:xla_internal_test_main", + "//tensorflow/core:test", + ], +) diff --git a/tensorflow/compiler/xla/tests/array_elementwise_ops_test.cc b/tensorflow/compiler/xla/tests/array_elementwise_ops_test.cc index 03c91745b978f80801e0da5ac44d31959659b20c..4b4dc6dd9d3dddd105c63c7f3ce039bafca65dd7 100644 --- a/tensorflow/compiler/xla/tests/array_elementwise_ops_test.cc +++ b/tensorflow/compiler/xla/tests/array_elementwise_ops_test.cc @@ -2217,6 +2217,15 @@ XLA_TEST_F(ArrayElementwiseOpTest, LogF32sVector) { error_spec_); } +XLA_TEST_F(ArrayElementwiseOpTest, ClzU32s) { + XlaBuilder builder(TestName()); + auto a = builder.ConstantR1( + {0, 1, 0x10, 0x10000, 0x700000, 0x12345678, 0xF2345678}); + builder.Clz(a); + + ComputeAndCompareR1(&builder, {32, 31, 27, 15, 9, 3, 0}, {}); +} + XLA_TEST_F(ArrayElementwiseOpTest, AddChainFoldLeft) { // a ------ (add) --------- (add) // / / diff --git a/tensorflow/compiler/xla/tests/batch_normalization_test.cc b/tensorflow/compiler/xla/tests/batch_normalization_test.cc index af8af99c791e2a40cfcfa2291b786b33e5652267..f3dac75a44b948c4b45b80b93e7462073010979e 100644 --- a/tensorflow/compiler/xla/tests/batch_normalization_test.cc +++ b/tensorflow/compiler/xla/tests/batch_normalization_test.cc @@ -19,10 +19,10 @@ limitations under the License. #include "tensorflow/compiler/xla/array2d.h" #include "tensorflow/compiler/xla/array4d.h" -#include "tensorflow/compiler/xla/client/computation.h" -#include "tensorflow/compiler/xla/client/computation_builder.h" #include "tensorflow/compiler/xla/client/lib/arithmetic.h" #include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_computation.h" #include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/compiler/xla/reference_util.h" #include "tensorflow/compiler/xla/service/hlo_computation.h" @@ -69,14 +69,12 @@ class BatchNormalizationTest CHECK_EQ(kY, input_array_.width()); } - ComputationDataHandle CheckShape(ComputationBuilder* b, - const ComputationDataHandle& operand, - const Shape& expected_shape) const { - std::unique_ptr actual_shape = - b->GetShape(operand).ConsumeValueOrDie(); - CHECK(ShapeUtil::Equal(expected_shape, *actual_shape)) + XlaOp CheckShape(XlaBuilder* b, const XlaOp& operand, + const Shape& expected_shape) const { + Shape actual_shape = b->GetShape(operand).ConsumeValueOrDie(); + CHECK(ShapeUtil::Equal(expected_shape, actual_shape)) << "want " << ShapeUtil::HumanString(expected_shape) << " got " - << ShapeUtil::HumanString(*actual_shape); + << ShapeUtil::HumanString(actual_shape); return operand; } @@ -102,7 +100,7 @@ INSTANTIATE_TEST_CASE_P(BatchNormalizationTestInstance, BatchNormalizationTest, #endif XLA_TEST_P(BatchNormalizationTest, SubtractInZ) { - ComputationBuilder builder(client_, "subtract_in_z_one_sample"); + XlaBuilder builder("subtract_in_z_one_sample"); auto x = builder.ConstantLiteral(input_literal_); auto y = builder.ConstantR1({3.14, 4.25}); builder.Sub(x, y, /*broadcast_dimensions=*/{1}); @@ -118,7 +116,7 @@ XLA_TEST_P(BatchNormalizationTest, SubtractInZ) { } XLA_TEST_P(BatchNormalizationTest, SquareTesseractElementwise) { - ComputationBuilder builder(client_, "square_tesseract_elementwise"); + XlaBuilder builder("square_tesseract_elementwise"); auto x = builder.ConstantLiteral(input_literal_); builder.SquareF32(x); @@ -135,9 +133,9 @@ XLA_TEST_P(BatchNormalizationTest, SquareTesseractElementwise) { } XLA_TEST_P(BatchNormalizationTest, SumToZ) { - ComputationBuilder builder(client_, "sum_to_z"); + XlaBuilder builder("sum_to_z"); auto input_activations = builder.ConstantLiteral(input_literal_); - Computation add = CreateScalarAddComputation(F32, &builder); + XlaComputation add = CreateScalarAddComputation(F32, &builder); // Reduce all but the Z dimension. builder.Reduce(input_activations, builder.ConstantR0(0.0f), add, {0, 2, 3}); @@ -147,24 +145,23 @@ XLA_TEST_P(BatchNormalizationTest, SumToZ) { } XLA_TEST_P(BatchNormalizationTest, SquareAndReduce) { - ComputationBuilder builder(client_, "square_and_reduce"); + XlaBuilder builder("square_and_reduce"); auto input_activations = builder.ConstantLiteral(input_literal_); auto set_means = builder.ConstantR1({2.f, 4.2f}); auto activation_deviations = builder.Sub(input_activations, set_means, /*broadcast_dimensions=*/{1}); - Computation add = CreateScalarAddComputation(F32, &builder); + XlaComputation add = CreateScalarAddComputation(F32, &builder); auto dev_squares = builder.SquareF32(activation_deviations); - auto sum_of_squares = builder.Reduce( - dev_squares, builder.ConstantR0(0.0f), add, {0, 2, 3}); + builder.Reduce(dev_squares, builder.ConstantR0(0.0f), add, {0, 2, 3}); std::vector expected = {18, 0.06}; ComputeAndCompareR1(&builder, expected, {}, error_spec_); } XLA_TEST_P(BatchNormalizationTest, VarianceToStddev) { - ComputationBuilder builder(client_, "variance_to_stddev"); + XlaBuilder builder("variance_to_stddev"); auto variance = builder.ConstantR1({6.f, .02f}); - auto sqrt = builder.SqrtF32(variance); + builder.SqrtF32(variance); std::vector expected = {2.44948974f, 0.14142136f}; ComputeAndCompareR1(&builder, expected, {}, error_spec_); @@ -173,13 +170,13 @@ XLA_TEST_P(BatchNormalizationTest, VarianceToStddev) { // Compare against a forward batch normalization example in the NN spec // reference. XLA_TEST_P(BatchNormalizationTest, SpecComparisonForward) { - ComputationBuilder builder(client_, "batch_normalize_per_spec"); + XlaBuilder builder("batch_normalize_per_spec"); auto input_activations = CheckShape(&builder, builder.ConstantLiteral(input_literal_), ShapeUtil::MakeShape(F32, {3, 2, 1, 1})); auto gamma = builder.ConstantR1({1.0, 1.0}); auto beta = builder.ConstantR1({0.0, 0.0}); - Computation add = CreateScalarAddComputation(F32, &builder); + XlaComputation add = CreateScalarAddComputation(F32, &builder); // Reduce all dimensions except dimension 1. Shape TwoElementVectorF32 = ShapeUtil::MakeShape(F32, {2}); auto sum = CheckShape( @@ -189,8 +186,8 @@ XLA_TEST_P(BatchNormalizationTest, SpecComparisonForward) { TwoElementVectorF32); auto input_shape = builder.GetShape(input_activations).ConsumeValueOrDie(); auto sum_shape = builder.GetShape(sum).ConsumeValueOrDie(); - auto count = builder.ConstantR0(ShapeUtil::ElementsIn(*input_shape) / - ShapeUtil::ElementsIn(*sum_shape)); + auto count = builder.ConstantR0(ShapeUtil::ElementsIn(input_shape) / + ShapeUtil::ElementsIn(sum_shape)); auto set_means = builder.Div(sum, count); const float kEpsilon = 1e-9f; @@ -233,7 +230,7 @@ XLA_TEST_P(BatchNormalizationTest, SpecComparisonForward) { XLA_TEST_P(BatchNormalizationTest, BasicTraining) { const int kFeatureIndex = 3; - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto operand = builder.ConstantR4FromArray4D( {{{{1.f, 2.f}}, {{3.f, 4.f}}}, {{{5.f, 6.f}}, {{7.f, 8.f}}}}); @@ -242,8 +239,8 @@ XLA_TEST_P(BatchNormalizationTest, BasicTraining) { auto offset = builder.ConstantR1({1.0f, 2.0f}); - auto tuple = builder.BatchNormTraining(operand, scale, offset, - /*epsilon=*/0.001, kFeatureIndex); + builder.BatchNormTraining(operand, scale, offset, + /*epsilon=*/0.001, kFeatureIndex); auto expected = Literal::MakeTuple( {Literal::CreateR4({{{{-1.6f, -2.0f}}, {{0.1f, 0.6f}}}, @@ -257,7 +254,7 @@ XLA_TEST_P(BatchNormalizationTest, BasicTraining) { XLA_TEST_P(BatchNormalizationTest, BasicTrainingOnSublane) { const int kFeatureIndex = 2; - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto operand = builder.ConstantR4FromArray4D( {{{{1.f}, {2.f}}, {{3.f}, {4.f}}}, {{{5.f}, {6.f}}, {{7.f}, {8.f}}}}); @@ -266,8 +263,8 @@ XLA_TEST_P(BatchNormalizationTest, BasicTrainingOnSublane) { auto offset = builder.ConstantR1({1.0f, 2.0f}); - auto tuple = builder.BatchNormTraining(operand, scale, offset, - /*epsilon=*/0.001, kFeatureIndex); + builder.BatchNormTraining(operand, scale, offset, + /*epsilon=*/0.001, kFeatureIndex); auto expected = Literal::MakeTuple( {Literal::CreateR4({{{{-1.6f}, {-2.0f}}, {{0.1f}, {0.6f}}}, @@ -282,23 +279,23 @@ XLA_TEST_P(BatchNormalizationTest, BasicTrainingOnSublane) { XLA_TEST_P(BatchNormalizationTest, TrainingWithFeatureOnLowDimension) { // Use 0 dimension as feature, tests layout analyzer. const int kFeatureIndex = 0; - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); - ComputationDataHandle h0; + XlaOp h0; auto operand = CreateR3Parameter(Array3D(260, 2, 2, 1.0f), /*parameter_number=*/0, "operand", &builder, &h0); - ComputationDataHandle h1; + XlaOp h1; auto scale = CreateR1Parameter(std::vector(260, 1.0f), /*parameter_number=*/1, "scale", &builder, &h1); - ComputationDataHandle h2; + XlaOp h2; auto offset = CreateR1Parameter(std::vector(260, 1.0f), /*parameter_number=*/2, "offset", &builder, &h2); - auto tuple = builder.BatchNormTraining(h0, h1, h2, - /*epsilon=*/1, kFeatureIndex); + builder.BatchNormTraining(h0, h1, h2, + /*epsilon=*/1, kFeatureIndex); auto expected = Literal::MakeTuple( {Literal::CreateR3FromArray3D(Array3D(260, 2, 2, 1.0f)) @@ -314,24 +311,24 @@ XLA_TEST_P(BatchNormalizationTest, TrainingWithFeatureOnLowDimension) { XLA_TEST_P(BatchNormalizationTest, LargeEpsilonTest) { // Test the correctness of choosing a large epsilon value. const int kFeatureIndex = 2; - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); - ComputationDataHandle h0; + XlaOp h0; auto operand = CreateR3Parameter({{{0.0f}, {10.0f}, {20.0f}, {30.0f}}}, /*parameter_number=*/0, "operand", &builder, &h0); - ComputationDataHandle h1; + XlaOp h1; auto scale = CreateR1Parameter(std::vector(1, 1.0f), /*parameter_number=*/1, "scale", &builder, &h1); - ComputationDataHandle h2; + XlaOp h2; auto offset = CreateR1Parameter(std::vector(1, 0.0f), /*parameter_number=*/2, "offset", &builder, &h2); // var = 125, mean = 15, epsilon = -100 - auto tuple = builder.BatchNormTraining(h0, h1, h2, - /*epsilon=*/-100, kFeatureIndex); + builder.BatchNormTraining(h0, h1, h2, + /*epsilon=*/-100, kFeatureIndex); auto expected = Literal::MakeTuple( {Literal::CreateR3FromArray3D({{{-3.0f}, {-1.0f}, {1.0f}, {3.0f}}}) @@ -346,7 +343,7 @@ XLA_TEST_P(BatchNormalizationTest, LargeEpsilonTest) { XLA_TEST_P(BatchNormalizationTest, BatchNormGradBasic) { const int kFeatureIndex = 2; - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto operand = builder.ConstantR4FromArray4D(Array4D(2, 2, 2, 1, 0.0f)); @@ -453,7 +450,7 @@ INSTANTIATE_TEST_CASE_P(BatchNormTest_Instantiation, BatchNormTestManySizes, XLA_TEST_P(BatchNormTestManySizes, RandomizedTrainingTests) { float epsilon = 0.001; - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); const std::vector& bounds = GetParam().bounds; Array4D input_array(bounds[0], bounds[1], bounds[2], bounds[3]); input_array.FillRandom(GetParam().random_value_var, @@ -553,7 +550,7 @@ XLA_TEST_P(BatchNormTestManySizes, RandomizedTrainingTests) { XLA_TEST_P(BatchNormTestManySizes, RandomizedInferencingTests) { float epsilon = 0.001; - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); const std::vector& bounds = GetParam().bounds; Array4D input_array(bounds[0], bounds[1], bounds[2], bounds[3]); input_array.FillRandom(GetParam().random_value_var, @@ -661,7 +658,7 @@ XLA_TEST_P(BatchNormTestManySizes, RandomizedInferencingTests) { XLA_TEST_P(BatchNormTestManySizes, RandomizedGradTests) { float epsilon = 0.001; - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); const std::vector& bounds = GetParam().bounds; Array4D input_array(bounds[0], bounds[1], bounds[2], bounds[3]); input_array.FillRandom(GetParam().random_value_var, @@ -828,9 +825,9 @@ XLA_TEST_P(BatchNormTestManySizes, RandomizedGradTests) { std::unique_ptr grad_output_data = client_->TransferToServer(*grad_output_literal).ConsumeValueOrDie(); - auto t = builder.BatchNormGrad(input_parameter, scale_parameter, - mean_parameter, var_parameter, - grad_output_parameter, epsilon, feature_index); + builder.BatchNormGrad(input_parameter, scale_parameter, mean_parameter, + var_parameter, grad_output_parameter, epsilon, + feature_index); auto expected = Literal::MakeTuple({expected_grad_activation.get(), diff --git a/tensorflow/compiler/xla/tests/bitcast_convert_test.cc b/tensorflow/compiler/xla/tests/bitcast_convert_test.cc index 0d94d65c1015fb54ada3fdfc95d0c31d0a0f158b..bff60f25ec8f15d372d251ac313200301a04f20f 100644 --- a/tensorflow/compiler/xla/tests/bitcast_convert_test.cc +++ b/tensorflow/compiler/xla/tests/bitcast_convert_test.cc @@ -18,8 +18,8 @@ limitations under the License. #include #include -#include "tensorflow/compiler/xla/client/computation_builder.h" #include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/tests/client_library_test_base.h" #include "tensorflow/compiler/xla/tests/literal_test_util.h" @@ -34,7 +34,7 @@ namespace { class BitcastConvertTest : public ClientLibraryTestBase { public: - explicit BitcastConvertTest(perftools::gputools::Platform* platform = nullptr) + explicit BitcastConvertTest(se::Platform* platform = nullptr) : ClientLibraryTestBase(platform) { mutable_debug_options()->add_xla_disable_hlo_passes("algsimp"); mutable_debug_options()->add_xla_disable_hlo_passes("inline"); @@ -42,7 +42,7 @@ class BitcastConvertTest : public ClientLibraryTestBase { }; TEST_F(BitcastConvertTest, ConvertR1S32ToR1S32) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR1({42, 64}); builder.BitcastConvertType(a, S32); @@ -51,7 +51,7 @@ TEST_F(BitcastConvertTest, ConvertR1S32ToR1S32) { } TEST_F(BitcastConvertTest, ConvertR1F32ToR1F32) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR1({42.0f, 64.0f}); builder.BitcastConvertType(a, F32); @@ -60,7 +60,7 @@ TEST_F(BitcastConvertTest, ConvertR1F32ToR1F32) { } TEST_F(BitcastConvertTest, BitcastR1S32ToR1F32) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR1({0, static_cast(0x80000000), 0x3F800000, static_cast(0xBF800000), 0x3F000000, @@ -72,7 +72,7 @@ TEST_F(BitcastConvertTest, BitcastR1S32ToR1F32) { } XLA_TEST_F(BitcastConvertTest, ConvertR1S0S32ToR1S0F32) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR1({}); builder.BitcastConvertType(a, F32); @@ -81,7 +81,7 @@ XLA_TEST_F(BitcastConvertTest, ConvertR1S0S32ToR1S0F32) { } TEST_F(BitcastConvertTest, ConvertR1F32ToR1S32) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR1({42.6, 64.4}); builder.BitcastConvertType(a, S32); @@ -90,7 +90,7 @@ TEST_F(BitcastConvertTest, ConvertR1F32ToR1S32) { } TEST_F(BitcastConvertTest, ConvertS32Extremes) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR1( {std::numeric_limits::min(), std::numeric_limits::max()}); builder.BitcastConvertType(a, F32); @@ -100,7 +100,7 @@ TEST_F(BitcastConvertTest, ConvertS32Extremes) { } TEST_F(BitcastConvertTest, ConvertMapToS32) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto b = builder.CreateSubBuilder("convert"); auto param = b->Parameter(0, ShapeUtil::MakeShape(F32, {}), "in"); b->BitcastConvertType(param, S32); @@ -112,7 +112,7 @@ TEST_F(BitcastConvertTest, ConvertMapToS32) { } TEST_F(BitcastConvertTest, ConvertMapToF32) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto b = builder.CreateSubBuilder("convert"); auto param = b->Parameter(0, ShapeUtil::MakeShape(S32, {}), "in"); b->BitcastConvertType(param, F32); @@ -129,7 +129,7 @@ TEST_F(BitcastConvertTest, ConvertMapToF32) { // input -> convert -> reshape // the new convert should have the same element type as the old convert. TEST_F(BitcastConvertTest, ConvertReshape) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto input = builder.ConstantR1({0x42280000}); auto reshape = builder.Reshape(input, /*dimensions=*/{0}, /*new_sizes=*/{}); builder.BitcastConvertType(reshape, F32); diff --git a/tensorflow/compiler/xla/tests/build_defs.bzl b/tensorflow/compiler/xla/tests/build_defs.bzl index 610302ac1256a57db6ed6e18016a4136973e3891..eac2eb286c3f7a1cd33aed03686e99ef753b773a 100644 --- a/tensorflow/compiler/xla/tests/build_defs.bzl +++ b/tensorflow/compiler/xla/tests/build_defs.bzl @@ -137,7 +137,8 @@ def xla_test(name, backend_deps += ["//tensorflow/compiler/xla/tests:test_macros_gpu"] this_backend_tags += ["requires-gpu-sm35"] elif backend in plugins: - backend_deps = plugins[backend]["deps"] + backend_deps = [] + backend_deps += plugins[backend]["deps"] this_backend_copts += plugins[backend]["copts"] this_backend_tags += plugins[backend]["tags"] this_backend_args += plugins[backend]["args"] diff --git a/tensorflow/compiler/xla/tests/client_library_test_base.cc b/tensorflow/compiler/xla/tests/client_library_test_base.cc index 4a9faef1dc202993bfddcf6e5591e4724d461e18..69389dae3f28a2a20eb8a26e72ab3541eb565305 100644 --- a/tensorflow/compiler/xla/tests/client_library_test_base.cc +++ b/tensorflow/compiler/xla/tests/client_library_test_base.cc @@ -23,6 +23,7 @@ limitations under the License. #include "tensorflow/compiler/xla/execution_options_util.h" #include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/compiler/xla/ptr_util.h" +#include "tensorflow/compiler/xla/service/platform_util.h" #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/status_macros.h" #include "tensorflow/compiler/xla/statusor.h" @@ -31,10 +32,12 @@ limitations under the License. #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/types.h" -namespace se = ::perftools::gputools; - namespace xla { namespace { + +// Name of the interpreter backend. +constexpr char kInterpreter[] = "interpreter"; + // Wrapper function that creates a nicer error message (than a bare // ValueOrDie()) if the platform we intend to test is not available. Client* GetOrCreateLocalClientOrDie(const LocalClientOptions& client_options) { @@ -43,11 +46,18 @@ Client* GetOrCreateLocalClientOrDie(const LocalClientOptions& client_options) { TF_CHECK_OK(result.status()) << " could not create local client for testing"; return result.ValueOrDie(); } + +// Helper functions to get the reference platform. +se::Platform* GetReferencePlatform() { + auto result = PlatformUtil::GetPlatform(kInterpreter); + TF_CHECK_OK(result.status()) << "could not get interpreter platform"; + return result.ValueOrDie(); +} + } // namespace ClientLibraryTestBase::ClientLibraryTestBase( - perftools::gputools::Platform* platform, - const LocalClientOptions& client_options) + se::Platform* platform, const LocalClientOptions& client_options) : client_(GetOrCreateLocalClientOrDie(client_options)), execution_options_(CreateDefaultExecutionOptions()) { CHECK_EQ(platform, client_options.platform()); @@ -66,6 +76,11 @@ ClientLibraryTestBase::ClientLibraryTestBase(se::Platform* platform) LocalClientOptions default_options; default_options.set_platform(platform); client_ = GetOrCreateLocalClientOrDie(default_options); + + LocalClientOptions ref_options; + ref_options.set_platform(GetReferencePlatform()); + ref_client_ = GetOrCreateLocalClientOrDie(ref_options); + execution_options_.mutable_debug_options()->add_xla_disable_hlo_passes( "constant_folding"); } @@ -74,9 +89,9 @@ string ClientLibraryTestBase::TestName() const { return ::testing::UnitTest::GetInstance()->current_test_info()->name(); } +template StatusOr> ClientLibraryTestBase::Execute( - ComputationBuilder* builder, - tensorflow::gtl::ArraySlice arguments) { + BuilderT* builder, tensorflow::gtl::ArraySlice arguments) { // Build the computation, as a convenience. TF_ASSIGN_OR_RETURN(auto computation, builder->Build()); return client_->Execute(computation, arguments, &execution_options_); @@ -127,6 +142,20 @@ StatusOr> ClientLibraryTestBase::ExecuteAndTransfer( return ExecuteAndTransfer(computation, arguments, shape_with_output_layout); } +StatusOr> +ClientLibraryTestBase::ExecuteAndTransferReference( + const XlaComputation& computation, + tensorflow::gtl::ArraySlice arguments, + const Shape* shape_with_output_layout) { + ExecutionOptions execution_options = execution_options_; + if (shape_with_output_layout != nullptr) { + *execution_options.mutable_shape_with_output_layout() = + *shape_with_output_layout; + } + return ref_client_->ExecuteAndTransfer(computation, arguments, + &execution_options); +} + std::unique_ptr ClientLibraryTestBase::ExecuteOrDie( ComputationBuilder* builder, tensorflow::gtl::ArraySlice arguments) { @@ -521,6 +550,69 @@ ClientLibraryTestBase::ComputeValueAndReference( return std::make_pair(std::move(reference), std::move(result)); } +void ClientLibraryTestBase::ComputeAndCompare( + XlaBuilder* builder, tensorflow::gtl::ArraySlice arguments) { + auto status_or_data = ComputeValueAndReference(builder, arguments); + EXPECT_IS_OK(status_or_data); + if (!status_or_data.ok()) { + return; + } + std::unique_ptr reference, result; + std::tie(reference, result) = status_or_data.ConsumeValueOrDie(); + LiteralTestUtil::ExpectEqual(*reference, *result); +} + +void ClientLibraryTestBase::ComputeAndCompare( + XlaBuilder* builder, tensorflow::gtl::ArraySlice arguments, + ErrorSpec error) { + auto status_or_data = ComputeValueAndReference(builder, arguments); + EXPECT_IS_OK(status_or_data); + if (!status_or_data.ok()) { + return; + } + std::unique_ptr reference, result; + std::tie(reference, result) = status_or_data.ConsumeValueOrDie(); + LiteralTestUtil::ExpectNear(*reference, *result, error); +} + +StatusOr, std::unique_ptr>> +ClientLibraryTestBase::ComputeValueAndReference( + XlaBuilder* builder, tensorflow::gtl::ArraySlice arguments) { + // Transfer the arguments to the executor service. We put the unique_ptr's + // into a vector to keep the data alive on the service until the end of this + // function. + std::vector> argument_data; + std::vector> ref_argument_data; + for (const auto& arg : arguments) { + TF_ASSIGN_OR_RETURN(auto data, client_->TransferToServer(arg.Clone())); + TF_ASSIGN_OR_RETURN(auto ref_data, ref_client_->TransferToServer(arg)); + argument_data.push_back(std::move(data)); + ref_argument_data.push_back(std::move(ref_data)); + } + + // Create raw pointers to the GlobalData for the rest of the call stack. + std::vector argument_data_ptr; + std::transform( + argument_data.begin(), argument_data.end(), + std::back_inserter(argument_data_ptr), + [](const std::unique_ptr& data) { return data.get(); }); + std::vector ref_argument_data_ptr; + std::transform( + ref_argument_data.begin(), ref_argument_data.end(), + std::back_inserter(ref_argument_data_ptr), + [](const std::unique_ptr& data) { return data.get(); }); + + TF_ASSIGN_OR_RETURN(auto computation, builder->Build()); + + TF_ASSIGN_OR_RETURN(auto result, + ExecuteAndTransfer(computation, argument_data_ptr)); + + TF_ASSIGN_OR_RETURN(auto reference, ExecuteAndTransferReference( + computation, ref_argument_data_ptr)); + + return std::make_pair(std::move(reference), std::move(result)); +} + Computation ClientLibraryTestBase::CreateScalarRelu() { ComputationBuilder builder(client_, "relu"); auto shape = ShapeUtil::MakeShape(use_bfloat16_ ? BF16 : F32, {}); @@ -595,12 +687,26 @@ ComputationDataHandle ClientLibraryTestBase::AddParam( return data_handle; } +XlaOp ClientLibraryTestBase::AddParam(const Literal& argument, + XlaBuilder* builder) { + XlaOp data_handle; + arguments_.push_back(CreateParameterAndTransferLiteral( + arguments_.size(), argument, "", builder, &data_handle)); + return data_handle; +} + ComputationDataHandle ClientLibraryTestBase::CreateConstantFromLiteral( const Literal& literal, ComputationBuilder* builder) { return builder->ConstantLiteral( use_bfloat16_ ? *LiteralTestUtil::ConvertF32ToBF16(literal) : literal); } +XlaOp ClientLibraryTestBase::CreateConstantFromLiteral(const Literal& literal, + XlaBuilder* builder) { + return builder->ConstantLiteral( + use_bfloat16_ ? *LiteralTestUtil::ConvertF32ToBF16(literal) : literal); +} + template void ClientLibraryTestBase::ComputeAndCompareLiteral( ComputationBuilder* builder, const Literal& expected, tensorflow::gtl::ArraySlice arguments, @@ -637,4 +743,11 @@ template void ClientLibraryTestBase::ComputeAndCompareTuple( XlaBuilder* builder, const Literal& expected, tensorflow::gtl::ArraySlice arguments, ErrorSpec error); +template StatusOr> ClientLibraryTestBase::Execute( + ComputationBuilder* builder, + tensorflow::gtl::ArraySlice arguments); + +template StatusOr> ClientLibraryTestBase::Execute( + XlaBuilder* builder, tensorflow::gtl::ArraySlice arguments); + } // namespace xla diff --git a/tensorflow/compiler/xla/tests/client_library_test_base.h b/tensorflow/compiler/xla/tests/client_library_test_base.h index be90f14c8e7259ebf3ffb44d6e7118c8019878af..481d7c5c25a7e8ad9d40d11fa3f84e13d6922aa3 100644 --- a/tensorflow/compiler/xla/tests/client_library_test_base.h +++ b/tensorflow/compiler/xla/tests/client_library_test_base.h @@ -64,11 +64,10 @@ std::vector ExpandUseBfloat16( // A client library test establishes an in-process XLA client connection. class ClientLibraryTestBase : public ::testing::Test { protected: - explicit ClientLibraryTestBase( - perftools::gputools::Platform* platform = nullptr); + explicit ClientLibraryTestBase(se::Platform* platform = nullptr); // Creates a new ClientLibraryTestBase with custom client options. - ClientLibraryTestBase(perftools::gputools::Platform* platform, + ClientLibraryTestBase(se::Platform* platform, const LocalClientOptions& client_options); // Returns the name of the test currently being run. @@ -92,9 +91,9 @@ class ClientLibraryTestBase : public ::testing::Test { // Convenience methods for building and running a computation with the member // execution options. Modify execution_options_ in your test if you want to // customize the options. + template StatusOr> Execute( - ComputationBuilder* builder, - tensorflow::gtl::ArraySlice arguments); + BuilderT* builder, tensorflow::gtl::ArraySlice arguments); // TODO(b/74197823): Remove the template type 'BuilderT' in all methods once // the migration to XlaBuilder is complete. @@ -114,6 +113,14 @@ class ClientLibraryTestBase : public ::testing::Test { tensorflow::gtl::ArraySlice arguments, const Shape* shape_with_output_layout = nullptr); + // This executes the computation via the reference client (which connects a + // interpreter backend). The result is used as the expected values of the + // computation. + StatusOr> ExecuteAndTransferReference( + const XlaComputation& computation, + tensorflow::gtl::ArraySlice arguments, + const Shape* shape_with_output_layout = nullptr); + // Convenience OrDie variants of above methods. std::unique_ptr ExecuteOrDie( ComputationBuilder* builder, @@ -236,6 +243,14 @@ class ClientLibraryTestBase : public ::testing::Test { tensorflow::gtl::ArraySlice arguments, ErrorSpec error); + // Convenience method for running a built computation and comparing the result + // with the reference result. + void ComputeAndCompare(XlaBuilder* builder, + tensorflow::gtl::ArraySlice arguments); + void ComputeAndCompare(XlaBuilder* builder, + tensorflow::gtl::ArraySlice arguments, + ErrorSpec error); + // Create scalar operations for use in reductions. Computation CreateScalarRelu(); Computation CreateScalarMax(); @@ -300,18 +315,24 @@ class ClientLibraryTestBase : public ::testing::Test { // set exactly once. The first added parameter gets index 0, then 1 and so on. ComputationDataHandle AddParam(const Literal& argument, ComputationBuilder* builder); + XlaOp AddParam(const Literal& argument, XlaBuilder* builder); template ComputationDataHandle AddParam(const Array& argument, ComputationBuilder* builder) { return AddParam(*Literal::CreateFromArray(argument), builder); } + template + XlaOp AddParam(const Array& argument, XlaBuilder* builder) { + return AddParam(*Literal::CreateFromArray(argument), builder); + } // Creates a constant instruction with the given literal. When the // use_bfloat16 flag is set but the literal has F32 elements, the elements // will be converted to BF16s. ComputationDataHandle CreateConstantFromLiteral(const Literal& literal, ComputationBuilder* builder); + XlaOp CreateConstantFromLiteral(const Literal& literal, XlaBuilder* builder); // Creates a constant instruction with the given array. When the use_bfloat16 // flag is set but the array has float elements, the elements will be @@ -322,6 +343,12 @@ class ClientLibraryTestBase : public ::testing::Test { return CreateConstantFromLiteral(*Literal::CreateFromArray(array), builder); } + template + XlaOp CreateConstantFromArray(const Array& array, + XlaBuilder* builder) { + return CreateConstantFromLiteral(*Literal::CreateFromArray(array), builder); + } + // Same as CreateConstantFromArray, but for scalars. template ComputationDataHandle CreateConstantFromScalar(NativeT value, @@ -330,6 +357,12 @@ class ClientLibraryTestBase : public ::testing::Test { builder); } + template + XlaOp CreateConstantFromScalar(NativeT value, XlaBuilder* builder) { + return CreateConstantFromLiteral(*Literal::CreateR0(value), + builder); + } + // Creates a parameter instruction that wraps a given value and then stores // into "data_handle" the global handle for that parameter. // @@ -395,6 +428,7 @@ class ClientLibraryTestBase : public ::testing::Test { PrimitiveType FloatType() const { return use_bfloat16_ ? BF16 : F32; } Client* client_; + Client* ref_client_; // To compute reference result. ExecutionOptions execution_options_; private: @@ -426,12 +460,19 @@ class ClientLibraryTestBase : public ::testing::Test { const Shape* output_with_layout = nullptr); // Executes the computation and calculates the expected reference value using - // the HloEvaluator. Returns two literal in the order of (expected, actual). + // the HloEvaluator. Returns two literals in the order of (expected, actual). StatusOr, std::unique_ptr>> ComputeValueAndReference(ComputationBuilder* builder, const ComputationDataHandle& operand, tensorflow::gtl::ArraySlice arguments); + // Executes the computation and calculates the expected reference value using + // the reference client. Returns two literals in the order of (expected, + // actual). + StatusOr, std::unique_ptr>> + ComputeValueAndReference(XlaBuilder* builder, + tensorflow::gtl::ArraySlice arguments); + // Whether to run tests with all float-type input/output converted to // bfloat16. bool use_bfloat16_ = false; diff --git a/tensorflow/compiler/xla/tests/client_test.cc b/tensorflow/compiler/xla/tests/client_test.cc index 045148cdd11da94ae4789a753efca95c6aaa1f27..32e2f2c0848407ec46a5ac52e2668ef27b92c426 100644 --- a/tensorflow/compiler/xla/tests/client_test.cc +++ b/tensorflow/compiler/xla/tests/client_test.cc @@ -19,6 +19,8 @@ limitations under the License. #include "tensorflow/compiler/xla/client/computation_builder.h" #include "tensorflow/compiler/xla/client/global_data.h" #include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_computation.h" #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/status_macros.h" #include "tensorflow/compiler/xla/statusor.h" @@ -109,14 +111,14 @@ XLA_TEST_F(ClientTest, ExecuteWithTupleLayout) { XLA_TEST_F(ClientTest, DISABLED_ON_CPU_PARALLEL(DISABLED_ON_GPU(ExecuteParallel))) { - Computation add_with_one_arg, mul_with_two_args, dot_with_one_arg; + XlaComputation add_with_one_arg, mul_with_two_args, dot_with_one_arg; Shape shape = ShapeUtil::MakeShape(S32, {2, 2}); TF_ASSERT_OK_AND_ASSIGN( std::unique_ptr const_arg, client_->TransferToServer(*Literal::CreateR2({{5, 6}, {7, 8}}))); - ComputationBuilder b(client_, TestName() + ".add"); + XlaBuilder b(TestName() + ".add"); b.Add(b.Parameter(0, shape, "param_0"), b.ConstantR2({{1, 2}, {3, 4}})); TF_ASSERT_OK_AND_ASSIGN(add_with_one_arg, b.Build()); @@ -124,14 +126,14 @@ XLA_TEST_F(ClientTest, // We can't really test parallel execution on CPU since all of the cores in a // CPU are presented as a single device. So for now we test "parallel" // execution on a single device. - std::vector computation_instances; + std::vector computation_instances; TF_ASSERT_OK_AND_ASSIGN(std::vector devices, client_->GetDeviceHandles(1)); ASSERT_EQ(devices.size(), 1); ExecutionOptions options = execution_options_; *options.add_device_handles() = devices[0]; - computation_instances.push_back(Client::ComputationInstance( + computation_instances.push_back(Client::XlaComputationInstance( add_with_one_arg, {const_arg.get()}, options, nullptr)); TF_ASSERT_OK_AND_ASSIGN(auto results, diff --git a/tensorflow/compiler/xla/tests/compute_constant_test.cc b/tensorflow/compiler/xla/tests/compute_constant_test.cc index e5a03b49ad259a64b9cbbc88c31d8c6558289d1b..7ea82a791f72ea23be152ce9f548cc6719922236 100644 --- a/tensorflow/compiler/xla/tests/compute_constant_test.cc +++ b/tensorflow/compiler/xla/tests/compute_constant_test.cc @@ -21,6 +21,8 @@ limitations under the License. #include "tensorflow/compiler/xla/client/computation.h" #include "tensorflow/compiler/xla/client/computation_builder.h" #include "tensorflow/compiler/xla/client/global_data.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_computation.h" #include "tensorflow/compiler/xla/layout_util.h" #include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/compiler/xla/shape_util.h" @@ -31,6 +33,7 @@ limitations under the License. #include "tensorflow/compiler/xla/tests/test_macros.h" #include "tensorflow/compiler/xla/tests/test_utils.h" #include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/lib/core/status_test_util.h" #include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/platform/types.h" @@ -44,16 +47,14 @@ ClientType client_types[] = {ClientType::kLocal, ClientType::kCompileOnly}; class ComputeConstantTest : public ::testing::Test { public: - explicit ComputeConstantTest( - perftools::gputools::Platform* platform = nullptr) + explicit ComputeConstantTest(se::Platform* platform = nullptr) : platform_(platform) {} string TestName() const { return ::testing::UnitTest::GetInstance()->current_test_info()->name(); } - Client* ClientOrDie(::perftools::gputools::Platform* platform, - ClientType client_type) { + Client* ClientOrDie(se::Platform* platform, ClientType client_type) { if (client_type == ClientType::kLocal) { StatusOr result = ClientLibrary::GetOrCreateLocalClient(platform); @@ -71,39 +72,46 @@ class ComputeConstantTest : public ::testing::Test { } StatusOr> ComputeConstantLiteral( - Client* client, const ComputationDataHandle& operand, - ComputationBuilder* builder, Layout* output_layout = nullptr, - tensorflow::gtl::ArraySlice parameters = {}) { - TF_ASSIGN_OR_RETURN(auto computed, builder->ComputeConstant( - operand, output_layout, parameters)); + Client* client, const XlaOp& operand, XlaBuilder* builder, + Layout* output_layout = nullptr) { + TF_ASSIGN_OR_RETURN(auto subgraph, builder->BuildConstantSubGraph(operand)); + TF_ASSIGN_OR_RETURN(auto computed, + client->ComputeConstant(subgraph, output_layout)); return std::move(computed); } + template + StatusOr ComputeConstantScalar(Client* client, const XlaOp& operand, + XlaBuilder* builder) { + TF_ASSIGN_OR_RETURN(auto literal, ComputeConstantLiteral(client, operand, + builder, nullptr)); + return literal->Get({}); + } + template StatusOr ComputeConstantScalar( Client* client, const ComputationDataHandle& operand, ComputationBuilder* builder, tensorflow::gtl::ArraySlice parameters = {}) { - TF_ASSIGN_OR_RETURN( - auto literal, - ComputeConstantLiteral(client, operand, builder, nullptr, parameters)); + TF_ASSIGN_OR_RETURN(auto literal, + builder->ComputeConstant( + operand, /*output_layout=*/nullptr, parameters)); return literal->Get({}); } - bool IsConstant(const ComputationDataHandle& operand, - ComputationBuilder* builder, int64 num_parameters = 0) { - StatusOr result = builder->IsConstant(operand, num_parameters); + bool IsConstant(const XlaOp& operand, XlaBuilder* builder) { + StatusOr result = builder->IsConstant(operand); EXPECT_TRUE(result.ok()) << result.status(); return result.ok() ? result.ValueOrDie() : false; } - perftools::gputools::Platform* platform_; + se::Platform* platform_; }; TEST_F(ComputeConstantTest, ScalarInt32Literal) { for (ClientType client_type : client_types) { Client* client = ClientOrDie(platform_, client_type); - ComputationBuilder b(client, TestName()); + XlaBuilder b(TestName()); auto computation = b.ConstantR0(42); EXPECT_TRUE(IsConstant(computation, &b)); @@ -116,7 +124,7 @@ TEST_F(ComputeConstantTest, ScalarInt32Literal) { TEST_F(ComputeConstantTest, ScalarFloatAdd) { for (ClientType client_type : client_types) { Client* client = ClientOrDie(platform_, client_type); - ComputationBuilder b(client, TestName()); + XlaBuilder b(TestName()); auto computation = b.Add(b.ConstantR0(42.5f), b.ConstantR0(1.5f)); EXPECT_TRUE(IsConstant(computation, &b)); @@ -130,7 +138,7 @@ TEST_F(ComputeConstantTest, ScalarFloatAdd) { TEST_F(ComputeConstantTest, ScalarRng) { for (ClientType client_type : client_types) { Client* client = ClientOrDie(platform_, client_type); - ComputationBuilder b(client, TestName()); + XlaBuilder b(TestName()); auto computation = b.RngUniform(b.ConstantR0(1.1f), b.ConstantR0(2.1f), ShapeUtil::MakeShape(F32, {})); @@ -151,19 +159,21 @@ TEST_F(ComputeConstantTest, Param) { std::vector arguments; arguments.push_back(std::move(*Literal::CreateR0(42.5f))); - EXPECT_TRUE(IsConstant(computation, &b, arguments.size())); - - auto value = - ComputeConstantScalar(client, computation, &b, arguments); - ASSERT_TRUE(value.ok()) << value.status(); - EXPECT_EQ(value.ValueOrDie(), 44.0f); + TF_ASSERT_OK_AND_ASSIGN(bool is_constant, + b.IsConstant(computation, arguments.size())); + EXPECT_TRUE(is_constant); + + TF_ASSERT_OK_AND_ASSIGN( + auto value, + ComputeConstantScalar(client, computation, &b, arguments)); + EXPECT_EQ(value, 44.0f); } } TEST_F(ComputeConstantTest, DirectParamMissing) { for (ClientType client_type : client_types) { Client* client = ClientOrDie(platform_, client_type); - ComputationBuilder b(client, TestName()); + XlaBuilder b(TestName()); auto computation = b.Parameter(0, ShapeUtil::MakeShape(F32, {}), "param"); EXPECT_FALSE(IsConstant(computation, &b)); @@ -177,7 +187,7 @@ TEST_F(ComputeConstantTest, DirectParamMissing) { TEST_F(ComputeConstantTest, IndirectParamMissing) { for (ClientType client_type : client_types) { Client* client = ClientOrDie(platform_, client_type); - ComputationBuilder b(client, TestName()); + XlaBuilder b(TestName()); auto computation = b.Add(b.ConstantR0(1.0f), b.Parameter(0, ShapeUtil::MakeShape(F32, {}), "param")); @@ -195,7 +205,7 @@ TEST_F(ComputeConstantTest, IndirectParamMissing) { TEST_F(ComputeConstantTest, UnrelatedParam) { for (ClientType client_type : client_types) { Client* client = ClientOrDie(platform_, client_type); - ComputationBuilder b(client, TestName()); + XlaBuilder b(TestName()); auto param_a = b.Parameter(10, ShapeUtil::MakeShape(F32, {}), "param0"); auto constant_4 = @@ -212,64 +222,64 @@ TEST_F(ComputeConstantTest, UnrelatedParam) { EXPECT_TRUE(IsConstant(constant_13, &b)); - auto value = ComputeConstantScalar(client, constant_13, &b); - ASSERT_TRUE(value.ok()) << value.status(); - EXPECT_EQ(value.ValueOrDie(), 13.0f); + TF_ASSERT_OK_AND_ASSIGN( + auto value, ComputeConstantScalar(client, constant_13, &b)); + EXPECT_EQ(value, 13.0f); } } TEST_F(ComputeConstantTest, NonScalarAdd) { for (ClientType client_type : client_types) { Client* client = ClientOrDie(platform_, client_type); - ComputationBuilder b(client, TestName()); + XlaBuilder b(TestName()); auto computation = b.Add(b.ConstantR1({1, 2}), b.ConstantR1({3, 4})); EXPECT_TRUE(IsConstant(computation, &b)); - auto computed = ComputeConstantLiteral(client, computation, &b); - ASSERT_TRUE(computed.ok()) << computed.status(); + TF_ASSERT_OK_AND_ASSIGN(auto computed, + ComputeConstantLiteral(client, computation, &b)); std::unique_ptr expected_literal = Literal::CreateR1({4, 6}); - LiteralTestUtil::ExpectEqual(*expected_literal, *computed.ValueOrDie()); + LiteralTestUtil::ExpectEqual(*expected_literal, *computed); } } TEST_F(ComputeConstantTest, IntegerDivide) { for (ClientType client_type : client_types) { Client* client = ClientOrDie(platform_, client_type); - ComputationBuilder b(client, TestName()); + XlaBuilder b(TestName()); auto computation = b.Div(b.ConstantR0(15), b.ConstantR0(3)); EXPECT_TRUE(IsConstant(computation, &b)); - auto computed = ComputeConstantLiteral(client, computation, &b); - ASSERT_TRUE(computed.ok()) << computed.status(); + TF_ASSERT_OK_AND_ASSIGN(auto computed, + ComputeConstantLiteral(client, computation, &b)); std::unique_ptr expected_literal = Literal::CreateR0(5); - LiteralTestUtil::ExpectEqual(*expected_literal, *computed.ValueOrDie()); + LiteralTestUtil::ExpectEqual(*expected_literal, *computed); } } XLA_TEST_F(ComputeConstantTest, Layout) { for (ClientType client_type : client_types) { Client* client = ClientOrDie(platform_, client_type); - ComputationBuilder b(client, TestName()); + XlaBuilder b(TestName()); std::vector> layouts = {{0, 1}, {1, 0}}; for (const std::vector& layout : layouts) { auto layout_proto = LayoutUtil::MakeLayout(layout); - auto computed = ComputeConstantLiteral( - client, - b.Add(b.ConstantR2({{1, 2}, {3, 4}}), - b.ConstantR2({{10, 20}, {30, 40}})), - &b, &layout_proto); - ASSERT_TRUE(computed.ok()) << computed.status(); + TF_ASSERT_OK_AND_ASSIGN( + auto computed, ComputeConstantLiteral( + client, + b.Add(b.ConstantR2({{1, 2}, {3, 4}}), + b.ConstantR2({{10, 20}, {30, 40}})), + &b, &layout_proto)); std::unique_ptr expected_literal = Literal::CreateR2WithLayout({{11, 22}, {33, 44}}, LayoutUtil::MakeLayout(layout)); - LiteralTestUtil::AssertEqualShapesAndLayouts( - expected_literal->shape(), computed.ValueOrDie()->shape()); - LiteralTestUtil::ExpectEqual(*expected_literal, *computed.ValueOrDie()); + LiteralTestUtil::AssertEqualShapesAndLayouts(expected_literal->shape(), + computed->shape()); + LiteralTestUtil::ExpectEqual(*expected_literal, *computed); } } } diff --git a/tensorflow/compiler/xla/tests/concat_test.cc b/tensorflow/compiler/xla/tests/concat_test.cc index fb0e9c724a69b61801e6e0c2d07ef75b63a00465..a4c8a83eb15f7cc279b6c8f1bf1394c0afb9f7cf 100644 --- a/tensorflow/compiler/xla/tests/concat_test.cc +++ b/tensorflow/compiler/xla/tests/concat_test.cc @@ -18,9 +18,9 @@ limitations under the License. #include "tensorflow/compiler/xla/array2d.h" #include "tensorflow/compiler/xla/array3d.h" -#include "tensorflow/compiler/xla/client/computation.h" -#include "tensorflow/compiler/xla/client/computation_builder.h" #include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_computation.h" #include "tensorflow/compiler/xla/reference_util.h" #include "tensorflow/compiler/xla/statusor.h" #include "tensorflow/compiler/xla/test.h" @@ -38,9 +38,9 @@ using ::testing::HasSubstr; // Concatenate expects at least one argument. XLA_TEST_F(ConcatTest, Concat_Nothing) { - ComputationBuilder builder(client_, TestName()); - auto concatenated = builder.ConcatInDim({}, 0); - StatusOr computation_status = builder.Build(); + XlaBuilder builder(TestName()); + builder.ConcatInDim({}, 0); + StatusOr computation_status = builder.Build(); ASSERT_FALSE(computation_status.ok()); EXPECT_THAT(computation_status.status().ToString(), HasSubstr("Concatenate expects at least one argument")); @@ -48,18 +48,18 @@ XLA_TEST_F(ConcatTest, Concat_Nothing) { // Concatenate with one argument works. XLA_TEST_F(ConcatTest, Concat_R1_With_Nothing) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR1({42.0, 64.0}); - auto concatenated = builder.ConcatInDim({a}, 0); + builder.ConcatInDim({a}, 0); std::vector expected = {42, 64}; ComputeAndCompareR1(&builder, expected, {}, ErrorSpec(0.0001)); } XLA_TEST_F(ConcatTest, Concat_R1_L0_With_Nothing) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR1({}); - auto concatenated = builder.ConcatInDim({a}, 0); + builder.ConcatInDim({a}, 0); std::vector expected = {}; ComputeAndCompareR1(&builder, expected, {}, ErrorSpec(0.0001)); @@ -68,51 +68,51 @@ XLA_TEST_F(ConcatTest, Concat_R1_L0_With_Nothing) { // Show that we can't concatenate R0 with R0 because we can't name the dimension // to concatenate on. XLA_TEST_F(ConcatTest, CannotConcatR0WithR0) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR0(42.0); auto b = builder.ConstantR0(64.0); - auto concatenated = builder.ConcatInDim({a, b}, 0); - StatusOr computation_status = builder.Build(); + builder.ConcatInDim({a, b}, 0); + StatusOr computation_status = builder.Build(); ASSERT_FALSE(computation_status.ok()); EXPECT_THAT(computation_status.status().ToString(), HasSubstr("out of bounds: 0")); } XLA_TEST_F(ConcatTest, Concat_R1_L0_With_R1_L0) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR1({}); auto b = builder.ConstantR1({}); - auto concatenated = builder.ConcatInDim({a, b}, 0); + builder.ConcatInDim({a, b}, 0); std::vector expected = {}; ComputeAndCompareR1(&builder, expected, {}, ErrorSpec(0.0001)); } XLA_TEST_F(ConcatTest, Concat_R1_L0_With_R1_L1) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR1({}); auto b = builder.ConstantR1({256.0}); - auto concatenated = builder.ConcatInDim({a, b}, 0); + builder.ConcatInDim({a, b}, 0); std::vector expected = {256}; ComputeAndCompareR1(&builder, expected, {}, ErrorSpec(0.0001)); } XLA_TEST_F(ConcatTest, Concat_R1_L2_With_R1_L0) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR1({42.0, 64.0}); auto b = builder.ConstantR1({}); - auto concatenated = builder.ConcatInDim({a, b}, 0); + builder.ConcatInDim({a, b}, 0); std::vector expected = {42, 64}; ComputeAndCompareR1(&builder, expected, {}, ErrorSpec(0.0001)); } XLA_TEST_F(ConcatTest, Concat_R1_L2_With_R1_L1) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR1({42.0, 64.0}); auto b = builder.ConstantR1({256.0}); - auto concatenated = builder.ConcatInDim({a, b}, 0); + builder.ConcatInDim({a, b}, 0); std::vector expected = {42, 64, 256}; ComputeAndCompareR1(&builder, expected, {}, ErrorSpec(0.0001)); @@ -129,20 +129,20 @@ XLA_TEST_F(ConcatTest, Concat_R1_L253_With_R1_L7) { expected[253 + i] = rhs[i] = 253 + i + 1; } - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR1(lhs); auto b = builder.ConstantR1(rhs); - auto concatenated = builder.ConcatInDim({a, b}, 0); + builder.ConcatInDim({a, b}, 0); ComputeAndCompareR1(&builder, expected, {}, ErrorSpec(0.0001)); } XLA_TEST_F(ConcatTest, Concat_0x0_With_0x0) { for (int dim : {0, 1}) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR2FromArray2D(Array2D(0, 0)); auto b = builder.ConstantR2FromArray2D(Array2D(0, 0)); - auto concatenated = builder.ConcatInDim({a, b}, dim); + builder.ConcatInDim({a, b}, dim); ComputeAndCompareR2(&builder, Array2D(0, 0), {}, ErrorSpec(0.0001)); @@ -150,26 +150,27 @@ XLA_TEST_F(ConcatTest, Concat_0x0_With_0x0) { } XLA_TEST_F(ConcatTest, Concat_1x1_With_1x1_InDim0) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a_array = CreatePatternedMatrix(1, 1); auto b_array = CreatePatternedMatrix(1, 1, /*offset=*/64.0); auto a = builder.ConstantR2FromArray2D(*a_array); auto b = builder.ConstantR2FromArray2D(*b_array); - auto concatenated = builder.ConcatInDim({a, b}, 0); + builder.ConcatInDim({a, b}, 0); Array2D expected({ - {0}, {64}, + {0}, + {64}, }); ComputeAndCompareR2(&builder, expected, {}, ErrorSpec(0.0001)); } XLA_TEST_F(ConcatTest, Concat_1x1_With_1x1_InDim1) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a_array = CreatePatternedMatrix(1, 1); auto b_array = CreatePatternedMatrix(1, 1, /*offset=*/64.0); auto a = builder.ConstantR2FromArray2D(*a_array); auto b = builder.ConstantR2FromArray2D(*b_array); - auto concatenated = builder.ConcatInDim({a, b}, 1); + builder.ConcatInDim({a, b}, 1); Array2D expected({ {0, 64}, @@ -178,22 +179,22 @@ XLA_TEST_F(ConcatTest, Concat_1x1_With_1x1_InDim1) { } XLA_TEST_F(ConcatTest, Concat2x0With2x5) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto b_array = CreatePatternedMatrix(2, 5, /*offset=*/64.0); auto a = builder.ConstantR2FromArray2D(Array2D(2, 0)); auto b = builder.ConstantR2FromArray2D(*b_array); - auto concatenated = builder.ConcatInDim({a, b}, 1); + builder.ConcatInDim({a, b}, 1); ComputeAndCompareR2(&builder, *b_array, {}, ErrorSpec(0.0001)); } XLA_TEST_F(ConcatTest, Concat2x3With2x5) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a_array = CreatePatternedMatrix(2, 3); auto b_array = CreatePatternedMatrix(2, 5, /*offset=*/64.0); auto a = builder.ConstantR2FromArray2D(*a_array); auto b = builder.ConstantR2FromArray2D(*b_array); - auto concatenated = builder.ConcatInDim({a, b}, 1); + builder.ConcatInDim({a, b}, 1); Array2D expected({ {0, 1, 2, 64, 65, 66, 67, 68}, @@ -203,22 +204,22 @@ XLA_TEST_F(ConcatTest, Concat2x3With2x5) { } XLA_TEST_F(ConcatTest, Concat3x2With0x2) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a_array = CreatePatternedMatrix(3, 2); auto a = builder.ConstantR2FromArray2D(*a_array); auto b = builder.ConstantR2FromArray2D(Array2D(0, 2)); - auto concatenated = builder.ConcatInDim({a, b}, 0); + builder.ConcatInDim({a, b}, 0); ComputeAndCompareR2(&builder, *a_array, {}, ErrorSpec(0.0001)); } XLA_TEST_F(ConcatTest, Concat3x2With5x2) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a_array = CreatePatternedMatrix(3, 2); auto b_array = CreatePatternedMatrix(5, 2, /*offset=*/64.0); auto a = builder.ConstantR2FromArray2D(*a_array); auto b = builder.ConstantR2FromArray2D(*b_array); - auto concatenated = builder.ConcatInDim({a, b}, 0); + builder.ConcatInDim({a, b}, 0); Array2D expected({ {0, 1}, @@ -234,16 +235,16 @@ XLA_TEST_F(ConcatTest, Concat3x2With5x2) { } XLA_TEST_F(ConcatTest, Concat_R3_3x0x2_3x0x1) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR3FromArray3D(Array3D(3, 0, 2)); auto b = builder.ConstantR3FromArray3D(Array3D(3, 0, 1)); - auto concatenated = builder.ConcatInDim({a, b}, 2); + builder.ConcatInDim({a, b}, 2); ComputeAndCompareR3(&builder, Array3D(3, 0, 3), {}, ErrorSpec(0.0001)); } XLA_TEST_F(ConcatTest, Concat_R3_3x1x2_3x1x1) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); Array3D a_array({ // 3x1x2 {{0, 1}}, @@ -258,27 +259,29 @@ XLA_TEST_F(ConcatTest, Concat_R3_3x1x2_3x1x1) { }); auto a = builder.ConstantR3FromArray3D(a_array); auto b = builder.ConstantR3FromArray3D(b_array); - auto concatenated = builder.ConcatInDim({a, b}, 2); + builder.ConcatInDim({a, b}, 2); Array3D expected({ - {{0, 1, 6}}, {{2, 3, 7}}, {{4, 5, 8}}, + {{0, 1, 6}}, + {{2, 3, 7}}, + {{4, 5, 8}}, }); ComputeAndCompareR3(&builder, expected, {}, ErrorSpec(0.0001)); } XLA_TEST_F(ConcatTest, Concat_R1_1x1_1x1_1x1) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR1({42.0}); auto b = builder.ConstantR1({64.0}); auto c = builder.ConstantR1({256.0}); - auto concatenated = builder.ConcatInDim({a, b, c}, 0); + builder.ConcatInDim({a, b, c}, 0); std::vector expected = {42, 64, 256}; ComputeAndCompareR1(&builder, expected, {}, ErrorSpec(0.0001)); } XLA_TEST_F(ConcatTest, Concat_R3_3x1x2_3x1x1_3x1x1) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); Array3D a_array({ // 3x1x2 {{0, 1}}, @@ -300,35 +303,35 @@ XLA_TEST_F(ConcatTest, Concat_R3_3x1x2_3x1x1_3x1x1) { auto a = builder.ConstantR3FromArray3D(a_array); auto b = builder.ConstantR3FromArray3D(b_array); auto c = builder.ConstantR3FromArray3D(c_array); - auto concatenated = builder.ConcatInDim({a, b, c}, 2); + builder.ConcatInDim({a, b, c}, 2); Array3D expected({ - {{0, 1, 2, 3}}, {{4, 5, 6, 7}}, {{8, 9, 10, 11}}, + {{0, 1, 2, 3}}, + {{4, 5, 6, 7}}, + {{8, 9, 10, 11}}, }); ComputeAndCompareR3(&builder, expected, {}, ErrorSpec(0.0001)); } XLA_TEST_F(ConcatTest, DoubleConcatLeftAssociative) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR1({42.0}); auto b = builder.ConstantR1({64.0}); auto c = builder.ConstantR1({256.0}); // concatenated = (a concat b) concat c - auto concatenated = - builder.ConcatInDim({builder.ConcatInDim({a, b}, 0), c}, 0); + builder.ConcatInDim({builder.ConcatInDim({a, b}, 0), c}, 0); std::vector expected = {42, 64, 256}; ComputeAndCompareR1(&builder, expected, {}, ErrorSpec(0.0001)); } XLA_TEST_F(ConcatTest, DoubleConcatRightAssociative) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR1({42.0}); auto b = builder.ConstantR1({64.0}); auto c = builder.ConstantR1({256.0}); // concatenated = a concat (b concat c) - auto concatenated = - builder.ConcatInDim({a, builder.ConcatInDim({b, c}, 0)}, 0); + builder.ConcatInDim({a, builder.ConcatInDim({b, c}, 0)}, 0); std::vector expected = {42, 64, 256}; ComputeAndCompareR1(&builder, expected, {}, ErrorSpec(0.0001)); @@ -342,7 +345,7 @@ XLA_TEST_F(ConcatTest, Concat_1x1024_With_1x1024_InDim0) { rhs(0, i) = i + 1024; } - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR2FromArray2D(lhs); auto b = builder.ConstantR2FromArray2D(rhs); builder.ConcatInDim({a, b}, 0); @@ -363,7 +366,7 @@ XLA_TEST_F(ConcatTest, Concat_1x1024_With_1x1024_InDim1) { rhs(0, i) = i + 1024; } - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR2FromArray2D(lhs); auto b = builder.ConstantR2FromArray2D(rhs); builder.ConcatInDim({a, b}, 1); @@ -388,7 +391,7 @@ XLA_TEST_F(ConcatTest, Concat_64x64_With_64x2) { } } - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR2FromArray2D(lhs); auto b = builder.ConstantR2FromArray2D(rhs); builder.ConcatInDim({a, b}, 1); @@ -404,13 +407,13 @@ XLA_TEST_F(ConcatTest, Concat_64x64_With_64x2) { // Show that we can't concatenate with an opaques. XLA_TEST_F(ConcatTest, CannotConcatOpaques) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto opaque_shape = ShapeUtil::MakeOpaqueShape(); auto r1f32 = xla::ShapeUtil::MakeShape(xla::F32, {1}); auto x = builder.Parameter(0, r1f32, "x"); auto y = builder.Parameter(1, opaque_shape, "y"); - auto concatenated = builder.ConcatInDim({x, y}, 0); - StatusOr computation_status = builder.Build(); + builder.ConcatInDim({x, y}, 0); + StatusOr computation_status = builder.Build(); ASSERT_FALSE(computation_status.ok()); EXPECT_THAT( computation_status.status().ToString(), @@ -418,23 +421,23 @@ XLA_TEST_F(ConcatTest, CannotConcatOpaques) { } XLA_TEST_F(ConcatTest, ConcatSeveralBoxedPredicates) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto p0 = builder.ConstantR1({true}); auto p1 = builder.ConstantR1({false}); auto p2 = builder.ConstantR1({true}); - auto concatenated = builder.ConcatInDim({p0, p1, p2}, 0); + builder.ConcatInDim({p0, p1, p2}, 0); bool expected[] = {true, false, true}; ComputeAndCompareR1(&builder, expected, {}); } XLA_TEST_F(ConcatTest, ConcatSeveralR1S32s) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a0 = builder.ConstantR1({1}); auto a1 = builder.ConstantR1({2, 3}); auto a2 = builder.ConstantR1({4, 5, 6}); auto a3 = builder.ConstantR1({7, 8, 9, 10}); - auto concatenated = builder.ConcatInDim({a0, a1, a2, a3}, 0); + builder.ConcatInDim({a0, a1, a2, a3}, 0); std::vector expected(10); std::iota(expected.begin(), expected.end(), 1); @@ -442,7 +445,7 @@ XLA_TEST_F(ConcatTest, ConcatSeveralR1S32s) { } XLA_TEST_F(ConcatTest, ConcatR3WeirdDims) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); Array3D arr0(9, 17, 1); arr0.Fill(1); @@ -462,14 +465,14 @@ XLA_TEST_F(ConcatTest, ConcatR3WeirdDims) { } } - ComputationDataHandle h0; + XlaOp h0; auto p0 = CreateR3Parameter(arr0, /*parameter_number=*/0, "p0", &builder, &h0); - ComputationDataHandle h1; + XlaOp h1; auto p1 = CreateR3Parameter(arr1, /*parameter_number=*/1, "p1", &builder, &h1); - auto concatenated = builder.ConcatInDim({h0, h1}, 2); + builder.ConcatInDim({h0, h1}, 2); ComputeAndCompareR3(&builder, expected, {p0.get(), p1.get()}); } @@ -495,7 +498,7 @@ TEST_P(ConcatR2BinaryTest, DoIt) { Array2D rhs(spec.rhs_dim0, spec.rhs_dim1); rhs.FillUnique(1000); - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a0 = builder.ConstantR2FromArray2D(lhs); auto a1 = builder.ConstantR2FromArray2D(rhs); builder.ConcatInDim({a0, a1}, spec.concat_dimension); @@ -521,7 +524,7 @@ XLA_TEST_F(ConcatTest, ConcatOperandsOfSameOperand) { auto x_data = client_->TransferToServer(*x_literal).ConsumeValueOrDie(); auto y_data = client_->TransferToServer(*y_literal).ConsumeValueOrDie(); - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto x = builder.Parameter(0, f32_scalar, "x"); auto y = builder.Parameter(1, f32_scalar, "y"); auto mul = builder.Mul(x, y); @@ -545,7 +548,7 @@ XLA_TEST_F(ConcatTest, ConcatBroadcastArgument) { auto y_data = client_->TransferToServer(*y_literal).ConsumeValueOrDie(); auto z_data = client_->TransferToServer(*z_literal).ConsumeValueOrDie(); - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto x = builder.Parameter(0, x_literal->shape(), "x"); auto y = builder.Parameter(1, f32_scalar, "y"); auto z = builder.Parameter(2, f32_scalar, "z"); @@ -573,7 +576,7 @@ XLA_TEST_F(ConcatTest, ConcatBroadcastArgumentR3) { auto y_data = client_->TransferToServer(*y_literal).ConsumeValueOrDie(); auto z_data = client_->TransferToServer(*z_literal).ConsumeValueOrDie(); - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto x = builder.Parameter(0, x_literal->shape(), "x"); auto y = builder.Parameter(1, f32_scalar, "y"); auto z = builder.Parameter(2, f32_scalar, "y"); diff --git a/tensorflow/compiler/xla/tests/conditional_test.cc b/tensorflow/compiler/xla/tests/conditional_test.cc index b917dee77b5400db8f2c0a6a86258fee64723d71..7ff6706935740c7d76ee5cd03eae292386760397 100644 --- a/tensorflow/compiler/xla/tests/conditional_test.cc +++ b/tensorflow/compiler/xla/tests/conditional_test.cc @@ -13,7 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_computation.h" #include "tensorflow/compiler/xla/tests/client_library_test_base.h" #include "tensorflow/compiler/xla/tests/literal_test_util.h" #include "tensorflow/compiler/xla/tests/test_macros.h" @@ -23,8 +24,8 @@ namespace { class ConditionalOpTest : public ClientLibraryTestBase { protected: - Computation CreateR0ConstantComputation(float value) { - ComputationBuilder builder(client_, "Constant"); + XlaComputation CreateR0ConstantComputation(float value) { + XlaBuilder builder("Constant"); builder.Parameter(0, empty_tuple_, "tuple"); builder.ConstantR0(value); auto build_status = builder.Build(); @@ -32,16 +33,16 @@ class ConditionalOpTest : public ClientLibraryTestBase { return build_status.ConsumeValueOrDie(); } - Computation CreateR0IdentityComputation() { - ComputationBuilder builder(client_, "Identity"); + XlaComputation CreateR0IdentityComputation() { + XlaBuilder builder("Identity"); builder.Parameter(0, r0f32_, "x"); auto build_status = builder.Build(); EXPECT_IS_OK(build_status.status()); return build_status.ConsumeValueOrDie(); } - Computation CreateCeilComputation(const Shape& shape) { - ComputationBuilder builder(client_, "Ceil"); + XlaComputation CreateCeilComputation(const Shape& shape) { + XlaBuilder builder("Ceil"); auto param = builder.Parameter(0, shape, "param"); builder.Ceil(param); auto build_status = builder.Build(); @@ -49,16 +50,16 @@ class ConditionalOpTest : public ClientLibraryTestBase { return build_status.ConsumeValueOrDie(); } - Computation CreateR0CeilComputation() { + XlaComputation CreateR0CeilComputation() { return CreateCeilComputation(r0f32_); } - Computation CreateR1CeilComputation() { + XlaComputation CreateR1CeilComputation() { return CreateCeilComputation(r1s2f32_); } - Computation CreateFloorComputation(const Shape& shape) { - ComputationBuilder builder(client_, "Floor"); + XlaComputation CreateFloorComputation(const Shape& shape) { + XlaBuilder builder("Floor"); auto param = builder.Parameter(0, shape, "param"); builder.Floor(param); auto build_status = builder.Build(); @@ -66,17 +67,17 @@ class ConditionalOpTest : public ClientLibraryTestBase { return build_status.ConsumeValueOrDie(); } - Computation CreateR0FloorComputation() { + XlaComputation CreateR0FloorComputation() { return CreateFloorComputation(r0f32_); } - Computation CreateR1FloorComputation() { + XlaComputation CreateR1FloorComputation() { return CreateFloorComputation(r1s2f32_); } - Computation CreateTupleCeilComputation(const string& computation_name, - const Shape& tuple_shape) { - ComputationBuilder builder(client_, computation_name); + XlaComputation CreateTupleCeilComputation(const string& computation_name, + const Shape& tuple_shape) { + XlaBuilder builder(computation_name); auto tuple = builder.Parameter(0, tuple_shape, "tuple"); auto x = builder.GetTupleElement(tuple, 0); auto y = builder.GetTupleElement(tuple, 1); @@ -88,17 +89,17 @@ class ConditionalOpTest : public ClientLibraryTestBase { return build_status.ConsumeValueOrDie(); } - Computation CreateR0TupleCeilComputation() { + XlaComputation CreateR0TupleCeilComputation() { return CreateTupleCeilComputation("CeilR0", tuple_2_r0f32_); } - Computation CreateR1TupleCeilComputation() { + XlaComputation CreateR1TupleCeilComputation() { return CreateTupleCeilComputation("CeilR1", tuple_2_r1s2f32_); } - Computation CreateTupleFloorComputation(const string& computation_name, - const Shape& tuple_shape) { - ComputationBuilder builder(client_, computation_name); + XlaComputation CreateTupleFloorComputation(const string& computation_name, + const Shape& tuple_shape) { + XlaBuilder builder(computation_name); auto tuple = builder.Parameter(0, tuple_shape, "tuple"); auto x = builder.GetTupleElement(tuple, 0); auto y = builder.GetTupleElement(tuple, 1); @@ -110,17 +111,17 @@ class ConditionalOpTest : public ClientLibraryTestBase { return build_status.ConsumeValueOrDie(); } - Computation CreateR0TupleFloorComputation() { + XlaComputation CreateR0TupleFloorComputation() { return CreateTupleFloorComputation("FloorR0", tuple_2_r0f32_); } - Computation CreateR1TupleFloorComputation() { + XlaComputation CreateR1TupleFloorComputation() { return CreateTupleFloorComputation("FloorR1", tuple_2_r1s2f32_); } - Computation CreateTupleAddComputation(const string& computation_name, - const Shape& tuple_shape) { - ComputationBuilder builder(client_, computation_name); + XlaComputation CreateTupleAddComputation(const string& computation_name, + const Shape& tuple_shape) { + XlaBuilder builder(computation_name); auto tuple = builder.Parameter(0, tuple_shape, "tuple"); auto x = builder.GetTupleElement(tuple, 0); auto y = builder.GetTupleElement(tuple, 1); @@ -130,17 +131,17 @@ class ConditionalOpTest : public ClientLibraryTestBase { return build_status.ConsumeValueOrDie(); } - Computation CreateR0TupleAddComputation() { + XlaComputation CreateR0TupleAddComputation() { return CreateTupleAddComputation("AddR0", tuple_2_r0f32_); } - Computation CreateR1TupleAddComputation() { + XlaComputation CreateR1TupleAddComputation() { return CreateTupleAddComputation("AddR1", tuple_2_r1s2f32_); } - Computation CreateTupleSubComputation(const string& computation_name, - const Shape& tuple_shape) { - ComputationBuilder builder(client_, computation_name); + XlaComputation CreateTupleSubComputation(const string& computation_name, + const Shape& tuple_shape) { + XlaBuilder builder(computation_name); auto tuple = builder.Parameter(0, tuple_shape, "tuple"); auto x = builder.GetTupleElement(tuple, 0); auto y = builder.GetTupleElement(tuple, 1); @@ -150,11 +151,11 @@ class ConditionalOpTest : public ClientLibraryTestBase { return build_status.ConsumeValueOrDie(); } - Computation CreateR0TupleSubComputation() { + XlaComputation CreateR0TupleSubComputation() { return CreateTupleSubComputation("SubR0", tuple_2_r0f32_); } - Computation CreateR1TupleSubComputation() { + XlaComputation CreateR1TupleSubComputation() { return CreateTupleSubComputation("SubR1", tuple_2_r1s2f32_); } @@ -170,26 +171,25 @@ class ConditionalOpTest : public ClientLibraryTestBase { // Test true and false computations that do not take any parameters. XLA_TEST_F(ConditionalOpTest, Parameters0) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto pred = builder.ConstantR0(true); auto operands = builder.Tuple({}); auto true_computation = CreateR0ConstantComputation(56.0f); auto false_computation = CreateR0ConstantComputation(12.0f); - auto result = builder.Conditional(pred, operands, true_computation, operands, - false_computation); + builder.Conditional(pred, operands, true_computation, operands, + false_computation); ComputeAndCompareR0(&builder, 56.0f, {}, error_spec_); } // Test true and false computations that take in 1 parameter. XLA_TEST_F(ConditionalOpTest, Parameters1) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto pred = builder.ConstantR0(false); auto operand1 = builder.ConstantR0(56.0f); auto operand2 = builder.ConstantR0(12.0f); auto identity = CreateR0IdentityComputation(); - auto result = - builder.Conditional(pred, operand1, identity, operand2, identity); + builder.Conditional(pred, operand1, identity, operand2, identity); ComputeAndCompareR0(&builder, 12.0f, {}, error_spec_); } @@ -197,12 +197,12 @@ XLA_TEST_F(ConditionalOpTest, Parameters1) { // Test conditional with two different computations in the true and false cases // that take in different arguments. XLA_TEST_F(ConditionalOpTest, DiffComputationsDiffArgs) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto pred = builder.ConstantR0(false); auto operand1 = builder.ConstantR0(56.4f); auto operand2 = builder.ConstantR0(12.6f); - auto result = builder.Conditional(pred, operand1, CreateR0CeilComputation(), - operand2, CreateR0FloorComputation()); + builder.Conditional(pred, operand1, CreateR0CeilComputation(), operand2, + CreateR0FloorComputation()); ComputeAndCompareR0(&builder, 12.0f, {}, error_spec_); } @@ -210,11 +210,11 @@ XLA_TEST_F(ConditionalOpTest, DiffComputationsDiffArgs) { // Test conditional with two different computations in the true and false cases // that take in the same arguments. XLA_TEST_F(ConditionalOpTest, DiffComputationsSameArg) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto pred = builder.ConstantR0(false); auto operand = builder.ConstantR0(12.6f); - auto result = builder.Conditional(pred, operand, CreateR0CeilComputation(), - operand, CreateR0FloorComputation()); + builder.Conditional(pred, operand, CreateR0CeilComputation(), operand, + CreateR0FloorComputation()); ComputeAndCompareR0(&builder, 12.0f, {}, error_spec_); } @@ -222,12 +222,12 @@ XLA_TEST_F(ConditionalOpTest, DiffComputationsSameArg) { // Test conditional with the same computation in the true and false cases but // take in different arguments. XLA_TEST_F(ConditionalOpTest, SameComputationDiffArgs) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto pred = builder.ConstantR0(false); auto operand1 = builder.ConstantR0(56.4f); auto operand2 = builder.ConstantR0(12.6f); auto floor = CreateR0FloorComputation(); - auto result = builder.Conditional(pred, operand1, floor, operand2, floor); + builder.Conditional(pred, operand1, floor, operand2, floor); ComputeAndCompareR0(&builder, 12.0f, {}, error_spec_); } @@ -235,11 +235,11 @@ XLA_TEST_F(ConditionalOpTest, SameComputationDiffArgs) { // Test conditional with the same computation in the true and false cases that // take in the same arguments. XLA_TEST_F(ConditionalOpTest, SameComputationSameArg) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto pred = builder.ConstantR0(false); auto operand = builder.ConstantR0(12.6f); auto floor = CreateR0FloorComputation(); - auto result = builder.Conditional(pred, operand, floor, operand, floor); + builder.Conditional(pred, operand, floor, operand, floor); ComputeAndCompareR0(&builder, 12.0f, {}, error_spec_); } @@ -247,12 +247,12 @@ XLA_TEST_F(ConditionalOpTest, SameComputationSameArg) { // Test conditional with different instances of the same computation in the true // and false cases. XLA_TEST_F(ConditionalOpTest, SameComputationDiffInstances) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto pred = builder.ConstantR0(false); auto operand1 = builder.ConstantR0(56.4f); auto operand2 = builder.ConstantR0(12.6f); - auto result = builder.Conditional(pred, operand1, CreateR0FloorComputation(), - operand2, CreateR0FloorComputation()); + builder.Conditional(pred, operand1, CreateR0FloorComputation(), operand2, + CreateR0FloorComputation()); ComputeAndCompareR0(&builder, 12.0f, {}, error_spec_); } @@ -260,7 +260,7 @@ XLA_TEST_F(ConditionalOpTest, SameComputationDiffInstances) { // Test the case when a call invokes a computation that contains a conditional. XLA_TEST_F(ConditionalOpTest, ConditionalWithCall) { Shape r0bool = ShapeUtil::MakeShape(PRED, {}); - ComputationBuilder inner_builder(client_, TestName() + ".inner_conditional"); + XlaBuilder inner_builder(TestName() + ".inner_conditional"); auto pred_cond = inner_builder.Parameter(0, r0bool, "param0"); auto true_operand = inner_builder.Parameter(1, r0f32_, "param1"); auto false_operand = inner_builder.Parameter(2, r0f32_, "param2"); @@ -268,7 +268,7 @@ XLA_TEST_F(ConditionalOpTest, ConditionalWithCall) { false_operand, CreateR0FloorComputation()); auto inner_builder_result = inner_builder.Build(); - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto pred = builder.ConstantR0(false); auto operand1 = builder.ConstantR0(56.4f); auto operand2 = builder.ConstantR0(12.6f); @@ -281,14 +281,13 @@ XLA_TEST_F(ConditionalOpTest, ConditionalWithCall) { // Test true and false computations that take in 2 parameters and predicate is // true. XLA_TEST_F(ConditionalOpTest, Parameters2TrueBranch) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto pred = builder.ConstantR0(true); auto operand1 = builder.ConstantR0(56.0f); auto operand2 = builder.ConstantR0(12.0f); auto operands = builder.Tuple({operand1, operand2}); - auto result = - builder.Conditional(pred, operands, CreateR0TupleAddComputation(), - operands, CreateR0TupleSubComputation()); + builder.Conditional(pred, operands, CreateR0TupleAddComputation(), operands, + CreateR0TupleSubComputation()); ComputeAndCompareR0(&builder, 68.0f, {}, error_spec_); } @@ -296,14 +295,13 @@ XLA_TEST_F(ConditionalOpTest, Parameters2TrueBranch) { // Test true and false computations that take in 2 parameters and predicate is // false. XLA_TEST_F(ConditionalOpTest, Parameters2FalseBranch) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto pred = builder.ConstantR0(false); auto operand1 = builder.ConstantR0(56.0f); auto operand2 = builder.ConstantR0(12.0f); auto operands = builder.Tuple({operand1, operand2}); - auto result = - builder.Conditional(pred, operands, CreateR0TupleAddComputation(), - operands, CreateR0TupleSubComputation()); + builder.Conditional(pred, operands, CreateR0TupleAddComputation(), operands, + CreateR0TupleSubComputation()); ComputeAndCompareR0(&builder, 44.0f, {}, error_spec_); } @@ -311,14 +309,13 @@ XLA_TEST_F(ConditionalOpTest, Parameters2FalseBranch) { // Test true and false computations that take in 2 array parameters and // predicate is true. XLA_TEST_F(ConditionalOpTest, Parameters2ArrayTrueBranch) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto pred = builder.ConstantR0(true); auto operand1 = builder.ConstantR1({24.0f, 56.0f}); auto operand2 = builder.ConstantR1({10.0f, 11.0f}); auto operands = builder.Tuple({operand1, operand2}); - auto result = - builder.Conditional(pred, operands, CreateR1TupleAddComputation(), - operands, CreateR1TupleSubComputation()); + builder.Conditional(pred, operands, CreateR1TupleAddComputation(), operands, + CreateR1TupleSubComputation()); ComputeAndCompareR1(&builder, {34.0f, 67.0f}, {}, error_spec_); } @@ -326,21 +323,20 @@ XLA_TEST_F(ConditionalOpTest, Parameters2ArrayTrueBranch) { // Test true and false computations that take in 2 array parameters and // predicate is false. XLA_TEST_F(ConditionalOpTest, Parameters2ArrayFalseBranch) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto pred = builder.ConstantR0(false); auto operand1 = builder.ConstantR1({24.0f, 56.0f}); auto operand2 = builder.ConstantR1({10.0f, 11.0f}); auto operands = builder.Tuple({operand1, operand2}); - auto result = - builder.Conditional(pred, operands, CreateR1TupleAddComputation(), - operands, CreateR1TupleSubComputation()); + builder.Conditional(pred, operands, CreateR1TupleAddComputation(), operands, + CreateR1TupleSubComputation()); ComputeAndCompareR1(&builder, {14.0f, 45.0f}, {}, error_spec_); } // Test true and false computations that return a tuple of scalars. XLA_TEST_F(ConditionalOpTest, ReturnTupleOfScalars) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto pred = builder.ConstantR0(false); auto operands = builder.Tuple( {builder.ConstantR0(12.2f), builder.ConstantR0(25.6f)}); @@ -356,7 +352,7 @@ XLA_TEST_F(ConditionalOpTest, ReturnTupleOfScalars) { // Test true and false computations that return a tuple of arrays. XLA_TEST_F(ConditionalOpTest, ReturnTupleOfArrays) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto pred = builder.ConstantR0(true); auto operands = builder.Tuple({builder.ConstantR1({12.2f, 15.8f}), builder.ConstantR1({25.6f, 29.2f})}); @@ -373,7 +369,7 @@ XLA_TEST_F(ConditionalOpTest, ReturnTupleOfArrays) { // Test true and false computations that return a tuple of a predicate, a // scalar, and an array. XLA_TEST_F(ConditionalOpTest, ReturnTupleofPredicateScalarArray) { - ComputationBuilder true_builder(client_, TestName() + ".true"); + XlaBuilder true_builder(TestName() + ".true"); { true_builder.Parameter(0, empty_tuple_, "tuple"); auto true_pred = true_builder.ConstantR0(true); @@ -384,7 +380,7 @@ XLA_TEST_F(ConditionalOpTest, ReturnTupleofPredicateScalarArray) { auto true_builder_result = true_builder.Build(); EXPECT_IS_OK(true_builder_result.status()); - ComputationBuilder false_builder(client_, TestName() + ".false"); + XlaBuilder false_builder(TestName() + ".false"); { false_builder.Parameter(0, empty_tuple_, "tuple"); auto false_pred = false_builder.ConstantR0(false); @@ -395,7 +391,7 @@ XLA_TEST_F(ConditionalOpTest, ReturnTupleofPredicateScalarArray) { auto false_builder_result = false_builder.Build(); EXPECT_IS_OK(false_builder_result.status()); - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto pred = builder.ConstantR0(true); auto operands = builder.Tuple({}); builder.Conditional(pred, operands, true_builder_result.ConsumeValueOrDie(), @@ -411,7 +407,7 @@ XLA_TEST_F(ConditionalOpTest, ReturnTupleofPredicateScalarArray) { // Test true and false computations that return a nested tuple. XLA_TEST_F(ConditionalOpTest, ReturnNestedTuple) { - ComputationBuilder true_builder(client_, TestName() + ".true"); + XlaBuilder true_builder(TestName() + ".true"); { true_builder.Parameter(0, empty_tuple_, "tuple"); auto true_constant1 = true_builder.ConstantR0(12.2f); @@ -424,7 +420,7 @@ XLA_TEST_F(ConditionalOpTest, ReturnNestedTuple) { auto true_builder_result = true_builder.Build(); EXPECT_IS_OK(true_builder_result.status()); - ComputationBuilder false_builder(client_, TestName() + ".false"); + XlaBuilder false_builder(TestName() + ".false"); { false_builder.Parameter(0, empty_tuple_, "tuple"); auto false_constant1 = false_builder.ConstantR0(46.6f); @@ -438,7 +434,7 @@ XLA_TEST_F(ConditionalOpTest, ReturnNestedTuple) { auto false_builder_result = false_builder.Build(); EXPECT_IS_OK(false_builder_result.status()); - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto pred = builder.ConstantR0(false); auto operands = builder.Tuple({}); builder.Conditional(pred, operands, true_builder_result.ConsumeValueOrDie(), @@ -460,16 +456,16 @@ XLA_TEST_F(ConditionalOpTest, ReturnNestedTuple) { // params. XLA_TEST_F(ConditionalOpTest, ScalarOperandsFromExternalParams) { Shape r0bool = ShapeUtil::MakeShape(PRED, {}); - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); - ComputationDataHandle pred, operand1, operand2; + XlaOp pred, operand1, operand2; auto pred_arg = CreateR0Parameter(true, 0, "pred", &builder, &pred); auto operand1_param = CreateR0Parameter(56.3f, 1, "operand1", &builder, &operand1); auto operand2_param = CreateR0Parameter(12.7f, 2, "operand2", &builder, &operand2); - auto result = builder.Conditional(pred, operand1, CreateR0CeilComputation(), - operand2, CreateR0FloorComputation()); + builder.Conditional(pred, operand1, CreateR0CeilComputation(), operand2, + CreateR0FloorComputation()); ComputeAndCompareR0( &builder, 57.0f, @@ -480,16 +476,16 @@ XLA_TEST_F(ConditionalOpTest, ScalarOperandsFromExternalParams) { // Test conditional that takes in array operands in the form of external params. XLA_TEST_F(ConditionalOpTest, ArrayOperandsFromExternalParams) { Shape r0bool = ShapeUtil::MakeShape(PRED, {}); - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); - ComputationDataHandle pred, operand1, operand2; + XlaOp pred, operand1, operand2; auto pred_arg = CreateR0Parameter(false, 0, "pred", &builder, &pred); auto operand1_param = CreateR1Parameter({24.3f, 56.7f}, 1, "operand1", &builder, &operand1); auto operand2_param = CreateR1Parameter({10.2f, 11.6f}, 2, "operand2", &builder, &operand2); - auto result = builder.Conditional(pred, operand1, CreateR1CeilComputation(), - operand2, CreateR1FloorComputation()); + builder.Conditional(pred, operand1, CreateR1CeilComputation(), operand2, + CreateR1FloorComputation()); ComputeAndCompareR1( &builder, {10.0f, 11.0f}, @@ -499,7 +495,7 @@ XLA_TEST_F(ConditionalOpTest, ArrayOperandsFromExternalParams) { // Test the case where one conditional is nested within another. XLA_TEST_F(ConditionalOpTest, NestedConditionals) { - ComputationBuilder inner_builder(client_, TestName() + ".inner_conditional"); + XlaBuilder inner_builder(TestName() + ".inner_conditional"); { Shape r0bool = ShapeUtil::MakeShape(PRED, {}); Shape tuple_shape = ShapeUtil::MakeTupleShape({r0bool, r0f32_, r0f32_}); @@ -514,7 +510,7 @@ XLA_TEST_F(ConditionalOpTest, NestedConditionals) { auto inner_builder_result = inner_builder.Build(); EXPECT_IS_OK(inner_builder_result.status()); - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto pred1 = builder.ConstantR0(true); auto pred2 = builder.ConstantR0(false); auto operand1 = builder.ConstantR0(1.1f); @@ -529,7 +525,7 @@ XLA_TEST_F(ConditionalOpTest, NestedConditionals) { } XLA_TEST_F(ConditionalOpTest, ConditionalInNestedComputation) { - ComputationBuilder inner_builder(client_, TestName() + ".inner_conditional"); + XlaBuilder inner_builder(TestName() + ".inner_conditional"); { Shape r0bool = ShapeUtil::MakeShape(PRED, {}); Shape tuple_shape = ShapeUtil::MakeTupleShape({r0bool, r0f32_, r0f32_}); @@ -544,7 +540,7 @@ XLA_TEST_F(ConditionalOpTest, ConditionalInNestedComputation) { auto inner_builder_result = inner_builder.Build(); EXPECT_IS_OK(inner_builder_result.status()); - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto pred2 = builder.ConstantR0(false); auto operand1 = builder.ConstantR0(1.1f); auto operand2 = builder.ConstantR0(12.2f); @@ -556,7 +552,7 @@ XLA_TEST_F(ConditionalOpTest, ConditionalInNestedComputation) { // Test a mismatch in the shape of the true operand and true computation. XLA_TEST_F(ConditionalOpTest, ShapeMismatch) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto pred = builder.ConstantR0(true); auto operand1 = builder.ConstantR0(56.0f); auto operand2 = builder.ConstantR0(12.0f); @@ -573,27 +569,27 @@ XLA_TEST_F(ConditionalOpTest, ShapeMismatch) { XLA_TEST_F(ConditionalOpTest, SwappedInputsInSequentialConditionals) { Shape tuple_shape = ShapeUtil::MakeTupleShape({r0f32_, r0f32_}); - Computation swapper; + XlaComputation swapper; { - ComputationBuilder builder(client_, TestName() + ".swapper"); + XlaBuilder builder(TestName() + ".swapper"); auto param0 = builder.Parameter(0, tuple_shape, "sp0"); auto x = builder.GetTupleElement(param0, 0); auto y = builder.GetTupleElement(param0, 1); builder.Tuple({y, x}); swapper = builder.Build().ConsumeValueOrDie(); } - Computation forwarder; + XlaComputation forwarder; { - ComputationBuilder builder(client_, TestName() + ".forwarder"); + XlaBuilder builder(TestName() + ".forwarder"); auto param0 = builder.Parameter(0, tuple_shape, "fp0"); auto x = builder.GetTupleElement(param0, 0); auto y = builder.GetTupleElement(param0, 1); builder.Tuple({x, y}); forwarder = builder.Build().ConsumeValueOrDie(); } - Computation main; + XlaComputation main; { - ComputationBuilder builder(client_, TestName() + ".main"); + XlaBuilder builder(TestName() + ".main"); auto param0 = builder.Parameter(0, tuple_shape, "mp0"); auto x = builder.GetTupleElement(param0, 0); auto y = builder.GetTupleElement(param0, 1); @@ -605,7 +601,7 @@ XLA_TEST_F(ConditionalOpTest, SwappedInputsInSequentialConditionals) { } auto test_swap = [&](float a, float b) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto x = builder.ConstantR0(a); auto y = builder.ConstantR0(b); auto tuple_operand = builder.Tuple({x, y}); diff --git a/tensorflow/compiler/xla/tests/convert_test.cc b/tensorflow/compiler/xla/tests/convert_test.cc index 9a899b79141fbc35fabd8d2e5d4195fb589dd84c..e67a30d76c2fac1240c5f14a97dea421db2fed04 100644 --- a/tensorflow/compiler/xla/tests/convert_test.cc +++ b/tensorflow/compiler/xla/tests/convert_test.cc @@ -36,7 +36,7 @@ namespace { class ConvertTest : public ClientLibraryTestBase { public: - explicit ConvertTest(perftools::gputools::Platform* platform = nullptr) + explicit ConvertTest(se::Platform* platform = nullptr) : ClientLibraryTestBase(platform) { mutable_debug_options()->add_xla_disable_hlo_passes("algsimp"); mutable_debug_options()->add_xla_disable_hlo_passes("inline"); @@ -230,6 +230,43 @@ XLA_TEST_F(ConvertTest, ConvertR1S32ToR1S64) { ComputeAndCompareR1(&builder, expected, {arg_data.get()}); } +XLA_TEST_F(ConvertTest, ConvertR1F32ToR1S64) { + ComputationBuilder builder(client_, TestName()); + // Test cases from compiler_rt library. + std::vector arg{0.0f, + 0.5f, + 0.99f, + 1.0f, + 1.5f, + 1.99f, + 2.0f, + 2.01f, + 2147483648.f, + -0.5f, + -0.99f, + -1.0f, + -1.5f, + -1.99f, + -2.0f, + -2.01f, + 0x1.FFFFFEp+62F, + 0x1.FFFFFCp+62F, + -0x1.FFFFFEp+62F, + -0x1.FFFFFCp+62F}; + std::unique_ptr arg_literal = Literal::CreateR1({arg}); + auto arg_param = builder.Parameter(0, arg_literal->shape(), "arg_param"); + std::unique_ptr arg_data = + client_->TransferToServer(*arg_literal).ConsumeValueOrDie(); + + builder.ConvertElementType(arg_param, S64); + + std::vector expected(arg.size()); + for (int64 i = 0; i < arg.size(); ++i) { + expected[i] = static_cast(arg[i]); + } + ComputeAndCompareR1(&builder, expected, {arg_data.get()}); +} + XLA_TEST_F(ConvertTest, ConvertR1U8ToR1F32) { ComputationBuilder builder(client_, TestName()); auto a = builder.ConstantR1({32, 64}); diff --git a/tensorflow/compiler/xla/tests/convolution_test.cc b/tensorflow/compiler/xla/tests/convolution_test.cc index 72715398dea468d0000144759454c5f8d8673516..947959beb144e1509a77ad2f94b8493de46ba6f2 100644 --- a/tensorflow/compiler/xla/tests/convolution_test.cc +++ b/tensorflow/compiler/xla/tests/convolution_test.cc @@ -20,10 +20,10 @@ limitations under the License. #include "tensorflow/compiler/xla/array2d.h" #include "tensorflow/compiler/xla/array4d.h" -#include "tensorflow/compiler/xla/client/computation_builder.h" #include "tensorflow/compiler/xla/client/global_data.h" #include "tensorflow/compiler/xla/client/local_client.h" #include "tensorflow/compiler/xla/client/padding.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" #include "tensorflow/compiler/xla/layout_util.h" #include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/compiler/xla/ptr_util.h" @@ -88,12 +88,12 @@ class ForwardPassConvolution_3x3x256_256_OutputZ_Iota : public ConvolutionTest { ASSERT_EQ(2, arhs->width()); ASSERT_EQ(2, arhs->height()); - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto lhs = builder.ConstantR4FromArray4D(*alhs); auto rhs = builder.ConstantR4FromArray4D(*arhs); - auto conv = builder.Conv(lhs, rhs, {1, 1}, Padding::kValid); + builder.Conv(lhs, rhs, {1, 1}, Padding::kValid); - ComputeAndCompare(&builder, conv, {}, error_spec_); + ComputeAndCompare(&builder, {}, error_spec_); } }; @@ -106,12 +106,12 @@ template class Convolve_1x1x1x2_1x1x1x2_Valid : public ConvolutionTest { public: void RunTest() { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); Shape input_shape = ShapeUtil::MakeShapeWithType({1, 1, 1, 2}); Shape filter_shape = ShapeUtil::MakeShapeWithType({1, 1, 1, 2}); auto input = builder.Parameter(0, input_shape, "input"); auto filter = builder.Parameter(1, filter_shape, "filter"); - auto conv = builder.Conv(input, filter, {1, 1}, Padding::kValid); + builder.Conv(input, filter, {1, 1}, Padding::kValid); Array4D input_data(1, 1, 1, 2); input_data.FillWithYX(Array2D({ @@ -122,7 +122,7 @@ class Convolve_1x1x1x2_1x1x1x2_Valid : public ConvolutionTest { {5.0f, 6.0f}, })); - ComputeAndCompare(&builder, conv, + ComputeAndCompare(&builder, {std::move(*Literal::CreateFromArray(input_data)), std::move(*Literal::CreateFromArray(filter_data))}, error_spec_); @@ -137,12 +137,12 @@ template class Convolve_1x1x4x4_1x1x2x2_Valid : public ConvolutionTest { public: void RunTest() { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); Shape input_shape = ShapeUtil::MakeShapeWithType({1, 1, 4, 4}); Shape filter_shape = ShapeUtil::MakeShapeWithType({1, 1, 2, 2}); auto input = builder.Parameter(0, input_shape, "input"); auto filter = builder.Parameter(1, filter_shape, "filter"); - auto conv = builder.Conv(input, filter, {1, 1}, Padding::kValid); + builder.Conv(input, filter, {1, 1}, Padding::kValid); Array4D input_data(1, 1, 4, 4); input_data.FillWithYX(Array2D({ @@ -156,7 +156,7 @@ class Convolve_1x1x4x4_1x1x2x2_Valid : public ConvolutionTest { {5.0f, 6.0f}, {7.0f, 8.0f}, })); - ComputeAndCompare(&builder, conv, + ComputeAndCompare(&builder, {std::move(*Literal::CreateFromArray(input_data)), std::move(*Literal::CreateFromArray(filter_data))}, error_spec_); @@ -171,12 +171,12 @@ template class Convolve_1x1x4x4_1x1x2x2_Same : public ConvolutionTest { public: void RunTest() { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); Shape input_shape = ShapeUtil::MakeShapeWithType({1, 1, 4, 4}); Shape filter_shape = ShapeUtil::MakeShapeWithType({1, 1, 2, 2}); auto input = builder.Parameter(0, input_shape, "input"); auto filter = builder.Parameter(1, filter_shape, "filter"); - auto conv = builder.Conv(input, filter, {1, 1}, Padding::kSame); + builder.Conv(input, filter, {1, 1}, Padding::kSame); Array4D input_data(1, 1, 4, 4); input_data.FillWithYX(Array2D({ @@ -191,7 +191,7 @@ class Convolve_1x1x4x4_1x1x2x2_Same : public ConvolutionTest { {7.0f, 8.0f}, })); - ComputeAndCompare(&builder, conv, + ComputeAndCompare(&builder, {std::move(*Literal::CreateFromArray(input_data)), std::move(*Literal::CreateFromArray(filter_data))}, error_spec_); @@ -207,12 +207,12 @@ template class Convolve_1x1x4x4_1x1x3x3_Same : public ConvolutionTest { public: void RunTest() { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); Shape input_shape = ShapeUtil::MakeShapeWithType({1, 1, 4, 4}); Shape filter_shape = ShapeUtil::MakeShapeWithType({1, 1, 3, 3}); auto input = builder.Parameter(0, input_shape, "input"); auto filter = builder.Parameter(1, filter_shape, "filter"); - auto conv = builder.Conv(input, filter, {1, 1}, Padding::kSame); + builder.Conv(input, filter, {1, 1}, Padding::kSame); Array4D input_data(1, 1, 4, 4); input_data.FillWithYX(Array2D({{1.0f, 2.0f, 3.0f, 4.0f}, @@ -223,7 +223,7 @@ class Convolve_1x1x4x4_1x1x3x3_Same : public ConvolutionTest { filter_data.FillWithYX(Array2D( {{5.0f, 6.0f, 7.0f}, {8.0f, 9.0f, 10.0f}, {11.0f, 12.0f, 13.0f}})); // clang-format on - ComputeAndCompare(&builder, conv, + ComputeAndCompare(&builder, {std::move(*Literal::CreateFromArray(input_data)), std::move(*Literal::CreateFromArray(filter_data))}, error_spec_); @@ -234,7 +234,7 @@ TYPED_TEST_CASE(Convolve_1x1x4x4_1x1x3x3_Same, TestTypes); TYPED_TEST(Convolve_1x1x4x4_1x1x3x3_Same, Types) { this->RunTest(); } XLA_TEST_F(ConvolutionTest, Convolve1D_1x2x5_1x2x2_Valid) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); { Shape input_shape = ShapeUtil::MakeShape(F32, {1, 2, 5}); Shape filter_shape = ShapeUtil::MakeShape(F32, {1, 2, 2}); @@ -264,7 +264,7 @@ template class Convolve1D_1x2x5_1x2x2_WithRHSDilation : public ConvolutionTest { public: void RunTest() { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); { Shape input_shape = ShapeUtil::MakeShapeWithType({1, 2, 5}); Shape filter_shape = ShapeUtil::MakeShapeWithType({1, 2, 2}); @@ -300,7 +300,7 @@ TYPED_TEST_CASE(Convolve1D_1x2x5_1x2x2_WithRHSDilation, TestTypes); TYPED_TEST(Convolve1D_1x2x5_1x2x2_WithRHSDilation, Types) { this->RunTest(); } XLA_TEST_F(ConvolutionTest, Convolve1D_1x2x5_1x2x2_WithLHSDilation) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); { Shape input_shape = ShapeUtil::MakeShape(F32, {1, 2, 5}); Shape filter_shape = ShapeUtil::MakeShape(F32, {1, 2, 2}); @@ -331,7 +331,7 @@ XLA_TEST_F(ConvolutionTest, Convolve1D_1x2x5_1x2x2_WithLHSDilation) { } XLA_TEST_F(ConvolutionTest, Convolve1D_1x2x5_1x2x2_WithLHSAndRHSDilation) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); { Shape input_shape = ShapeUtil::MakeShape(F32, {1, 2, 5}); Shape filter_shape = ShapeUtil::MakeShape(F32, {1, 2, 2}); @@ -365,7 +365,7 @@ template class Convolve1D_1x2x5_1x2x2_WithPadding : public ConvolutionTest { public: void RunTest() { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); { Shape input_shape = ShapeUtil::MakeShapeWithType({1, 2, 5}); Shape filter_shape = ShapeUtil::MakeShapeWithType({1, 2, 2}); @@ -402,7 +402,7 @@ TYPED_TEST_CASE(Convolve1D_1x2x5_1x2x2_WithPadding, TestTypes); TYPED_TEST(Convolve1D_1x2x5_1x2x2_WithPadding, Types) { this->RunTest(); } XLA_TEST_F(ConvolutionTest, Convolve3D_1x4x2x3x3_2x2x2x3x3_Valid) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); std::vector input_dims = {1, 4, 2, 3, 3}; std::vector filter_dims = {2, 2, 2, 3, 3}; Shape input_shape = ShapeUtil::MakeShape(F32, input_dims); @@ -469,7 +469,7 @@ template class Convolve2D_1x3x3x5_3x3x5x5_Valid : public ConvolutionTest { public: void RunTest() { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); std::vector input_dims = {1, 3, 3, 5}; std::vector filter_dims = {3, 3, 5, 3}; Shape input_shape = ShapeUtil::MakeShapeWithType(input_dims); @@ -537,7 +537,7 @@ XLA_TEST_P(ConvolveWithAndWithoutCanonicalization, execution_options_.mutable_debug_options()->add_xla_disable_hlo_passes( "convolution-canonicalization"); } - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); Shape input_shape = ShapeUtil::MakeShape(F32, {4, 29}); Shape filter_shape = ShapeUtil::MakeShape(F32, {4, 10}); @@ -551,8 +551,7 @@ XLA_TEST_P(ConvolveWithAndWithoutCanonicalization, dnums.set_kernel_output_feature_dimension(1); dnums.set_output_batch_dimension(0); dnums.set_output_feature_dimension(1); - auto conv = builder.ConvWithGeneralDimensions(input, filter, {}, - Padding::kValid, dnums); + builder.ConvWithGeneralDimensions(input, filter, {}, Padding::kValid, dnums); Array2D param0(4, 29); param0.FillUnique(); @@ -563,7 +562,7 @@ XLA_TEST_P(ConvolveWithAndWithoutCanonicalization, Array2D expected_result(29, 10); expected_result.Fill(0); - ComputeAndCompare(&builder, conv, + ComputeAndCompare(&builder, {std::move(*Literal::CreateFromArray(param0)), std::move(*Literal::CreateFromArray(param1))}, error_spec_); @@ -587,7 +586,7 @@ class Convolve1D1WindowTestBase protected: template void TestImpl() { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); int64 input_feature = GetParam().input_feature; int64 output_feature = GetParam().output_feature; int64 batch = GetParam().batch; @@ -724,12 +723,12 @@ INSTANTIATE_TEST_CASE_P( #endif XLA_TEST_F(ConvolutionTest, Convolve_bf16_1x1x1x2_1x1x1x2_Valid) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); Shape input_shape = ShapeUtil::MakeShape(BF16, {1, 1, 1, 2}); Shape filter_shape = ShapeUtil::MakeShape(BF16, {1, 1, 1, 2}); auto input = builder.Parameter(0, input_shape, "input"); auto filter = builder.Parameter(1, filter_shape, "filter"); - auto conv = builder.Conv(input, filter, {1, 1}, Padding::kValid); + builder.Conv(input, filter, {1, 1}, Padding::kValid); Array4D input_data(1, 1, 1, 2); input_data.FillWithYX(Array2D({ @@ -740,11 +739,34 @@ XLA_TEST_F(ConvolutionTest, Convolve_bf16_1x1x1x2_1x1x1x2_Valid) { {bfloat16(5), bfloat16(6)}, })); - ComputeAndCompare(&builder, conv, + ComputeAndCompare(&builder, {std::move(*Literal::CreateFromArray(input_data)), std::move(*Literal::CreateFromArray(filter_data))}, error_spec_); } +// Check that GPU convs still work if the CudnnAlgorithmPicker pass is disabled. +// (We run this test on all platforms, because, what the heck.) +XLA_TEST_F(ConvolutionTest, NoCudnnAlgorithmPicker) { + execution_options_.mutable_debug_options()->add_xla_disable_hlo_passes( + "cudnn-convolution-algorithm-picker"); + + XlaBuilder builder(TestName()); + Shape input_shape = ShapeUtil::MakeShape(F32, {1, 1, 1, 2}); + Shape filter_shape = ShapeUtil::MakeShape(F32, {1, 1, 1, 2}); + auto input = builder.Parameter(0, input_shape, "input"); + auto filter = builder.Parameter(1, filter_shape, "filter"); + builder.Conv(input, filter, {1, 1}, Padding::kValid); + + Array4D input_data(1, 1, 1, 2); + input_data.FillIota(0); + Array4D filter_data(1, 1, 1, 2); + filter_data.FillIota(10); + + ComputeAndCompare(&builder, + {std::move(*Literal::CreateFromArray(input_data)), + std::move(*Literal::CreateFromArray(filter_data))}); +} + } // namespace } // namespace xla diff --git a/tensorflow/compiler/xla/tests/custom_call_test.cc b/tensorflow/compiler/xla/tests/custom_call_test.cc index 2d847a66b0ae7c8f09fa0cb181a4c84ea99be5b1..b43d5c9ff5d75ee0e1b3c9ceb2bc295e631ac107 100644 --- a/tensorflow/compiler/xla/tests/custom_call_test.cc +++ b/tensorflow/compiler/xla/tests/custom_call_test.cc @@ -134,9 +134,9 @@ class CustomCallClientAPITest : public ClientLibraryTestBase {}; // When using the client API, CustomCall targets can't begin with '$' -- these // are reserved for internal use. XLA_TEST_F(CustomCallClientAPITest, IllegalCustomCallTarget) { - ComputationBuilder builder(client_, TestName()); - auto call = builder.CustomCall("$illegal", /*operands=*/{}, - ShapeUtil::MakeShape(F32, {1})); + XlaBuilder builder(TestName()); + builder.CustomCall("$illegal", /*operands=*/{}, + ShapeUtil::MakeShape(F32, {1})); StatusOr> result = Execute(&builder, /*arguments=*/{}); diff --git a/tensorflow/compiler/xla/tests/dot_operation_test.cc b/tensorflow/compiler/xla/tests/dot_operation_test.cc index 7b994a4c172cafee53ede9bfd4f30b0e0c9888d5..c4031dfee593a13af6a5db15e43ed7bc418603c5 100644 --- a/tensorflow/compiler/xla/tests/dot_operation_test.cc +++ b/tensorflow/compiler/xla/tests/dot_operation_test.cc @@ -50,6 +50,13 @@ using TypesF16F32 = ::testing::Types; using TypesF16F32F64 = ::testing::Types; using TypesF16F32F64CF64 = ::testing::Types; +#elif !defined(XLA_BACKEND_DOES_NOT_SUPPORT_FLOAT16) && \ + defined(XLA_BACKEND_DOES_NOT_SUPPORT_FLOAT64) && \ + defined(XLA_BACKEND_DOES_NOT_SUPPORT_COMPLEX) +using TypesF16F32 = ::testing::Types; +using TypesF16F32F64 = ::testing::Types; +using TypesF16F32F64CF64 = + ::testing::Types; #else #error "Situation not handled yet" #endif diff --git a/tensorflow/compiler/xla/tests/dynamic_ops_test.cc b/tensorflow/compiler/xla/tests/dynamic_ops_test.cc index 4f354e6aefe70a51c09be1c0ca151af2bb9f0a2c..021fbcedb994eb4ea86a9a2da691900a56cd45f2 100644 --- a/tensorflow/compiler/xla/tests/dynamic_ops_test.cc +++ b/tensorflow/compiler/xla/tests/dynamic_ops_test.cc @@ -18,9 +18,8 @@ limitations under the License. #include "tensorflow/compiler/xla/array2d.h" #include "tensorflow/compiler/xla/client/client_library.h" -#include "tensorflow/compiler/xla/client/computation.h" -#include "tensorflow/compiler/xla/client/computation_builder.h" #include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" #include "tensorflow/compiler/xla/reference_util.h" #include "tensorflow/compiler/xla/service/device_memory_allocator.h" #include "tensorflow/compiler/xla/service/local_service.h" @@ -36,8 +35,6 @@ limitations under the License. #include "tensorflow/core/platform/test_benchmark.h" #include "tensorflow/core/platform/types.h" -namespace se = ::perftools::gputools; - namespace xla { namespace { @@ -112,10 +109,8 @@ class DynamicSliceTest : public ClientLibraryTestBase { void TestR3Wrap() { // Slice at dimension boundaries, but with sizes that cause indices to wrap. RunR3( - {{{1, 2}, {3, 4}, {5, 6}}, - {{7, 8}, {9, 10}, {11, 12}}}, - {0, 2, 1}, {2, 1, 2}, - {{{6, 5}}, {{12, 11}}}); + {{{1, 2}, {3, 4}, {5, 6}}, {{7, 8}, {9, 10}, {11, 12}}}, {0, 2, 1}, + {2, 1, 2}, {{{6, 5}}, {{12, 11}}}); } template @@ -137,9 +132,9 @@ class DynamicSliceTest : public ClientLibraryTestBase { ->Convert(primitive_util::NativeToPrimitiveType()) .ValueOrDie()); - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); // Initialize and transfer dynamic slice start indices parameter. - ComputationDataHandle starts; + XlaOp starts; std::unique_ptr start_data = CreateR1Parameter( slice_starts, 0, "slice_starts", &builder, &starts); // Build dynamic slice computation. @@ -163,9 +158,9 @@ class DynamicSliceTest : public ClientLibraryTestBase { ->Convert(primitive_util::NativeToPrimitiveType()) .ValueOrDie()); - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); // Initialize and transfer dynamic slice start indices parameter. - ComputationDataHandle starts; + XlaOp starts; std::unique_ptr start_data = CreateR1Parameter( slice_starts, 0, "slice_starts", &builder, &starts); // Build dynamic slice computation. @@ -189,9 +184,9 @@ class DynamicSliceTest : public ClientLibraryTestBase { ->Convert(primitive_util::NativeToPrimitiveType()) .ValueOrDie()); - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); // Initialize and transfer dynamic slice start indices parameter. - ComputationDataHandle starts; + XlaOp starts; std::unique_ptr start_data = CreateR1Parameter( slice_starts, 0, "slice_starts", &builder, &starts); // Build dynamic slice computation. @@ -281,6 +276,15 @@ XLA_TEST_F(DynamicSliceTest, Int32R3Pred) { class DynamicUpdateSliceTest : public ClientLibraryTestBase { protected: + template + void TestR0() { + // Disable algebraic simplifier, otherwise the op will be replaced by a + // constant. + execution_options_.mutable_debug_options()->add_xla_disable_hlo_passes( + "algsimp"); + RunR0(0, 123, {}, 123); + } + template void TestR1() { // Slice at dimension start. @@ -341,6 +345,35 @@ class DynamicUpdateSliceTest : public ClientLibraryTestBase { {1, 2, 1}, {{{1, 2}, {3, 4}, {5, 6}}, {{7, 15}, {9, 10}, {11, 13}}}); } + template + void RunR0(int input_value_int, int update_value_int, + const std::vector slice_starts, int expected_value_int) { + Literal input_value = + std::move(*Literal::CreateR0(input_value_int) + ->Convert(primitive_util::NativeToPrimitiveType()) + .ValueOrDie()); + Literal update_value = + std::move(*Literal::CreateR0(update_value_int) + ->Convert(primitive_util::NativeToPrimitiveType()) + .ValueOrDie()); + Literal expected_value = + std::move(*Literal::CreateR0(expected_value_int) + ->Convert(primitive_util::NativeToPrimitiveType()) + .ValueOrDie()); + + ComputationBuilder builder(client_, TestName()); + // Initialize and transfer dynamic slice start indices parameter. + ComputationDataHandle starts; + std::unique_ptr start_data = CreateR1Parameter( + slice_starts, 0, "slice_starts", &builder, &starts); + // Build dynamic slice computation. + auto input = builder.ConstantLiteral(input_value); + auto update = builder.ConstantLiteral(update_value); + builder.DynamicUpdateSlice(input, update, starts); + // Run computation and compare against expected values. + ComputeAndCompareLiteral(&builder, expected_value, {start_data.get()}); + } + template void RunR1(tensorflow::gtl::ArraySlice input_values_int, tensorflow::gtl::ArraySlice update_values_int, @@ -359,9 +392,9 @@ class DynamicUpdateSliceTest : public ClientLibraryTestBase { ->Convert(primitive_util::NativeToPrimitiveType()) .ValueOrDie()); - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); // Initialize and transfer dynamic slice start indices parameter. - ComputationDataHandle starts; + XlaOp starts; std::unique_ptr start_data = CreateR1Parameter( slice_starts, 0, "slice_starts", &builder, &starts); // Build dynamic slice computation. @@ -390,9 +423,9 @@ class DynamicUpdateSliceTest : public ClientLibraryTestBase { ->Convert(primitive_util::NativeToPrimitiveType()) .ValueOrDie()); - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); // Initialize and transfer dynamic slice start indices parameter. - ComputationDataHandle starts; + XlaOp starts; std::unique_ptr start_data = CreateR1Parameter( slice_starts, 0, "slice_starts", &builder, &starts); // Build dynamic slice computation. @@ -421,9 +454,9 @@ class DynamicUpdateSliceTest : public ClientLibraryTestBase { ->Convert(primitive_util::NativeToPrimitiveType()) .ValueOrDie()); - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); // Initialize and transfer dynamic slice start indices parameter. - ComputationDataHandle starts; + XlaOp starts; std::unique_ptr start_data = CreateR1Parameter( slice_starts, 0, "slice_starts", &builder, &starts); // Build dynamic slice computation. @@ -474,13 +507,13 @@ class DynamicUpdateSliceTest : public ClientLibraryTestBase { } // Build dynamic slice computation. - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); // Initialize and transfer input parameter. - ComputationDataHandle input; + XlaOp input; std::unique_ptr input_data = CreateR3Parameter(input_values, 0, "input_values", &builder, &input); // Initialize and transfer update parameter. - ComputationDataHandle update; + XlaOp update; std::unique_ptr update_data = CreateR3Parameter( update_values, 1, "update_values", &builder, &update); auto starts = builder.ConstantR1({index, 0, 0}); @@ -500,6 +533,11 @@ class DynamicUpdateSliceTest : public ClientLibraryTestBase { } }; +XLA_TEST_F(DynamicUpdateSliceTest, Int32R0BF16) { TestR0(); } +XLA_TEST_F(DynamicUpdateSliceTest, Int32R0) { TestR0(); } +XLA_TEST_F(DynamicUpdateSliceTest, Int64R0) { TestR0(); } +XLA_TEST_F(DynamicUpdateSliceTest, UInt64R0) { TestR0(); } + // TODO(b/71820067): The CPU parallel backend failed for this on 2018-01-10. XLA_TEST_F(DynamicUpdateSliceTest, DISABLED_ON_CPU_PARALLEL(Int32R1BF16)) { TestR1(); @@ -672,7 +710,7 @@ void BM_DynamicSlice(int num_iters) { TransferManager::GetForPlatform(platform).ValueOrDie(); int device_ordinal = client->default_device_ordinal(); - ComputationBuilder builder(client, "DynamicSlice"); + XlaBuilder builder("DynamicSlice"); // Create input as a constant: shape [1, 2, 3, 4] auto input_literal = Literal::CreateR4( @@ -697,11 +735,11 @@ void BM_DynamicSlice(int num_iters) { auto start_indices_literal = Literal::CreateR1({0, 1, 2, 3}); ASSERT_IS_OK(transfer_manager->TransferLiteralToDevice( - executors[device_ordinal], *start_indices_literal, *buffer)); + executors[device_ordinal], *start_indices_literal, buffer)); std::unique_ptr executable = client - ->Compile(computation, {&buffer->on_host_shape()}, + ->Compile(computation, {&buffer.on_host_shape()}, ExecutableBuildOptions()) .ConsumeValueOrDie(); @@ -710,14 +748,14 @@ void BM_DynamicSlice(int num_iters) { options.set_allocator(&allocator); const int kWarmups = 2; for (int i = 0; i < kWarmups; ++i) { - auto result = executable->Run({buffer.get()}, options); + auto result = executable->Run({&buffer}, options); ASSERT_TRUE(result.ok()); } // Run benchmark. tensorflow::testing::StartTiming(); for (int i = 0; i < num_iters; ++i) { - auto result = executable->Run({buffer.get()}, options); + auto result = executable->Run({&buffer}, options); ASSERT_TRUE(result.ok()); } } diff --git a/tensorflow/compiler/xla/tests/fusion_test.cc b/tensorflow/compiler/xla/tests/fusion_test.cc index a292eab1d198fbf69c6dc81c780487ea46756f72..c7f64d856097350a948f6b15cefb147aae7cee18 100644 --- a/tensorflow/compiler/xla/tests/fusion_test.cc +++ b/tensorflow/compiler/xla/tests/fusion_test.cc @@ -50,8 +50,6 @@ limitations under the License. using tensorflow::gtl::ArraySlice; -namespace se = ::perftools::gputools; - namespace xla { namespace { @@ -796,19 +794,19 @@ void BM_ParallelFusion(int num_iters) { // Transfer literals to device. auto param0_literal = Literal::CreateR2F32Linspace(1.0, 2.0, param0_dim0, param0_dim1); - std::unique_ptr buffer0 = + ShapedBuffer buffer0 = client->LiteralToShapedBuffer(*param0_literal, device_ordinal) .ConsumeValueOrDie(); auto param1_literal = Literal::CreateR2F32Linspace(1.0, 2.0, param1_dim0, param1_dim1); - std::unique_ptr buffer1 = + ShapedBuffer buffer1 = client->LiteralToShapedBuffer(*param1_literal, device_ordinal) .ConsumeValueOrDie(); auto param2_literal = Literal::CreateR2F32Linspace(1.0, 2.0, param2_dim0, param2_dim1); - std::unique_ptr buffer2 = + ShapedBuffer buffer2 = client->LiteralToShapedBuffer(*param2_literal, device_ordinal) .ConsumeValueOrDie(); @@ -816,8 +814,8 @@ void BM_ParallelFusion(int num_iters) { std::unique_ptr executable = client ->Compile(computation, - {&buffer0->on_host_shape(), &buffer1->on_host_shape(), - &buffer2->on_host_shape()}, + {&buffer0.on_host_shape(), &buffer1.on_host_shape(), + &buffer2.on_host_shape()}, ExecutableBuildOptions()) .ConsumeValueOrDie(); @@ -838,8 +836,7 @@ void BM_ParallelFusion(int num_iters) { // Run some warm-up executions. const int kWarmups = 2; for (int i = 0; i < kWarmups; ++i) { - auto result = - executable->Run({buffer0.get(), buffer1.get(), buffer2.get()}, options); + auto result = executable->Run({&buffer0, &buffer1, &buffer2}, options); ASSERT_TRUE(result.ok()); } @@ -852,8 +849,7 @@ void BM_ParallelFusion(int num_iters) { tensorflow::testing::UseRealTime(); tensorflow::testing::StartTiming(); for (int i = 0; i < num_iters; ++i) { - auto result = - executable->Run({buffer0.get(), buffer1.get(), buffer2.get()}, options); + auto result = executable->Run({&buffer0, &buffer1, &buffer2}, options); ASSERT_TRUE(result.ok()); } } diff --git a/tensorflow/compiler/xla/tests/gather_operation_test.cc b/tensorflow/compiler/xla/tests/gather_operation_test.cc index 9db68ff7a6dcbd9204fb2b3a37734a9aaed35dfd..90496d55e60b4f45fc2d46b2746f94d775cf9f94 100644 --- a/tensorflow/compiler/xla/tests/gather_operation_test.cc +++ b/tensorflow/compiler/xla/tests/gather_operation_test.cc @@ -405,7 +405,7 @@ class GatherClientLibraryTest : public ClientLibraryTestBase {}; // GPU and CPU_PARALLEL. XLA_TEST_F(GatherClientLibraryTest, DISABLED_ON_CPU_PARALLEL(DISABLED_ON_GPU(Basic))) { - // We create this HLO, but using the ComputationBuilder API. + // We create this HLO, but using the XlaBuilder API. // // ENTRY main { // operand = s32[3,3] parameter(0) @@ -418,7 +418,7 @@ XLA_TEST_F(GatherClientLibraryTest, // window_bounds={1, 3} // } - ComputationBuilder builder(client_, "gather_basic"); + XlaBuilder builder("gather_basic"); Shape operand_shape = ShapeUtil::MakeShape(S32, {3, 3}); Shape indices_shape = ShapeUtil::MakeShape(S32, {2}); @@ -443,8 +443,8 @@ XLA_TEST_F(GatherClientLibraryTest, client_->GetDeviceHandles(1)); xla::ExecutionOptions execution_options = CreateDefaultExecutionOptions(); *execution_options.add_device_handles() = devices[0]; - TF_ASSERT_OK_AND_ASSIGN(Computation computation, builder.Build()); - std::vector computation_instances = { + TF_ASSERT_OK_AND_ASSIGN(XlaComputation computation, builder.Build()); + std::vector computation_instances = { {computation, {operand_arg.get(), indices_arg.get()}, execution_options, diff --git a/tensorflow/compiler/xla/tests/hlo_test_base.cc b/tensorflow/compiler/xla/tests/hlo_test_base.cc index e574644dea7c1ba144ba87fbeb7f28cc52312e26..9984aba089be89072c5a49f93df68ec805658b68 100644 --- a/tensorflow/compiler/xla/tests/hlo_test_base.cc +++ b/tensorflow/compiler/xla/tests/hlo_test_base.cc @@ -35,8 +35,6 @@ limitations under the License. #include "tensorflow/core/platform/test.h" #include "tensorflow/core/platform/types.h" -namespace se = ::perftools::gputools; - namespace xla { namespace { @@ -91,7 +89,7 @@ HloTestBase::HloTestBase() HloTestBase::HloTestBase(se::Platform* test_platform, se::Platform* reference_platform) : test_runner_(test_platform), reference_runner_(reference_platform) { - hlo_verifier_ = MakeUnique(); + hlo_verifier_ = MakeUnique(/*allow_mixed_precision=*/true); } /* static */ @@ -115,11 +113,13 @@ StatusOr> HloTestBase::Execute( return test_runner_.Execute(std::move(module), arguments); } -StatusOr> HloTestBase::ExecuteNoHloPasses( +std::unique_ptr HloTestBase::ExecuteNoHloPasses( std::unique_ptr module, tensorflow::gtl::ArraySlice arguments) { - return test_runner_.Execute(std::move(module), arguments, - /*run_hlo_passes=*/false); + return test_runner_ + .Execute(std::move(module), arguments, + /*run_hlo_passes=*/false) + .ValueOrDie(); } std::unique_ptr HloTestBase::ExecuteAndTransfer( @@ -142,8 +142,7 @@ StatusOr> HloTestBase::MakeReferenceModule( "reference preprocessor must not modify the program shape"); } } - TF_RETURN_IF_ERROR(VerifyHloModule(*reference_runner_.backend().platform(), - reference_module.get())); + TF_RETURN_IF_ERROR(hlo_verifier_->Run(reference_module.get()).status()); return std::move(reference_module); } @@ -151,8 +150,7 @@ StatusOr<::testing::AssertionResult> HloTestBase::RunAndCompareInternal( std::unique_ptr module, const ArraySlice arguments, const optional& error, bool run_hlo_passes, const std::function& reference_preprocessor) { - TF_RETURN_IF_ERROR( - VerifyHloModule(*test_runner_.backend().platform(), module.get())); + TF_RETURN_IF_ERROR(hlo_verifier_->Run(module.get()).status()); TF_ASSIGN_OR_RETURN(auto reference_module, MakeReferenceModule(*module, reference_preprocessor)); diff --git a/tensorflow/compiler/xla/tests/hlo_test_base.h b/tensorflow/compiler/xla/tests/hlo_test_base.h index 3e8e2360bb3a87e127920cd222803c0f7b9161f4..79fcea9403e6e2dfc989c86ce8c6609f44acd12b 100644 --- a/tensorflow/compiler/xla/tests/hlo_test_base.h +++ b/tensorflow/compiler/xla/tests/hlo_test_base.h @@ -76,8 +76,7 @@ class HloTestBase : public ::testing::Test { // If your test doesn't use interpreter as the reference backend, you can use // this constructor. Note that your test target is responsible for linking in // both needed backends. - HloTestBase(::perftools::gputools::Platform* test_platform, - ::perftools::gputools::Platform* reference_platform); + HloTestBase(se::Platform* test_platform, se::Platform* reference_platform); ~HloTestBase() override {} @@ -100,7 +99,7 @@ class HloTestBase : public ::testing::Test { // Same as above, except the module will be executed without running any HLO // passes on it. - StatusOr> ExecuteNoHloPasses( + std::unique_ptr ExecuteNoHloPasses( std::unique_ptr module, tensorflow::gtl::ArraySlice arguments); diff --git a/tensorflow/compiler/xla/tests/literal_test_util.cc b/tensorflow/compiler/xla/tests/literal_test_util.cc index 81630df34c58526b6d41492b2b4b3892a02a21c2..c28f79ae386670ca80d603a42f6629dfd30e0bc9 100644 --- a/tensorflow/compiler/xla/tests/literal_test_util.cc +++ b/tensorflow/compiler/xla/tests/literal_test_util.cc @@ -39,6 +39,11 @@ limitations under the License. namespace xla { +using ::tensorflow::strings::Appendf; +using ::tensorflow::strings::Printf; +using ::tensorflow::strings::StrAppend; +using ::tensorflow::strings::StrCat; + /* static */ ::testing::AssertionResult LiteralTestUtil::EqualShapes( const Shape& expected, const Shape& actual) { if (ShapeUtil::IsTuple(expected) != ShapeUtil::IsTuple(actual)) { @@ -173,14 +178,11 @@ template auto lhs_double = static_cast(lhs); auto rhs_double = static_cast(rhs); if (ulhs != urhs) { - return ::testing::AssertionFailure() << tensorflow::strings::Printf( + return ::testing::AssertionFailure() << Printf( "floating values are not bitwise-equal; and equality testing " "was requested: %s=%g=%a vs %s=%g=%a", - tensorflow::strings::StrCat(tensorflow::strings::Hex(ulhs)) - .c_str(), - lhs_double, lhs_double, - tensorflow::strings::StrCat(tensorflow::strings::Hex(urhs)) - .c_str(), + StrCat(tensorflow::strings::Hex(ulhs)).c_str(), lhs_double, + lhs_double, StrCat(tensorflow::strings::Hex(urhs)).c_str(), rhs_double, rhs_double); } return ::testing::AssertionSuccess(); @@ -264,9 +266,7 @@ bool ExpectLiteralsEqual(const Literal& expected, const Literal& actual, << "expected:\n" << expected.ToString() << "\n\tvs actual:\n" << actual.ToString() - << (message.empty() - ? "" - : tensorflow::strings::StrCat("\nmessage: ", message)); + << (message.empty() ? "" : StrCat("\nmessage: ", message)); } /* static */ void LiteralTestUtil::ExpectNotEqual(const Literal& expected, @@ -321,9 +321,8 @@ bool ExpectLiteralsEqual(const Literal& expected, const Literal& actual, case TUPLE: { bool tuple_match = true; for (int i = 0; i < ShapeUtil::TupleElementCount(expected.shape()); ++i) { - SCOPED_TRACE(tensorflow::strings::StrCat( - "Tuple index ", i, " in ", - ShapeUtil::HumanString(expected.shape()))); + SCOPED_TRACE(StrCat("Tuple index ", i, " in ", + ShapeUtil::HumanString(expected.shape()))); // Create LiteralViews of the expected and actual elements. auto result = Equal(LiteralView::Create(expected, {i}), @@ -350,227 +349,301 @@ bool ExpectLiteralsEqual(const Literal& expected, const Literal& actual, namespace { +// Gets the total element count. For tuples, this is not the count of tuple +// elements, but the sum of elements of each tuple element. +int64 RecursiveElementCount(const Shape& shape) { + if (ShapeUtil::IsTuple(shape)) { + const int64 tuple_elements = ShapeUtil::TupleElementCount(shape); + int64 total = 0; + for (int64 i = 0; i < tuple_elements; ++i) { + total += RecursiveElementCount(ShapeUtil::GetTupleElementShape(shape, i)); + } + return total; + } else { + return ShapeUtil::ElementsIn(shape); + } +} + +// Calling ToString on a literal with over 100 million elements takes around +// 3 minutes. The utility of printing a literal with >1000 elements is +// questionable, especially when writing the Literal proto to disk is orders +// of magnitude faster. +string TruncateHugeLiteral(const Literal& literal) { + return RecursiveElementCount(literal.shape()) < 1000 + ? literal.ToString() + : "[TRUNCATED, Literal with more than 1000 values]"; +} + +// Returns whether the actual and expected values are mismatched with respect to +// nans. 'relaxed_nans' is interpreted as in xla::ErrorSpec. +template +bool NanMismatch(NativeT expected, NativeT actual, bool relaxed_nans) { + if (relaxed_nans) { + return !std::isnan(expected) && std::isnan(actual); + } else { + return std::isnan(expected) != std::isnan(actual); + } +} + +template <> +bool NanMismatch(complex64 expected, complex64 actual, + bool relaxed_nans) { + return NanMismatch(expected.real(), actual.real(), relaxed_nans) || + NanMismatch(expected.imag(), actual.imag(), relaxed_nans); +} + +template <> +bool NanMismatch(half expected, half actual, bool relaxed_nans) { + return NanMismatch(static_cast(expected), + static_cast(actual), relaxed_nans); +} + +// Converts the given floating-point value to a string. +template +string FpValueToString(NativeT value) { + return Printf("%8.4g", static_cast(value)); +} + +template <> +string FpValueToString(complex64 value) { + return Printf("%8.4g + %8.4fi", value.real(), value.imag()); +} + +// Returns the absolute value of the given floating point value. This function +// is used instead of std::abs directly in order to allow type-dependent +// implementations for NearComparator. +template +float FpAbsoluteValue(NativeT value) { + return std::abs(value); +} + +template <> +float FpAbsoluteValue(bfloat16 value) { + return FpAbsoluteValue(static_cast(value)); +} + +template <> +float FpAbsoluteValue(half value) { + return FpAbsoluteValue(static_cast(value)); +} + // Helper class for comparing floating-point literals within an error bound. +template class NearComparator { public: - explicit NearComparator(ErrorSpec error) : error_(error) {} + // Compares the two array literals elementwise and returns an assertion + // result. The assertion result is successful if all actual and expected + // elements are within the given error bound. In case of error, the assertion + // result contains a detailed error message in case of failure. + static ::testing::AssertionResult Compare(const Literal& expected, + const Literal& actual, + ErrorSpec error, + bool detailed_message) { + NearComparator comparator(expected, actual, error, + detailed_message); + return comparator.Run(); + } + + private: + // Data structure encapsulating metadata about a single element mismatch. + struct Mismatch { + NativeT actual; + NativeT expected; + float rel_error; + float abs_error; + + // The linear index of the failure within the shape. This linear index is + // from the 'actual' literal. + int64 linear_index; + + bool operator<(const Mismatch& other) const { + return rel_error < other.rel_error; + } - // Compares the two literals elementwise. EXPECTs each pair of elements to be - // within the error bound. Emits useful log messages and dumps literals to - // temporary files on failure. Returns true if literals match. - bool ExpectNear(const Literal& expected, const Literal& actual) { + string ToString(const Shape& shape) const { + return Printf( + "actual %s, expected %s, index %s, rel error %8.3g, abs error %8.3g", + FpValueToString(actual).c_str(), FpValueToString(expected).c_str(), + LiteralTestUtil::MultiIndexAsString( + IndexUtil::LinearIndexToMultidimensionalIndex(shape, + linear_index)) + .c_str(), + rel_error, abs_error); + } + }; + + explicit NearComparator(const Literal& expected, const Literal& actual, + ErrorSpec error, bool detailed_message) + : expected_(expected), + actual_(actual), + error_(error), + detailed_message_(detailed_message), + abs_value_buckets_(kAbsValueBucketBounds.size() - 1, {0, 0}), + abs_error_buckets_(kErrorBucketBounds.size(), 0), + rel_error_buckets_(kErrorBucketBounds.size(), 0) {} + + // Runs the comparison between expected and actual literals. + ::testing::AssertionResult Run() { VLOG(1) << "expected:"; - XLA_VLOG_LINES(1, TruncateHugeLiteral(expected)); + XLA_VLOG_LINES(1, TruncateHugeLiteral(expected_)); VLOG(1) << "actual:"; - XLA_VLOG_LINES(1, TruncateHugeLiteral(actual)); + XLA_VLOG_LINES(1, TruncateHugeLiteral(actual_)); // If the shapes mismatch, we simply fail the expectation instead of // printing out data, as it's a type error rather than a value error. ::testing::AssertionResult equal_shapes = - LiteralTestUtil::EqualShapes(expected.shape(), actual.shape()); + LiteralTestUtil::EqualShapes(expected_.shape(), actual_.shape()); if (!equal_shapes) { - EXPECT_TRUE(equal_shapes); - return false; + return equal_shapes; } - - // Set up members used during the comparison. - num_miscompares_ = 0; - abs_diff_sum_ = 0.0; - abs_expected_sum_ = 0.0; - abs_diff_miscompare_sum_ = 0.0; - abs_expected_miscompare_sum_ = 0.0; - max_rel_err_ = 0.0; - max_abs_err_ = 0.0; - first_linear_index_ = -1; - last_linear_index_ = -1; - max_rel_linear_index_ = -1; - max_abs_linear_index_ = -1; - miscompares_ = Literal(ShapeUtil::ChangeElementType(actual.shape(), PRED)); - miscompares_.PopulateWithValue(false); - multi_index_.resize(expected.shape().dimensions_size(), 0); - - switch (expected.shape().element_type()) { - case BF16: - ExpectLiteralsNear(expected, actual, 0); - break; - case F16: - ExpectLiteralsNear(expected, actual, 0); - break; - case F32: - ExpectLiteralsNear(expected, actual, 0); - break; - case F64: - ExpectLiteralsNear(expected, actual, 0); - break; - case C64: - ExpectLiteralsNear(expected, actual, 0); - break; - default: - LOG(FATAL) << "Unsupported primitive type in near comparator: " - << PrimitiveType_Name(expected.shape().element_type()) - << ". Must be floating-point type."; + if (!ShapeUtil::IsArray(expected_.shape())) { + return ::testing::AssertionFailure() << "Expected array shape"; } - if (num_miscompares_ > 0) { - if (!VLOG_IS_ON(1)) { - LOG(INFO) << "expected: " << ShapeUtil::HumanString(expected.shape()) - << " " << TruncateHugeLiteral(expected); - LOG(INFO) << "actual: " << ShapeUtil::HumanString(actual.shape()) - << " " << TruncateHugeLiteral(actual); - LOG(INFO) << "Dumping literals to temp files..."; - WriteLiteralToTempFile(expected, "expected"); - WriteLiteralToTempFile(actual, "actual"); - WriteLiteralToTempFile(miscompares_, "miscompares"); - } - EXPECT_TRUE(num_miscompares_ == 0) - << "\nmax relative mismatch at index " - << LiteralTestUtil::MultiIndexAsString( - IndexUtil::LinearIndexToMultidimensionalIndex( - actual.shape(), max_rel_linear_index_)) - << "\nmaximum relative error " << max_rel_err_ - << "\nmax absolute mismatch at index " - << LiteralTestUtil::MultiIndexAsString( - IndexUtil::LinearIndexToMultidimensionalIndex( - actual.shape(), max_abs_linear_index_)) - << "\nmaximum absolute error " << max_abs_err_ - << "\nfirst mismatch at index " - << LiteralTestUtil::MultiIndexAsString( - IndexUtil::LinearIndexToMultidimensionalIndex( - actual.shape(), first_linear_index_)) - << "\nlast mismatch at index " - << LiteralTestUtil::MultiIndexAsString( - IndexUtil::LinearIndexToMultidimensionalIndex( - actual.shape(), last_linear_index_)) - << "\ntotal absolute error " << abs_diff_sum_ - << "\ntotal absolute error of miscompares " - << abs_diff_miscompare_sum_ << "\ntotal relative error " - << (abs_diff_sum_ / abs_expected_sum_) - << "\ntotal relative error of miscompares " - << (abs_diff_miscompare_sum_ / abs_expected_miscompare_sum_) - << "\nfailure count " << num_miscompares_; + mismatches_ = Literal(ShapeUtil::ChangeElementType(actual_.shape(), PRED)); + mismatches_.PopulateWithValue(false); + + CompareLiterals(); + + if (num_mismatches_ == 0) { + return ::testing::AssertionSuccess(); + } else if (!VLOG_IS_ON(1)) { + LOG(INFO) << "expected: " << ShapeUtil::HumanString(expected_.shape()) + << " " << TruncateHugeLiteral(expected_); + LOG(INFO) << "actual: " << ShapeUtil::HumanString(actual_.shape()) + << " " << TruncateHugeLiteral(actual_); + LOG(INFO) << "Dumping literals to temp files..."; + WriteLiteralToTempFile(expected_, "expected"); + WriteLiteralToTempFile(actual_, "actual"); + WriteLiteralToTempFile(mismatches_, "mismatches"); } - return num_miscompares_ == 0; + return ::testing::AssertionFailure() << ErrorMessage(); } - private: - template - bool NanMismatch(NativeT expected, NativeT actual, bool relaxed_nans) { - if (relaxed_nans) { - return !std::isnan(expected) && std::isnan(actual); - } else { - return std::isnan(expected) != std::isnan(actual); + // Insert the given absolute value into the absolute value bucket vector. The + // bounds of the buckets are given by kAbsValueBucketBounds. + void UpdateAbsValueBucket(NativeT value, bool is_mismatch) { + // Adjust the bucket containing the absolute values of the 'actual' + // elements. + const float abs_value = FpAbsoluteValue(value); + for (int i = 0; i < abs_value_buckets_.size(); ++i) { + if (i == abs_value_buckets_.size() - 1 || + (abs_value >= kAbsValueBucketBounds[i] && + abs_value < kAbsValueBucketBounds[i + 1])) { + // The first value of the pair is the count of elements in the bucket, + // the second is the count of mismatches in the bucket. + abs_value_buckets_[i].first++; + if (is_mismatch) { + abs_value_buckets_[i].second++; + } + return; + } } } - template - void ExpectNear(NativeT expected, NativeT actual, - const ::testing::Message& message) { - EXPECT_NEAR(expected, actual, error_.abs) - << "expected:\n " << expected << "\n\tvs actual:\n " << actual << "\n" - << message; - } - - // EXPECTs that the two given scalar values are within the error bound. Keeps - // track of how many mismatches have occurred to keep the size of the output - // manageable. - template - bool ExpectValuesNear(NativeT expected, NativeT actual) { - if (expected == actual) { - return true; + // Insert the given error into the given error bucket vector. + void UpdateErrorBucket( + float error, tensorflow::gtl::MutableArraySlice error_buckets) { + CHECK_EQ(error_buckets.size(), kErrorBucketBounds.size()); + for (int i = 0; i < error_buckets.size(); ++i) { + if (error >= kErrorBucketBounds[i]) { + error_buckets[i]++; + } } - - const float abs_diff = std::abs(actual - expected); - const float rel_err = abs_diff / std::abs(expected); - const bool nan_mismatch = - NanMismatch(expected, actual, error_.relaxed_nans); - const bool mismatch = - (nan_mismatch || (abs_diff >= error_.abs && rel_err >= error_.rel)); - return !mismatch; } - // Assumes that expected vs actual fail ExpectValuesNear. - template - void UpdateAndLogMiscompares(const NativeT expected, const NativeT actual, - const Shape& shape, const int64 linear_index) { - const float abs_diff = std::abs(actual - expected); - const float rel_err = abs_diff / std::abs(expected); - abs_diff_sum_ += abs_diff; - abs_expected_sum_ += std::abs(expected); - if (rel_err > max_rel_err_ || std::isnan(rel_err)) { - max_rel_err_ = rel_err; - max_rel_linear_index_ = linear_index; + // Compares the two given elements from the expected and actual literals at + // the given literal_index and keeps track of various mismatch statistics. + void CompareValues(NativeT expected, NativeT actual, int64 linear_index) { + const bool is_nan_mismatch = + NanMismatch(expected, actual, error_.relaxed_nans); + float abs_error; + float rel_error; + if (actual == expected) { + abs_error = 0; + rel_error = 0; + } else if (is_nan_mismatch) { + num_nan_mismatches_++; + // A nan mismatch is considered to have infinite error. rel_error is used + // for sorting a std::set of the top mismatchs, and a nan value here will + // result in undefined behavior because nan's do not satisfy the strict + // weak ordering requirement of std containers. + abs_error = std::numeric_limits::infinity(); + rel_error = std::numeric_limits::infinity(); + } else { + abs_error = FpAbsoluteValue(actual - expected); + rel_error = abs_error / FpAbsoluteValue(expected); } - if (abs_diff > max_abs_err_ || std::isnan(abs_diff)) { - max_abs_err_ = abs_diff; - max_abs_linear_index_ = linear_index; + const bool is_abs_mismatch = abs_error > error_.abs; + const bool is_rel_mismatch = rel_error > error_.rel; + const bool is_mismatch = + is_nan_mismatch || (is_abs_mismatch && is_rel_mismatch); + + // Update the error of the relative bucket only if the *absolute* error + // bound is exceeded and vice versa. + if (is_abs_mismatch) { + num_abs_mismatches_++; + UpdateErrorBucket(rel_error, &rel_error_buckets_); } - if (VLOG_IS_ON(10)) { - VLOG(10) << tensorflow::strings::Printf( - "index %s abs_diff %f rel_err %f", - LiteralTestUtil::MultiIndexAsString( - IndexUtil::LinearIndexToMultidimensionalIndex(shape, - linear_index)) - .c_str(), - abs_diff, rel_err); + if (is_rel_mismatch) { + num_rel_mismatches_++; + UpdateErrorBucket(abs_error, &abs_error_buckets_); } - abs_diff_miscompare_sum_ += abs_diff; - abs_expected_miscompare_sum_ += std::abs(expected); - const int64 kMaxFailures = 2; - if (num_miscompares_ < kMaxFailures) { - const auto multi_index = - IndexUtil::LinearIndexToMultidimensionalIndex(shape, linear_index); - ::testing::Message msg; - msg << "mismatch at index " - << LiteralTestUtil::MultiIndexAsString(multi_index) << " abs diff " - << abs_diff << " rel err " << rel_err << " failure #" - << num_miscompares_; - ExpectNear(expected, actual, msg); - } else if (num_miscompares_ == kMaxFailures) { - LOG(ERROR) << "reached max 'loud' failure count; silently proceeding..."; + + UpdateAbsValueBucket(actual, is_mismatch); + + if (!is_mismatch) { + return; } - if (num_miscompares_ == 0) { - first_linear_index_ = linear_index; + + num_mismatches_++; + + // Keep track of the kTopRelativeErrorCount relative error mismatches. + if (top_rel_mismatches_.size() < kTopRelativeErrorCount || + rel_error > top_rel_mismatches_.begin()->rel_error) { + Mismatch mismatch = {actual, expected, rel_error, abs_error, + linear_index}; + top_rel_mismatches_.insert(mismatch); + if (top_rel_mismatches_.size() > kTopRelativeErrorCount) { + top_rel_mismatches_.erase(top_rel_mismatches_.begin()); + } } - num_miscompares_++; - last_linear_index_ = linear_index; - miscompares_.data()[linear_index] = true; + + mismatches_.data()[linear_index] = true; } - // Recursive function which compares the two given literals elementwise. - template - void ExpectLiteralsNear(const Literal& expected, const Literal& actual, - int64 dimension) { + // Compares the two literals elementwise. + void CompareLiterals() { // Fast path optimization for the case were layouts match. - if (LayoutUtil::Equal(actual.shape().layout(), expected.shape().layout())) { + if (LayoutUtil::Equal(actual_.shape().layout(), + expected_.shape().layout())) { tensorflow::gtl::ArraySlice expected_data = - expected.data(); + expected_.data(); tensorflow::gtl::ArraySlice actual_data = - actual.data(); + actual_.data(); const int64 len = expected_data.size(); for (int64 i = 0; i < len; ++i) { - const bool near = ExpectValuesNear(expected_data[i], actual_data[i]); - if (!near) { - UpdateAndLogMiscompares(expected_data[i], actual_data[i], - actual.shape(), i); - } + CompareValues(expected_data[i], actual_data[i], i); } return; } + std::vector multi_index(ShapeUtil::Rank(actual_.shape()), 0); + CompareLiteralsSlow(0, &multi_index); + } - if (dimension == expected.shape().dimensions_size()) { - bool near = ExpectValuesNear(expected.Get(multi_index_), - actual.Get(multi_index_)); - if (!near) { - UpdateAndLogMiscompares( - expected.Get(multi_index_), - actual.Get(multi_index_), actual.shape(), - IndexUtil::MultidimensionalIndexToLinearIndex(actual.shape(), - multi_index_)); - } + // Slow path for CompareLiterals when 'actual' and 'expected' literals have + // different layouts. In this case, multidimensional indices are constructed + // and indexed for each element. + void CompareLiteralsSlow(int64 dimension, std::vector* multi_index) { + if (dimension == multi_index->size()) { + CompareValues(expected_.Get(*multi_index), + actual_.Get(*multi_index), + IndexUtil::MultidimensionalIndexToLinearIndex( + actual_.shape(), *multi_index)); } else { - for (int64 i = 0; i < expected.shape().dimensions(dimension); ++i) { - multi_index_[dimension] = i; - ExpectLiteralsNear(expected, actual, dimension + 1); + for (int64 i = 0; i < expected_.shape().dimensions(dimension); ++i) { + (*multi_index)[dimension] = i; + CompareLiteralsSlow(dimension + 1, multi_index); } } } @@ -580,159 +653,247 @@ class NearComparator { int64 now_usec = tensorflow::Env::Default()->NowMicros(); string filename = tensorflow::io::JoinPath( tensorflow::testing::TmpDir(), - tensorflow::strings::Printf("tempfile-%s-%llx-%s", Hostname().c_str(), - now_usec, name.c_str())); + Printf("tempfile-%s-%llx-%s", Hostname().c_str(), now_usec, + name.c_str())); TF_CHECK_OK(tensorflow::WriteBinaryProto(tensorflow::Env::Default(), filename, literal.ToProto())); LOG(ERROR) << "wrote to " << name << " file: " << filename; } - // Gets the total element count. For tuples, this is not the count of tuple - // elements, but the sum of elements of each tuple element. - int64 RecursiveElementCount(const Shape& shape) { - if (ShapeUtil::IsTuple(shape)) { - const int64 tuple_elements = ShapeUtil::TupleElementCount(shape); - int64 total = 0; - for (int64 i = 0; i < tuple_elements; ++i) { - total += - RecursiveElementCount(ShapeUtil::GetTupleElementShape(shape, i)); - } - return total; - } else { - return ShapeUtil::ElementsIn(shape); + // Returns an error message string with a detailed breakdown of the + // mismatches. Called after calling Run(). + string ErrorMessage() { + string out; + int64 element_count = ShapeUtil::ElementsIn(actual_.shape()); + + auto percent_string = [](float a, float b) { + float pct = b == 0.0 ? 0.0 : 100.0 * a / b; + return Printf("%0.4f%%", pct); + }; + + Appendf(&out, + "\nMismatch count %lld (%s) in shape %s (%lld elements), abs bound " + "%g, rel bound %g\n", + num_mismatches_, + percent_string(num_mismatches_, element_count).c_str(), + ShapeUtil::HumanString(actual_.shape()).c_str(), + ShapeUtil::ElementsIn(actual_.shape()), error_.abs, error_.rel); + if (num_nan_mismatches_ > 0) { + StrAppend(&out, "nan mismatches ", num_nan_mismatches_, "\n"); + } + Appendf(&out, "Top relative error mismatches:\n"); + for (auto it = top_rel_mismatches_.rbegin(); + it != top_rel_mismatches_.rend(); ++it) { + StrAppend(&out, " ", it->ToString(actual_.shape()).c_str(), "\n"); } - } - // Calling ToString on a literal with over 100 million elements takes around - // 3 minutes. The utility of printing a literal with >1000 elements is - // questionable, especially when writing the Literal proto to disk is orders - // of magnitude faster. - string TruncateHugeLiteral(const Literal& literal) { - return RecursiveElementCount(literal.shape()) < 1000 - ? literal.ToString() - : "[TRUNCATED, Literal with more than 1000 values]"; - } + if (!detailed_message_) { + return out; + } - ErrorSpec error_; + StrAppend(&out, "Absolute magnitude breakdown of actual values:\n"); + CHECK_EQ(abs_value_buckets_.size() + 1, kAbsValueBucketBounds.size()); + for (int i = 0; i < abs_value_buckets_.size(); ++i) { + const int64 bucket_size = abs_value_buckets_[i].first; + const int64 bucket_mismatches = abs_value_buckets_[i].second; + string mismatch_str = bucket_mismatches > 0 + ? Printf(", mismatches %lld", bucket_mismatches) + : ""; + Appendf(&out, " %-6g <= x < %-6g : %7lld (%9s)%s\n", + kAbsValueBucketBounds[i], kAbsValueBucketBounds[i + 1], + bucket_size, percent_string(bucket_size, element_count).c_str(), + mismatch_str.c_str()); + } - // Number of element miscomparisons encountered so far. - int64 num_miscompares_; + auto print_accum_buckets = [&](const string& header, int64 total, + tensorflow::gtl::ArraySlice buckets) { + StrAppend(&out, header, ":\n"); + Appendf(&out, " < %-6g : %7lld (%s)\n", kErrorBucketBounds[0], + total - buckets[0], + percent_string(total - buckets[0], total).c_str()); + CHECK_EQ(buckets.size(), kErrorBucketBounds.size()); + for (int i = 0; i < kErrorBucketBounds.size(); ++i) { + Appendf(&out, " >= %-6g : %7lld (%s)\n", kErrorBucketBounds[i], + buckets[i], percent_string(buckets[i], total).c_str()); + } + }; + Appendf(&out, "Elements exceeding abs error bound %g: %lld (%s)\n", + error_.abs, num_abs_mismatches_, + percent_string(num_abs_mismatches_, element_count).c_str()); + print_accum_buckets( + "Relative error breakdown of elements exceeding abs error bound", + num_abs_mismatches_, rel_error_buckets_); + Appendf(&out, "Elements exceeding rel error bound %g: %lld (%s)\n", + error_.rel, num_rel_mismatches_, + percent_string(num_rel_mismatches_, element_count).c_str()); + print_accum_buckets( + "Absolute error breakdown of elements exceeding rel error bound", + num_rel_mismatches_, abs_error_buckets_); + return out; + } - // A Literal containing which elements did not match in the expected and - // actual literals. miscompares_ contains PREDs and is of the same sizes as - // the comparison literals. - Literal miscompares_; - - // A multidimensional index used when performing the recursive comparison. - std::vector multi_index_; - - // Aggregated Statistics on input. - double abs_diff_sum_; - double abs_expected_sum_; - double abs_diff_miscompare_sum_; - double abs_expected_miscompare_sum_; - float max_rel_err_; - float max_abs_err_; - int64 first_linear_index_; - int64 last_linear_index_; - int64 max_rel_linear_index_; - int64 max_abs_linear_index_; -}; + // 'actual' and 'expected' literals being compared. + const Literal& expected_; + const Literal& actual_; -template <> -bool NearComparator::NanMismatch(complex64 expected, - complex64 actual, - bool relaxed_nans) { - return NanMismatch(expected.real(), actual.real(), relaxed_nans) || - NanMismatch(expected.imag(), actual.imag(), relaxed_nans); -} + // The error bounds of the comparison. + ErrorSpec error_; -template <> -void NearComparator::ExpectNear(complex64 expected, complex64 actual, - const ::testing::Message& message) { - EXPECT_NEAR(expected.real(), actual.real(), error_.abs) - << "expected:\n " << expected << "\n\tvs actual:\n " << actual << "\n" - << message; - EXPECT_NEAR(expected.imag(), actual.imag(), error_.abs) - << "expected:\n " << expected << "\n\tvs actual:\n " << actual << "\n" - << message; -} + // Whether to include detailed breakdown of mismatches in the error message. + bool detailed_message_; -template <> -bool NearComparator::ExpectValuesNear(bfloat16 expected, - bfloat16 actual) { - return ExpectValuesNear(static_cast(expected), - static_cast(actual)); -} + // Number of element element mismatches encountered so far. + int64 num_mismatches_ = 0; -template <> -bool NearComparator::ExpectValuesNear(half expected, half actual) { - return ExpectValuesNear(static_cast(std::move(expected)), - static_cast(std::move(actual))); -} + // Number of elements with a nan mismatch. + int64 num_nan_mismatches_ = 0; -template <> -void NearComparator::UpdateAndLogMiscompares( - const bfloat16 expected, const bfloat16 actual, const Shape& shape, - const int64 linear_index) { - UpdateAndLogMiscompares(static_cast(expected), - static_cast(actual), shape, linear_index); -} + // Number of elements which exceed the absolute/relative error bound. + int64 num_abs_mismatches_ = 0; + int64 num_rel_mismatches_ = 0; -template <> -void NearComparator::UpdateAndLogMiscompares(half expected, half actual, - const Shape& shape, - const int64 linear_index) { - UpdateAndLogMiscompares(static_cast(std::move(expected)), - static_cast(std::move(actual)), shape, - linear_index); -} - -} // namespace + // A Literal containing which elements did not match in the expected and + // actual literals. mismatches_ contains PREDs and is of the same sizes as + // the comparison literals. + Literal mismatches_; + + // The number of mismatches to report in the output, sorted by relative error + // magnitude. + static constexpr int64 kTopRelativeErrorCount = 5; + + // The set of mismatches with the largest relative error. The size of this set + // is bounded by kTopRelativeErrorCount. + std::multiset top_rel_mismatches_; + + // Actual values are bucketed by absolute value. kAbsValueBucketBounds is the + // bounds of these buckets. abs_value_buckets_ contains a pair for each + // bucket: the element count and failure count. + static constexpr std::array kAbsValueBucketBounds = { + 0.0, 0.0001, 0.001, 0.01, 0.1, 1, std::numeric_limits::infinity()}; + std::vector> abs_value_buckets_; + + // Buckets for relative and absolute errors. The relative error buckets only + // contains those elements which exceed the *absolute* error bound, and vice + // versa. This makes it easy to see the effect of adjusting the relative (or + // absolute) error bound on the success of the comparison. kErrorBucketBounds + // are the lower bounds of the buckets in both vectors. The error buckets are + // a cumulative distribution so an error value may appear in more than one + // bucket. For example an error value of 0.003 may appear in the buckets + // bounded by 0.01, 0.1, and 1.0. + static constexpr std::array kErrorBucketBounds = {0.0001, 0.001, + 0.01, 0.1, 1}; + std::vector abs_error_buckets_; + std::vector rel_error_buckets_; +}; -/* static */ ::testing::AssertionResult LiteralTestUtil::Near( - const Literal& expected, const Literal& actual, const ErrorSpec& error) { +template +constexpr std::array NearComparator::kAbsValueBucketBounds; +template +constexpr std::array NearComparator::kErrorBucketBounds; + +// Helper function for comparing two literals for nearness. Handles tuple-shapes +// via recursion. shape_index is the ShapeIndex of expected (or actual) +// currently being compared. +::testing::AssertionResult NearHelper(const Literal& expected, + const Literal& actual, + const ErrorSpec& error, + bool detailed_message, + const ShapeIndex& shape_index) { ::testing::AssertionResult err = - EqualShapes(expected.shape(), actual.shape()); + LiteralTestUtil::EqualShapes(expected.shape(), actual.shape()); if (!err) { return err; } if (ShapeUtil::IsTuple(expected.shape())) { for (int64 i = 0; i < ShapeUtil::TupleElementCount(expected.shape()); ++i) { - SCOPED_TRACE(tensorflow::strings::StrCat( - "Tuple index ", i, " in ", ShapeUtil::HumanString(expected.shape()))); const auto expected_element = LiteralView::Create(expected, {i}); const auto actual_element = LiteralView::Create(actual, {i}); - + ShapeIndex element_index = shape_index; + element_index.push_back(i); ::testing::AssertionResult res = - Near(expected_element, actual_element, error); - if (err && !res) { - err = res; + NearHelper(expected_element, actual_element, error, detailed_message, + element_index); + if (!res) { + string err_message = + Printf("\nArray at shape index %s%s", + element_index.ToString().c_str(), res.message()); + if (err) { + err = ::testing::AssertionFailure() << err_message; + } else { + err << err_message; + } } } + if (!err && shape_index.empty()) { + // Emit a top-level error message containing the top-level shape in case + // of mismatch. + int64 total_elements = RecursiveElementCount(actual.shape()); + err = ::testing::AssertionFailure() + << Printf("\nMismatches in shape %s (%lld elements):\n%s", + ShapeUtil::HumanString(actual.shape()).c_str(), + total_elements, err.message()); + } return err; } if (ShapeUtil::ElementIsFloating(expected.shape()) || ShapeUtil::ElementIsComplex(expected.shape())) { - NearComparator comparator(error); - return comparator.ExpectNear(expected, actual) - ? ::testing::AssertionSuccess() - : ::testing::AssertionFailure() << "values were not near"; + switch (expected.shape().element_type()) { + case BF16: + return NearComparator::Compare(expected, actual, error, + detailed_message); + break; + case F16: + return NearComparator::Compare(expected, actual, error, + detailed_message); + break; + case F32: + return NearComparator::Compare(expected, actual, error, + detailed_message); + break; + case F64: + return NearComparator::Compare(expected, actual, error, + detailed_message); + break; + case C64: + return NearComparator::Compare(expected, actual, error, + detailed_message); + break; + default: + LOG(FATAL) << "Unsupported primitive type in near comparator: " + << PrimitiveType_Name(expected.shape().element_type()) + << ". Must be floating-point type."; + } } - return Equal(expected, actual); + // Non-floating point literal. + return LiteralTestUtil::Equal(expected, actual); +} + +} // namespace + +/* static */ ::testing::AssertionResult LiteralTestUtil::Near( + const Literal& expected, const Literal& actual, const ErrorSpec& error, + bool detailed_message) { + return NearHelper(expected, actual, error, detailed_message, + /*shape_index=*/{}); } /* static */ void LiteralTestUtil::ExpectNear(const Literal& expected, const Literal& actual, const ErrorSpec& error, const string& message) { - EXPECT_TRUE(Near(expected, actual, error)) - << (message.empty() - ? "" - : tensorflow::strings::StrCat("\nmessage: ", message)); + ::testing::AssertionResult res = + Near(expected, actual, error, /*detailed_message=*/false); + if (!res) { + res << "Expected: " << TruncateHugeLiteral(expected) << "\n"; + res << "Actual: " << TruncateHugeLiteral(actual) << "\n"; + if (!message.empty()) { + res << StrCat("\nmessage: ", message); + } + } + EXPECT_TRUE(res); } /*static*/ ::testing::AssertionResult LiteralTestUtil::NearOrEqual( @@ -754,8 +915,7 @@ void NearComparator::UpdateAndLogMiscompares(half expected, half actual, /* static */ string LiteralTestUtil::MultiIndexAsString( tensorflow::gtl::ArraySlice multi_index) { - return tensorflow::strings::StrCat( - "{", tensorflow::str_util::Join(multi_index, ","), "}"); + return StrCat("{", tensorflow::str_util::Join(multi_index, ","), "}"); } /* static */ std::unique_ptr LiteralTestUtil::Reshape( diff --git a/tensorflow/compiler/xla/tests/literal_test_util.h b/tensorflow/compiler/xla/tests/literal_test_util.h index 7b757a4bd7e7592583b7596b4305ddb7e6c52d75..a755568c0f098e15512bd1d3720269c867bc9c49 100644 --- a/tensorflow/compiler/xla/tests/literal_test_util.h +++ b/tensorflow/compiler/xla/tests/literal_test_util.h @@ -122,16 +122,19 @@ class LiteralTestUtil { // bounds are equivalent. // // Tuples are matched recursively. When comparing tensors of - // non-floating-point type, checks for exact equality, ignoring the ErroSpec. + // non-floating-point type, checks for exact equality, ignoring the ErrorSpec. // // If the shape of the literals is neither a complex/floating-point tensor nor // a tuple which contains a complex/floating-point tensor, Near() is // equivalent to Equal(). We don't raise an error in this case, because we // want to allow callers to call Near() even if they have no preconceptions // about the shapes being compared. + // + // If detailed_message is true, then the error message in the assertion result + // will contain a more detailed breakdown of mismatches. static ::testing::AssertionResult Near( - const Literal& expected, const Literal& actual, - const ErrorSpec& error) TF_MUST_USE_RESULT; + const Literal& expected, const Literal& actual, const ErrorSpec& error, + bool detailed_message = false) TF_MUST_USE_RESULT; // Expects expected and actual to be Near with the given error. static void ExpectNear(const Literal& expected, const Literal& actual, diff --git a/tensorflow/compiler/xla/tests/literal_test_util_test.cc b/tensorflow/compiler/xla/tests/literal_test_util_test.cc index 3a421f8458268a14dcdd84889bcae4990c095ea4..9d619a77c7e8d6398b559e8f562cd7f8194e0811 100644 --- a/tensorflow/compiler/xla/tests/literal_test_util_test.cc +++ b/tensorflow/compiler/xla/tests/literal_test_util_test.cc @@ -89,7 +89,7 @@ TEST(LiteralTestUtilTest, ExpectNearFailurePlacesResultsInTemporaryDirectory) { EXPECT_EQ("2", literal->ToString()); } else if (result.find("actual") != string::npos) { EXPECT_EQ("4", literal->ToString()); - } else if (result.find("miscompares") != string::npos) { + } else if (result.find("mismatches") != string::npos) { EXPECT_EQ("true", literal->ToString()); } else { FAIL() << "unknown file in temporary directory: " << result; diff --git a/tensorflow/compiler/xla/tests/llvm_compiler_test.cc b/tensorflow/compiler/xla/tests/llvm_compiler_test.cc index 7e92439c494b677f718a63c71c20828d65bebef4..2f46ee0be216d7dabf1c476d3cfb7d528f8ab6a4 100644 --- a/tensorflow/compiler/xla/tests/llvm_compiler_test.cc +++ b/tensorflow/compiler/xla/tests/llvm_compiler_test.cc @@ -43,7 +43,7 @@ class LLVMCompilerTest : public ::testing::Test { ~LLVMCompilerTest() override {} protected: - using Platform = ::perftools::gputools::Platform; + using Platform = se::Platform; explicit LLVMCompilerTest(string platform_name) : platform_name_(std::move(platform_name)) {} @@ -95,7 +95,7 @@ class LLVMCompilerTest : public ::testing::Test { modules.push_back(hlo_module->Clone()); modules.push_back(std::move(hlo_module)); - std::vector> executors; + std::vector> executors; executors.push_back({backend_->default_stream_executor()}); executors.push_back({backend_->default_stream_executor()}); diff --git a/tensorflow/compiler/xla/tests/local_client_allocation_test.cc b/tensorflow/compiler/xla/tests/local_client_allocation_test.cc index 3d30ceeaf1b0369b6fdc0cd9620c04aae287941c..7209f91639b5f24e7da569135c3b9444ed522d6e 100644 --- a/tensorflow/compiler/xla/tests/local_client_allocation_test.cc +++ b/tensorflow/compiler/xla/tests/local_client_allocation_test.cc @@ -25,6 +25,7 @@ limitations under the License. #include "tensorflow/compiler/xla/tests/literal_test_util.h" #include "tensorflow/compiler/xla/tests/local_client_test_base.h" #include "tensorflow/compiler/xla/tests/test_macros.h" +#include "tensorflow/core/lib/gtl/optional.h" #include "tensorflow/core/platform/test.h" #include "tensorflow/core/platform/types.h" @@ -53,7 +54,7 @@ XLA_TEST_F(LocalClientAllocationTest, AddVectors) { // deallocation happen on the right allocator. ExecutableRunOptions options; options.set_allocator(allocator); - std::unique_ptr result = + tensorflow::gtl::optional result = ExecuteLocallyOrDie(builder.Build().ValueOrDie(), {}, DefaultExecutableBuildOptions(), options); @@ -66,7 +67,7 @@ XLA_TEST_F(LocalClientAllocationTest, AddVectors) { // Deallocate result and verify that deallocate was called once. int64 deallocation_count_before = allocator_->deallocation_count(); - result = nullptr; + result.reset(); EXPECT_EQ(deallocation_count_before + 1, allocator_->deallocation_count()); } @@ -92,7 +93,7 @@ XLA_TEST_F(LocalClientAllocationTest, RunOnDevices) { computation, {}, ExecutableBuildOptions().set_device_ordinal(d), ExecutableRunOptions().set_device_ordinal(d).set_allocator(allocator)); LiteralTestUtil::ExpectR1Near( - {2.0f, 4.0f, 6.0f}, *ShapedBufferToLiteral(*result), error_spec_); + {2.0f, 4.0f, 6.0f}, *ShapedBufferToLiteral(result), error_spec_); // At least one allocation should have been performed when executing the // computation. diff --git a/tensorflow/compiler/xla/tests/local_client_execute_test.cc b/tensorflow/compiler/xla/tests/local_client_execute_test.cc index 2462ea39f914b1dbb525ea777a48d9ce66035638..7e14e77366d1d4d7f21a585883bb2fc750f4247e 100644 --- a/tensorflow/compiler/xla/tests/local_client_execute_test.cc +++ b/tensorflow/compiler/xla/tests/local_client_execute_test.cc @@ -43,8 +43,6 @@ limitations under the License. #include "tensorflow/core/platform/test.h" #include "tensorflow/core/platform/test_benchmark.h" -namespace se = ::perftools::gputools; - namespace xla { namespace { @@ -59,10 +57,9 @@ XLA_TEST_F(LocalClientExecuteTest, Constant) { ComputationBuilder builder(local_client_, TestName()); auto y = builder.ConstantR0(123.0f); - std::unique_ptr result = + ScopedShapedBuffer result = ExecuteLocallyOrDie(builder.Build().ValueOrDie(), {}); - - LiteralTestUtil::ExpectR0Near(123.f, *ShapedBufferToLiteral(*result), + LiteralTestUtil::ExpectR0Near(123.f, *ShapedBufferToLiteral(result), error_spec_); } @@ -73,10 +70,9 @@ XLA_TEST_F(LocalClientExecuteTest, AddScalars) { builder.Add(x, y); auto x_value = LiteralToShapedBuffer(*Literal::CreateR0(42.0f)); - std::unique_ptr result = - ExecuteLocallyOrDie(builder.Build().ValueOrDie(), {x_value.get()}); - - LiteralTestUtil::ExpectR0Near(165.f, *ShapedBufferToLiteral(*result), + ScopedShapedBuffer result = + ExecuteLocallyOrDie(builder.Build().ValueOrDie(), {&x_value}); + LiteralTestUtil::ExpectR0Near(165.f, *ShapedBufferToLiteral(result), error_spec_); } @@ -87,10 +83,9 @@ XLA_TEST_F(LocalClientExecuteTest, AddZeroElementVectors) { builder.Add(x, y); auto x_array = LiteralToShapedBuffer(*Literal::CreateR1({})); - std::unique_ptr result = - ExecuteLocallyOrDie(builder.Build().ValueOrDie(), {x_array.get()}); - - LiteralTestUtil::ExpectR1Near({}, *ShapedBufferToLiteral(*result), + ScopedShapedBuffer result = + ExecuteLocallyOrDie(builder.Build().ValueOrDie(), {&x_array}); + LiteralTestUtil::ExpectR1Near({}, *ShapedBufferToLiteral(result), error_spec_); } @@ -102,11 +97,10 @@ XLA_TEST_F(LocalClientExecuteTest, AddVectors) { auto x_array = LiteralToShapedBuffer(*Literal::CreateR1({0.0f, 1.0f, 2.0f})); - std::unique_ptr result = - ExecuteLocallyOrDie(builder.Build().ValueOrDie(), {x_array.get()}); - + ScopedShapedBuffer result = + ExecuteLocallyOrDie(builder.Build().ValueOrDie(), {&x_array}); LiteralTestUtil::ExpectR1Near( - {2.0f, 4.0f, 6.0f}, *ShapedBufferToLiteral(*result), error_spec_); + {2.0f, 4.0f, 6.0f}, *ShapedBufferToLiteral(result), error_spec_); } XLA_TEST_F(LocalClientExecuteTest, AddVectorsWithProfile) { @@ -118,13 +112,12 @@ XLA_TEST_F(LocalClientExecuteTest, AddVectorsWithProfile) { auto x_array = LiteralToShapedBuffer(*Literal::CreateR1({0.0f, 1.0f, 2.0f})); ExecutionProfile profile; - std::unique_ptr result = ExecuteLocallyOrDie( - builder.Build().ValueOrDie(), {x_array.get()}, - DefaultExecutableBuildOptions(), + ScopedShapedBuffer result = ExecuteLocallyOrDie( + builder.Build().ValueOrDie(), {&x_array}, DefaultExecutableBuildOptions(), DefaultExecutableRunOptions().set_execution_profile(&profile)); LiteralTestUtil::ExpectR1Near( - {2.0f, 4.0f, 6.0f}, *ShapedBufferToLiteral(*result), error_spec_); + {2.0f, 4.0f, 6.0f}, *ShapedBufferToLiteral(result), error_spec_); EXPECT_GT(profile.compute_and_transfer_time_ns(), 0); } @@ -138,27 +131,27 @@ XLA_TEST_F(LocalClientExecuteTest, AddArraysWithDifferentInputLayouts) { // Create x as a col-major array. auto x_array = LiteralToShapedBuffer(*Literal::CreateR2WithLayout( {{1.0f, 2.0f}, {3.0f, 4.0f}}, LayoutUtil::MakeLayout({0, 1}))); - EXPECT_TRUE(LayoutUtil::Equal(x_array->on_device_shape().layout(), + EXPECT_TRUE(LayoutUtil::Equal(x_array.on_device_shape().layout(), LayoutUtil::MakeLayout({0, 1}))); // Create y as a row-major array. auto y_array = LiteralToShapedBuffer(*Literal::CreateR2WithLayout( {{10.0f, 20.0f}, {30.0f, 40.0f}}, LayoutUtil::MakeLayout({1, 0}))); - EXPECT_TRUE(LayoutUtil::Equal(y_array->on_device_shape().layout(), + EXPECT_TRUE(LayoutUtil::Equal(y_array.on_device_shape().layout(), LayoutUtil::MakeLayout({1, 0}))); - std::unique_ptr result_colmaj = - ExecuteLocallyOrDie(computation, {x_array.get(), y_array.get()}); + ScopedShapedBuffer result_colmaj = + ExecuteLocallyOrDie(computation, {&x_array, &y_array}); LiteralTestUtil::ExpectR2Near({{11.0f, 22.0f}, {33.0f, 44.0f}}, - *ShapedBufferToLiteral(*result_colmaj), + *ShapedBufferToLiteral(result_colmaj), error_spec_); // Run with the parameter values in a different order. - std::unique_ptr result_param_swap = - ExecuteLocallyOrDie(computation, {y_array.get(), x_array.get()}); + ScopedShapedBuffer result_param_swap = + ExecuteLocallyOrDie(computation, {&y_array, &x_array}); LiteralTestUtil::ExpectR2Near( {{11.0f, 22.0f}, {33.0f, 44.0f}}, - *ShapedBufferToLiteral(*result_param_swap), error_spec_); + *ShapedBufferToLiteral(result_param_swap), error_spec_); } XLA_TEST_F(LocalClientExecuteTest, AddArraysWithDifferentOutputLayouts) { @@ -174,27 +167,27 @@ XLA_TEST_F(LocalClientExecuteTest, AddArraysWithDifferentOutputLayouts) { *Literal::CreateR2({{10.0f, 20.0f}, {30.0f, 40.0f}})); // Run with col-major result layout. - std::unique_ptr result_colmaj = ExecuteLocallyOrDie( - computation, {x_array.get(), y_array.get()}, + ScopedShapedBuffer result_colmaj = ExecuteLocallyOrDie( + computation, {&x_array, &y_array}, DefaultExecutableBuildOptions().set_result_layout( ShapeUtil::MakeShapeWithLayout(F32, /*dimensions=*/{2, 2}, {0, 1})), DefaultExecutableRunOptions()); - EXPECT_TRUE(LayoutUtil::Equal(result_colmaj->on_device_shape().layout(), + EXPECT_TRUE(LayoutUtil::Equal(result_colmaj.on_device_shape().layout(), LayoutUtil::MakeLayout({0, 1}))); LiteralTestUtil::ExpectR2Near({{11.0f, 22.0f}, {33.0f, 44.0f}}, - *ShapedBufferToLiteral(*result_colmaj), + *ShapedBufferToLiteral(result_colmaj), error_spec_); // Run with row-major result layout. - std::unique_ptr result_rowmaj = ExecuteLocallyOrDie( - computation, {x_array.get(), y_array.get()}, + ScopedShapedBuffer result_rowmaj = ExecuteLocallyOrDie( + computation, {&x_array, &y_array}, DefaultExecutableBuildOptions().set_result_layout( ShapeUtil::MakeShapeWithLayout(F32, /*dimensions=*/{2, 2}, {1, 0})), DefaultExecutableRunOptions()); - EXPECT_TRUE(LayoutUtil::Equal(result_rowmaj->on_device_shape().layout(), + EXPECT_TRUE(LayoutUtil::Equal(result_rowmaj.on_device_shape().layout(), LayoutUtil::MakeLayout({1, 0}))); LiteralTestUtil::ExpectR2Near({{11.0f, 22.0f}, {33.0f, 44.0f}}, - *ShapedBufferToLiteral(*result_rowmaj), + *ShapedBufferToLiteral(result_rowmaj), error_spec_); } @@ -210,13 +203,13 @@ XLA_TEST_F(LocalClientExecuteTest, TupleResult) { auto y_array = LiteralToShapedBuffer( *Literal::CreateR2({{10.0f, 20.0f}, {30.0f, 40.0f}})); - std::unique_ptr result = - ExecuteLocallyOrDie(computation, {x_array.get(), y_array.get()}); + ScopedShapedBuffer result = + ExecuteLocallyOrDie(computation, {&x_array, &y_array}); - EXPECT_TRUE(ShapeUtil::IsTuple(result->on_host_shape())); - EXPECT_EQ(3, ShapeUtil::TupleElementCount(result->on_host_shape())); + EXPECT_TRUE(ShapeUtil::IsTuple(result.on_host_shape())); + EXPECT_EQ(3, ShapeUtil::TupleElementCount(result.on_host_shape())); - std::unique_ptr result_literal = ShapedBufferToLiteral(*result); + std::unique_ptr result_literal = ShapedBufferToLiteral(result); LiteralTestUtil::ExpectR2Equal( {{1.0f, 2.0f}, {3.0f, 4.0f}}, LiteralView::Create(*result_literal, {0})); LiteralTestUtil::ExpectR2Equal( @@ -239,13 +232,13 @@ XLA_TEST_F(LocalClientExecuteTest, NestedTupleResult) { auto y_array = LiteralToShapedBuffer( *Literal::CreateR2({{10.0f, 20.0f}, {30.0f, 40.0f}})); - std::unique_ptr result = - ExecuteLocallyOrDie(computation, {x_array.get(), y_array.get()}); + ScopedShapedBuffer result = + ExecuteLocallyOrDie(computation, {&x_array, &y_array}); - EXPECT_TRUE(ShapeUtil::IsTuple(result->on_host_shape())); - EXPECT_EQ(2, ShapeUtil::TupleElementCount(result->on_host_shape())); + EXPECT_TRUE(ShapeUtil::IsTuple(result.on_host_shape())); + EXPECT_EQ(2, ShapeUtil::TupleElementCount(result.on_host_shape())); - std::unique_ptr result_literal = ShapedBufferToLiteral(*result); + std::unique_ptr result_literal = ShapedBufferToLiteral(result); LiteralTestUtil::ExpectR2Equal( {{1.0f, 2.0f}, {3.0f, 4.0f}}, LiteralView::Create(*result_literal, {1})); LiteralTestUtil::ExpectR2Equal( @@ -276,11 +269,11 @@ XLA_TEST_F(LocalClientExecuteTest, TupleResultWithLayout) { ShapeUtil::MakeShapeWithLayout(F32, /*dimensions=*/{2, 2}, /*minor_to_major=*/{1, 0})}); options.set_result_layout(shape_with_layout); - std::unique_ptr result = ExecuteLocallyOrDie( - builder.Build().ValueOrDie(), {array.get(), array.get()}, options, - DefaultExecutableRunOptions()); + ScopedShapedBuffer result = + ExecuteLocallyOrDie(builder.Build().ValueOrDie(), {&array, &array}, + options, DefaultExecutableRunOptions()); - std::unique_ptr result_literal = ShapedBufferToLiteral(*result); + std::unique_ptr result_literal = ShapedBufferToLiteral(result); LiteralTestUtil::ExpectR2Equal( {{1.0f, 2.0f}, {3.0f, 4.0f}}, LiteralView::Create(*result_literal, {0})); LiteralTestUtil::ExpectR2Equal( @@ -320,13 +313,13 @@ XLA_TEST_F(LocalClientExecuteTest, TupleArguments) { auto x_buffer = LiteralToShapedBuffer(*x_literal); auto y_buffer = LiteralToShapedBuffer(*y_literal); - std::unique_ptr result = - ExecuteLocallyOrDie(computation, {x_buffer.get(), y_buffer.get()}); + ScopedShapedBuffer result = + ExecuteLocallyOrDie(computation, {&x_buffer, &y_buffer}); - EXPECT_TRUE(ShapeUtil::IsTuple(result->on_host_shape())); - EXPECT_EQ(2, ShapeUtil::TupleElementCount(result->on_host_shape())); + EXPECT_TRUE(ShapeUtil::IsTuple(result.on_host_shape())); + EXPECT_EQ(2, ShapeUtil::TupleElementCount(result.on_host_shape())); - std::unique_ptr result_literal = ShapedBufferToLiteral(*result); + std::unique_ptr result_literal = ShapedBufferToLiteral(result); LiteralTestUtil::ExpectR2Equal( {{56.0f, 46.0f}, {36.0f, 26.0f}}, LiteralView::Create(*result_literal, {0})); @@ -365,10 +358,9 @@ XLA_TEST_F(LocalClientExecuteTest, NestedTupleArgument) { Literal::CreateR1({222.0, -2.0, 10.0}).get()}); auto arg_buffer = LiteralToShapedBuffer(*arg_literal); - std::unique_ptr result = - ExecuteLocallyOrDie(computation, {arg_buffer.get()}); + ScopedShapedBuffer result = ExecuteLocallyOrDie(computation, {&arg_buffer}); - std::unique_ptr result_literal = ShapedBufferToLiteral(*result); + std::unique_ptr result_literal = ShapedBufferToLiteral(result); LiteralTestUtil::ExpectR2Equal( {{-1.0, -2.0}, {-3.0, -4}}, LiteralView::Create(*result_literal, {0})); LiteralTestUtil::ExpectR1Equal( @@ -396,18 +388,16 @@ XLA_TEST_F(LocalClientExecuteTest, PassingTupleResultBackIntoComputation) { Literal::CreateR2({{11.0, 3.0}, {4.0, 5.0}}).get()}); auto arg_buffer = LiteralToShapedBuffer(*arg_literal); - std::unique_ptr result_0 = - ExecuteLocallyOrDie(computation, {arg_buffer.get()}); - std::unique_ptr result_0_literal = ShapedBufferToLiteral(*result_0); + ScopedShapedBuffer result_0 = ExecuteLocallyOrDie(computation, {&arg_buffer}); + std::unique_ptr result_0_literal = ShapedBufferToLiteral(result_0); LiteralTestUtil::ExpectR2Equal( {{-1.0, -2.0}, {-3.0, -4.0}}, LiteralView::Create(*result_0_literal, {0})); LiteralTestUtil::ExpectR2Equal( {{22.0, 6.0}, {8.0, 10}}, LiteralView::Create(*result_0_literal, {1})); - std::unique_ptr result_1 = - ExecuteLocallyOrDie(computation, {result_0.get()}); - std::unique_ptr result_1_literal = ShapedBufferToLiteral(*result_1); + ScopedShapedBuffer result_1 = ExecuteLocallyOrDie(computation, {&result_0}); + std::unique_ptr result_1_literal = ShapedBufferToLiteral(result_1); LiteralTestUtil::ExpectR2Equal( {{1.0, 2.0}, {3.0, 4.0}}, LiteralView::Create(*result_1_literal, {0})); LiteralTestUtil::ExpectR2Equal( @@ -453,10 +443,8 @@ XLA_TEST_F(LocalClientExecuteTest, LargeTuple) { Literal::MakeTupleOwned(std::move(arg_elements)); auto arg_buffer = LiteralToShapedBuffer(*arg_literal); - std::unique_ptr result = - ExecuteLocallyOrDie(computation, {arg_buffer.get()}); - - std::unique_ptr result_literal = ShapedBufferToLiteral(*result); + ScopedShapedBuffer result = ExecuteLocallyOrDie(computation, {&arg_buffer}); + std::unique_ptr result_literal = ShapedBufferToLiteral(result); for (int i = 0; i < kElementCount; ++i) { LiteralTestUtil::ExpectR1Near( @@ -511,9 +499,8 @@ XLA_TEST_F(LocalClientExecuteTest, DISABLED_ON_CPU_PARALLEL(LargeNestedTuple)) { auto arg_literal = Literal::MakeTupleOwned(std::move(outer_tuple_elements)); auto arg_buffer = LiteralToShapedBuffer(*arg_literal); - std::unique_ptr result = - ExecuteLocallyOrDie(computation, {arg_buffer.get()}); - std::unique_ptr result_literal = ShapedBufferToLiteral(*result); + ScopedShapedBuffer result = ExecuteLocallyOrDie(computation, {&arg_buffer}); + std::unique_ptr result_literal = ShapedBufferToLiteral(result); for (int i = 0; i < kFanout; ++i) { for (int j = 0; j < kFanout; ++j) { @@ -556,9 +543,8 @@ XLA_TEST_F(LocalClientExecuteTest, DeepTuple) { } auto arg_buffer = LiteralToShapedBuffer(*arg_literal); - std::unique_ptr result = - ExecuteLocallyOrDie(computation, {arg_buffer.get()}); - std::unique_ptr result_literal = ShapedBufferToLiteral(*result); + ScopedShapedBuffer result = ExecuteLocallyOrDie(computation, {&arg_buffer}); + std::unique_ptr result_literal = ShapedBufferToLiteral(result); ShapeIndex index; for (int i = 0; i < kTupleDepth; ++i) { @@ -578,7 +564,7 @@ XLA_TEST_F(LocalClientExecuteTest, InvalidNumberOfArguments) { auto x_array = LiteralToShapedBuffer(*Literal::CreateR1({1.0f, 2.0f, 3.0f})); auto execute_status = - ExecuteLocally(builder.Build().ValueOrDie(), {x_array.get()}); + ExecuteLocally(builder.Build().ValueOrDie(), {&x_array}); EXPECT_FALSE(execute_status.ok()); EXPECT_THAT(execute_status.status().error_message(), @@ -594,7 +580,7 @@ XLA_TEST_F(LocalClientExecuteTest, IncorrectArgumentShape) { auto x_array = LiteralToShapedBuffer( *Literal::CreateR2({{0.0f, 1.0f}, {2.0f, 3.0f}})); auto execute_status = - ExecuteLocally(builder.Build().ValueOrDie(), {x_array.get()}); + ExecuteLocally(builder.Build().ValueOrDie(), {&x_array}); EXPECT_FALSE(execute_status.ok()); EXPECT_THAT(execute_status.status().error_message(), @@ -611,7 +597,7 @@ XLA_TEST_F(LocalClientExecuteTest, InvalidResultLayout) { auto x_array = LiteralToShapedBuffer( *Literal::CreateR2({{0.0f, 1.0f}, {2.0f, 3.0f}})); auto execute_status = ExecuteLocally( - builder.Build().ValueOrDie(), {x_array.get()}, + builder.Build().ValueOrDie(), {&x_array}, DefaultExecutableBuildOptions().set_result_layout( ShapeUtil::MakeShapeWithLayout(F32, /*dimensions=*/{1, 2, 3, 4}, @@ -644,9 +630,9 @@ XLA_TEST_F(LocalClientExecuteTest, RunOnAllDeviceOrdinals) { computation, {}, DefaultExecutableBuildOptions().set_device_ordinal(d), DefaultExecutableRunOptions().set_device_ordinal(d)); - EXPECT_EQ(d, result->device_ordinal()); + EXPECT_EQ(d, result.device_ordinal()); LiteralTestUtil::ExpectR0Equal(42.0f, - *ShapedBufferToLiteral(*result)); + *ShapedBufferToLiteral(result)); } } } @@ -689,9 +675,9 @@ XLA_TEST_F(LocalClientExecuteTest, RunOnStream) { DefaultExecutableRunOptions().set_stream(&stream)); // As a check to verify that the computation ran of the device associated // with the stream. This is a weak check, but stronger verification is hard. - EXPECT_EQ(d, result->device_ordinal()); + EXPECT_EQ(d, result.device_ordinal()); LiteralTestUtil::ExpectR0Equal(42.0f, - *ShapedBufferToLiteral(*result)); + *ShapedBufferToLiteral(result)); } } @@ -767,9 +753,9 @@ XLA_TEST_F(LocalClientExecuteTest, SelectBetweenTuples) { {builder.ConstantR1(vec2), builder.ConstantR1(vec1)}); builder.Select(builder.ConstantR0(false), tuple12, tuple21); - std::unique_ptr result = + ScopedShapedBuffer result = ExecuteLocallyOrDie(builder.Build().ValueOrDie(), {}); - std::unique_ptr tuple_literal = ShapedBufferToLiteral(*result); + std::unique_ptr tuple_literal = ShapedBufferToLiteral(result); LiteralTestUtil::ExpectR1Equal( {2.0f, 4.0f, 6.0f}, LiteralView::Create(*tuple_literal, {0})); LiteralTestUtil::ExpectR1Equal( @@ -793,12 +779,12 @@ XLA_TEST_F(LocalClientExecuteTest, CompileExecutable) { auto x_array = LiteralToShapedBuffer(*Literal::CreateR1({0.0f, 1.0f, 2.0f})); - std::unique_ptr result = - executable->Run({x_array.get()}, DefaultExecutableRunOptions()) + ScopedShapedBuffer result = + executable->Run({&x_array}, DefaultExecutableRunOptions()) .ConsumeValueOrDie(); LiteralTestUtil::ExpectR1Near( - {2.0f, 4.0f, 6.0f}, *ShapedBufferToLiteral(*result), error_spec_); + {2.0f, 4.0f, 6.0f}, *ShapedBufferToLiteral(result), error_spec_); } XLA_TEST_F(LocalClientExecuteTest, ShapeBufferToLiteralConversion) { @@ -811,7 +797,7 @@ XLA_TEST_F(LocalClientExecuteTest, ShapeBufferToLiteralConversion) { literal, local_client_->default_device_ordinal(), allocator_)); TF_ASSERT_OK_AND_ASSIGN( auto transferred_literal, - local_client_->ShapedBufferToLiteral(*shaped_buffer)); + local_client_->ShapedBufferToLiteral(shaped_buffer)); EXPECT_EQ(literal, *transferred_literal); }; @@ -851,7 +837,7 @@ XLA_TEST_F(LocalClientExecuteTest, ShapeBufferToLiteralConversion64bit) { literal, local_client_->default_device_ordinal(), allocator_)); TF_ASSERT_OK_AND_ASSIGN( auto transferred_literal, - local_client_->ShapedBufferToLiteral(*shaped_buffer)); + local_client_->ShapedBufferToLiteral(shaped_buffer)); EXPECT_EQ(literal, *transferred_literal); }; @@ -919,12 +905,12 @@ void BM_LocalClientOverhead(int num_iters) { .ConsumeValueOrDie(); auto literal = Literal::CreateR2({{0, 0, 0}, {0, 0, 0}}); ASSERT_IS_OK(transfer_manager->TransferLiteralToDevice( - executors[device_ordinal], *literal, *buffer)); + executors[device_ordinal], *literal, buffer)); const int kWarmups = 2; auto executable_status = client->Compile( - computation, {&buffer->on_host_shape()}, ExecutableBuildOptions()); + computation, {&buffer.on_host_shape()}, ExecutableBuildOptions()); ASSERT_IS_OK(executable_status); std::unique_ptr executable = executable_status.ConsumeValueOrDie(); @@ -936,13 +922,13 @@ void BM_LocalClientOverhead(int num_iters) { run_options.set_allocator(&allocator).set_stream(&stream); for (int i = 0; i < kWarmups; ++i) { - auto result = executable->Run({buffer.get()}, run_options); + auto result = executable->Run({&buffer}, run_options); ASSERT_IS_OK(result); } tensorflow::testing::StartTiming(); for (int i = 0; i < num_iters; ++i) { - auto result = executable->Run({buffer.get()}, run_options); + auto result = executable->Run({&buffer}, run_options); ASSERT_IS_OK(result); } } diff --git a/tensorflow/compiler/xla/tests/local_client_test_base.cc b/tensorflow/compiler/xla/tests/local_client_test_base.cc index 96b976d25d75d35f46adfd104a03aceb363661eb..bb5aabb214da41f8bcd5da7d23d6d732e1277133 100644 --- a/tensorflow/compiler/xla/tests/local_client_test_base.cc +++ b/tensorflow/compiler/xla/tests/local_client_test_base.cc @@ -35,19 +35,21 @@ namespace xla { /* static */ TestAllocator* LocalClientTestBase::allocator_; -StatusOr TestAllocator::Allocate( - int device_ordinal, uint64 size, bool retry_on_failure) { +StatusOr TestAllocator::Allocate(int device_ordinal, + uint64 size, + bool retry_on_failure) { VLOG(2) << "Allocate(" << device_ordinal << ", " << size << ")"; { tensorflow::mutex_lock lock(count_mutex_); allocation_count_++; device_allocation_count_[device_ordinal]++; } - return StreamExecutorMemoryAllocator::Allocate(device_ordinal, size); + return StreamExecutorMemoryAllocator::Allocate(device_ordinal, size, + retry_on_failure); } -tensorflow::Status TestAllocator::Deallocate( - int device_ordinal, perftools::gputools::DeviceMemoryBase* mem) { +tensorflow::Status TestAllocator::Deallocate(int device_ordinal, + se::DeviceMemoryBase* mem) { VLOG(2) << "Deallocate(" << device_ordinal << ")"; { tensorflow::mutex_lock lock(count_mutex_); @@ -88,7 +90,7 @@ int64 TestAllocator::deallocation_count(int device_ordinal) const { } /* static */ TestAllocator* LocalClientTestBase::GetOrCreateAllocator( - perftools::gputools::Platform* platform) { + se::Platform* platform) { static tensorflow::mutex mu(tensorflow::LINKER_INITIALIZED); tensorflow::mutex_lock lock(mu); @@ -115,8 +117,7 @@ struct LocalClientTestBase::EigenThreadPoolWrapper { std::unique_ptr device; }; -LocalClientTestBase::LocalClientTestBase( - perftools::gputools::Platform* platform) +LocalClientTestBase::LocalClientTestBase(se::Platform* platform) : local_client_( ClientLibrary::GetOrCreateLocalClient(platform).ValueOrDie()), thread_pool_wrapper_(new EigenThreadPoolWrapper()) { @@ -128,7 +129,7 @@ LocalClientTestBase::LocalClientTestBase( LocalClientTestBase::~LocalClientTestBase() {} -std::unique_ptr LocalClientTestBase::LiteralToShapedBuffer( +ScopedShapedBuffer LocalClientTestBase::LiteralToShapedBuffer( const Literal& literal) { return local_client_ ->LiteralToShapedBuffer(literal, local_client_->default_device_ordinal()) @@ -155,7 +156,7 @@ ExecutableRunOptions LocalClientTestBase::DefaultExecutableRunOptions() const { return run_options; } -std::unique_ptr LocalClientTestBase::ExecuteLocallyOrDie( +ScopedShapedBuffer LocalClientTestBase::ExecuteLocallyOrDie( const Computation& computation, tensorflow::gtl::ArraySlice arguments) { return ExecuteLocally(computation, arguments, DefaultExecutableBuildOptions(), @@ -163,7 +164,7 @@ std::unique_ptr LocalClientTestBase::ExecuteLocallyOrDie( .ConsumeValueOrDie(); } -std::unique_ptr LocalClientTestBase::ExecuteLocallyOrDie( +ScopedShapedBuffer LocalClientTestBase::ExecuteLocallyOrDie( const Computation& computation, tensorflow::gtl::ArraySlice arguments, const ExecutableBuildOptions& build_options, @@ -172,16 +173,14 @@ std::unique_ptr LocalClientTestBase::ExecuteLocallyOrDie( .ConsumeValueOrDie(); } -StatusOr> -LocalClientTestBase::ExecuteLocally( +StatusOr LocalClientTestBase::ExecuteLocally( const Computation& computation, tensorflow::gtl::ArraySlice arguments) { return ExecuteLocally(computation, arguments, DefaultExecutableBuildOptions(), DefaultExecutableRunOptions()); } -StatusOr> -LocalClientTestBase::ExecuteLocally( +StatusOr LocalClientTestBase::ExecuteLocally( const Computation& computation, tensorflow::gtl::ArraySlice arguments, const ExecutableBuildOptions& build_options, diff --git a/tensorflow/compiler/xla/tests/local_client_test_base.h b/tensorflow/compiler/xla/tests/local_client_test_base.h index f0c73f04f6eb67b2e9cb5e111eccdc3818059b2b..4ee56a05ec6ec1ddf3fba0b357e2c3b586eb456e 100644 --- a/tensorflow/compiler/xla/tests/local_client_test_base.h +++ b/tensorflow/compiler/xla/tests/local_client_test_base.h @@ -41,15 +41,15 @@ namespace xla { class TestAllocator : public StreamExecutorMemoryAllocator { public: - explicit TestAllocator(perftools::gputools::Platform* platform) + explicit TestAllocator(se::Platform* platform) : StreamExecutorMemoryAllocator( platform, PlatformUtil::GetStreamExecutors(platform).ValueOrDie()) { } - StatusOr Allocate( - int device_ordinal, uint64 size, bool retry_on_failure) override; - tensorflow::Status Deallocate( - int device_ordinal, perftools::gputools::DeviceMemoryBase* mem) override; + StatusOr Allocate(int device_ordinal, uint64 size, + bool retry_on_failure) override; + tensorflow::Status Deallocate(int device_ordinal, + se::DeviceMemoryBase* mem) override; // Return the number of allocations that have been performed. int64 allocation_count() const; @@ -75,18 +75,15 @@ class TestAllocator : public StreamExecutorMemoryAllocator { class LocalClientTestBase : public ::testing::Test { protected: struct EigenThreadPoolWrapper; - explicit LocalClientTestBase( - perftools::gputools::Platform* platform = nullptr); + explicit LocalClientTestBase(se::Platform* platform = nullptr); virtual ~LocalClientTestBase(); - static TestAllocator* GetOrCreateAllocator( - perftools::gputools::Platform* platform); + static TestAllocator* GetOrCreateAllocator(se::Platform* platform); // Copy the given literal onto the default device and return a // ScopedShapedBuffer. Convenience wrapper around // LocalClient::LiteralToShapedBuffer. - std::unique_ptr LiteralToShapedBuffer( - const Literal& literal); + ScopedShapedBuffer LiteralToShapedBuffer(const Literal& literal); // Construct and return a literal containing the array represented by // shaped_buffer. @@ -95,19 +92,19 @@ class LocalClientTestBase : public ::testing::Test { // Execute the given computation on the local client. With and without // options. - StatusOr> ExecuteLocally( + StatusOr ExecuteLocally( const Computation& computation, tensorflow::gtl::ArraySlice arguments); - StatusOr> ExecuteLocally( + StatusOr ExecuteLocally( const Computation& computation, tensorflow::gtl::ArraySlice arguments, const ExecutableBuildOptions& build_options, const ExecutableRunOptions& run_options); - std::unique_ptr ExecuteLocallyOrDie( + ScopedShapedBuffer ExecuteLocallyOrDie( const Computation& computation, tensorflow::gtl::ArraySlice arguments); - std::unique_ptr ExecuteLocallyOrDie( + ScopedShapedBuffer ExecuteLocallyOrDie( const Computation& computation, tensorflow::gtl::ArraySlice arguments, const ExecutableBuildOptions& build_options, @@ -128,7 +125,7 @@ class LocalClientTestBase : public ::testing::Test { // of the process. So make the allocator static. static TestAllocator* allocator_; - perftools::gputools::StreamExecutor* stream_executor_; + se::StreamExecutor* stream_executor_; TransferManager* transfer_manager_; LocalClient* local_client_; diff --git a/tensorflow/compiler/xla/tests/map_test.cc b/tensorflow/compiler/xla/tests/map_test.cc index 0cd812fd1b4bc69c34b70d3ca0fd0aa6cf57fa4c..8fabcaca1b968dbd0546e16b97f48c43dbb60e3c 100644 --- a/tensorflow/compiler/xla/tests/map_test.cc +++ b/tensorflow/compiler/xla/tests/map_test.cc @@ -21,6 +21,8 @@ limitations under the License. #include "tensorflow/compiler/xla/client/global_data.h" #include "tensorflow/compiler/xla/client/lib/arithmetic.h" #include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_computation.h" #include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/statusor.h" @@ -39,7 +41,7 @@ namespace { class MapTest : public ClientLibraryTestBase { public: - explicit MapTest(perftools::gputools::Platform* platform = nullptr) + explicit MapTest(se::Platform* platform = nullptr) : ClientLibraryTestBase(platform) { mutable_debug_options()->add_xla_disable_hlo_passes("algsimp"); mutable_debug_options()->add_xla_disable_hlo_passes("inline"); @@ -50,18 +52,18 @@ class MapTest : public ClientLibraryTestBase { // x {R0F32} ----> (add) // / // 1.0f ---------/ - Computation CreateAdderToOne() { - ComputationBuilder mapped_builder(client_, TestName()); + XlaComputation CreateAdderToOne() { + XlaBuilder mapped_builder(TestName()); auto x = mapped_builder.Parameter(0, ShapeUtil::MakeShape(F32, {}), "x"); auto one = mapped_builder.ConstantR0(1.0); - auto adder_to_one = mapped_builder.Add(x, one); + mapped_builder.Add(x, one); auto computation_status = mapped_builder.Build(); TF_CHECK_OK(computation_status.status()); return computation_status.ConsumeValueOrDie(); } - Computation CreateMax() { - ComputationBuilder b(client_, TestName()); + XlaComputation CreateMax() { + XlaBuilder b(TestName()); auto lhs = b.Parameter(0, ShapeUtil::MakeShape(F32, {}), "x"); auto rhs = b.Parameter(1, ShapeUtil::MakeShape(F32, {}), "y"); b.Max(lhs, rhs); @@ -73,8 +75,8 @@ class MapTest : public ClientLibraryTestBase { // Creates a computation that accepts an F32 and returns T(1) (ignoring the // argument). template - Computation CreateScalarOne() { - ComputationBuilder mapped_builder(client_, "scalar_one"); + XlaComputation CreateScalarOne() { + XlaBuilder mapped_builder("scalar_one"); (void)mapped_builder.Parameter(0, ShapeUtil::MakeShape(F32, {}), "x"); mapped_builder.ConstantR0(1); auto computation_status = mapped_builder.Build(); @@ -87,11 +89,11 @@ class MapTest : public ClientLibraryTestBase { // x {R0F32} ----> (mul) // / // 2.0f ---------/ - Computation CreateMulByTwo() { - ComputationBuilder mapped_builder(client_, TestName()); + XlaComputation CreateMulByTwo() { + XlaBuilder mapped_builder(TestName()); auto x = mapped_builder.Parameter(0, ShapeUtil::MakeShape(F32, {}), "x"); auto two = mapped_builder.ConstantR0(2.0); - auto mul_by_two = mapped_builder.Mul(x, two); + mapped_builder.Mul(x, two); auto computation_status = mapped_builder.Build(); TF_CHECK_OK(computation_status.status()); return computation_status.ConsumeValueOrDie(); @@ -105,12 +107,12 @@ class MapTest : public ClientLibraryTestBase { // x {R0F32} ----> (add) ----> (mul) // / // 1.0f ---------/ - Computation CreateAdderToOneTimesItself() { - ComputationBuilder mapped_builder(client_, TestName()); + XlaComputation CreateAdderToOneTimesItself() { + XlaBuilder mapped_builder(TestName()); auto x = mapped_builder.Parameter(0, ShapeUtil::MakeShape(F32, {}), "x"); auto one = mapped_builder.ConstantR0(1.0); auto adder_to_one = mapped_builder.Add(x, one); - auto result = mapped_builder.Mul(x, adder_to_one); + mapped_builder.Mul(x, adder_to_one); auto computation_status = mapped_builder.Build(); TF_CHECK_OK(computation_status.status()); return computation_status.ConsumeValueOrDie(); @@ -122,12 +124,13 @@ class MapTest : public ClientLibraryTestBase { // x {R0F32} -----------> (map) ----> (add) // / / // embedded_computation --/ n --/ - Computation CreateMapPlusN(const Computation& embedded_computation, float n) { - ComputationBuilder builder(client_, TestName()); + XlaComputation CreateMapPlusN(const XlaComputation& embedded_computation, + float n) { + XlaBuilder builder(TestName()); auto x = builder.Parameter(0, ShapeUtil::MakeShape(F32, {}), "x"); auto map = builder.Map({x}, embedded_computation, {}); auto constant_n = builder.ConstantR0(n); - auto add = builder.Add(map, constant_n); + builder.Add(map, constant_n); auto computation_status = builder.Build(); TF_CHECK_OK(computation_status.status()); return computation_status.ConsumeValueOrDie(); @@ -135,11 +138,11 @@ class MapTest : public ClientLibraryTestBase { // Creates a binary function with signature (F32, F32) -> Pred // defined by (x, y) -> x > y. - Computation CreateGt() { - ComputationBuilder b(client_, "Gt"); + XlaComputation CreateGt() { + XlaBuilder b("Gt"); auto x = b.Parameter(0, ShapeUtil::MakeShape(F32, {}), "x"); auto y = b.Parameter(1, ShapeUtil::MakeShape(F32, {}), "y"); - auto gt = b.Gt(x, y); + b.Gt(x, y); auto computation_status = b.Build(); TF_CHECK_OK(computation_status.status()); return computation_status.ConsumeValueOrDie(); @@ -152,13 +155,13 @@ class MapTest : public ClientLibraryTestBase { // y {R0F32} ----> (add) ---> (add) // / // z {R0F32} ---------------/ - Computation CreateTernaryAdder() { - ComputationBuilder mapped_builder(client_, "TernaryAdder"); + XlaComputation CreateTernaryAdder() { + XlaBuilder mapped_builder("TernaryAdder"); auto x = mapped_builder.Parameter(0, ShapeUtil::MakeShape(F32, {}), "x"); auto y = mapped_builder.Parameter(1, ShapeUtil::MakeShape(F32, {}), "y"); auto z = mapped_builder.Parameter(2, ShapeUtil::MakeShape(F32, {}), "z"); auto xy = mapped_builder.Add(x, y); - auto xyz = mapped_builder.Add(xy, z); + mapped_builder.Add(xy, z); auto computation_status = mapped_builder.Build(); TF_CHECK_OK(computation_status.status()); return computation_status.ConsumeValueOrDie(); @@ -167,13 +170,13 @@ class MapTest : public ClientLibraryTestBase { TEST_F(MapTest, MapEachElemPlusOneR0) { // Applies lambda (x) (+ x 1)) to an input scalar. - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); std::unique_ptr param0_literal = Literal::CreateR0(42.0); std::unique_ptr param0_data = client_->TransferToServer(*param0_literal).ConsumeValueOrDie(); auto param = builder.Parameter(0, param0_literal->shape(), "param0"); - auto map = builder.Map({param}, CreateAdderToOne(), {}); + builder.Map({param}, CreateAdderToOne(), {}); ComputeAndCompareR0(&builder, 43.0, {param0_data.get()}, ErrorSpec(0.01f)); @@ -181,13 +184,13 @@ TEST_F(MapTest, MapEachElemPlusOneR0) { XLA_TEST_F(MapTest, MapEachElemPlusOneR1S0) { // Maps (lambda (x) (+ x 1)) onto an input R1F32 vector of length 0. - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); std::unique_ptr param0_literal = Literal::CreateR1({}); std::unique_ptr param0_data = client_->TransferToServer(*param0_literal).ConsumeValueOrDie(); auto param = builder.Parameter(0, param0_literal->shape(), "param0"); - auto map = builder.Map({param}, CreateAdderToOne(), {0}); + builder.Map({param}, CreateAdderToOne(), {0}); ComputeAndCompareR1(&builder, {}, {param0_data.get()}, ErrorSpec(0.01f)); @@ -195,55 +198,55 @@ XLA_TEST_F(MapTest, MapEachElemPlusOneR1S0) { TEST_F(MapTest, MapEachElemPlusOneR1S4) { // Maps (lambda (x) (+ x 1)) onto an input R1F32 vector of length 4. - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); std::unique_ptr param0_literal = Literal::CreateR1({2.2f, 3.3f, 4.4f, 5.5f}); std::unique_ptr param0_data = client_->TransferToServer(*param0_literal).ConsumeValueOrDie(); auto param = builder.Parameter(0, param0_literal->shape(), "param0"); - auto map = builder.Map({param}, CreateAdderToOne(), {0}); + builder.Map({param}, CreateAdderToOne(), {0}); ComputeAndCompareR1(&builder, {3.2f, 4.3f, 5.4f, 6.5f}, {param0_data.get()}, ErrorSpec(0.01f)); } TEST_F(MapTest, MapEachF32ElementToS32Constant) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); std::unique_ptr param0_literal = Literal::CreateR1({2.2f, 3.3f, 4.4f, 5.5f}); std::unique_ptr param0_data = client_->TransferToServer(*param0_literal).ConsumeValueOrDie(); auto param = builder.Parameter(0, param0_literal->shape(), "param0"); - auto map = builder.Map({param}, CreateScalarOne(), {0}); + builder.Map({param}, CreateScalarOne(), {0}); ComputeAndCompareR1(&builder, {1, 1, 1, 1}, {param0_data.get()}); } TEST_F(MapTest, MapEachF32ElementToU32Constant) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); std::unique_ptr param0_literal = Literal::CreateR1({2.2f, 3.3f, 4.4f, 5.5f}); std::unique_ptr param0_data = client_->TransferToServer(*param0_literal).ConsumeValueOrDie(); auto param = builder.Parameter(0, param0_literal->shape(), "param0"); - auto map = builder.Map({param}, CreateScalarOne(), {0}); + builder.Map({param}, CreateScalarOne(), {0}); ComputeAndCompareR1(&builder, {1, 1, 1, 1}, {param0_data.get()}); } TEST_F(MapTest, MapEachElemLongerChainR1) { // Maps (lambda (x) (* (+ x 1) x)) onto an input R1F32 vector. - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); std::unique_ptr param0_literal = Literal::CreateR1({2.6f, -5.1f, 0.1f, 0.2f, 999.0f, 255.5f}); std::unique_ptr param0_data = client_->TransferToServer(*param0_literal).ConsumeValueOrDie(); auto param = builder.Parameter(0, param0_literal->shape(), "param0"); - auto map = builder.Map({param}, CreateAdderToOneTimesItself(), {0}); + builder.Map({param}, CreateAdderToOneTimesItself(), {0}); ComputeAndCompareR1( &builder, {9.36f, 20.91f, 0.11f, 0.24f, 999000.0f, 65535.75f}, @@ -253,14 +256,14 @@ TEST_F(MapTest, MapEachElemLongerChainR1) { XLA_TEST_F(MapTest, MapMultipleMapsR1S0) { // Maps (lambda (x) (+ x 1)) onto an input R1F32 vector of length 0, and then // maps (lambda (x) (* x 2)) on the result. - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); std::unique_ptr param0_literal = Literal::CreateR1({}); std::unique_ptr param0_data = client_->TransferToServer(*param0_literal).ConsumeValueOrDie(); auto param = builder.Parameter(0, param0_literal->shape(), "param0"); auto map1 = builder.Map({param}, CreateAdderToOne(), {0}); - auto map2 = builder.Map({map1}, CreateMulByTwo(), {0}); + builder.Map({map1}, CreateMulByTwo(), {0}); ComputeAndCompareR1(&builder, {}, {param0_data.get()}, ErrorSpec(0.01f)); @@ -269,7 +272,7 @@ XLA_TEST_F(MapTest, MapMultipleMapsR1S0) { TEST_F(MapTest, MapMultipleMapsR1S4) { // Maps (lambda (x) (+ x 1)) onto an input R1F32 vector of length 4, and then // maps (lambda (x) (* x 2)) on the result. - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); std::unique_ptr param0_literal = Literal::CreateR1({2.2f, 3.3f, 4.4f, 5.5f}); std::unique_ptr param0_data = @@ -277,7 +280,7 @@ TEST_F(MapTest, MapMultipleMapsR1S4) { auto param = builder.Parameter(0, param0_literal->shape(), "param0"); auto map1 = builder.Map({param}, CreateAdderToOne(), {0}); - auto map2 = builder.Map({map1}, CreateMulByTwo(), {0}); + builder.Map({map1}, CreateMulByTwo(), {0}); ComputeAndCompareR1(&builder, {6.4f, 8.6f, 10.8f, 13.0f}, {param0_data.get()}, ErrorSpec(0.01f)); @@ -285,14 +288,14 @@ TEST_F(MapTest, MapMultipleMapsR1S4) { TEST_F(MapTest, MapEachElemPlusOneR2) { // Maps (lambda (x) (+ x 1)) onto an input R2F32 vector. - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); std::unique_ptr param0_literal = Literal::CreateR2( {{13.25f, 14.0f}, {-7.1f, -7.2f}, {-8.8f, 8.8f}}); std::unique_ptr param0_data = client_->TransferToServer(*param0_literal).ConsumeValueOrDie(); auto param = builder.Parameter(0, param0_literal->shape(), "param0"); - auto map = builder.Map({param}, CreateAdderToOne(), {0, 1}); + builder.Map({param}, CreateAdderToOne(), {0, 1}); Array2D expected_array( {{14.25f, 15.0f}, {-6.1f, -6.2f}, {-7.8f, 9.8f}}); @@ -317,18 +320,18 @@ XLA_TEST_F(MapTest, ComplexNestedMaps) { auto embed2 = CreateMapPlusN(embed1, 2.0); auto embed3 = CreateMapPlusN(embed1, 4.0); - ComputationBuilder embed4_builder(client_, "embed4"); + XlaBuilder embed4_builder("embed4"); auto embed4_param = embed4_builder.Parameter(0, scalar_shape, "x"); auto embed4_map_lhs = embed4_builder.Map({embed4_param}, embed2, {}); auto embed4_map_rhs = embed4_builder.Map({embed4_param}, embed3, {}); - auto embed4_add = embed4_builder.Add(embed4_map_lhs, embed4_map_rhs); + embed4_builder.Add(embed4_map_lhs, embed4_map_rhs); auto embed4_status = embed4_builder.Build(); ASSERT_IS_OK(embed4_status.status()); auto embed4 = embed4_status.ConsumeValueOrDie(); auto embed5 = CreateMapPlusN(embed2, 6.0); - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto constant_42 = builder.ConstantR0(42.0); auto constant_7 = builder.ConstantR0(7.0); auto map_42 = builder.Map({constant_42}, embed5, {}); @@ -359,7 +362,8 @@ TEST_F(MapTest, VersionedEmbeddedComputation) { // Add another Add(1) operation to the existing embedded computation. This // requires using the stub interface because the ComputationBuilder does not - // allow modification to the Computation objects after they have been built. + // allow modification to the XlaComputation objects after they have been + // built. BinaryOpRequest request; request.set_binop(BINOP_ADD); *request.mutable_lhs() = adder_to_one; @@ -381,7 +385,7 @@ TEST_F(MapTest, VersionedEmbeddedComputation) { TEST_F(MapTest, MapBinaryAdder) { // Maps (lambda (x y) (+ x y)) onto two R1F32 vectors. - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); std::unique_ptr param0_literal = Literal::CreateR1({2.2f, 3.3f, 4.4f, 5.5f}); std::unique_ptr param0_data = @@ -393,8 +397,7 @@ TEST_F(MapTest, MapBinaryAdder) { auto param0 = builder.Parameter(0, param0_literal->shape(), "param0"); auto param1 = builder.Parameter(1, param1_literal->shape(), "param1"); - auto map = builder.Map({param0, param1}, - CreateScalarAddComputation(F32, &builder), {0}); + builder.Map({param0, param1}, CreateScalarAddComputation(F32, &builder), {0}); ComputeAndCompareR1(&builder, {7.3f, 7.7, 4.3f, 0}, {param0_data.get(), param1_data.get()}, @@ -404,7 +407,7 @@ TEST_F(MapTest, MapBinaryAdder) { // Adds two rank-2 arrays with different layouts. This test exercises a path // for Map that used to fail in shape inference (b/28989438). XLA_TEST_F(MapTest, AddWithMixedLayouts) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); std::unique_ptr param0_literal = Literal::CreateR2WithLayout( {{1, 2}, {3, 4}}, LayoutUtil::MakeLayout({1, 0})); std::unique_ptr param0_data = @@ -417,8 +420,8 @@ XLA_TEST_F(MapTest, AddWithMixedLayouts) { auto param0 = builder.Parameter(0, param0_literal->shape(), "param0"); auto param1 = builder.Parameter(1, param1_literal->shape(), "param1"); - auto map = builder.Map({param0, param1}, - CreateScalarAddComputation(S32, &builder), {0, 1}); + builder.Map({param0, param1}, CreateScalarAddComputation(S32, &builder), + {0, 1}); Array2D expected(2, 2); expected(0, 0) = 11; @@ -430,7 +433,7 @@ XLA_TEST_F(MapTest, AddWithMixedLayouts) { } XLA_TEST_F(MapTest, AddR3_3x0x2) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); std::unique_ptr param0_literal = Literal::CreateR3FromArray3D(Array3D(3, 0, 2)); std::unique_ptr param0_data = @@ -443,8 +446,8 @@ XLA_TEST_F(MapTest, AddR3_3x0x2) { auto param0 = builder.Parameter(0, param0_literal->shape(), "param0"); auto param1 = builder.Parameter(1, param1_literal->shape(), "param1"); - auto map = builder.Map({param0, param1}, - CreateScalarAddComputation(S32, &builder), {0, 1, 2}); + builder.Map({param0, param1}, CreateScalarAddComputation(S32, &builder), + {0, 1, 2}); ComputeAndCompareR3(&builder, Array3D(3, 0, 2), {param0_data.get(), param1_data.get()}); @@ -452,7 +455,7 @@ XLA_TEST_F(MapTest, AddR3_3x0x2) { TEST_F(MapTest, MapTernaryAdder) { // Maps (lambda (x y z) (+ x y z)) onto three R1F32 vectors. - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); std::unique_ptr param0_literal = Literal::CreateR1({2.2f, 3.3f, 4.4f, 5.5f}); std::unique_ptr param0_data = @@ -469,7 +472,7 @@ TEST_F(MapTest, MapTernaryAdder) { auto param0 = builder.Parameter(0, param0_literal->shape(), "param0"); auto param1 = builder.Parameter(1, param1_literal->shape(), "param1"); auto param2 = builder.Parameter(2, param2_literal->shape(), "param2"); - auto map = builder.Map({param0, param1, param2}, CreateTernaryAdder(), {0}); + builder.Map({param0, param1, param2}, CreateTernaryAdder(), {0}); ComputeAndCompareR1( &builder, {-2.7f, -92.3f, -895.7f, -400.0f}, @@ -479,24 +482,24 @@ TEST_F(MapTest, MapTernaryAdder) { TEST_F(MapTest, MapGt) { // Maps (x,y) -> x > y onto two R1F32 vectors. - ComputationBuilder b(client_, TestName()); + XlaBuilder b(TestName()); auto gt = CreateGt(); b.Map({b.ConstantR1({1, 20}), b.ConstantR1({10, 2})}, gt, {0}); ComputeAndCompareR1(&b, {false, true}, {}); } TEST_F(MapTest, NestedBinaryMap) { - Computation max_with_square; + XlaComputation max_with_square; { // max_with_square(x) = do max(x, x^2) via a map. - ComputationBuilder b(client_, "max_with_square"); + XlaBuilder b("max_with_square"); auto x = b.Parameter(0, ShapeUtil::MakeShape(F32, {}), "x"); b.Map({x, b.Mul(x, x)}, CreateMax(), {}); auto computation_status = b.Build(); ASSERT_IS_OK(computation_status.status()); max_with_square = computation_status.ConsumeValueOrDie(); } - ComputationBuilder b(client_, TestName()); + XlaBuilder b(TestName()); auto input = b.ConstantR1({0.1f, 0.5f, -0.5f, 1.0f, 2.0f}); b.Map({input}, max_with_square, {0}); ComputeAndCompareR1(&b, {0.1f, 0.5f, 0.25f, 1.0f, 4.0f}, {}); @@ -505,13 +508,13 @@ TEST_F(MapTest, NestedBinaryMap) { TEST_F(MapTest, MapOperantionWithBuildError) { // Maps (lambda (x y) (+ x y)) onto two R1F32 vectors but uses an unsupported // type combination (F32 + U16) to test that the error is reported to the - // outermost ComputationBuilder. - ComputationBuilder builder(client_, TestName()); + // outermost XlaBuilder. + XlaBuilder builder(TestName()); auto sub_builder = builder.CreateSubBuilder("ErrorAdd"); auto x = sub_builder->Parameter(0, ShapeUtil::MakeShape(F32, {}), "x"); auto y = sub_builder->Parameter(1, ShapeUtil::MakeShape(U16, {}), "y"); - auto adder = sub_builder->Add(x, y); + sub_builder->Add(x, y); auto error_add = sub_builder->BuildAndNoteError(); std::unique_ptr param0_literal = @@ -525,9 +528,9 @@ TEST_F(MapTest, MapOperantionWithBuildError) { auto param0 = builder.Parameter(0, param0_literal->shape(), "param0"); auto param1 = builder.Parameter(1, param1_literal->shape(), "param1"); - auto map = builder.Map({param0, param1}, error_add, {0}); + builder.Map({param0, param1}, error_add, {0}); - StatusOr computation_status = builder.Build(); + StatusOr computation_status = builder.Build(); ASSERT_TRUE(!computation_status.ok()); EXPECT_THAT( computation_status.status().ToString(), @@ -545,7 +548,7 @@ using MapTestWithFullOpt = ClientLibraryTestBase; // to have issues with such patterns and maybe invalidate the pointer to entry // computation. TEST_F(MapTestWithFullOpt, MapScalarPower) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto sub_builder = builder.CreateSubBuilder("power"); auto x = sub_builder->Parameter(0, ShapeUtil::MakeShape(F32, {}), "x"); @@ -572,7 +575,7 @@ TEST_F(MapTestWithFullOpt, MapScalarPower) { // Regression test for b/35786417, where the inliner would not notice the change // of parameter order inside the map. TEST_F(MapTestWithFullOpt, MapSubtractOppositeOrder) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto sub_builder = builder.CreateSubBuilder("power"); auto x = sub_builder->Parameter(0, ShapeUtil::MakeShape(F32, {}), "x"); @@ -598,7 +601,7 @@ TEST_F(MapTestWithFullOpt, MapSubtractOppositeOrder) { // Regression test for b/35786417, where the inliner would CHECK-fail due to the // mul inside the map having more parameters than the map does. TEST_F(MapTestWithFullOpt, MapSquare) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto sub_builder = builder.CreateSubBuilder("power"); auto x = sub_builder->Parameter(0, ShapeUtil::MakeShape(F32, {}), "x"); diff --git a/tensorflow/compiler/xla/tests/pad_test.cc b/tensorflow/compiler/xla/tests/pad_test.cc index 8cef8dd34dc7b16b1e58ded67d6b6a4ba79f20db..ce295b832d79e4f00656f2893c2ba1162693dd73 100644 --- a/tensorflow/compiler/xla/tests/pad_test.cc +++ b/tensorflow/compiler/xla/tests/pad_test.cc @@ -18,9 +18,9 @@ limitations under the License. #include "tensorflow/compiler/xla/array2d.h" #include "tensorflow/compiler/xla/array4d.h" -#include "tensorflow/compiler/xla/client/computation_builder.h" #include "tensorflow/compiler/xla/client/lib/arithmetic.h" #include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" #include "tensorflow/compiler/xla/ptr_util.h" #include "tensorflow/compiler/xla/reference_util.h" #include "tensorflow/compiler/xla/tests/client_library_test_base.h" @@ -85,7 +85,7 @@ class PadTestFloat : public PadTest, // Tests a Pad() with a zero-element input and output. XLA_TEST_P(PadTestFloat, Pad1DS0ToS0Array) { - ComputationBuilder b(client_, TestName()); + XlaBuilder b(TestName()); // Set up the padding configuration {low: 0, high: 0, interior: 0}. PaddingConfig padding_config; auto dimension = padding_config.add_dimensions(); @@ -100,7 +100,7 @@ XLA_TEST_P(PadTestFloat, Pad1DS0ToS0Array) { // Tests a Pad() with a zero-element input but a non-zero-element output. XLA_TEST_P(PadTestFloat, Pad1DS0ToS5Array) { - ComputationBuilder b(client_, TestName()); + XlaBuilder b(TestName()); // Set up the padding configuration {low: 3, high: 0, interior: 1}. PaddingConfig padding_config; auto dimension = padding_config.add_dimensions(); @@ -115,7 +115,7 @@ XLA_TEST_P(PadTestFloat, Pad1DS0ToS5Array) { } XLA_TEST_P(PadTestFloat, Pad1DS3Array) { - ComputationBuilder b(client_, TestName()); + XlaBuilder b(TestName()); // Set up the padding configuration {low: 3, high: 0, interior: 1}. PaddingConfig padding_config; auto dimension = padding_config.add_dimensions(); @@ -130,7 +130,7 @@ XLA_TEST_P(PadTestFloat, Pad1DS3Array) { } XLA_TEST_P(PadTestFloat, Pad4D_2x0x3x2_FloatArray) { - ComputationBuilder b(client_, TestName()); + XlaBuilder b(TestName()); b.Pad(AddParam(Array4D(2, 0, 3, 2), &b), AddParam(*Literal::CreateR0(1.5), &b), r4_padding_on_dim0_dim1_); ComputeAndCompareR4(&b, Array4D(5, 2, 3, 2, 1.5f), {}, @@ -138,7 +138,7 @@ XLA_TEST_P(PadTestFloat, Pad4D_2x0x3x2_FloatArray) { } TEST_P(PadTestFloat, Pad4DFloat_1x1x3x2_Array) { - ComputationBuilder b(client_, TestName()); + XlaBuilder b(TestName()); auto input = MakeUnique>(1, 1, 3, 2); Array2D input_xy({ {1.0f, 2.0f}, // row 0 @@ -162,7 +162,7 @@ TEST_P(PadTestFloat, Pad4DFloat_1x1x3x2_Array) { } TEST_P(PadTestFloat, Pad4DFloatArrayWithInteriorPadding) { - ComputationBuilder b(client_, TestName()); + XlaBuilder b(TestName()); const float pad_value = 1.5f; Array4D input(3, 2, 1, 1, {1, 2, 3, 4, 5, 6}); @@ -181,7 +181,7 @@ TEST_P(PadTestFloat, Pad4DFloatArrayWithInteriorPadding) { } TEST_P(PadTestFloat, Pad4DFloatArrayMinorFirstSmall) { - ComputationBuilder b(client_, TestName()); + XlaBuilder b(TestName()); PaddingConfig padding_config; auto dimension0 = padding_config.add_dimensions(); @@ -223,7 +223,7 @@ TEST_P(PadTestFloat, Pad4DFloatArrayMinorFirstSmall) { } XLA_TEST_P(PadTestFloat, Pad4DFloatArrayMinorFirstNonTrivialMinorDimensions) { - ComputationBuilder b(client_, TestName()); + XlaBuilder b(TestName()); PaddingConfig padding_config; auto dimension0 = padding_config.add_dimensions(); @@ -266,7 +266,7 @@ XLA_TEST_P(PadTestFloat, Pad4DFloatArrayMinorFirstNonTrivialMinorDimensions) { } XLA_TEST_F(PadTest, Pad4DU8Array) { - ComputationBuilder b(client_, TestName()); + XlaBuilder b(TestName()); auto input = MakeUnique>(1, 1, 3, 2); Array2D input_xy({ {1, 2}, // row 0 @@ -290,7 +290,7 @@ XLA_TEST_F(PadTest, Pad4DU8Array) { } XLA_TEST_F(PadTest, Pad4DPredArray) { - ComputationBuilder b(client_, TestName()); + XlaBuilder b(TestName()); // Since bool is currently not well supported, use Broadcast operation to // create the operand for Pad. @@ -317,7 +317,7 @@ XLA_TEST_F(PadTest, Pad4DPredArray) { } XLA_TEST_P(PadTestFloat, Large2DPad) { - ComputationBuilder b(client_, TestName()); + XlaBuilder b(TestName()); auto ones = MakeUnique>(4, 4); ones->Fill(1.0f); @@ -329,15 +329,14 @@ XLA_TEST_P(PadTestFloat, Large2DPad) { padding_config.mutable_dimensions(dim)->set_edge_padding_high(58 + 100 * dim); } - auto padded = b.Pad(input, AddParam(*Literal::CreateR0(0.0f), &b), - padding_config); + b.Pad(input, AddParam(*Literal::CreateR0(0.0f), &b), padding_config); auto expected = ReferenceUtil::PadArray2D(*ones, padding_config, 0.0f); ComputeAndCompareR2(&b, *expected, {}, DefaultErrorSpec()); } XLA_TEST_P(PadTestFloat, AllTypes2DPad) { - ComputationBuilder b(client_, TestName()); + XlaBuilder b(TestName()); constexpr int64 in_rows = 35; constexpr int64 in_cols = 35; @@ -352,15 +351,14 @@ XLA_TEST_P(PadTestFloat, AllTypes2DPad) { padding_config.mutable_dimensions(1)->set_edge_padding_low(6); padding_config.mutable_dimensions(1)->set_edge_padding_high(4); padding_config.mutable_dimensions(1)->set_interior_padding(2); - auto padded = b.Pad(input, AddParam(*Literal::CreateR0(3.14f), &b), - padding_config); + b.Pad(input, AddParam(*Literal::CreateR0(3.14f), &b), padding_config); auto expected = ReferenceUtil::PadArray2D(*operand, padding_config, 3.14f); ComputeAndCompareR2(&b, *expected, {}, DefaultErrorSpec()); } XLA_TEST_P(PadTestFloat, High2DPad) { - ComputationBuilder b(client_, TestName()); + XlaBuilder b(TestName()); constexpr int64 in_rows = 129; constexpr int64 in_cols = 129; @@ -378,8 +376,7 @@ XLA_TEST_P(PadTestFloat, High2DPad) { padding_config.mutable_dimensions(dim)->set_interior_padding( interior_padding); } - auto padded = b.Pad(input, AddParam(*Literal::CreateR0(2.718f), &b), - padding_config); + b.Pad(input, AddParam(*Literal::CreateR0(2.718f), &b), padding_config); auto expected = ReferenceUtil::PadArray2D(*operand, padding_config, 2.718f); @@ -387,7 +384,7 @@ XLA_TEST_P(PadTestFloat, High2DPad) { } XLA_TEST_P(PadTestFloat, NegativePadding2D) { - ComputationBuilder b(client_, TestName()); + XlaBuilder b(TestName()); constexpr int64 in_rows = 129; constexpr int64 in_cols = 129; @@ -406,8 +403,7 @@ XLA_TEST_P(PadTestFloat, NegativePadding2D) { padding_config.mutable_dimensions(dim)->set_interior_padding( interior_padding); } - auto padded = b.Pad(input, AddParam(*Literal::CreateR0(2.718f), &b), - padding_config); + b.Pad(input, AddParam(*Literal::CreateR0(2.718f), &b), padding_config); auto expected = ReferenceUtil::PadArray2D(*operand, padding_config, 2.718f); @@ -415,7 +411,7 @@ XLA_TEST_P(PadTestFloat, NegativePadding2D) { } XLA_TEST_P(PadTestFloat, NegativeAndInteriorPadding2D) { - ComputationBuilder b(client_, TestName()); + XlaBuilder b(TestName()); constexpr int64 in_rows = 8; constexpr int64 in_cols = 11; @@ -434,8 +430,7 @@ XLA_TEST_P(PadTestFloat, NegativeAndInteriorPadding2D) { padding_config.mutable_dimensions(dim)->set_interior_padding( interior_padding[dim]); } - auto padded = b.Pad(input, AddParam(*Literal::CreateR0(2.718f), &b), - padding_config); + b.Pad(input, AddParam(*Literal::CreateR0(2.718f), &b), padding_config); auto expected = ReferenceUtil::PadArray2D(*operand, padding_config, 2.718f); @@ -444,20 +439,19 @@ XLA_TEST_P(PadTestFloat, NegativeAndInteriorPadding2D) { // Regression test for b/31827337. XLA_TEST_P(PadTestFloat, ReducePad) { - ComputationBuilder b(client_, TestName()); + XlaBuilder b(TestName()); auto ones = MakeUnique>(2, 2, 2, 2); ones->Fill(1.0); auto input = AddParam(*ones, &b); - Computation add = CreateScalarAddComputation(FloatType(), &b); + XlaComputation add = CreateScalarAddComputation(FloatType(), &b); auto reduce = b.Reduce(input, AddParam(*Literal::CreateR0(0.0), &b), add, {0}); PaddingConfig padding_config = MakeNoPaddingConfig(3); padding_config.mutable_dimensions(0)->set_edge_padding_low(1); padding_config.mutable_dimensions(0)->set_edge_padding_high(1); - auto padded = b.Pad(reduce, AddParam(*Literal::CreateR0(0.0f), &b), - padding_config); + b.Pad(reduce, AddParam(*Literal::CreateR0(0.0f), &b), padding_config); Array3D expected({{{0.0, 0.0}, {0.0, 0.0}}, {{2.0, 2.0}, {2.0, 2.0}}, diff --git a/tensorflow/compiler/xla/tests/reduce_precision_test.cc b/tensorflow/compiler/xla/tests/reduce_precision_test.cc index dc7ce3253cee255a7949326fa5b49fc8917432b8..b311785449f1774c3bc1e4d7ad35c2866e3b4061 100644 --- a/tensorflow/compiler/xla/tests/reduce_precision_test.cc +++ b/tensorflow/compiler/xla/tests/reduce_precision_test.cc @@ -20,9 +20,9 @@ limitations under the License. #include #include "tensorflow/compiler/xla/array2d.h" -#include "tensorflow/compiler/xla/client/computation_builder.h" #include "tensorflow/compiler/xla/client/global_data.h" #include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" #include "tensorflow/compiler/xla/layout_util.h" #include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/compiler/xla/service/reduce_precision_insertion.h" @@ -228,15 +228,14 @@ XLA_TEST_P(ReducePrecisionAccuracyTest, ReducePrecisionF32) { // This is required for proper handling of NaN values. SetFastMathDisabled(true); - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); std::unique_ptr a_literal = Literal::CreateR1({input_values}); std::unique_ptr a_data = client_->TransferToServer(*a_literal).ConsumeValueOrDie(); auto a = builder.Parameter(0, a_literal->shape(), "a"); - auto reduce_precision = - builder.ReducePrecision(a, exponent_bits, mantissa_bits); + builder.ReducePrecision(a, exponent_bits, mantissa_bits); ComputeAndCompareR1(&builder, expected_values, {a_data.get()}); } @@ -252,7 +251,7 @@ class ReducePrecisionInsertionTest : public ClientLibraryTestBase {}; // The interpreter has no fusion pass, so skip this test. XLA_TEST_F(ReducePrecisionInsertionTest, DISABLED_ON_INTERPRETER(ReducePrecisionBeforeFusion)) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); std::unique_ptr a_literal = Literal::CreateR1({1.00001}); std::unique_ptr a_data = @@ -265,7 +264,7 @@ XLA_TEST_F(ReducePrecisionInsertionTest, // Near 1.0, Log(x) approximates x - 1; this lets us confirm that the // reduce-precision operation showed up in the correct place in the // graph. - auto log = builder.Log(abs); + builder.Log(abs); // Insert precision-reduction after the Abs(x) operation, rounding that // result to exactly 1.0f. @@ -281,7 +280,7 @@ XLA_TEST_F(ReducePrecisionInsertionTest, // The interpreter has no fusion pass, so skip this test. XLA_TEST_F(ReducePrecisionInsertionTest, DISABLED_ON_INTERPRETER(ReducePrecisionSkippedAfterFusion)) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); std::unique_ptr a_literal = Literal::CreateR1({1.00001}); std::unique_ptr a_data = @@ -290,7 +289,7 @@ XLA_TEST_F(ReducePrecisionInsertionTest, // These two operations should be fused by any reasonable backend. auto abs = builder.Abs(a); - auto neg = builder.Neg(abs); + builder.Neg(abs); // Add a pass after operation fusion, suffixing kAbs operations. This // should not see into the fusion nodes and thus should not affect the @@ -307,7 +306,7 @@ XLA_TEST_F(ReducePrecisionInsertionTest, // The interpreter has no fusion pass, so skip this test. XLA_TEST_F(ReducePrecisionInsertionTest, DISABLED_ON_INTERPRETER(ReducePrecisionAddedAfterFusion)) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); std::unique_ptr a_literal = Literal::CreateR1({1.00001}); std::unique_ptr a_data = @@ -316,7 +315,7 @@ XLA_TEST_F(ReducePrecisionInsertionTest, // These two operations should be fused by any reasonable backend. auto abs = builder.Abs(a); - auto neg = builder.Neg(abs); + builder.Neg(abs); // Add a pass after operation fusion, suffixing kFusion operations. auto reduce_precision_pass = execution_options_.mutable_debug_options() @@ -331,7 +330,7 @@ XLA_TEST_F(ReducePrecisionInsertionTest, // The interpreter has no fusion pass, so skip this test. XLA_TEST_F(ReducePrecisionInsertionTest, DISABLED_ON_INTERPRETER(ReducePrecisionSkippedFusionContains)) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); std::unique_ptr a_literal = Literal::CreateR1({1.00001}); std::unique_ptr a_data = @@ -340,7 +339,7 @@ XLA_TEST_F(ReducePrecisionInsertionTest, // These two operations should be fused by any reasonable backend. auto abs = builder.Abs(a); - auto neg = builder.Neg(abs); + builder.Neg(abs); // Add a pass suffixing fusion nodes containing kCos operations. This // should have no effect. @@ -356,7 +355,7 @@ XLA_TEST_F(ReducePrecisionInsertionTest, // The interpreter has no fusion pass, so skip this test. XLA_TEST_F(ReducePrecisionInsertionTest, DISABLED_ON_INTERPRETER(ReducePrecisionAddedFusionContains)) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); std::unique_ptr a_literal = Literal::CreateR1({1.00001}); std::unique_ptr a_data = @@ -365,7 +364,7 @@ XLA_TEST_F(ReducePrecisionInsertionTest, // These two operations should be fused by any reasonable backend. auto abs = builder.Abs(a); - auto neg = builder.Neg(abs); + builder.Neg(abs); // Add a pass suffixing fusion nodes containing kAbs operations. This // should see the kAbs operation within the above fusion node. diff --git a/tensorflow/compiler/xla/tests/reduce_test.cc b/tensorflow/compiler/xla/tests/reduce_test.cc index 3a097a01ab095b8a21a39f0d738a43c3d6a4d1d7..423ccadb5b3b7df950824349737a833c08870d77 100644 --- a/tensorflow/compiler/xla/tests/reduce_test.cc +++ b/tensorflow/compiler/xla/tests/reduce_test.cc @@ -39,6 +39,8 @@ limitations under the License. #include "tensorflow/compiler/xla/client/global_data.h" #include "tensorflow/compiler/xla/client/lib/arithmetic.h" #include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_computation.h" #include "tensorflow/compiler/xla/layout_util.h" #include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/compiler/xla/reference_util.h" @@ -50,6 +52,7 @@ limitations under the License. #include "tensorflow/compiler/xla/tests/test_macros.h" #include "tensorflow/compiler/xla/util.h" #include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/lib/core/status_test_util.h" #include "tensorflow/core/lib/gtl/array_slice.h" #include "tensorflow/core/platform/test.h" #include "tensorflow/core/platform/types.h" @@ -57,6 +60,11 @@ limitations under the License. namespace xla { namespace { +using FuncGeneratorForType = Computation (*)(PrimitiveType, + ComputationBuilder*); + +using FuncGenerator = Computation (*)(ComputationBuilder*); + class ReduceTest : public ClientLibraryTestBase { protected: ReduceTest() { @@ -497,21 +505,18 @@ XLA_TEST_F(ReduceTest, TransposeAndReduceElementwiseR2_111x50_To_R1) { // Test that algebraic simplifier does not incorrectly fold a transpose into a // reduction operation. XLA_TEST_F(ReduceTest, TransposeAndReduceR3_12x111x50_To_R2) { - ComputationBuilder builder(client_, TestName()); - Computation add_f32 = CreateScalarAddComputation(F32, &builder); + XlaBuilder builder(TestName()); + XlaComputation add_f32 = CreateScalarAddComputation(F32, &builder); const Shape input_shape = ShapeUtil::MakeShape(F32, {12, 111, 50}); - ComputationDataHandle input = builder.Parameter(0, input_shape, "input"); - ComputationDataHandle zero = builder.ConstantR0(0.0); - ComputationDataHandle transpose = - builder.Transpose(input, /*permutation=*/{1, 0, 2}); - ComputationDataHandle reduce = - builder.Reduce(transpose, zero, add_f32, /*dimensions_to_reduce=*/{0}); + XlaOp input = builder.Parameter(0, input_shape, "input"); + XlaOp zero = builder.ConstantR0(0.0); + XlaOp transpose = builder.Transpose(input, /*permutation=*/{1, 0, 2}); + builder.Reduce(transpose, zero, add_f32, /*dimensions_to_reduce=*/{0}); TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr input_data, MakeFakeLiteral(input_shape)); - ComputeAndCompare(&builder, reduce, {std::move(*input_data)}, - ErrorSpec(0.01, 1e-4)); + ComputeAndCompare(&builder, {std::move(*input_data)}, ErrorSpec(0.01, 1e-4)); } XLA_TEST_F(ReduceTest, Reshape_111x2x25Reduce_111x50_To_R1) { @@ -755,53 +760,57 @@ XLA_TEST_F(ReduceTest, ReduceR3AmongDim2) { } XLA_TEST_F(ReduceTest, VectorizedReduce_Add) { - RunVectorizedReduceTest(CreateScalarAddComputation, - [](float a, float b) { return a + b; }, - [](int32 a, int32 b) { - return static_cast(static_cast(a) + - static_cast(b)); - }, - [](uint32 a, uint32 b) { return a + b; }, 0.0, 0, 0); + RunVectorizedReduceTest( + static_cast(CreateScalarAddComputation), + [](float a, float b) { return a + b; }, + [](int32 a, int32 b) { + return static_cast(static_cast(a) + + static_cast(b)); + }, + [](uint32 a, uint32 b) { return a + b; }, 0.0, 0, 0); } XLA_TEST_F(ReduceTest, VectorizedReduce_Multiply) { - RunVectorizedReduceTest(CreateScalarMultiplyComputation, - [](float a, float b) { return a * b; }, - [](int32 a, int32 b) { - return static_cast(static_cast(a) * - static_cast(b)); - }, - [](uint32 a, uint32 b) { return a * b; }, 1.0, 1, 1); + RunVectorizedReduceTest( + static_cast(CreateScalarMultiplyComputation), + [](float a, float b) { return a * b; }, + [](int32 a, int32 b) { + return static_cast(static_cast(a) * + static_cast(b)); + }, + [](uint32 a, uint32 b) { return a * b; }, 1.0, 1, 1); } XLA_TEST_F(ReduceTest, VectorizedReduce_Max) { - RunVectorizedReduceTest(CreateScalarMaxComputation, - [](float a, float b) { return std::max(a, b); }, - [](int32 a, int32 b) { return std::max(a, b); }, - [](uint32 a, uint32 b) { return std::max(a, b); }, - std::numeric_limits::min(), - std::numeric_limits::min(), - std::numeric_limits::min()); + RunVectorizedReduceTest( + static_cast(CreateScalarMaxComputation), + [](float a, float b) { return std::max(a, b); }, + [](int32 a, int32 b) { return std::max(a, b); }, + [](uint32 a, uint32 b) { return std::max(a, b); }, + std::numeric_limits::min(), std::numeric_limits::min(), + std::numeric_limits::min()); } XLA_TEST_F(ReduceTest, VectorizedReduce_Min) { - RunVectorizedReduceTest(CreateScalarMinComputation, - [](float a, float b) { return std::min(a, b); }, - [](int32 a, int32 b) { return std::min(a, b); }, - [](uint32 a, uint32 b) { return std::min(a, b); }, - std::numeric_limits::max(), - std::numeric_limits::max(), - std::numeric_limits::max()); + RunVectorizedReduceTest( + static_cast(CreateScalarMinComputation), + [](float a, float b) { return std::min(a, b); }, + [](int32 a, int32 b) { return std::min(a, b); }, + [](uint32 a, uint32 b) { return std::min(a, b); }, + std::numeric_limits::max(), std::numeric_limits::max(), + std::numeric_limits::max()); } XLA_TEST_F(ReduceTest, VectorizedReduce_BooleanAnd) { RunVectorizedReduceTestForType( - CreateScalarAndComputation, [](bool a, bool b) { return a && b; }, true); + static_cast(CreateScalarAndComputation), + [](bool a, bool b) { return a && b; }, true); } XLA_TEST_F(ReduceTest, VectorizedReduce_BooleanOr) { RunVectorizedReduceTestForType( - CreateScalarOrComputation, [](bool a, bool b) { return a || b; }, false); + static_cast(CreateScalarOrComputation), + [](bool a, bool b) { return a || b; }, false); } class ReduceR3ToR2Test : public ReduceTest, @@ -926,5 +935,36 @@ XLA_TEST_F(ReduceInitializerTest, U64InitializerBigValue) { DoTest(1234556789123, 1024); } +// Test the operational semantic that the init value is passed on the lhs for +// reduces. Can be tested by performing an "identity" reduce (that simply +// returns one of the parameters). In this case, we return the rhs, which for +// a 1D array with one element, should not be the init value. +XLA_TEST_F(ReduceTest, ReduceIdentity) { + ComputationBuilder builder(client_, TestName()); + Shape single_float = ShapeUtil::MakeShape(F32, {}); + builder.Parameter(0, single_float, "lhs-unused"); + builder.Parameter(1, single_float, "rhs-used"); + auto computation_status = builder.Build(); + TF_ASSERT_OK(computation_status.status()); + + Shape operand_shape = ShapeUtil::MakeShape(F32, {1}); + builder.Reduce(builder.Parameter(0, operand_shape, "operand"), + builder.Parameter(1, single_float, "init"), + computation_status.ValueOrDie(), {0}); + + float operand[] = {42.0f}; + float init = 58.5f; + float expected = 42.0f; + std::unique_ptr input_literal = Literal::CreateR1(operand); + std::unique_ptr input_global_data = + client_->TransferToServer(*input_literal).ConsumeValueOrDie(); + std::unique_ptr input_literal2 = Literal::CreateR0(init); + std::unique_ptr input_global_data2 = + client_->TransferToServer(*input_literal2).ConsumeValueOrDie(); + ComputeAndCompareR0( + &builder, expected, {input_global_data.get(), input_global_data2.get()}, + ErrorSpec(0.0001)); +} + } // namespace } // namespace xla diff --git a/tensorflow/compiler/xla/tests/reduce_window_test.cc b/tensorflow/compiler/xla/tests/reduce_window_test.cc index 9c317fe579394c5b7a1d599169f471d484950199..0a097667222d8c315cb9943688d6dbcb4426a8b3 100644 --- a/tensorflow/compiler/xla/tests/reduce_window_test.cc +++ b/tensorflow/compiler/xla/tests/reduce_window_test.cc @@ -21,10 +21,11 @@ limitations under the License. #include "tensorflow/compiler/xla/array2d.h" #include "tensorflow/compiler/xla/array3d.h" #include "tensorflow/compiler/xla/array4d.h" -#include "tensorflow/compiler/xla/client/computation_builder.h" #include "tensorflow/compiler/xla/client/lib/arithmetic.h" #include "tensorflow/compiler/xla/client/local_client.h" #include "tensorflow/compiler/xla/client/padding.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_computation.h" #include "tensorflow/compiler/xla/reference_util.h" #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/tests/client_library_test_base.h" @@ -63,11 +64,9 @@ class ReduceWindowTestBase : public ClientLibraryTestBase { class ReduceWindowTest : public ::testing::WithParamInterface, public ReduceWindowTestBase { public: - ReduceWindowTest() : builder_(client_, TestName()) { - set_use_bfloat16(GetParam()); - } + ReduceWindowTest() : builder_(TestName()) { set_use_bfloat16(GetParam()); } - void ReduceWindowAdd(const ComputationDataHandle& input, + void ReduceWindowAdd(const XlaOp& input, tensorflow::gtl::ArraySlice window_dimensions, tensorflow::gtl::ArraySlice window_strides, Padding padding) { @@ -78,16 +77,17 @@ class ReduceWindowTest : public ::testing::WithParamInterface, window_dimensions, window_strides, padding); } - void ReduceWindowMax(const ComputationDataHandle& input, + void ReduceWindowMax(const XlaOp& input, tensorflow::gtl::ArraySlice window_dimensions, tensorflow::gtl::ArraySlice window_strides, Padding padding) { auto init = CreateConstantFromLiteral(Literal::MinValue(F32), &builder_); - builder_.ReduceWindow(input, init, CreateScalarMax(), window_dimensions, - window_strides, padding); + builder_.ReduceWindow(input, init, + CreateScalarMaxComputation(FloatType(), &builder_), + window_dimensions, window_strides, padding); } - void ReduceWindowMin(const ComputationDataHandle& input, + void ReduceWindowMin(const XlaOp& input, tensorflow::gtl::ArraySlice window_dimensions, tensorflow::gtl::ArraySlice window_strides, Padding padding) { @@ -97,7 +97,7 @@ class ReduceWindowTest : public ::testing::WithParamInterface, window_dimensions, window_strides, padding); } - ComputationBuilder builder_; + XlaBuilder builder_; }; TEST_P(ReduceWindowTest, MismatchedRanksGivesErrorStatus) { @@ -252,6 +252,48 @@ TEST_P(ReduceWindowTest, AmongMajor2DimsMediumSize) { DefaultErrorSpec()); } +// Tests the super windowing logic w.r.t handling prime number of windows in a +// major dimension with reduction. +TEST_P(ReduceWindowTest, PrimeWindowsInReductionDimension) { + Array4D input_array(15, 15, 4, 128); + input_array.FillRandom(2.f, 4.f); + + int win_len = 3; + int win_stride = 2; + + const auto input_data_handle = + CreateConstantFromArray(input_array, &builder_); + + Padding padding = Padding::kSame; + // Reduce only along the x and y dimensions, according to the win_len. + ReduceWindowAdd(input_data_handle, {win_len, win_len, 1, 1}, + {win_stride, win_stride, 1, 1}, padding); + + auto result = ReferenceUtil::ReduceWindow4DAdd( + input_array, 0.0f, {win_len, win_len, 1, 1}, + {win_stride, win_stride, 1, 1}, padding); + + ComputeAndCompareLiteral(&builder_, *Literal::CreateFromArray(*result), {}, + DefaultErrorSpec()); +} + +TEST_P(ReduceWindowTest, ReduceAlongLaneDimension) { + Array4D input_array(19, 17, 8, 256); + input_array.FillWithMinorDimNum(); + + const auto input_data_handle = + CreateConstantFromArray(input_array, &builder_); + + Padding padding = Padding::kSame; + ReduceWindowAdd(input_data_handle, {1, 1, 1, 11}, {1, 1, 1, 1}, padding); + + auto result = ReferenceUtil::ReduceWindow4DAdd( + input_array, 0.0f, {1, 1, 1, 11}, {1, 1, 1, 1}, padding); + + ComputeAndCompareLiteral(&builder_, *Literal::CreateFromArray(*result), {}, + DefaultErrorSpec()); +} + // Tests a reduction function that is not a simple add/min/max/etc. XLA_TEST_P(ReduceWindowTest, NonstandardReduceFunction) { Array4D input_array(1, 2, 2, 1); @@ -268,7 +310,7 @@ XLA_TEST_P(ReduceWindowTest, NonstandardReduceFunction) { auto rhs = b->Parameter(1, scalar, "rhs"); b->Min(b->Add(lhs, rhs), CreateConstantFromLiteral(*Literal::CreateR0(8.0f), b.get())); - Computation reduce_fn = b->BuildAndNoteError(); + XlaComputation reduce_fn = b->BuildAndNoteError(); builder_.ReduceWindow( input, @@ -296,7 +338,7 @@ TEST_P(ReduceWindowTest, R4UnitWindow) { std::unique_ptr input_literal = Literal::CreateR4FromArray4DWithLayout( input_array, LayoutUtil::MakeLayout({0, 3, 2, 1})); - ComputationDataHandle input; + XlaOp input; auto input_data = CreateParameterAndTransferLiteral( 0, *input_literal, "parameter", &builder_, &input); @@ -364,7 +406,7 @@ XLA_TEST_P(ReduceWindowTest, R4SecondMinorStride) { std::unique_ptr input_literal = Literal::CreateR4FromArray4DWithLayout( input_array, LayoutUtil::MakeLayout({3, 2, 1, 0})); - ComputationDataHandle input; + XlaOp input; auto input_data = CreateParameterAndTransferLiteral( 0, *input_literal, "parameter", &builder_, &input); @@ -386,7 +428,7 @@ XLA_TEST_P(ReduceWindowTest, R4SecondMinorUnitStride) { std::unique_ptr input_literal = Literal::CreateR4FromArray4DWithLayout( input_array, LayoutUtil::MakeLayout({3, 2, 1, 0})); - ComputationDataHandle input; + XlaOp input; auto input_data = CreateParameterAndTransferLiteral( 0, *input_literal, "parameter", &builder_, &input); @@ -408,7 +450,7 @@ XLA_TEST_P(ReduceWindowTest, R4SecondMinorWin) { std::unique_ptr input_literal = Literal::CreateR4FromArray4DWithLayout( input_array, LayoutUtil::MakeLayout({3, 2, 1, 0})); - ComputationDataHandle input; + XlaOp input; auto input_data = CreateParameterAndTransferLiteral( 0, *input_literal, "parameter", &builder_, &input); @@ -509,7 +551,7 @@ TEST_P(ReduceWindowTest, R2ReduceWindowInceptionFromBroadcast) { TEST_P(ReduceWindowTest, R2ReduceWindowNonOverlappingFromBroadcast) { Array2D input_array(6, 4, 1.0f); - ComputationDataHandle input = builder_.Broadcast( + XlaOp input = builder_.Broadcast( CreateConstantFromLiteral(Literal::One(F32), &builder_), {6, 4}); Padding padding = Padding::kSame; @@ -568,7 +610,7 @@ class R4ReduceWindowTest : public ReduceWindowTestBase, R4ReduceWindowTest() { set_use_bfloat16(::testing::get<1>(GetParam())); } void DoIt() { - ComputationBuilder b(client_, TestName()); + XlaBuilder b(TestName()); const auto& param = ::testing::get<0>(GetParam()); const float kInitValue = 0.0f; @@ -579,7 +621,7 @@ class R4ReduceWindowTest : public ReduceWindowTestBase, std::unique_ptr input_literal = Literal::CreateR4FromArray4DWithLayout( input, LayoutUtil::MakeLayout(param.layout)); - ComputationDataHandle parameter; + XlaOp parameter; auto input_arg = CreateParameterAndTransferLiteral(0, *input_literal, "p0", &b, ¶meter); @@ -920,7 +962,7 @@ class R3ReduceWindowTest : public ReduceWindowTestBase, }; TEST_P(R3ReduceWindowTest, Add) { - ComputationBuilder b(client_, TestName()); + XlaBuilder b(TestName()); const auto& param = ::testing::get<0>(GetParam()); CHECK(param.reducer == kAdd); @@ -931,7 +973,7 @@ TEST_P(R3ReduceWindowTest, Add) { Literal::CreateR3FromArray3DWithLayout( input, LayoutUtil::MakeLayout(param.layout)); - ComputationDataHandle parameter; + XlaOp parameter; auto input_arg = CreateParameterAndTransferLiteral(0, *input_literal, "p0", &b, ¶meter); auto init_value = @@ -1021,15 +1063,12 @@ struct R2ReduceWindowTestData { /*strides=*/{1, 1}, /*pad_low=*/{0, 130}, /*pad_high=*/{0, 0}, /*layout=*/{1, 0}, /*reducer=*/Reducer::kAdd}, -// TODO(b/76025683): These tests fail on TPU. -#if defined(XLA_TEST_BACKEND_CPU) || defined(XLA_TEST_BACKEND_GPU) - {/*base_bounds=*/{4096, 4096}, /*window_bounds=*/{1, 4}, - /*strides=*/{1, 1024}, /*pad_low=*/{0, 0}, /*pad-high=*/{0, 0}, - /*layout=*/{1, 0}, /*reducer=*/Reducer::kAdd}, {/*base_bounds=*/{8, 256}, /*window_bounds=*/{1, 4}, /*strides=*/{1, 64}, /*pad_low=*/{0, 0}, /*pad_high=*/{0, 0}, /*layout=*/{1, 0}, /*reducer=*/Reducer::kAdd}, -#endif + {/*base_bounds=*/{4096, 4096}, /*window_bounds=*/{1, 4}, + /*strides=*/{1, 1024}, /*pad_low=*/{0, 0}, /*pad-high=*/{0, 0}, + /*layout=*/{1, 0}, /*reducer=*/Reducer::kAdd}, }; string R2ReduceWindowTestDataToString( @@ -1058,7 +1097,7 @@ class R2ReduceWindowTest : public ReduceWindowTestBase, R2ReduceWindowTest() { set_use_bfloat16(::testing::get<1>(GetParam())); } void DoIt() { - ComputationBuilder b(client_, TestName()); + XlaBuilder b(TestName()); const auto& param = ::testing::get<0>(GetParam()); CHECK(param.reducer == kAdd); @@ -1068,7 +1107,7 @@ class R2ReduceWindowTest : public ReduceWindowTestBase, Literal::CreateR2FromArray2DWithLayout( input, LayoutUtil::MakeLayout(param.layout)); - ComputationDataHandle parameter; + XlaOp parameter; auto input_arg = CreateParameterAndTransferLiteral(0, *input_literal, "p0", &b, ¶meter); std::vector> padding(2); @@ -1256,7 +1295,7 @@ class R1ReduceWindowTest : public ReduceWindowTestBase, }; TEST_P(R1ReduceWindowTest, DoIt) { - ComputationBuilder b(client_, TestName()); + XlaBuilder b(TestName()); const auto& param = ::testing::get<0>(GetParam()); CHECK(param.reducer == kAdd || param.reducer == kMax); @@ -1265,7 +1304,7 @@ TEST_P(R1ReduceWindowTest, DoIt) { std::iota(std::begin(input_vector), std::end(input_vector), 0); std::unique_ptr input_literal = Literal::CreateR1(tensorflow::gtl::ArraySlice(input_vector)); - ComputationDataHandle parameter; + XlaOp parameter; auto input_arg = CreateParameterAndTransferLiteral(0, *input_literal, "p0", &b, ¶meter); @@ -1396,5 +1435,22 @@ ENTRY R3Window { EXPECT_TRUE(RunAndCompare(hlo_string, ErrorSpec{0.001})); } +TEST_F(HloTestBase, ReduceWindowIdentity) { + const string& hlo_string = R"( +HloModule ReduceWindowIdentity +identity.pad_to_reduce_window { + param0 = f32[] parameter(0) + ROOT param1 = f32[] parameter(1) +} +ENTRY reduce-window-identity { + operand = f32[1,32,64]{2,1,0} parameter(0) + constant.4466 = f32[] constant(0) + ROOT reduce-window = f32[1,33,64]{2,1,0} reduce-window(operand, constant.4466), window={size=1x1x1 pad=0_0x1_0x0_0}, to_apply=identity.pad_to_reduce_window +} + +)"; + EXPECT_TRUE(RunAndCompare(hlo_string, tensorflow::gtl::nullopt)); +} + } // namespace } // namespace xla diff --git a/tensorflow/compiler/xla/tests/reshape_test.cc b/tensorflow/compiler/xla/tests/reshape_test.cc index 02272d60171c70896f44b0d6b96f176ea52e686f..d7462d581b8596dc43b81b0162b3f5020cebb546 100644 --- a/tensorflow/compiler/xla/tests/reshape_test.cc +++ b/tensorflow/compiler/xla/tests/reshape_test.cc @@ -20,11 +20,10 @@ limitations under the License. #include "tensorflow/compiler/xla/array2d.h" #include "tensorflow/compiler/xla/array4d.h" -#include "tensorflow/compiler/xla/client/computation.h" -#include "tensorflow/compiler/xla/client/computation_builder.h" #include "tensorflow/compiler/xla/client/global_data.h" #include "tensorflow/compiler/xla/client/local_client.h" #include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_computation.h" #include "tensorflow/compiler/xla/layout_util.h" #include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/compiler/xla/reference_util.h" @@ -53,11 +52,11 @@ class ReshapeTest : public ::testing::WithParamInterface, // Collapses 2-dimensional pseudo-scalar (single-element array) to 1 dimension. XLA_TEST_P(ReshapeTest, CollapseTrivial1x1) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); Array2D input_array(1, 1); input_array.Fill(1.0f); auto input_literal = Literal::CreateR2FromArray2D(input_array); - ComputationDataHandle parameter; + XlaOp parameter; auto input = CreateParameterAndTransferLiteral(0, *input_literal, "parameter", &builder, ¶meter); builder.Collapse(/*operand=*/parameter, /*dimensions=*/{0, 1}); @@ -68,9 +67,9 @@ XLA_TEST_P(ReshapeTest, CollapseTrivial1x1) { } XLA_TEST_P(ReshapeTest, CollapseTrivialR1EmptyDims) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto input_literal = Literal::CreateR1({1.0f}); - ComputationDataHandle parameter; + XlaOp parameter; auto input = CreateParameterAndTransferLiteral(0, *input_literal, "parameter", &builder, ¶meter); builder.Collapse(/*operand=*/parameter, /*dimensions=*/{}); @@ -81,9 +80,9 @@ XLA_TEST_P(ReshapeTest, CollapseTrivialR1EmptyDims) { } XLA_TEST_P(ReshapeTest, CollapseTrivialR1OnlyDim) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto input_literal = Literal::CreateR1({1.0f}); - ComputationDataHandle parameter; + XlaOp parameter; auto input = CreateParameterAndTransferLiteral(0, *input_literal, "parameter", &builder, ¶meter); builder.Collapse(/*operand=*/parameter, /*dimensions=*/{0}); @@ -95,11 +94,11 @@ XLA_TEST_P(ReshapeTest, CollapseTrivialR1OnlyDim) { // Collapses 2-dimensional pseudo-scalar (single-element array) to scalar. XLA_TEST_P(ReshapeTest, SingleElementArrayToScalar) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); Array2D input_array(1, 1); input_array.Fill(1.0f); auto input_literal = Literal::CreateR2FromArray2D(input_array); - ComputationDataHandle parameter; + XlaOp parameter; auto input = CreateParameterAndTransferLiteral(0, *input_literal, "parameter", &builder, ¶meter); auto reshape = builder.Reshape(/*operand=*/parameter, /*dimensions=*/{0, 1}, @@ -112,15 +111,14 @@ XLA_TEST_P(ReshapeTest, SingleElementArrayToScalar) { } XLA_TEST_P(ReshapeTest, ScalarToSingleElementArray) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); std::unique_ptr param0_literal = Literal::CreateR0(1.0f); - ComputationDataHandle parameter; + XlaOp parameter; auto input = CreateParameterAndTransferLiteral(0, *param0_literal, "param0", &builder, ¶meter); auto a = builder.Neg(parameter); - auto reshape = - builder.Reshape(/*operand=*/a, /*dimensions=*/{}, /*new_sizes=*/{1}); + builder.Reshape(/*operand=*/a, /*dimensions=*/{}, /*new_sizes=*/{1}); auto expected_literal = Literal::CreateR1({-1.0f}); ComputeAndCompareLiteral(&builder, *expected_literal, {input.get()}, @@ -131,10 +129,10 @@ XLA_TEST_P(ReshapeTest, ScalarToSingleElementArray) { // does not handle zero-sized shapes correctly. Failed last on 2017-11-30 // with an incorrect result rank. XLA_TEST_P(ReshapeTest, DISABLED_ON_GPU(Trivial0x3)) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); Array2D input_array(0, 3); auto input_literal = Literal::CreateR2FromArray2D(input_array); - ComputationDataHandle parameter; + XlaOp parameter; auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input", &builder, ¶meter); builder.Collapse(/*operand=*/parameter, /*dimensions=*/{0, 1}); @@ -147,11 +145,11 @@ XLA_TEST_P(ReshapeTest, DISABLED_ON_GPU(Trivial0x3)) { // does not handle zero-sized shapes correctly. Failed last on 2017-05-15 // with an incorrect result rank. XLA_TEST_P(ReshapeTest, DISABLED_ON_GPU(Trivial0x3WithParameter)) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); std::unique_ptr param0_literal = Literal::CreateR2FromArray2D(Array2D(0, 3)); - ComputationDataHandle parameter; + XlaOp parameter; auto input = CreateParameterAndTransferLiteral(0, *param0_literal, "param0", &builder, ¶meter); builder.Collapse(/*operand=*/parameter, /*dimensions=*/{0, 1}); @@ -164,10 +162,10 @@ XLA_TEST_P(ReshapeTest, DISABLED_ON_GPU(Trivial0x3WithParameter)) { // does not handle zero-sized shapes correctly. Failed last on 2017-11-30 // with an incorrect result rank. XLA_TEST_P(ReshapeTest, DISABLED_ON_GPU(Trivial3x0)) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); Array2D input_array(3, 0); auto input_literal = Literal::CreateR2FromArray2D(input_array); - ComputationDataHandle parameter; + XlaOp parameter; auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input", &builder, ¶meter); builder.Collapse(/*operand=*/parameter, /*dimensions=*/{0, 1}); @@ -178,9 +176,9 @@ XLA_TEST_P(ReshapeTest, DISABLED_ON_GPU(Trivial3x0)) { // Collapses a 2-dimensional row vector to 1 dimension. XLA_TEST_P(ReshapeTest, Trivial1x3) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto input_literal = Literal::CreateR2({{1.0f, 2.0f, 3.0f}}); - ComputationDataHandle parameter; + XlaOp parameter; auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input", &builder, ¶meter); builder.Collapse(/*operand=*/parameter, /*dimensions=*/{0, 1}); @@ -191,9 +189,9 @@ XLA_TEST_P(ReshapeTest, Trivial1x3) { // Collapses a 2-dimensional column vector to 1 dimension. XLA_TEST_P(ReshapeTest, Trivial3x1) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto input_literal = Literal::CreateR2({{1.0f}, {2.0f}, {3.0f}}); - ComputationDataHandle parameter; + XlaOp parameter; auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input", &builder, ¶meter); builder.Collapse(/*operand=*/parameter, /*dimensions=*/{0, 1}); @@ -344,9 +342,9 @@ XLA_TEST_P(ReshapeTest, DISABLED_ON_GPU(ReshapeSplitNoShuffleZeroElements)) { // does not handle zero-sized shapes correctly. Failed last on 2017-11-30 // with an incorrect result rank. XLA_TEST_P(ReshapeTest, DISABLED_ON_GPU(ReshapeR4ToR2ZeroElements)) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto input_literal = Literal::CreateFromArray(Array4D(2, 3, 4, 0)); - ComputationDataHandle parameter; + XlaOp parameter; auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input", &builder, ¶meter); builder.Reshape(/*operand=*/parameter, /*dimensions=*/{0, 1, 2, 3}, @@ -359,10 +357,10 @@ XLA_TEST_P(ReshapeTest, DISABLED_ON_GPU(ReshapeR4ToR2ZeroElements)) { // Reshapes a 2-dimensional array with dimensions that are not just a // rearrangement of the originals (split), but no reordering (no shuffle). XLA_TEST_P(ReshapeTest, ReshapeSplitNoShuffle) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a4x3 = MakeLinspaceArray2D(1.0f, 12.0f, 4, 3); auto input_literal = Literal::CreateFromArray(*a4x3); - ComputationDataHandle parameter; + XlaOp parameter; auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input", &builder, ¶meter); builder.Reshape(/*operand=*/parameter, /*dimensions=*/{0, 1}, @@ -379,9 +377,9 @@ XLA_TEST_P(ReshapeTest, ReshapeSplitNoShuffle) { // with an incorrect result rank. // XLA_TEST_P(ReshapeTest, DISABLED_ON_GPU(ReshapeSplitAndShuffleZeroElements)) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto input_literal = Literal::CreateFromArray(Array2D(0, 6)); - ComputationDataHandle parameter; + XlaOp parameter; auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input", &builder, ¶meter); builder.Reshape(/*operand=*/parameter, /*dimensions=*/{1, 0}, @@ -394,10 +392,10 @@ XLA_TEST_P(ReshapeTest, DISABLED_ON_GPU(ReshapeSplitAndShuffleZeroElements)) { // Reshapes a 2-dimensional array with dimensions that are not just a // rearrangement of the originals (split), and reorder the input (shuffle). XLA_TEST_P(ReshapeTest, ReshapeSplitAndShuffle) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a4x3 = MakeLinspaceArray2D(1.0f, 12.0f, 4, 3); auto input_literal = Literal::CreateFromArray(*a4x3); - ComputationDataHandle parameter; + XlaOp parameter; auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input", &builder, ¶meter); builder.Reshape(/*operand=*/parameter, /*dimensions=*/{1, 0}, @@ -421,9 +419,9 @@ static Array3D ArrayForDocR3Tests() { } XLA_TEST_P(ReshapeTest, DocR3_R1_Collapse_012) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto input_literal = Literal::CreateFromArray(ArrayForDocR3Tests()); - ComputationDataHandle parameter; + XlaOp parameter; auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input", &builder, ¶meter); builder.Reshape(/*operand=*/parameter, /*dimensions=*/{0, 1, 2}, @@ -436,9 +434,9 @@ XLA_TEST_P(ReshapeTest, DocR3_R1_Collapse_012) { } XLA_TEST_P(ReshapeTest, DocR3_R2_Collapse_012_Refine_83) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto input_literal = Literal::CreateFromArray(ArrayForDocR3Tests()); - ComputationDataHandle parameter; + XlaOp parameter; auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input", &builder, ¶meter); builder.Reshape(/*operand=*/parameter, /*dimensions=*/{0, 1, 2}, @@ -456,9 +454,9 @@ XLA_TEST_P(ReshapeTest, DocR3_R2_Collapse_012_Refine_83) { } XLA_TEST_P(ReshapeTest, DocR3_R1_Collapse_120) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto input_literal = Literal::CreateFromArray(ArrayForDocR3Tests()); - ComputationDataHandle parameter; + XlaOp parameter; auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input", &builder, ¶meter); builder.Reshape(/*operand=*/parameter, /*dimensions=*/{1, 2, 0}, @@ -471,9 +469,9 @@ XLA_TEST_P(ReshapeTest, DocR3_R1_Collapse_120) { } XLA_TEST_P(ReshapeTest, DocR3_R2_Collapse_120_Refine_83) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto input_literal = Literal::CreateFromArray(ArrayForDocR3Tests()); - ComputationDataHandle parameter; + XlaOp parameter; auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input", &builder, ¶meter); builder.Reshape(/*operand=*/parameter, /*dimensions=*/{1, 2, 0}, @@ -491,9 +489,9 @@ XLA_TEST_P(ReshapeTest, DocR3_R2_Collapse_120_Refine_83) { } XLA_TEST_P(ReshapeTest, DocR3_R3_Collapse_120_Refine_262) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto input_literal = Literal::CreateFromArray(ArrayForDocR3Tests()); - ComputationDataHandle parameter; + XlaOp parameter; auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input", &builder, ¶meter); builder.Reshape(/*operand=*/parameter, /*dimensions=*/{1, 2, 0}, @@ -521,12 +519,12 @@ XLA_TEST_P(ReshapeTest, DocR3_R3_Collapse_120_Refine_262) { // // 1 2 3 4 5 6 1 2 3 4 5 6 XLA_TEST_P(ReshapeTest, FullyConnectedCollapse) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); Array4D t2x2x2x3(2, 2, 2, 3); auto filler2x3 = MakeLinspaceArray2D(1.0f, 6.0f, 2, 3); t2x2x2x3.FillWithYX(*filler2x3); auto input_literal = Literal::CreateFromArray(t2x2x2x3); - ComputationDataHandle parameter; + XlaOp parameter; auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input", &builder, ¶meter); builder.Collapse(/*operand=*/parameter, /*dimensions=*/{1, 2, 3}); @@ -540,7 +538,7 @@ XLA_TEST_P(ReshapeTest, FullyConnectedCollapse) { // As above, but uses reshape directly. XLA_TEST_P(ReshapeTest, FullyConnectedCollapseDesugared) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); Array4D t(2, 1, 2, 2); t(0, 0, 0, 0) = 0; t(0, 0, 0, 1) = 1; @@ -551,7 +549,7 @@ XLA_TEST_P(ReshapeTest, FullyConnectedCollapseDesugared) { t(1, 0, 1, 0) = 6; t(1, 0, 1, 1) = 7; auto input_literal = Literal::CreateFromArray(t); - ComputationDataHandle parameter; + XlaOp parameter; auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input", &builder, ¶meter); builder.Reshape(/*operand=*/parameter, /*dimensions=*/{0, 1, 2, 3}, @@ -566,7 +564,7 @@ XLA_TEST_P(ReshapeTest, FullyConnectedCollapseDesugared) { // Reshape various ranks to a scalar. XLA_TEST_P(ReshapeTest, ToScalar) { for (int rank = 0; rank < 8; ++rank) { - ComputationBuilder b(client_, TestName()); + XlaBuilder b(TestName()); std::vector ones(rank, 1); // this is {1, ..., 1}. std::vector dimensions(rank); std::iota(dimensions.begin(), dimensions.end(), 0); @@ -574,7 +572,7 @@ XLA_TEST_P(ReshapeTest, ToScalar) { std::vector zeros(rank, 0); // this is {0, ..., 0}. input_literal.Set(zeros, 83.0f); - ComputationDataHandle parameter; + XlaOp parameter; auto input = CreateParameterAndTransferLiteral(0, input_literal, "input", &b, ¶meter); b.Reshape(parameter, dimensions, {}); @@ -586,9 +584,9 @@ XLA_TEST_P(ReshapeTest, ToScalar) { } XLA_TEST_P(ReshapeTest, BadDimensions) { - ComputationBuilder b(client_, TestName()); + XlaBuilder b(TestName()); auto input_literal = Literal::CreateR1({1.0f}); - ComputationDataHandle parameter; + XlaOp parameter; auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input", &b, ¶meter); b.Reshape(parameter, {}, {}); @@ -598,9 +596,9 @@ XLA_TEST_P(ReshapeTest, BadDimensions) { } XLA_TEST_P(ReshapeTest, BadNewSizes) { - ComputationBuilder b(client_, TestName()); + XlaBuilder b(TestName()); auto input_literal = Literal::CreateR1({1.0f, 2.0f}); - ComputationDataHandle parameter; + XlaOp parameter; auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input", &b, ¶meter); b.Reshape(parameter, {1}, {}); @@ -609,7 +607,7 @@ XLA_TEST_P(ReshapeTest, BadNewSizes) { } XLA_TEST_P(ReshapeTest, R4Dim0MinorLayoutToR2Dim0MajorLayout) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); // clang-format off auto input_literal = Literal::CreateR4FromArray4DWithLayout(Array4D{ { @@ -635,7 +633,7 @@ XLA_TEST_P(ReshapeTest, R4Dim0MinorLayoutToR2Dim0MajorLayout) { }, LayoutUtil::MakeLayout({0, 1, 2, 3})); // clang-format on - ComputationDataHandle parameter; + XlaOp parameter; auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input", &builder, ¶meter); @@ -646,7 +644,7 @@ XLA_TEST_P(ReshapeTest, R4Dim0MinorLayoutToR2Dim0MajorLayout) { {222, 333, 444, 555, 666, 777, 888, 999}, }); - Computation computation = builder.Build().ConsumeValueOrDie(); + XlaComputation computation = builder.Build().ConsumeValueOrDie(); ExecutionOptions execution_options = execution_options_; *execution_options.mutable_shape_with_output_layout() = ShapeUtil::MakeShapeWithLayout(use_bfloat16() ? BF16 : F32, {2, 8}, @@ -664,13 +662,13 @@ XLA_TEST_P(ReshapeTest, R4Dim0MinorLayoutToR2Dim0MajorLayout) { } XLA_TEST_P(ReshapeTest, R2ToR4_3x8_To_3x2x1x4) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); std::unique_ptr input_literal = Literal::CreateR2({ {0, 1, 2, 3, 4, 5, 6, 7}, {100, 101, 102, 103, 104, 105, 106, 107}, {200, 201, 202, 203, 204, 205, 206, 207}, }); - ComputationDataHandle parameter; + XlaOp parameter; auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input", &builder, ¶meter); builder.Reshape(parameter, /*dimensions=*/{0, 1}, /*new_sizes=*/{3, 2, 1, 4}); @@ -691,13 +689,13 @@ XLA_TEST_P(ReshapeTest, R2ToR4_3x8_To_3x2x1x4) { // Tests R2->R4 reshape with the reshape dimensions {1, 0}. XLA_TEST_P(ReshapeTest, R2ToR4_3x8_To_3x2x1x4_Dimensions_10) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); std::unique_ptr input_literal = Literal::CreateR2({ {0, 1, 2, 3, 4, 5, 6, 7}, {100, 101, 102, 103, 104, 105, 106, 107}, {200, 201, 202, 203, 204, 205, 206, 207}, }); - ComputationDataHandle parameter; + XlaOp parameter; auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input", &builder, ¶meter); builder.Reshape(parameter, /*dimensions=*/{1, 0}, /*new_sizes=*/{3, 2, 1, 4}); @@ -717,7 +715,7 @@ XLA_TEST_P(ReshapeTest, R2ToR4_3x8_To_3x2x1x4_Dimensions_10) { } XLA_TEST_P(ReshapeTest, R4ToR2_2x1x1x1_To_2x1) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); std::mt19937 rng; std::uniform_real_distribution distribution; Array4D input(2, 1, 1, 1); @@ -727,7 +725,7 @@ XLA_TEST_P(ReshapeTest, R4ToR2_2x1x1x1_To_2x1) { std::unique_ptr input_literal = Literal::CreateR4FromArray4DWithLayout( input, LayoutUtil::MakeLayout({3, 2, 1, 0})); - ComputationDataHandle parameter; + XlaOp parameter; auto input_data = CreateParameterAndTransferLiteral( 0, *input_literal, "input", &builder, ¶meter); builder.Reshape(parameter, /*dimensions=*/{0, 1, 2, 3}, /*new_sizes=*/{2, 1}); @@ -739,7 +737,7 @@ XLA_TEST_P(ReshapeTest, R4ToR2_2x1x1x1_To_2x1) { } XLA_TEST_P(ReshapeTest, R4ToR2_2x1x4x1_To_4x2) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); std::mt19937 rng; std::uniform_real_distribution distribution; Array4D input(2, 1, 4, 1); @@ -749,7 +747,7 @@ XLA_TEST_P(ReshapeTest, R4ToR2_2x1x4x1_To_4x2) { std::unique_ptr input_literal = Literal::CreateR4FromArray4DWithLayout( input, LayoutUtil::MakeLayout({3, 2, 1, 0})); - ComputationDataHandle parameter; + XlaOp parameter; auto input_data = CreateParameterAndTransferLiteral( 0, *input_literal, "input", &builder, ¶meter); builder.Reshape(parameter, /*dimensions=*/{0, 1, 2, 3}, /*new_sizes=*/{4, 2}); @@ -762,7 +760,7 @@ XLA_TEST_P(ReshapeTest, R4ToR2_2x1x4x1_To_4x2) { // Tests R4->R2 reshape with the reshape dimensions {0, 2, 1, 3}. XLA_TEST_P(ReshapeTest, R4ToR2_5x10x2x3_To_5x60_Dimensions_0213) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); std::mt19937 rng; std::uniform_real_distribution distribution; Array4D input(5, 10, 2, 3); @@ -772,7 +770,7 @@ XLA_TEST_P(ReshapeTest, R4ToR2_5x10x2x3_To_5x60_Dimensions_0213) { std::unique_ptr input_literal = Literal::CreateR4FromArray4DWithLayout( input, LayoutUtil::MakeLayout({3, 2, 1, 0})); - ComputationDataHandle parameter; + XlaOp parameter; auto input_data = CreateParameterAndTransferLiteral( 0, *input_literal, "input", &builder, ¶meter); builder.Reshape(parameter, /*dimensions=*/{0, 2, 1, 3}, @@ -789,7 +787,7 @@ XLA_TEST_P(ReshapeTest, R4ToR2_5x10x2x3_To_5x60_Dimensions_0213) { } XLA_TEST_P(ReshapeTest, NoopReshape) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); std::mt19937 rng; std::uniform_real_distribution distribution; Array4D input_array(2, 3, 5, 7); @@ -799,12 +797,12 @@ XLA_TEST_P(ReshapeTest, NoopReshape) { std::unique_ptr input_literal = Literal::CreateR4FromArray4DWithLayout( input_array, LayoutUtil::MakeLayout({1, 2, 3, 0})); - ComputationDataHandle parameter; + XlaOp parameter; auto input_data = CreateParameterAndTransferLiteral( 0, *input_literal, "input", &builder, ¶meter); builder.Reshape(parameter, /*dimensions=*/{3, 0, 1, 2}, /*new_sizes=*/{7, 2, 3, 5}); - Computation computation = builder.Build().ConsumeValueOrDie(); + XlaComputation computation = builder.Build().ConsumeValueOrDie(); ExecutionOptions execution_options = execution_options_; *execution_options.mutable_shape_with_output_layout() = @@ -827,12 +825,12 @@ XLA_TEST_P(ReshapeTest, NoopReshape) { } XLA_TEST_P(ReshapeTest, R4ToR4Reshape_Trivial) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto literal_1x2x3x4 = Literal::CreateR4( {{{{1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10, 11, 12}}, {{13, 14, 15, 16}, {17, 18, 19, 20}, {21, 22, 23, 24}}}}); - ComputationDataHandle parameter; + XlaOp parameter; auto input = CreateParameterAndTransferLiteral(0, *literal_1x2x3x4, "input", &builder, ¶meter); builder.Reshape(parameter, /*dimensions=*/{0, 1, 2, 3}, @@ -846,8 +844,8 @@ XLA_TEST_P(ReshapeTest, R4ToR4Reshape) { {{{{1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10, 11, 12}}, {{13, 14, 15, 16}, {17, 18, 19, 20}, {21, 22, 23, 24}}}}); - ComputationBuilder builder(client_, TestName()); - ComputationDataHandle parameter; + XlaBuilder builder(TestName()); + XlaOp parameter; auto input = CreateParameterAndTransferLiteral(0, *literal_1x2x3x4, "input", &builder, ¶meter); builder.Reshape(parameter, /*dimensions=*/{1, 3, 2, 0}, @@ -880,8 +878,8 @@ XLA_TEST_P(ReshapeTest, R4TwoMinorTransposeSimple) { std::unique_ptr input_literal = Literal::CreateR4FromArray4DWithLayout( input, LayoutUtil::MakeLayout({3, 2, 1, 0})); - ComputationBuilder builder(client_, TestName()); - ComputationDataHandle parameter; + XlaBuilder builder(TestName()); + XlaOp parameter; auto input_data = CreateParameterAndTransferLiteral( 0, *input_literal, "input", &builder, ¶meter); builder.Reshape(parameter, /*dimensions=*/{0, 1, 3, 2}, @@ -909,8 +907,8 @@ XLA_TEST_P(ReshapeTest, R4TwoMinorTransposeMajorFirstEffectiveR2) { std::unique_ptr input_literal = Literal::CreateR4FromArray4DWithLayout( input, LayoutUtil::MakeLayout({3, 2, 1, 0})); - ComputationBuilder builder(client_, TestName()); - ComputationDataHandle parameter; + XlaBuilder builder(TestName()); + XlaOp parameter; auto input_data = CreateParameterAndTransferLiteral( 0, *input_literal, "input", &builder, ¶meter); builder.Reshape(parameter, /*dimensions=*/{0, 1, 3, 2}, @@ -938,8 +936,8 @@ XLA_TEST_P(ReshapeTest, R4TwoMinorTransposeMajorFirstMinorEffectiveR1) { std::unique_ptr input_literal = Literal::CreateR4FromArray4DWithLayout( input, LayoutUtil::MakeLayout({3, 2, 1, 0})); - ComputationBuilder builder(client_, TestName()); - ComputationDataHandle parameter; + XlaBuilder builder(TestName()); + XlaOp parameter; auto input_data = CreateParameterAndTransferLiteral( 0, *input_literal, "input", &builder, ¶meter); builder.Reshape(parameter, /*dimensions=*/{0, 1, 3, 2}, @@ -968,8 +966,8 @@ XLA_TEST_P(ReshapeTest, R4TwoMinorTransposeMajorFirstMinorEffectiveR1InR2) { std::unique_ptr input_literal = Literal::CreateR4FromArray4DWithLayout( input, LayoutUtil::MakeLayout({3, 2, 1, 0})); - ComputationBuilder builder(client_, TestName()); - ComputationDataHandle parameter; + XlaBuilder builder(TestName()); + XlaOp parameter; auto input_data = CreateParameterAndTransferLiteral( 0, *input_literal, "input", &builder, ¶meter); builder.Reshape(parameter, /*dimensions=*/{0, 1, 3, 2}, @@ -997,8 +995,8 @@ XLA_TEST_P(ReshapeTest, R4TwoMinorTransposeTrivialR2) { std::unique_ptr input_literal = Literal::CreateR4FromArray4DWithLayout( input, LayoutUtil::MakeLayout({0, 1, 2, 3})); - ComputationBuilder builder(client_, TestName()); - ComputationDataHandle parameter; + XlaBuilder builder(TestName()); + XlaOp parameter; auto input_data = CreateParameterAndTransferLiteral( 0, *input_literal, "input", &builder, ¶meter); builder.Reshape(parameter, /*dimensions=*/{1, 0, 2, 3}, diff --git a/tensorflow/compiler/xla/tests/reverse_test.cc b/tensorflow/compiler/xla/tests/reverse_test.cc index 8fc841f14087cdea02fe44cdaea521ff92122aec..6959c95502cb7af6b720592e7836c6789719a528 100644 --- a/tensorflow/compiler/xla/tests/reverse_test.cc +++ b/tensorflow/compiler/xla/tests/reverse_test.cc @@ -17,8 +17,8 @@ limitations under the License. #include "tensorflow/compiler/xla/array2d.h" #include "tensorflow/compiler/xla/array4d.h" -#include "tensorflow/compiler/xla/client/computation_builder.h" #include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" #include "tensorflow/compiler/xla/tests/client_library_test_base.h" #include "tensorflow/compiler/xla/tests/literal_test_util.h" #include "tensorflow/compiler/xla/tests/test_macros.h" @@ -85,7 +85,7 @@ TEST_P(FloatReverseTest, Reverses) { auto r1_literal = Literal::CreateR1(input_vector); auto input_literal = r1_literal->Reshape(spec.input_dims).ConsumeValueOrDie(); - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = AddParam(*input_literal, &builder); builder.Rev(a, spec.reversal); diff --git a/tensorflow/compiler/xla/tests/select_and_scatter_test.cc b/tensorflow/compiler/xla/tests/select_and_scatter_test.cc index d268fdcacebcb162bf61bc7dd4b208f4db6c4a5f..7015e5a6a31f506d30c2629d7735482cf354455a 100644 --- a/tensorflow/compiler/xla/tests/select_and_scatter_test.cc +++ b/tensorflow/compiler/xla/tests/select_and_scatter_test.cc @@ -19,11 +19,11 @@ limitations under the License. #include #include "tensorflow/compiler/xla/array2d.h" -#include "tensorflow/compiler/xla/client/computation.h" -#include "tensorflow/compiler/xla/client/computation_builder.h" #include "tensorflow/compiler/xla/client/lib/arithmetic.h" #include "tensorflow/compiler/xla/client/local_client.h" #include "tensorflow/compiler/xla/client/padding.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_computation.h" #include "tensorflow/compiler/xla/layout_util.h" #include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/compiler/xla/reference_util.h" @@ -50,7 +50,7 @@ class SelectAndScatterTest : public ClientLibraryTestBase, public ::testing::WithParamInterface { public: - SelectAndScatterTest() : builder_(client_, TestName()) { + SelectAndScatterTest() : builder_(TestName()) { // Create S32 GE and ADD computations for select and scatter respectively. ge_s32_ = CreateScalarGeComputation(S32, &builder_); add_s32_ = CreateScalarAddComputation(S32, &builder_); @@ -60,13 +60,13 @@ class SelectAndScatterTest min_f32_ = CreateScalarMinComputation(F32, &builder_); } - ComputationBuilder builder_; - Computation ge_s32_; - Computation add_s32_; - Computation ge_f32_; - Computation add_f32_; - Computation max_f32_; - Computation min_f32_; + XlaBuilder builder_; + XlaComputation ge_s32_; + XlaComputation add_s32_; + XlaComputation ge_f32_; + XlaComputation add_f32_; + XlaComputation max_f32_; + XlaComputation min_f32_; }; XLA_TEST_P(SelectAndScatterTest, ParamTest) { @@ -80,12 +80,11 @@ XLA_TEST_P(SelectAndScatterTest, ParamTest) { s.FillRandom(12.0f); auto source = builder_.ConstantFromArray(s); - auto select_and_scatter = builder_.SelectAndScatter( - operand, ge_f32_, GetParam().window_dimensions, GetParam().window_strides, - GetParam().padding_type, source, builder_.ConstantR0(0.0f), - add_f32_); + builder_.SelectAndScatter(operand, ge_f32_, GetParam().window_dimensions, + GetParam().window_strides, GetParam().padding_type, + source, builder_.ConstantR0(0.0f), add_f32_); - ComputeAndCompare(&builder_, select_and_scatter, {}, ErrorSpec(1e-5)); + ComputeAndCompare(&builder_, {}, ErrorSpec(1e-5)); } INSTANTIATE_TEST_CASE_P( diff --git a/tensorflow/compiler/xla/tests/slice_test.cc b/tensorflow/compiler/xla/tests/slice_test.cc index a14a365bd0529ba82a25cdfacfe3902a655c4876..52195db2aa74710b901dd7744a670764a034e96b 100644 --- a/tensorflow/compiler/xla/tests/slice_test.cc +++ b/tensorflow/compiler/xla/tests/slice_test.cc @@ -19,8 +19,8 @@ limitations under the License. #include #include "tensorflow/compiler/xla/array2d.h" -#include "tensorflow/compiler/xla/client/computation_builder.h" #include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" #include "tensorflow/compiler/xla/reference_util.h" #include "tensorflow/compiler/xla/tests/client_library_test_base.h" #include "tensorflow/compiler/xla/tests/literal_test_util.h" @@ -41,7 +41,7 @@ TEST_F(SliceTest, Slice3x3x3_To_3x3x1_F32) { Array3D values(3, 3, 3); values.FillIota(0); - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto original = builder.ConstantR3FromArray3D(values); builder.Slice(original, {0, 0, 0}, {3, 3, 1}, {1, 1, 1}); @@ -54,7 +54,7 @@ TEST_F(SliceTest, Slice3x3x3_To_3x1x3_F32) { Array3D values(3, 3, 3); values.FillIota(0); - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto original = builder.ConstantR3FromArray3D(values); builder.Slice(original, {0, 0, 0}, {3, 1, 3}, {1, 1, 1}); @@ -67,7 +67,7 @@ TEST_F(SliceTest, Slice3x3x3_To_1x3x3_F32) { Array3D values(3, 3, 3); values.FillIota(0); - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto original = builder.ConstantR3FromArray3D(values); builder.Slice(original, {0, 0, 0}, {1, 3, 3}, {1, 1, 1}); @@ -77,7 +77,7 @@ TEST_F(SliceTest, Slice3x3x3_To_1x3x3_F32) { } XLA_TEST_F(SliceTest, Slice0x0to0x0F32) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto original = builder.ConstantR2FromArray2D(Array2D(0, 0)); builder.Slice(original, {0, 0}, {0, 0}, {1, 1}); @@ -85,7 +85,7 @@ XLA_TEST_F(SliceTest, Slice0x0to0x0F32) { } XLA_TEST_F(SliceTest, Slice0x20to0x5F32) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto original = builder.ConstantR2FromArray2D(Array2D(0, 20)); builder.Slice(original, {0, 15}, {0, 20}, {1, 1}); @@ -93,7 +93,7 @@ XLA_TEST_F(SliceTest, Slice0x20to0x5F32) { } XLA_TEST_F(SliceTest, Slice3x0to2x0F32) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto original = builder.ConstantR2FromArray2D(Array2D(3, 0)); builder.Slice(original, {1, 0}, {3, 0}, {1, 1}); @@ -108,7 +108,7 @@ XLA_TEST_F(SliceTest, SliceQuadrantOf256x256) { } } - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto original = builder.ConstantR2FromArray2D(values); builder.Slice(original, {128, 128}, {256, 256}, {1, 1}); @@ -126,7 +126,7 @@ TEST_F(SliceTest, Slice_1x4096_To_1x1024) { Array2D values(1, 4096); std::iota(values.data(), values.data() + 4096, 0.0); - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto original = builder.ConstantR2FromArray2D(values); builder.Slice(original, {0, 3072}, {1, 4096}, {1, 1}); @@ -147,7 +147,7 @@ TEST_F(SliceTest, Slice_16x4_To_16x2) { } } } - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto original = builder.ConstantR2FromArray2D(values); builder.Slice(original, {0, 0}, {16, 2}, {1, 1}); ComputeAndCompareR2(&builder, expected, {}, ErrorSpec(0.000001)); @@ -159,7 +159,7 @@ TEST_F(SliceTest, SliceR4ThreeDimsMiddleMinor) { values.FillRandom(3.14f); auto expected = ReferenceUtil::Slice4D( values, {{1, 0, 8, 0}}, {{2, 2, 16, 128}}, /*strides=*/{{1, 1, 1, 1}}); - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto original = builder.ConstantR4FromArray4D(values); builder.Slice(original, {1, 0, 8, 0}, {2, 2, 16, 128}, {1, 1, 1, 1}); ComputeAndCompareR4(&builder, *expected, {}, ErrorSpec(0.000001)); @@ -172,7 +172,7 @@ XLA_TEST_F(SliceTest, StridedSliceR4WithOutputLayout) { /*strides=*/{{1, 1, 2, 1}}); auto expected_literal = Literal::CreateR4FromArray4DWithLayout( *expected, LayoutUtil::MakeLayout({0, 1, 2, 3})); - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto original = builder.ConstantR4FromArray4D(values); builder.Slice(original, {0, 0, 0, 0}, {2, 4, 6, 8}, {1, 1, 2, 1}); ComputeAndCompareLiteral(&builder, *expected_literal, {}, ErrorSpec(0.000001), @@ -198,7 +198,7 @@ class SliceR1Test : public ClientLibraryTestBase, tensorflow::gtl::InlinedVector input(spec.input_dim0); std::iota(input.begin(), input.end(), NativeT()); - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto original = builder.ConstantR1(input); builder.Slice(original, {spec.slice_start}, {spec.slice_limit}, {spec.slice_stride}); @@ -214,6 +214,9 @@ class SliceR1Test : public ClientLibraryTestBase, } }; +// A version of SliceR1Test used to label and disable 'large' tests +class SliceR1LargeTest : public SliceR1Test {}; + string SliceR1TestDataToString(const ::testing::TestParamInfo& data) { const R1Spec& spec = data.param; return ::tensorflow::strings::Printf("%lld_%lld_%lld_%lld", spec.input_dim0, @@ -233,8 +236,21 @@ XLA_TEST_P(SliceR1Test, DoIt_U64) { Run(GetParam()); } XLA_TEST_P(SliceR1Test, DoIt_S64) { Run(GetParam()); } +XLA_TEST_P(SliceR1LargeTest, DoIt_F32) { Run(GetParam()); } + +XLA_TEST_P(SliceR1LargeTest, DoIt_F64) { Run(GetParam()); } + +XLA_TEST_P(SliceR1LargeTest, DoIt_U32) { Run(GetParam()); } + +XLA_TEST_P(SliceR1LargeTest, DoIt_S32) { Run(GetParam()); } + +XLA_TEST_P(SliceR1LargeTest, DoIt_U64) { Run(GetParam()); } + +XLA_TEST_P(SliceR1LargeTest, DoIt_S64) { Run(GetParam()); } + XLA_TEST_P(SliceR1Test, DoIt_PRED) { Run(GetParam()); } + // Tests for R1 slice ops. // The format for each testcase is {input size, start, limit, stride}. // clang-format off @@ -242,12 +258,6 @@ INSTANTIATE_TEST_CASE_P( SliceR1TestInstantiation, SliceR1Test, ::testing::Values( -// TODO(b/69425338): This uses too much memory on GPU. -#ifndef XLA_TEST_BACKEND_GPU - R1Spec{16 * 1024 * 1024, 4 * 1024 * 1024, 12 * 1024 * 1024, 1}, - R1Spec{16 * 1024 * 1024, 4 * 1024 * 1024 + 1, 12 * 1024 * 1024 - 1, 1}, - R1Spec{16 * 1024 * 1024, 4 * 1024 * 1024 - 1, 12 * 1024 * 1024 + 1, 1}, -#endif R1Spec{10, 0, 0, 1}, R1Spec{10, 7, 7, 1}, R1Spec{10, 0, 5, 1}, @@ -283,6 +293,23 @@ INSTANTIATE_TEST_CASE_P( SliceR1TestDataToString ); +// TODO(b/69425338): This uses too much memory on GPU. +#ifndef XLA_TEST_BACKEND_GPU +INSTANTIATE_TEST_CASE_P( + SliceR1TestBigSlicesInstantiation, + SliceR1LargeTest, + ::testing::Values( + R1Spec{ + 16 * 1024 * 1024, 4 * 1024 * 1024, 12 * 1024 * 1024, 1}, + R1Spec{ + 16 * 1024 * 1024, 4 * 1024 * 1024 + 1, 12 * 1024 * 1024 - 1, 1}, + R1Spec{ + 16 * 1024 * 1024, 4 * 1024 * 1024 - 1, 12 * 1024 * 1024 + 1, 1} + ), + SliceR1TestDataToString +); +#endif + INSTANTIATE_TEST_CASE_P( SliceStridedR1TestInstantiation, SliceR1Test, @@ -339,7 +366,7 @@ XLA_TEST_P(SliceR2Test, DoIt) { Array2D input(spec.input_dim0, spec.input_dim1); input.FillUnique(); - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR2FromArray2DWithLayout( input, LayoutUtil::MakeLayout(spec.layout)); builder.Slice(a, spec.slice_starts, spec.slice_limits, spec.slice_strides); @@ -429,7 +456,7 @@ class SliceR4Test : public ClientLibraryTestBase, values.FillRandom(3.14f); auto expected = ReferenceUtil::Slice4D( values, spec.slice_starts, spec.slice_limits, spec.slice_strides); - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto literal = Literal::CreateR4FromArray4DWithLayout( values, LayoutUtil::MakeLayout(spec.input_layout)); auto parameter = builder.Parameter(0, literal->shape(), "p0"); diff --git a/tensorflow/compiler/xla/tests/test_utils.cc b/tensorflow/compiler/xla/tests/test_utils.cc index 0bc7df2a65b44a76f877b6513e6bf93b99fbc1a3..997a1d8273736af31994ebbd07ff3857d1e8e0b5 100644 --- a/tensorflow/compiler/xla/tests/test_utils.cc +++ b/tensorflow/compiler/xla/tests/test_utils.cc @@ -23,14 +23,14 @@ namespace xla { namespace { -template -void PopulateWithRandomFloatingPointData(Literal* literal, - std::minstd_rand0* engine) { +template +void PopulateWithRandomFloatingPointDataImpl(Literal* literal, + std::minstd_rand0* engine) { CHECK_EQ(literal->shape().element_type(), primitive_util::NativeToPrimitiveType()); // Create uniform numbers between 1 and 1.125 to avoid creating denormal // numbers. - std::uniform_real_distribution generator(1.0f, 1.125f); + std::uniform_real_distribution generator(1.0f, 1.125f); const bool should_index_bias = ShapeUtil::ElementsIn(literal->shape()) > 1000; TF_CHECK_OK(literal->Populate( [&](tensorflow::gtl::ArraySlice indices) { @@ -52,10 +52,22 @@ void PopulateWithRandomFloatingPointData(Literal* literal, FloatT index_bias = static_cast(index_product % 113 - negative_bias) / static_cast(256.0f); - return (generator(*engine) - 1.0625) + index_bias; + return static_cast(generator(*engine) - 1.0625f) + index_bias; })); } +template +void PopulateWithRandomFloatingPointData(Literal* literal, + std::minstd_rand0* engine) { + PopulateWithRandomFloatingPointDataImpl(literal, engine); +} + +template <> +void PopulateWithRandomFloatingPointData(Literal* literal, + std::minstd_rand0* engine) { + PopulateWithRandomFloatingPointDataImpl(literal, engine); +} + // The standard library does not have a case for bfloat16, unsurprisingly, so we // handle that one specially. template <> @@ -100,6 +112,9 @@ StatusOr> MakeFakeLiteralInternal( case BF16: PopulateWithRandomFloatingPointData(literal.get(), engine); break; + case F16: + PopulateWithRandomFloatingPointData(literal.get(), engine); + break; case F32: PopulateWithRandomFloatingPointData(literal.get(), engine); break; @@ -145,27 +160,38 @@ StatusOr> MakeFakeLiteralInternal( return std::move(literal); } -// Matches binary addition computations. -bool LooksLikeSum(const HloComputation& computation) { +enum class ConstantType { kUnknown, kZero, kOne }; + +// Return the constant type required by this computation, if known. +ConstantType GetInitValue(const HloComputation& computation) { const HloInstruction* const root = computation.root_instruction(); - return root->opcode() == HloOpcode::kAdd && - computation.num_parameters() == 2 && - root->operand(0)->opcode() == HloOpcode::kParameter && - root->operand(1)->opcode() == HloOpcode::kParameter && - root->operand(0) != root->operand(1); + if (computation.num_parameters() != 2 || root->operand_count() != 2 || + root->operand(0)->opcode() != HloOpcode::kParameter || + root->operand(1)->opcode() != HloOpcode::kParameter || + root->operand(0) == root->operand(1)) { + return ConstantType::kUnknown; + } + + switch (root->opcode()) { + case HloOpcode::kAdd: + return ConstantType::kZero; + case HloOpcode::kMultiply: + return ConstantType::kOne; + default: + return ConstantType::kUnknown; + } } -// Reduce, ReduceWindow, and SelectAndScatter ops may use binary addition, -// which requires an init_value of 0 rather than a random value. -bool NeedsZeroInitValue(const HloUse& use) { +// Reduce, ReduceWindow, and SelectAndScatter ops may need a non-random +// initialization value. +bool NeedsInitValue(const HloUse& use) { const HloInstruction* const instruction = use.instruction; const HloOpcode opcode = instruction->opcode(); const int64 op_num = use.operand_number; return ( ((opcode == HloOpcode::kReduce || opcode == HloOpcode::kReduceWindow) && - op_num == 1 && LooksLikeSum(*instruction->to_apply())) || - (opcode == HloOpcode::kSelectAndScatter && op_num == 2 && - LooksLikeSum(*instruction->scatter()))); + op_num == 1) || + (opcode == HloOpcode::kSelectAndScatter && op_num == 2)); } // Generate random values that are constrained to the input_shape minus the @@ -207,7 +233,7 @@ std::vector FindConstrainedUses( auto fused_uses = FindConstrainedUses(dataflow, *to_analyze); constrained_uses.insert(constrained_uses.end(), fused_uses.begin(), fused_uses.end()); - } else if (NeedsZeroInitValue(use)) { + } else if (NeedsInitValue(use)) { constrained_uses.push_back(instruction); } else if (opcode == HloOpcode::kConvert || opcode == HloOpcode::kReducePrecision) { @@ -228,7 +254,8 @@ StatusOr> CreateLiteralForConstrainedUses( const tensorflow::gtl::ArraySlice constrained_uses, const HloInstruction& param, std::minstd_rand0* engine) { HloInstruction* needs_index = nullptr; - HloInstruction* needs_zero = nullptr; + HloInstruction* needs_constant = nullptr; + ConstantType constant_type = ConstantType::kUnknown; for (HloInstruction* use : constrained_uses) { switch (use->opcode()) { case HloOpcode::kDynamicSlice: @@ -243,8 +270,13 @@ StatusOr> CreateLiteralForConstrainedUses( case HloOpcode::kReduce: case HloOpcode::kReduceWindow: + needs_constant = use; + constant_type = GetInitValue(*use->to_apply()); + break; + case HloOpcode::kSelectAndScatter: - needs_zero = use; + needs_constant = use; + constant_type = GetInitValue(*use->scatter()); break; default: @@ -253,17 +285,26 @@ StatusOr> CreateLiteralForConstrainedUses( use->ToString().c_str()); } } - if (needs_index != nullptr && needs_zero != nullptr) { + if (needs_index != nullptr && needs_constant != nullptr) { return Unimplemented( "Conflicting operand generation constraints.\nNeeds index: %s\nNeeds " - "zero: %s\n", - needs_index->ToString().c_str(), needs_zero->ToString().c_str()); + "constant: %s\n", + needs_index->ToString().c_str(), needs_constant->ToString().c_str()); } if (needs_index != nullptr) { return MakeRandomNonwrappingSliceIndex(needs_index->operand(0)->shape(), needs_index->shape(), engine); - } else if (needs_zero != nullptr) { - return Literal::CreateFromShape(param.shape()); + } else if (needs_constant != nullptr) { + switch (constant_type) { + case ConstantType::kZero: + return Literal::Zero(param.shape().element_type()).CloneToUnique(); + case ConstantType::kOne: + return Literal::One(param.shape().element_type()).CloneToUnique(); + case ConstantType::kUnknown: + // We want the identity element for the computation, but we don't really + // know what it is - so any value we generate will be just as wrong. + return MakeFakeLiteralInternal(param.shape(), engine); + } } else { return MakeFakeLiteralInternal(param.shape(), engine); } @@ -298,9 +339,9 @@ StatusOr>> MakeFakeArguments( return std::move(arguments); } -Status VerifyHloModule(const perftools::gputools::Platform& platform, - HloModule* const module) { - return HloVerifier().Run(module).status(); +Status VerifyHloModule(const se::Platform& platform, HloModule* const module, + bool allow_mixed_precision) { + return HloVerifier(allow_mixed_precision).Run(module).status(); } } // namespace xla diff --git a/tensorflow/compiler/xla/tests/test_utils.h b/tensorflow/compiler/xla/tests/test_utils.h index 0fb024ffb074f1c90b75022bc7f5a8b58b03c0c2..30c147910cae85e1ebdddc22e637a6c1fd577c20 100644 --- a/tensorflow/compiler/xla/tests/test_utils.h +++ b/tensorflow/compiler/xla/tests/test_utils.h @@ -68,8 +68,8 @@ StatusOr>> MakeFakeArguments( // Check that a given module satisfies various constraints before trying to // execute it. -Status VerifyHloModule(const perftools::gputools::Platform& platform, - HloModule* const module); +Status VerifyHloModule(const se::Platform& platform, HloModule* const module, + bool allow_mixed_precision = false); } // namespace xla diff --git a/tensorflow/compiler/xla/tests/test_utils_test.cc b/tensorflow/compiler/xla/tests/test_utils_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..e8efc6e2a83f42bf81fc1261ba508632cf3f85b3 --- /dev/null +++ b/tensorflow/compiler/xla/tests/test_utils_test.cc @@ -0,0 +1,57 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/tests/test_utils.h" + +#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/tests/local_client_test_base.h" +#include "tensorflow/compiler/xla/tests/test_macros.h" +#include "tensorflow/core/lib/core/status_test_util.h" + +namespace xla { +namespace { + +// A test fixture is used because we need a client for our computation builder. +class TestUtilsTest : public LocalClientTestBase {}; + +XLA_TEST_F(TestUtilsTest, UnusedParam) { + ComputationBuilder builder(local_client_, TestName()); + // Make the reduction lambda. + Shape single_float = ShapeUtil::MakeShape(F32, {}); + builder.Parameter(0, single_float, "unused"); + builder.Parameter(1, single_float, "used"); + auto computation_status = builder.Build(); + TF_ASSERT_OK(computation_status.status()); + + // Make the reduction. + Shape pair_float = ShapeUtil::MakeShape(F32, {2}); + builder.Reduce(builder.Parameter(0, pair_float, "operand"), + builder.Parameter(1, single_float, "init"), + computation_status.ValueOrDie(), {0}); + computation_status = builder.Build(); + TF_ASSERT_OK(computation_status.status()); + + auto executable_status = local_client_->Compile( + computation_status.ValueOrDie(), {&pair_float, &single_float}, + ExecutableBuildOptions()); + TF_ASSERT_OK(executable_status.status()); + HloModule& module = const_cast( + executable_status.ValueOrDie()->executable()->module()); + TF_ASSERT_OK(MakeFakeArguments(&module).status()); +} + +} // namespace +} // namespace xla diff --git a/tensorflow/compiler/xla/tests/transfer_manager_test.cc b/tensorflow/compiler/xla/tests/transfer_manager_test.cc index 268ba338f2e6740a1d1a046d5a85494f3cf2e9f8..e2067bc1b835a946fc56801cbf227e05ef0686b4 100644 --- a/tensorflow/compiler/xla/tests/transfer_manager_test.cc +++ b/tensorflow/compiler/xla/tests/transfer_manager_test.cc @@ -45,7 +45,7 @@ class TransferManagerTest : public LocalClientTestBase { ~TransferManagerTest() override = default; - std::unique_ptr AllocateDeviceBuffer(const Shape& shape) { + ScopedShapedBuffer AllocateDeviceBuffer(const Shape& shape) { return transfer_manager_ ->AllocateScopedShapedBuffer( shape, GetOrCreateAllocator(local_client_->platform()), @@ -64,10 +64,10 @@ XLA_TEST_F(TransferManagerTest, TransferR0U32) { // Round trip literal through device. ASSERT_IS_OK(transfer_manager_->TransferLiteralToDevice( - stream_executor_, *literal, *device_buffer)); + stream_executor_, *literal, device_buffer)); TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr result, transfer_manager_->TransferLiteralFromDevice( - stream_executor_, *device_buffer)); + stream_executor_, device_buffer)); LiteralTestUtil::ExpectR0Equal(42, *result); } @@ -80,10 +80,10 @@ XLA_TEST_F(TransferManagerTest, TransferR1F32) { // Round trip literal through device. ASSERT_IS_OK(transfer_manager_->TransferLiteralToDevice( - stream_executor_, *literal, *device_buffer)); + stream_executor_, *literal, device_buffer)); TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr result, transfer_manager_->TransferLiteralFromDevice( - stream_executor_, *device_buffer)); + stream_executor_, device_buffer)); LiteralTestUtil::ExpectR1Equal({1.25f, 2.5f, -17.0f, -20.125f}, *result); @@ -98,10 +98,10 @@ XLA_TEST_F(TransferManagerTest, TransferR1LargeF32) { // Round trip literal through device. ASSERT_IS_OK(transfer_manager_->TransferLiteralToDevice( - stream_executor_, *literal, *device_buffer)); + stream_executor_, *literal, device_buffer)); TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr result, transfer_manager_->TransferLiteralFromDevice( - stream_executor_, *device_buffer)); + stream_executor_, device_buffer)); LiteralTestUtil::ExpectR1Equal(test_vector, *result); } @@ -114,10 +114,10 @@ XLA_TEST_F(TransferManagerTest, TransferR1U8) { // Round trip literal through device. ASSERT_IS_OK(transfer_manager_->TransferLiteralToDevice( - stream_executor_, *literal, *device_buffer)); + stream_executor_, *literal, device_buffer)); TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr result, transfer_manager_->TransferLiteralFromDevice( - stream_executor_, *device_buffer)); + stream_executor_, device_buffer)); EXPECT_EQ(result->GetR1U8AsString(), test_string); } @@ -130,10 +130,10 @@ XLA_TEST_F(TransferManagerTest, TransferR2F32) { // Round trip literal through device. ASSERT_IS_OK(transfer_manager_->TransferLiteralToDevice( - stream_executor_, *literal, *device_buffer)); + stream_executor_, *literal, device_buffer)); TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr result, transfer_manager_->TransferLiteralFromDevice( - stream_executor_, *device_buffer)); + stream_executor_, device_buffer)); LiteralTestUtil::ExpectR2Equal( {{1.0f, 2.0f, 3.0f}, {4.0f, 5.0f, 6.0f}}, *result); @@ -150,10 +150,10 @@ XLA_TEST_F(TransferManagerTest, // Round trip literal through device. Set the on-device layout to something // different than the literal layout. ASSERT_IS_OK(transfer_manager_->TransferLiteralToDevice( - stream_executor_, *literal, *device_buffer)); + stream_executor_, *literal, device_buffer)); TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr result, transfer_manager_->TransferLiteralFromDevice( - stream_executor_, *device_buffer)); + stream_executor_, device_buffer)); EXPECT_FALSE( LayoutUtil::Equal(result->shape().layout(), literal->shape().layout())); @@ -170,10 +170,10 @@ XLA_TEST_F(TransferManagerTest, TransferTuple) { // Round trip literal through device. ASSERT_IS_OK(transfer_manager_->TransferLiteralToDevice( - stream_executor_, *literal, *device_buffer)); + stream_executor_, *literal, device_buffer)); TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr result, transfer_manager_->TransferLiteralFromDevice( - stream_executor_, *device_buffer)); + stream_executor_, device_buffer)); LiteralTestUtil::ExpectEqual(*literal, *result); } @@ -184,10 +184,10 @@ XLA_TEST_F(TransferManagerTest, TransferEmptyTuple) { // Round trip literal through device. ASSERT_IS_OK(transfer_manager_->TransferLiteralToDevice( - stream_executor_, *literal, *device_buffer)); + stream_executor_, *literal, device_buffer)); TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr result, transfer_manager_->TransferLiteralFromDevice( - stream_executor_, *device_buffer)); + stream_executor_, device_buffer)); LiteralTestUtil::ExpectEqual(*literal, *result); } @@ -204,10 +204,10 @@ XLA_TEST_F(TransferManagerTest, TransferNestedTuple) { // Round trip literal through device. ASSERT_IS_OK(transfer_manager_->TransferLiteralToDevice( - stream_executor_, *literal, *device_buffer)); + stream_executor_, *literal, device_buffer)); TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr result, transfer_manager_->TransferLiteralFromDevice( - stream_executor_, *device_buffer)); + stream_executor_, device_buffer)); LiteralTestUtil::ExpectEqual(*literal, *result); } @@ -219,10 +219,10 @@ XLA_TEST_F(TransferManagerTest, TransferComplexValue) { // Round trip literal through device. ASSERT_IS_OK(transfer_manager_->TransferLiteralToDevice( - stream_executor_, *literal, *device_buffer)); + stream_executor_, *literal, device_buffer)); TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr result, transfer_manager_->TransferLiteralFromDevice( - stream_executor_, *device_buffer)); + stream_executor_, device_buffer)); LiteralTestUtil::ExpectEqual(*literal, *result); } @@ -238,10 +238,10 @@ XLA_TEST_F(TransferManagerTest, TransferComplexValueInTuple) { // Round trip literal through device. ASSERT_IS_OK(transfer_manager_->TransferLiteralToDevice( - stream_executor_, *literal, *device_buffer)); + stream_executor_, *literal, device_buffer)); TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr result, transfer_manager_->TransferLiteralFromDevice( - stream_executor_, *device_buffer)); + stream_executor_, device_buffer)); LiteralTestUtil::ExpectEqual(*literal, *result); } diff --git a/tensorflow/compiler/xla/tests/tuple_test.cc b/tensorflow/compiler/xla/tests/tuple_test.cc index 098be6d7aabe88d0deef600716229ddbd0bcae2f..61d0fa02aba80ddf60ab83f8355a5e3c1ef44518 100644 --- a/tensorflow/compiler/xla/tests/tuple_test.cc +++ b/tensorflow/compiler/xla/tests/tuple_test.cc @@ -535,8 +535,7 @@ TEST_F(TupleHloTest, HloRunner::CreateModuleFromString(testcase, GetDebugOptionsForTest()) .ValueOrDie(); auto param = Literal::MakeTupleOwned(Literal::CreateR1({1, 2, 3})); - TF_ASSERT_OK_AND_ASSIGN(auto result, - ExecuteNoHloPasses(std::move(module), {param.get()})); + auto result = ExecuteNoHloPasses(std::move(module), {param.get()}); EXPECT_TRUE(LiteralTestUtil::Equal( *result, *Literal::MakeTupleOwned(Literal::CreateR2({{1, 2, 3}})))); diff --git a/tensorflow/compiler/xla/tests/vector_ops_simple_test.cc b/tensorflow/compiler/xla/tests/vector_ops_simple_test.cc index b52c718814d4ffeff68c60588a6637a2159d57e5..697d78fe6e9abf6136f9ec4a708ef983732d0013 100644 --- a/tensorflow/compiler/xla/tests/vector_ops_simple_test.cc +++ b/tensorflow/compiler/xla/tests/vector_ops_simple_test.cc @@ -39,7 +39,7 @@ namespace { class VecOpsSimpleTest : public ClientLibraryTestBase { public: - explicit VecOpsSimpleTest(perftools::gputools::Platform* platform = nullptr) + explicit VecOpsSimpleTest(se::Platform* platform = nullptr) : ClientLibraryTestBase(platform) { mutable_debug_options()->add_xla_disable_hlo_passes("algsimp"); mutable_debug_options()->add_xla_disable_hlo_passes("inline"); diff --git a/tensorflow/compiler/xla/tests/while_test.cc b/tensorflow/compiler/xla/tests/while_test.cc index 33d457c70bac84c2da10e3cf9302c2c952cf1bc2..1e18b5679956710d7e43c6209be3fdfe4877b4d3 100644 --- a/tensorflow/compiler/xla/tests/while_test.cc +++ b/tensorflow/compiler/xla/tests/while_test.cc @@ -18,10 +18,10 @@ limitations under the License. #include #include "tensorflow/compiler/xla/client/client_library.h" -#include "tensorflow/compiler/xla/client/computation.h" -#include "tensorflow/compiler/xla/client/computation_builder.h" #include "tensorflow/compiler/xla/client/lib/arithmetic.h" #include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_computation.h" #include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/compiler/xla/service/platform_util.h" #include "tensorflow/compiler/xla/shape_util.h" @@ -37,8 +37,6 @@ limitations under the License. #include "tensorflow/core/platform/test_benchmark.h" #include "tensorflow/core/platform/types.h" -namespace se = ::perftools::gputools; - namespace xla { namespace { @@ -54,29 +52,28 @@ TEST_F(WhileTest, WhileWithScalarS32Result) { auto result_shape = ShapeUtil::MakeShape(S32, {}); // Create a computation for the condition: repeat for 5 iterations. - Computation condition; + XlaComputation condition; { - ComputationBuilder builder(client_, "condition"); + XlaBuilder builder("condition"); auto prev = builder.Parameter(0, result_shape, "prev"); builder.Gt(builder.ConstantR0(5), prev); condition = builder.Build().ConsumeValueOrDie(); } // Create a computation for the body: add 1 to the result variable. - Computation body; + XlaComputation body; { - ComputationBuilder builder(client_, "body"); + XlaBuilder builder("body"); auto prev = builder.Parameter(0, result_shape, "prev"); auto input = builder.ConstantR0(1); - auto result = builder.Add(input, prev); + builder.Add(input, prev); body = builder.Build().ConsumeValueOrDie(); } // Create a While node with computations for the condition and the body. - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto init = builder.ConstantR0(0); - auto result = builder.While(condition, body, init); - auto shape = builder.GetShape(result).ConsumeValueOrDie(); + builder.While(condition, body, init); ComputeAndCompareR0(&builder, 5, {}); } @@ -91,29 +88,28 @@ TEST_F(WhileTest, WhileWithScalarS64Result) { auto result_shape = ShapeUtil::MakeShape(S64, {}); // Create a computation for the condition: repeat for 5 iterations. - Computation condition; + XlaComputation condition; { - ComputationBuilder builder(client_, "condition"); + XlaBuilder builder("condition"); auto prev = builder.Parameter(0, result_shape, "prev"); builder.Gt(builder.ConstantR0(5), prev); condition = builder.Build().ConsumeValueOrDie(); } // Create a computation for the body: add 1 to the result variable. - Computation body; + XlaComputation body; { - ComputationBuilder builder(client_, "body"); + XlaBuilder builder("body"); auto prev = builder.Parameter(0, result_shape, "prev"); auto input = builder.ConstantR0(1); - auto result = builder.Add(input, prev); + builder.Add(input, prev); body = builder.Build().ConsumeValueOrDie(); } // Create a While node with computations for the condition and the body. - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto init = builder.ConstantR0(0); - auto result = builder.While(condition, body, init); - auto shape = builder.GetShape(result).ConsumeValueOrDie(); + builder.While(condition, body, init); ComputeAndCompareR0(&builder, 5, {}); } @@ -123,31 +119,30 @@ TEST_F(WhileTest, WhileWithScalarResultNonConstInit) { auto orig_shape = ShapeUtil::MakeShape(S32, {2}); // Create a computation for the condition: repeat for 5 iterations. - Computation condition; + XlaComputation condition; { - ComputationBuilder builder(client_, "condition"); + XlaBuilder builder("condition"); auto prev = builder.Parameter(0, result_shape, "prev"); builder.Gt(builder.ConstantR0(5), prev); condition = builder.Build().ConsumeValueOrDie(); } // Create a computation for the body: add 1 to the result variable. - Computation body; + XlaComputation body; { - ComputationBuilder builder(client_, "body"); + XlaBuilder builder("body"); auto prev = builder.Parameter(0, result_shape, "prev"); auto input = builder.ConstantR0(1); - auto result = builder.Add(input, prev); + builder.Add(input, prev); body = builder.Build().ConsumeValueOrDie(); } // Create a While node with computations for the condition and the body. - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto init = builder.Reduce(builder.ConstantR1(2, 1), builder.ConstantR0(0), CreateScalarAddComputation(S32, &builder), {0}); - auto result = builder.While(condition, body, init); - auto shape = builder.GetShape(result).ConsumeValueOrDie(); + builder.While(condition, body, init); ComputeAndCompareR0(&builder, 5, {}); } @@ -156,28 +151,28 @@ TEST_F(WhileTest, WhileWithPredicateResult) { auto result_shape = ShapeUtil::MakeShape(PRED, {}); // Create a computation for the condition: run until condition is true. - Computation condition; + XlaComputation condition; { - ComputationBuilder builder(client_, "condition"); + XlaBuilder builder("condition"); auto prev = builder.Parameter(0, result_shape, "prev"); builder.Ne(builder.ConstantR0(true), prev); condition = builder.Build().ConsumeValueOrDie(); } // Create a computation for the body: or condition with true. - Computation body; + XlaComputation body; { - ComputationBuilder builder(client_, "body"); + XlaBuilder builder("body"); auto prev = builder.Parameter(0, result_shape, "prev"); - auto result = builder.Or(prev, builder.ConstantR0(true)); + builder.Or(prev, builder.ConstantR0(true)); body = builder.Build().ConsumeValueOrDie(); } // Create a While node with computations for the condition and the body. - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto init = builder.Ne(builder.ConstantR0(false), builder.ConstantR0(true)); - auto result = builder.While(condition, body, init); + builder.While(condition, body, init); ComputeAndCompareR0(&builder, true, {}); } @@ -194,9 +189,9 @@ TEST_F(WhileTest, DISABLED_WhileWithEmptyVectorResult) { Shape result_shape = ShapeUtil::MakeShape(F32, {0}); // Create a computation for the reduction. - Computation add; + XlaComputation add; { - ComputationBuilder builder(client_, "add"); + XlaBuilder builder("add"); auto x = builder.Parameter(0, ShapeUtil::MakeShape(F32, {}), "x"); auto y = builder.Parameter(1, ShapeUtil::MakeShape(F32, {}), "y"); builder.Add(x, y); @@ -205,33 +200,34 @@ TEST_F(WhileTest, DISABLED_WhileWithEmptyVectorResult) { // Create a computation for the condition. // Repeat until the sum of the result vector is less than 15.5f. - Computation condition; + XlaComputation condition; { - ComputationBuilder builder(client_, "condition"); + XlaBuilder builder("condition"); auto prev = builder.Parameter(0, result_shape, "prev"); auto sum = builder.Reduce(prev, builder.ConstantR0(0.0f), add, /*dimensions_to_reduce=*/{0}); - auto test = builder.Gt(builder.ConstantR0(15.5f), sum); + builder.Gt(builder.ConstantR0(15.5f), sum); condition = builder.Build().ConsumeValueOrDie(); } // Create a computation for the body. // Add a constant vector of 1.f to the result vector. - Computation body; + XlaComputation body; { - ComputationBuilder builder(client_, "body"); + XlaBuilder builder("body"); auto prev = builder.Parameter(0, result_shape, "prev"); auto input = builder.ConstantR1({}); - auto result = builder.Add(input, prev); + builder.Add(input, prev); body = builder.Build().ConsumeValueOrDie(); } // Create a While node with computations for the condition and the body. - ComputationBuilder builder(client_, "while"); + XlaBuilder builder("while"); auto init = builder.ConstantR1({}); auto result = builder.While(condition, body, init); - VLOG(2) << "while = " << ShapeUtil::HumanString( - *builder.GetShape(result).ConsumeValueOrDie()); + VLOG(2) << "while = " + << ShapeUtil::HumanString( + builder.GetShape(result).ConsumeValueOrDie()); ComputeAndCompareR1(&builder, {}, {}, ErrorSpec(0.0001)); } @@ -247,9 +243,9 @@ TEST_F(WhileTest, WhileWithVectorResult) { Shape result_shape = ShapeUtil::MakeShape(F32, {8}); // Create a computation for the reduction. - Computation add; + XlaComputation add; { - ComputationBuilder builder(client_, "add"); + XlaBuilder builder("add"); auto x = builder.Parameter(0, ShapeUtil::MakeShape(F32, {}), "x"); auto y = builder.Parameter(1, ShapeUtil::MakeShape(F32, {}), "y"); builder.Add(x, y); @@ -258,33 +254,34 @@ TEST_F(WhileTest, WhileWithVectorResult) { // Create a computation for the condition. // Repeat until the sum of the result vector is less than 5.5f. - Computation condition; + XlaComputation condition; { - ComputationBuilder builder(client_, "condition"); + XlaBuilder builder("condition"); auto prev = builder.Parameter(0, result_shape, "prev"); auto sum = builder.Reduce(prev, builder.ConstantR0(0.0f), add, /*dimensions_to_reduce=*/{0}); - auto test = builder.Gt(builder.ConstantR0(15.5f), sum); + builder.Gt(builder.ConstantR0(15.5f), sum); condition = builder.Build().ConsumeValueOrDie(); } // Create a computation for the body. // Add a constant vector of 1.f to the result vector. - Computation body; + XlaComputation body; { - ComputationBuilder builder(client_, "body"); + XlaBuilder builder("body"); auto prev = builder.Parameter(0, result_shape, "prev"); auto input = builder.ConstantR1(8, 0.125f); - auto result = builder.Add(input, prev); + builder.Add(input, prev); body = builder.Build().ConsumeValueOrDie(); } // Create a While node with computations for the condition and the body. - ComputationBuilder builder(client_, "while"); + XlaBuilder builder("while"); auto init = builder.ConstantR1(8, 0.f); auto result = builder.While(condition, body, init); - VLOG(2) << "while = " << ShapeUtil::HumanString( - *builder.GetShape(result).ConsumeValueOrDie()); + VLOG(2) << "while = " + << ShapeUtil::HumanString( + builder.GetShape(result).ConsumeValueOrDie()); // Individual elements with increase by 1/8 each time through the loop, so // the sum will increase by 1.0. It will first be >15.5 when the elements @@ -306,9 +303,9 @@ TEST_F(WhileTest, WhileWithVectorResultIntoTuple) { Shape result_shape = ShapeUtil::MakeShape(F32, {8}); // Create a computation for the reduction. - Computation add; + XlaComputation add; { - ComputationBuilder builder(client_, "add"); + XlaBuilder builder("add"); auto x = builder.Parameter(0, ShapeUtil::MakeShape(F32, {}), "x"); auto y = builder.Parameter(1, ShapeUtil::MakeShape(F32, {}), "y"); builder.Add(x, y); @@ -317,34 +314,34 @@ TEST_F(WhileTest, WhileWithVectorResultIntoTuple) { // Create a computation for the condition. // Repeat until the sum of the result vector is less than 5.5f. - Computation condition; + XlaComputation condition; { - ComputationBuilder builder(client_, "condition"); + XlaBuilder builder("condition"); auto prev = builder.Parameter(0, result_shape, "prev"); auto sum = builder.Reduce(prev, builder.ConstantR0(0.0f), add, /*dimensions_to_reduce=*/{0}); - auto test = builder.Gt(builder.ConstantR0(15.5f), sum); + builder.Gt(builder.ConstantR0(15.5f), sum); condition = builder.Build().ConsumeValueOrDie(); } // Create a computation for the body. // Add a constant vector of 1.f to the result vector. - Computation body; + XlaComputation body; { - ComputationBuilder builder(client_, "body"); + XlaBuilder builder("body"); auto prev = builder.Parameter(0, result_shape, "prev"); auto input = builder.ConstantR1(8, 0.125f); - auto result = builder.Add(input, prev); + builder.Add(input, prev); body = builder.Build().ConsumeValueOrDie(); } // Create a While node with computations for the condition and the body. - ComputationBuilder builder(client_, "while"); + XlaBuilder builder("while"); auto init = builder.ConstantR1(8, 0.f); auto result = builder.While(condition, body, init); VLOG(2) << "while = " << ShapeUtil::HumanString( - *builder.GetShape(result).ConsumeValueOrDie()); + builder.GetShape(result).ConsumeValueOrDie()); builder.Tuple({result}); // Individual elements with increase by 1/8 each time through the loop, so @@ -366,9 +363,9 @@ TEST_F(WhileTest, WhileWithPermutationAndTupleResult) { // Create a computation for the condition. // Repeat for N iterations. const int N = 2; - Computation condition; + XlaComputation condition; { - ComputationBuilder builder(client_, "condition"); + XlaBuilder builder("condition"); auto prev = builder.Parameter(0, result_shape, "prev"); auto iteration = builder.GetTupleElement(prev, 0); builder.Gt(builder.ConstantR0(N), iteration); @@ -377,28 +374,28 @@ TEST_F(WhileTest, WhileWithPermutationAndTupleResult) { // Create a computation for the body. // Add 1 to the iteration variable and permute the weights. - Computation body; + XlaComputation body; { - ComputationBuilder builder(client_, "body"); + XlaBuilder builder("body"); auto prev = builder.Parameter(0, result_shape, "prev"); auto iteration = builder.GetTupleElement(prev, 0); auto w1 = builder.GetTupleElement(prev, 1); auto w2 = builder.GetTupleElement(prev, 2); auto w3 = builder.GetTupleElement(prev, 3); - auto result = builder.Tuple( + builder.Tuple( {builder.Add(iteration, builder.ConstantR0(1)), w3, w1, w2}); body = builder.Build().ConsumeValueOrDie(); } // Create a While node with computations for the condition and the body. - ComputationBuilder builder(client_, "while"); + XlaBuilder builder("while"); auto init = builder.Tuple( {builder.ConstantR0(0), builder.ConstantR1(3, 1.f), builder.ConstantR1(3, 2.f), builder.ConstantR1(3, 3.f)}); auto result = builder.While(condition, body, init); VLOG(2) << "result = " << ShapeUtil::HumanString( - *builder.GetShape(result).ConsumeValueOrDie()); + builder.GetShape(result).ConsumeValueOrDie()); auto expected_counter = Literal::CreateR0(N); auto expected_w1 = Literal::CreateR1({1.0f, 1.0f, 1.0f}); @@ -419,9 +416,9 @@ TEST_F(WhileTest, WhileWithPermutationAndVectorResult) { // Create a computation for the condition. // Repeat for N iterations. const int N = 2; - Computation condition; + XlaComputation condition; { - ComputationBuilder builder(client_, "condition"); + XlaBuilder builder("condition"); auto prev = builder.Parameter(0, result_shape, "prev"); auto iteration = builder.GetTupleElement(prev, 0); builder.Gt(builder.ConstantR0(N), iteration); @@ -430,21 +427,21 @@ TEST_F(WhileTest, WhileWithPermutationAndVectorResult) { // Create a computation for the body. // Add 1 to the iteration variable permute the weights. - Computation body; + XlaComputation body; { - ComputationBuilder builder(client_, "body"); + XlaBuilder builder("body"); auto prev = builder.Parameter(0, result_shape, "prev"); auto iteration = builder.GetTupleElement(prev, 0); auto w1 = builder.GetTupleElement(prev, 1); auto w2 = builder.GetTupleElement(prev, 2); auto w3 = builder.GetTupleElement(prev, 3); - auto result = builder.Tuple( + builder.Tuple( {builder.Add(iteration, builder.ConstantR0(1)), w3, w1, w2}); body = builder.Build().ConsumeValueOrDie(); } // Create a While node with computations for the condition and the body. - ComputationBuilder builder(client_, "while"); + XlaBuilder builder("while"); auto init = builder.Tuple( {builder.ConstantR0(0), builder.ConstantR1(3, 1.f), builder.ConstantR1(3, 2.f), builder.ConstantR1(3, 3.f)}); @@ -455,7 +452,7 @@ TEST_F(WhileTest, WhileWithPermutationAndVectorResult) { auto result = builder.Add(add12, builder.GetTupleElement(xla_while, 3)); VLOG(2) << "result = " << ShapeUtil::HumanString( - *builder.GetShape(result).ConsumeValueOrDie()); + builder.GetShape(result).ConsumeValueOrDie()); std::vector expected = {6.f, 6.f, 6.f}; ComputeAndCompareR1(&builder, expected, {}, ErrorSpec(0.0001)); } @@ -474,9 +471,9 @@ TEST_F(WhileTest, WhileWithTupleResult) { // Create a computation for the condition. // Repeat for 5 iterations. - Computation condition; + XlaComputation condition; { - ComputationBuilder builder(client_, "condition"); + XlaBuilder builder("condition"); auto prev = builder.Parameter(0, result_shape, "prev"); auto iteration = builder.GetTupleElement(prev, 0); builder.Gt(builder.ConstantR0(5), iteration); @@ -486,26 +483,27 @@ TEST_F(WhileTest, WhileWithTupleResult) { // Create a computation for the body. // Add 1 to the iteration variable and add a constant vector of 1.0f to // the weight variable, both of which are tuple elements. - Computation body; + XlaComputation body; { - ComputationBuilder builder(client_, "body"); + XlaBuilder builder("body"); auto prev = builder.Parameter(0, result_shape, "prev"); auto iteration = builder.GetTupleElement(prev, 0); auto weights = builder.GetTupleElement(prev, 1); auto input = builder.ConstantR1(10, 1.f); auto new_weights = builder.Add(weights, input); - auto result = builder.Tuple( + builder.Tuple( {builder.Add(iteration, builder.ConstantR0(1)), new_weights}); body = builder.Build().ConsumeValueOrDie(); } // Create a While node with computations for the condition and the body. - ComputationBuilder builder(client_, "while"); + XlaBuilder builder("while"); auto init = builder.Tuple( {builder.ConstantR0(0), builder.ConstantR1(10, 0.f)}); auto result = builder.While(condition, body, init); - VLOG(2) << "while = " << ShapeUtil::HumanString( - *builder.GetShape(result).ConsumeValueOrDie()); + VLOG(2) << "while = " + << ShapeUtil::HumanString( + builder.GetShape(result).ConsumeValueOrDie()); auto expected_counter = Literal::CreateR0(5); auto expected_data = Literal::CreateR1( @@ -523,9 +521,9 @@ TEST_F(WhileTest, WhileWithPredicateTupleResult) { // Create a computation for the condition. // Repeat for 5 iterations. - Computation condition; + XlaComputation condition; { - ComputationBuilder builder(client_, "condition"); + XlaBuilder builder("condition"); auto prev = builder.Parameter(0, result_shape, "prev"); auto iteration = builder.GetTupleElement(prev, 0); builder.Gt(builder.ConstantR0(5), iteration); @@ -534,27 +532,27 @@ TEST_F(WhileTest, WhileWithPredicateTupleResult) { // Create a computation for the body. // Add 1 to the iteration variable and or the predicate with true - Computation body; + XlaComputation body; { - ComputationBuilder builder(client_, "body"); + XlaBuilder builder("body"); auto prev = builder.Parameter(0, result_shape, "prev"); auto iteration = builder.GetTupleElement(prev, 0); auto pred = builder.GetTupleElement(prev, 1); auto new_pred = builder.Or(pred, builder.ConstantR0(true)); - auto result = builder.Tuple( + builder.Tuple( {builder.Add(iteration, builder.ConstantR0(1)), new_pred}); body = builder.Build().ConsumeValueOrDie(); } // Create a While node with computations for the condition and the body. - ComputationBuilder builder(client_, "while"); + XlaBuilder builder("while"); auto init = builder.Tuple({builder.ConstantR0(0), builder.Ne(builder.ConstantR0(false), builder.ConstantR0(true))}); auto result = builder.While(condition, body, init); VLOG(2) << "while = " << ShapeUtil::HumanString( - *builder.GetShape(result).ConsumeValueOrDie()); + builder.GetShape(result).ConsumeValueOrDie()); auto expected_counter = Literal::CreateR0(5); auto expected_predicate = Literal::CreateR0(true); @@ -570,9 +568,9 @@ TEST_F(WhileTest, WhileWithTupleConstantScalarResult) { // Create a computation for the condition. // Repeat for 5 iterations. - Computation condition; + XlaComputation condition; { - ComputationBuilder builder(client_, "condition"); + XlaBuilder builder("condition"); auto prev = builder.Parameter(0, result_shape, "prev"); auto iteration = builder.GetTupleElement(prev, 0); builder.Gt(builder.ConstantR0(5), iteration); @@ -582,25 +580,24 @@ TEST_F(WhileTest, WhileWithTupleConstantScalarResult) { // Create a computation for the body. // Add 1 to the iteration variable and set the other tuple element to a // constant. - Computation body; + XlaComputation body; { - ComputationBuilder builder(client_, "body"); + XlaBuilder builder("body"); auto prev = builder.Parameter(0, result_shape, "prev"); auto iteration = builder.GetTupleElement(prev, 0); - auto result = - builder.Tuple({builder.Add(iteration, builder.ConstantR0(1)), - builder.ConstantR0(7)}); + builder.Tuple({builder.Add(iteration, builder.ConstantR0(1)), + builder.ConstantR0(7)}); body = builder.Build().ConsumeValueOrDie(); } // Create a While node with computations for the condition and the body. - ComputationBuilder builder(client_, "while"); + XlaBuilder builder("while"); auto init = builder.Tuple( {builder.ConstantR0(0), builder.ConstantR0(7)}); auto result = builder.While(condition, body, init); VLOG(2) << "while = " << ShapeUtil::HumanString( - *builder.GetShape(result).ConsumeValueOrDie()); + builder.GetShape(result).ConsumeValueOrDie()); auto expected_counter = Literal::CreateR0(5); auto expected_data = Literal::CreateR0(7); @@ -631,20 +628,20 @@ TEST_F(WhileTest, TwoWhileWithTupleResult) { // Create a computation for the condition. // Repeat for 5 iterations. - Computation condition; + XlaComputation condition; const int c1 = 5; { - ComputationBuilder builder(client_, "condition"); + XlaBuilder builder("condition"); auto prev = builder.Parameter(0, result_shape, "prev"); auto iteration = builder.GetTupleElement(prev, 0); builder.Lt(iteration, builder.ConstantR0(c1)); TF_ASSERT_OK_AND_ASSIGN(condition, builder.Build()); } - Computation condition2; + XlaComputation condition2; const int c2 = 7; { - ComputationBuilder builder(client_, "condition2"); + XlaBuilder builder("condition2"); auto prev = builder.Parameter(0, result_shape, "prev"); auto iteration = builder.GetTupleElement(prev, 0); builder.Lt(iteration, builder.ConstantR0(c2)); @@ -654,34 +651,34 @@ TEST_F(WhileTest, TwoWhileWithTupleResult) { // Create a computation for the body. // Add 1 to the iteration variable and add a constant vector of 1.0f to // the weight variable, both of which are tuple elements. - Computation body; + XlaComputation body; { - ComputationBuilder builder(client_, "body"); + XlaBuilder builder("body"); auto prev = builder.Parameter(0, result_shape, "prev"); auto iteration = builder.GetTupleElement(prev, 0); auto weights = builder.GetTupleElement(prev, 1); auto input = builder.ConstantR1(10, 1.f); auto new_weights = builder.Add(weights, input); - auto result = builder.Tuple( + builder.Tuple( {builder.Add(iteration, builder.ConstantR0(1)), new_weights}); TF_ASSERT_OK_AND_ASSIGN(body, builder.Build()); } - Computation body2; + XlaComputation body2; { - ComputationBuilder builder(client_, "body"); + XlaBuilder builder("body"); auto prev = builder.Parameter(0, result_shape, "prev"); auto iteration = builder.GetTupleElement(prev, 0); auto weights = builder.GetTupleElement(prev, 1); auto input = builder.ConstantR1(10, 1.f); auto new_weights = builder.Add(weights, input); - auto result = builder.Tuple( + builder.Tuple( {builder.Add(iteration, builder.ConstantR0(1)), new_weights}); TF_ASSERT_OK_AND_ASSIGN(body2, builder.Build()); } // Create a While node with computations for the condition and the body. - ComputationBuilder builder(client_, "while"); + XlaBuilder builder("while"); auto init = builder.Tuple( {builder.ConstantR0(0), builder.ConstantR1(10, 0.f)}); auto while1 = builder.While(condition, body, init); @@ -692,11 +689,11 @@ TEST_F(WhileTest, TwoWhileWithTupleResult) { auto while_result2 = builder.GetTupleElement(while2, 1); VLOG(2) << "while_result2 = " << ShapeUtil::HumanString( - *builder.GetShape(while_result2).ConsumeValueOrDie()); + builder.GetShape(while_result2).ConsumeValueOrDie()); auto result = builder.Add(while_result1, while_result2); VLOG(2) << "result = " << ShapeUtil::HumanString( - *builder.GetShape(result).ConsumeValueOrDie()); + builder.GetShape(result).ConsumeValueOrDie()); const float sum = c1 + c2; std::vector expected(10, sum); ComputeAndCompareR1(&builder, expected, {}, ErrorSpec(0.0001)); @@ -710,20 +707,20 @@ TEST_F(WhileTest, TwoWhileLoopsAndSharedBody) { // Create a computation for the condition. // Repeat for 5 iterations. - Computation condition; + XlaComputation condition; const int c1 = 5; { - ComputationBuilder builder(client_, "condition"); + XlaBuilder builder("condition"); auto prev = builder.Parameter(0, result_shape, "prev"); auto iteration = builder.GetTupleElement(prev, 0); builder.Lt(iteration, builder.ConstantR0(c1)); TF_ASSERT_OK_AND_ASSIGN(condition, builder.Build()); } - Computation condition2; + XlaComputation condition2; const int c2 = 7; { - ComputationBuilder builder(client_, "condition2"); + XlaBuilder builder("condition2"); auto prev = builder.Parameter(0, result_shape, "prev"); auto iteration = builder.GetTupleElement(prev, 0); builder.Lt(iteration, builder.ConstantR0(c2)); @@ -733,21 +730,21 @@ TEST_F(WhileTest, TwoWhileLoopsAndSharedBody) { // Create a computation for the body. // Add 1 to the iteration variable and add a constant vector of 1.0f to // the weight variable, both of which are tuple elements. - Computation body; + XlaComputation body; { - ComputationBuilder builder(client_, "body"); + XlaBuilder builder("body"); auto prev = builder.Parameter(0, result_shape, "prev"); auto iteration = builder.GetTupleElement(prev, 0); auto weights = builder.GetTupleElement(prev, 1); auto input = builder.ConstantR1(10, 1.f); auto new_weights = builder.Add(weights, input); - auto result = builder.Tuple( + builder.Tuple( {builder.Add(iteration, builder.ConstantR0(1)), new_weights}); TF_ASSERT_OK_AND_ASSIGN(body, builder.Build()); } // Create a While node with computations for the condition and the body. - ComputationBuilder builder(client_, "while"); + XlaBuilder builder("while"); auto init = builder.Tuple( {builder.ConstantR0(0), builder.ConstantR1(10, 0.f)}); auto while1 = builder.While(condition, body, init); @@ -758,11 +755,11 @@ TEST_F(WhileTest, TwoWhileLoopsAndSharedBody) { auto while_result2 = builder.GetTupleElement(while2, 1); VLOG(2) << "while_result2 = " << ShapeUtil::HumanString( - *builder.GetShape(while_result2).ConsumeValueOrDie()); + builder.GetShape(while_result2).ConsumeValueOrDie()); auto result = builder.Add(while_result1, while_result2); VLOG(2) << "result = " << ShapeUtil::HumanString( - *builder.GetShape(result).ConsumeValueOrDie()); + builder.GetShape(result).ConsumeValueOrDie()); const float sum = c1 + c2; std::vector expected(10, sum); ComputeAndCompareR1(&builder, expected, {}, ErrorSpec(0.0001)); @@ -777,20 +774,20 @@ TEST_F(WhileTest, DISABLED_ON_GPU(WhileLoopsWithSharedBodyAndInit)) { // Create a computation for the condition. // Repeat for 5 iterations. - Computation condition; + XlaComputation condition; const int c1 = 5; { - ComputationBuilder builder(client_, "condition"); + XlaBuilder builder("condition"); auto prev = builder.Parameter(0, result_shape, "prev"); auto iteration = builder.GetTupleElement(prev, 0); builder.Lt(iteration, builder.ConstantR0(c1)); TF_ASSERT_OK_AND_ASSIGN(condition, builder.Build()); } - Computation condition2; + XlaComputation condition2; const int c2 = 7; { - ComputationBuilder builder(client_, "condition2"); + XlaBuilder builder("condition2"); auto prev = builder.Parameter(0, result_shape, "prev"); auto iteration = builder.GetTupleElement(prev, 0); builder.Lt(iteration, builder.ConstantR0(c2)); @@ -800,21 +797,21 @@ TEST_F(WhileTest, DISABLED_ON_GPU(WhileLoopsWithSharedBodyAndInit)) { // Create a computation for the body. // Add 1 to the iteration variable and add a constant vector of 1.0f to // the weight variable, both of which are tuple elements. - Computation body; + XlaComputation body; { - ComputationBuilder builder(client_, "body"); + XlaBuilder builder("body"); auto prev = builder.Parameter(0, result_shape, "prev"); auto iteration = builder.GetTupleElement(prev, 0); auto weights = builder.GetTupleElement(prev, 1); auto input = builder.ConstantR1(10, 1.f); auto new_weights = builder.Add(weights, input); - auto result = builder.Tuple( + builder.Tuple( {builder.Add(iteration, builder.ConstantR0(1)), new_weights}); TF_ASSERT_OK_AND_ASSIGN(body, builder.Build()); } // Create a While node with computations for the condition and the body. - ComputationBuilder builder(client_, "while"); + XlaBuilder builder("while"); auto init = builder.Tuple( {builder.ConstantR0(0), builder.ConstantR1(10, 0.f)}); auto while1 = builder.While(condition, body, init); @@ -824,11 +821,11 @@ TEST_F(WhileTest, DISABLED_ON_GPU(WhileLoopsWithSharedBodyAndInit)) { auto while_result2 = builder.GetTupleElement(while2, 1); VLOG(2) << "while_result2 = " << ShapeUtil::HumanString( - *builder.GetShape(while_result2).ConsumeValueOrDie()); + builder.GetShape(while_result2).ConsumeValueOrDie()); auto result = builder.Add(while_result1, while_result2); VLOG(2) << "result = " << ShapeUtil::HumanString( - *builder.GetShape(result).ConsumeValueOrDie()); + builder.GetShape(result).ConsumeValueOrDie()); const float sum = c1 + c2; std::vector expected(10, sum); ComputeAndCompareR1(&builder, expected, {}, ErrorSpec(0.0001)); @@ -844,9 +841,9 @@ XLA_TEST_F(WhileTest, WhileWithDynamicUpdateSlice) { // Create a computation for the condition. // Repeat for 5 iterations. - Computation condition; + XlaComputation condition; { - ComputationBuilder builder(client_, "condition"); + XlaBuilder builder("condition"); auto prev = builder.Parameter(0, result_shape, "prev"); auto iteration = builder.GetTupleElement(prev, 0); builder.Gt(builder.ConstantR0(5), iteration); @@ -856,9 +853,9 @@ XLA_TEST_F(WhileTest, WhileWithDynamicUpdateSlice) { // Create a computation for the body. // Add 1 to the iteration variable and add a constant vector of 1.0f to // the weight variable, both of which are tuple elements. - Computation body; + XlaComputation body; { - ComputationBuilder builder(client_, "body"); + XlaBuilder builder("body"); auto prev = builder.Parameter(0, result_shape, "prev"); // TupleElement 0 auto iteration = builder.GetTupleElement(prev, 0); @@ -873,18 +870,18 @@ XLA_TEST_F(WhileTest, WhileWithDynamicUpdateSlice) { // UpdateSlice. auto out1 = builder.DynamicUpdateSlice(input, update, starts); - auto result = builder.Tuple({out0, out1}); + builder.Tuple({out0, out1}); body = builder.Build().ConsumeValueOrDie(); } // Create a While node with computations for the condition and the body. - ComputationBuilder builder(client_, "while"); + XlaBuilder builder("while"); auto init = builder.Tuple( {builder.ConstantR0(0), builder.ConstantR1(10, 0.f)}); auto result = builder.While(condition, body, init); VLOG(2) << "while = " << ShapeUtil::HumanString( - *builder.GetShape(result).ConsumeValueOrDie()); + builder.GetShape(result).ConsumeValueOrDie()); auto expected_counter = Literal::CreateR0(5); auto expected_data = Literal::CreateR1( @@ -915,18 +912,18 @@ TEST_F(WhileTest, DISABLED_ON_INTERPRETER(WhileWithPrngScalarResult)) { // Create a computation for the condition: repeat for count iterations. auto build_condition = [this, v6s32](int count) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto prev = builder.Reshape( builder.Slice(builder.Parameter(0, v6s32, "prev"), {0}, {1}, {1}), {0}, - {}); + {}); builder.Gt(builder.ConstantR0(count), prev); return builder.Build().ConsumeValueOrDie(); }; // Create a computation for the body: add 1 to the result variable. - Computation body; + XlaComputation body; { - ComputationBuilder builder(client_, "body"); + XlaBuilder builder("body"); auto prev = builder.Parameter(0, v6s32, "prev"); auto inc = builder.ConcatInDim( {builder.ConstantR1({1}), @@ -934,16 +931,15 @@ TEST_F(WhileTest, DISABLED_ON_INTERPRETER(WhileWithPrngScalarResult)) { builder.ConstantR0(100), ShapeUtil::MakeShape(S32, {5}))}, 0); - auto result = builder.Add(inc, prev); + builder.Add(inc, prev); body = builder.Build().ConsumeValueOrDie(); } // Create a While node with computations for the condition and the body. auto while_loop = [this, &body, build_condition](int count) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto init = builder.ConstantR1({0, 0, 0, 0, 0, 0}); - auto result = builder.While(build_condition(count), body, init); - auto shape = builder.GetShape(result).ConsumeValueOrDie(); + builder.While(build_condition(count), body, init); return builder.Build(); }; @@ -1107,9 +1103,9 @@ XLA_TEST_F(WhileTest, NestedWhileWithScalarResult) { auto inner_result_shape = ShapeUtil::MakeTupleShape( {ShapeUtil::MakeShape(S32, {}), ShapeUtil::MakeShape(S32, {})}); - Computation inner_condition; + XlaComputation inner_condition; { - ComputationBuilder builder(client_, "inner_condition"); + XlaBuilder builder("inner_condition"); auto params = builder.Parameter(0, inner_result_shape, "prev"); auto i = builder.GetTupleElement(params, 0); builder.Lt(i, builder.ConstantR0(7)); @@ -1118,9 +1114,9 @@ XLA_TEST_F(WhileTest, NestedWhileWithScalarResult) { // Creates a computation for the outer loop condition: // repeat while result < 30. - Computation outer_condition; + XlaComputation outer_condition; { - ComputationBuilder builder(client_, "outer_condition"); + XlaBuilder builder("outer_condition"); auto prev = builder.Parameter(0, outer_result_shape, "prev"); builder.Lt(prev, builder.ConstantR0(30)); outer_condition = builder.Build().ConsumeValueOrDie(); @@ -1128,34 +1124,33 @@ XLA_TEST_F(WhileTest, NestedWhileWithScalarResult) { // Creates a computation for the inner loop body: add 1 to `i`, and add 2 to // `result`. - Computation inner_body; + XlaComputation inner_body; { - ComputationBuilder builder(client_, "inner_body"); + XlaBuilder builder("inner_body"); auto params = builder.Parameter(0, inner_result_shape, "prev"); auto i = builder.GetTupleElement(params, 0); auto result = builder.GetTupleElement(params, 1); i = builder.Add(builder.ConstantR0(1), i); result = builder.Add(builder.ConstantR0(2), result); - auto output = builder.Tuple({i, result}); + builder.Tuple({i, result}); inner_body = builder.Build().ConsumeValueOrDie(); } // Creates a computation for the outer loop: run the inner loop with i = 0. - Computation outer_body; + XlaComputation outer_body; { - ComputationBuilder builder(client_, "outer_body"); + XlaBuilder builder("outer_body"); auto prev = builder.Parameter(0, outer_result_shape, "prev"); auto init = builder.Tuple({builder.ConstantR0(0), prev}); auto result = builder.While(inner_condition, inner_body, init); - auto output = builder.GetTupleElement(result, 1); + builder.GetTupleElement(result, 1); outer_body = builder.Build().ConsumeValueOrDie(); } // Create a While node with computations for the condition and the body. - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto init = builder.ConstantR0(0); - auto result = builder.While(outer_condition, outer_body, init); - auto shape = builder.GetShape(result).ConsumeValueOrDie(); + builder.While(outer_condition, outer_body, init); ComputeAndCompareR0(&builder, 42, {}); } @@ -1170,18 +1165,18 @@ TEST_F(WhileTest, DISABLED_ON_INTERPRETER(WhileWithCallInsideCondition)) { auto result_shape = ShapeUtil::MakeShape(S32, {}); // Create a computation for the condition: repeat for 5 iterations. - Computation condition_callee; + XlaComputation condition_callee; { - ComputationBuilder builder(client_, "condition_callee"); + XlaBuilder builder("condition_callee"); auto prev = builder.Parameter(0, result_shape, "prev"); builder.Tuple({builder.Gt(builder.ConstantR0(5), prev)}); condition_callee = builder.Build().ConsumeValueOrDie(); } - Computation condition; + XlaComputation condition; { - ComputationBuilder builder(client_, "condition"); + XlaBuilder builder("condition"); auto prev = builder.Parameter(0, result_shape, "prev"); auto result = builder.Call(condition_callee, {prev}); builder.GetTupleElement(result, 0); @@ -1189,20 +1184,19 @@ TEST_F(WhileTest, DISABLED_ON_INTERPRETER(WhileWithCallInsideCondition)) { } // Create a computation for the body: add 1 to the result variable. - Computation body; + XlaComputation body; { - ComputationBuilder builder(client_, "body"); + XlaBuilder builder("body"); auto prev = builder.Parameter(0, result_shape, "prev"); auto input = builder.ConstantR0(1); - auto result = builder.Add(input, prev); + builder.Add(input, prev); body = builder.Build().ConsumeValueOrDie(); } // Create a While node with computations for the condition and the body. - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto init = builder.ConstantR0(0); - auto result = builder.While(condition, body, init); - auto shape = builder.GetShape(result).ConsumeValueOrDie(); + builder.While(condition, body, init); ComputeAndCompareR0(&builder, 5, {}); } @@ -1214,28 +1208,28 @@ TEST_F(WhileTest, WhileWithLoopInvariantOperation) { {scalar_s32, matrix_shape, matrix_shape, matrix_shape}); // Create a computation for the condition: repeat for 5 iterations. - Computation condition; + XlaComputation condition; { - ComputationBuilder builder(client_, "condition"); + XlaBuilder builder("condition"); auto state = builder.Parameter(0, while_shape, "state"); builder.Gt(builder.ConstantR0(5), builder.GetTupleElement(state, 0)); TF_ASSERT_OK_AND_ASSIGN(condition, builder.Build()); } - Computation body; + XlaComputation body; { - ComputationBuilder builder(client_, "body"); + XlaBuilder builder("body"); auto state = builder.Parameter(0, while_shape, "state"); auto indvar = builder.GetTupleElement(state, 0); auto input_0 = builder.GetTupleElement(state, 1); auto input_1 = builder.GetTupleElement(state, 2); auto output = builder.Tanh(builder.Dot(input_0, input_1)); auto indvar_next = builder.Add(indvar, builder.ConstantR0(1)); - auto tuple_result = builder.Tuple({indvar_next, input_0, input_1, output}); + builder.Tuple({indvar_next, input_0, input_1, output}); TF_ASSERT_OK_AND_ASSIGN(body, builder.Build()); } - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto matrix_input = builder.Parameter(0, matrix_shape, "matrix"); auto init = builder.Tuple( {builder.ConstantR0(0), matrix_input, matrix_input, matrix_input}); @@ -1268,9 +1262,9 @@ void BM_WhileLoop(int num_iters) { // Create while condition computation with 'loop_limit'. const int32 loop_limit = 100; - Computation condition; + XlaComputation condition; { - ComputationBuilder builder(client, "condition"); + XlaBuilder builder("condition"); auto prev = builder.Parameter(0, loop_state_shape, "prev"); auto iteration = builder.GetTupleElement(prev, 0); builder.Lt(iteration, builder.ConstantR0(loop_limit)); @@ -1278,9 +1272,9 @@ void BM_WhileLoop(int num_iters) { } // Create while body computation with unit loop increment. - Computation body; + XlaComputation body; { - ComputationBuilder builder(client, "body"); + XlaBuilder builder("body"); auto prev = builder.Parameter(0, loop_state_shape, "prev"); // TupleElement 0 auto iteration = builder.GetTupleElement(prev, 0); @@ -1294,12 +1288,12 @@ void BM_WhileLoop(int num_iters) { auto starts = builder.ConstantR1({0, 0, 0}); // UpdateSlice. auto out1 = builder.DynamicUpdateSlice(input, update, starts); - auto result = builder.Tuple({out0, out1}); + builder.Tuple({out0, out1}); body = builder.Build().ConsumeValueOrDie(); } // Create a While instruction. - ComputationBuilder builder(client, "while"); + XlaBuilder builder("while"); auto zero = builder.ConstantR0(0.0); auto input = builder.Broadcast(zero, {seq_len, 1024, 1024}); auto init = builder.Tuple({builder.ConstantR0(0), input}); diff --git a/tensorflow/compiler/xla/tests/xla_hlo_profile_test.cc b/tensorflow/compiler/xla/tests/xla_hlo_profile_test.cc index ff3418a128eed82b730a6602d6e3faba4ad7be32..837a01e873e1cfbcf7c52c13833bbe4366cc9163 100644 --- a/tensorflow/compiler/xla/tests/xla_hlo_profile_test.cc +++ b/tensorflow/compiler/xla/tests/xla_hlo_profile_test.cc @@ -34,7 +34,7 @@ limitations under the License. namespace xla { namespace { -namespace se = ::perftools::gputools; + namespace gtl = ::tensorflow::gtl; class HloProfileTest : public ClientLibraryTestBase {}; @@ -129,18 +129,18 @@ void ExecuteAndFetchProfile(string* profile_output, LocalClient* client, auto* transfer_manager = backend->transfer_manager(); TF_ASSERT_OK_AND_ASSIGN( - std::unique_ptr lhs_arg, + ScopedShapedBuffer lhs_arg, transfer_manager->AllocateScopedShapedBuffer( lhs_arg_shape, allocator, backend->default_device_ordinal())); TF_ASSERT_OK(transfer_manager->TransferLiteralToDevice( - executor, *Literal::CreateFromShape(lhs_arg_shape), *lhs_arg)); + executor, *Literal::CreateFromShape(lhs_arg_shape), lhs_arg)); TF_ASSERT_OK_AND_ASSIGN( - std::unique_ptr rhs_arg, + ScopedShapedBuffer rhs_arg, transfer_manager->AllocateScopedShapedBuffer( rhs_arg_shape, allocator, backend->default_device_ordinal())); TF_ASSERT_OK(transfer_manager->TransferLiteralToDevice( - executor, *Literal::CreateFromShape(rhs_arg_shape), *rhs_arg)); + executor, *Literal::CreateFromShape(rhs_arg_shape), rhs_arg)); TF_ASSERT_OK_AND_ASSIGN( std::unique_ptr local_executable, @@ -165,7 +165,7 @@ void ExecuteAndFetchProfile(string* profile_output, LocalClient* client, backend->eigen_intra_op_thread_pool()); TF_ASSERT_OK_AND_ASSIGN( auto execution_result, - executable->ExecuteOnStream(&run_options, {lhs_arg.get(), rhs_arg.get()}, + executable->ExecuteOnStream(&run_options, {&lhs_arg, &rhs_arg}, &hlo_execution_profile)); (void)execution_result; diff --git a/tensorflow/compiler/xla/tests/xla_internal_test_main.cc b/tensorflow/compiler/xla/tests/xla_internal_test_main.cc index 0af40bc15a41f7c4ef6382b1a94412afe5741a86..a9f2915b458b1816926de727b3da21982d06f6c0 100644 --- a/tensorflow/compiler/xla/tests/xla_internal_test_main.cc +++ b/tensorflow/compiler/xla/tests/xla_internal_test_main.cc @@ -15,6 +15,7 @@ limitations under the License. #include "tensorflow/compiler/xla/legacy_flags/debug_options_flags.h" #include "tensorflow/core/lib/core/stringpiece.h" +#include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/test.h" #include "tensorflow/core/platform/test_benchmark.h" @@ -32,14 +33,15 @@ GTEST_API_ int main(int argc, char** argv) { // tests. for (int i = 1; i < argc; i++) { tensorflow::StringPiece arg(argv[i]); - if (arg == "--benchmarks" || arg.starts_with("--benchmarks=")) { + if (arg == "--benchmarks" || + tensorflow::str_util::StartsWith(arg, "--benchmarks=")) { const char* pattern = nullptr; - if (arg.starts_with("--benchmarks=")) { + if (tensorflow::str_util::StartsWith(arg, "--benchmarks=")) { pattern = argv[i] + strlen("--benchmarks="); } else { // Handle flag of the form '--benchmarks foo' (no '='). if (i + 1 >= argc || - tensorflow::StringPiece(argv[i + 1]).starts_with("--")) { + tensorflow::str_util::StartsWith(argv[i + 1], "--")) { LOG(ERROR) << "--benchmarks flag requires an argument."; return 2; } diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc index e60a5a4919f2207939821e787c3c59a08ff3ba4e..95d3fd28b38a5945f5ed685f07db046701250273 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc +++ b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc @@ -470,6 +470,7 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, case HloOpcode::kRoundNearestAfz: case HloOpcode::kBitcast: case HloOpcode::kCeil: + case HloOpcode::kClz: case HloOpcode::kCopy: case HloOpcode::kCos: case HloOpcode::kExp: diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc index 863081d654390440aa6506bab4576b3cc5c1cbd1..adc8b1d620eb65fdca19072831360b71847abf9e 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc +++ b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc @@ -18,6 +18,7 @@ limitations under the License. #include #include "tensorflow/core/lib/core/status_test_util.h" #include "tensorflow/core/lib/core/stringpiece.h" +#include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/platform/test.h" namespace xla { @@ -894,7 +895,7 @@ class HloParserTest : public ::testing::Test, public ::testing::WithParamInterface { protected: static void ExpectHasSubstr(StringPiece s, StringPiece expected) { - EXPECT_TRUE(StringPiece(s).contains(expected)) + EXPECT_TRUE(tensorflow::str_util::StrContains(s, expected)) << "'" << s << "' does not contain '" << expected << "'"; } diff --git a/tensorflow/compiler/xla/types.h b/tensorflow/compiler/xla/types.h index 9fa4297523bab0748863479be52dff1b7b523a8b..b645acb700b0f168112a40c9c72b4669435f717d 100644 --- a/tensorflow/compiler/xla/types.h +++ b/tensorflow/compiler/xla/types.h @@ -46,4 +46,10 @@ using ::Eigen::half; } // namespace xla +// Alias namespace ::stream_executor as ::xla::se. +namespace stream_executor {} +namespace xla { +namespace se = ::stream_executor; +} // namespace xla + #endif // TENSORFLOW_COMPILER_XLA_TYPES_H_ diff --git a/tensorflow/compiler/xla/util.h b/tensorflow/compiler/xla/util.h index 2da9f9ed6f40fcf5b2512f974519df0b355da10f..be33bd6dd1304fa8fc6e5aed1d4c4d65bf97e692 100644 --- a/tensorflow/compiler/xla/util.h +++ b/tensorflow/compiler/xla/util.h @@ -528,6 +528,16 @@ bool IsInt32(T x) { // value is implementation-defined." return static_cast(x) == x; } + +template +Status EraseElementFromVector(std::vector* container, const T& value) { + // c_find returns a const_iterator which does not seem to work on gcc 4.8.4, + // and this breaks the ubuntu/xla_gpu build bot. + auto it = std::find(container->begin(), container->end(), value); + TF_RET_CHECK(it != container->end()); + container->erase(it); + return Status::OK(); +} } // namespace xla #define XLA_LOG_LINES(SEV, STRING) \ diff --git a/tensorflow/compiler/xla/xla.bzl b/tensorflow/compiler/xla/xla.bzl index 6b136d333bbf079efd314833f46fe3b98743fbac..1439f1bcc5cec39203a7cb4b1f8604e7349382c6 100644 --- a/tensorflow/compiler/xla/xla.bzl +++ b/tensorflow/compiler/xla/xla.bzl @@ -6,7 +6,9 @@ load("//tensorflow/core:platform/default/build_config_root.bzl", "if_static") # xla_proto_library() is a convenience wrapper around cc_proto_library. -def xla_proto_library(name, srcs=[], deps=[], visibility=None, testonly=0): +def xla_proto_library(name, srcs=[], deps=[], visibility=None, testonly=0, **kwargs): + if kwargs.get('use_grpc_plugin'): + kwargs['use_grpc_namespace'] = True cc_proto_library(name=name, srcs=srcs, deps=deps, @@ -16,6 +18,13 @@ def xla_proto_library(name, srcs=[], deps=[], visibility=None, testonly=0): ), protoc="@protobuf_archive//:protoc", testonly=testonly, - visibility=visibility,) + visibility=visibility, + **kwargs) + +def xla_py_grpc_library(**kwargs): + # Note: we don't currently define any special targets for Python GRPC in OSS. + _ignore = kwargs + pass + ORC_JIT_MEMORY_MAPPER_TARGETS = [] diff --git a/tensorflow/compiler/xla/xla.proto b/tensorflow/compiler/xla/xla.proto index 5cb18113e5ba9c49809c4410d56ca7bb5a50dae5..f619b8dc24038af64a27fc0565c74447ca9d09cf 100644 --- a/tensorflow/compiler/xla/xla.proto +++ b/tensorflow/compiler/xla/xla.proto @@ -189,6 +189,12 @@ message DebugOptions { // directory. string xla_dump_per_pass_hlo_proto_to = 96; + // Generate calls to MKL-DNN in the CPU backend. + bool xla_cpu_use_mkl_dnn = 97; + + // Maximum kernel unroll factor for the GPU backend. + int32 xla_gpu_max_kernel_unroll_factor = 98; + // Extra options to pass to the compilation backend; specific interpretation // of these values is left to the backend. map xla_backend_extra_options = 500; @@ -414,6 +420,11 @@ message ComputeConstantRequest { repeated LiteralProto parameters = 4; } +message ComputeConstantGraphRequest { + HloModuleProto computation = 1; + Layout output_layout = 2; +} + message ComputeConstantResponse { // A LiteralProto is returned directly for this request, instead of a // ComputationDataHandle. diff --git a/tensorflow/compiler/xla/xla_data.proto b/tensorflow/compiler/xla/xla_data.proto index 1f16e6d25178fd9c10a30b0c500e090ee2e08117..d23f9e5918f54c4f385f3b16fd84bbee51ed5a95 100644 --- a/tensorflow/compiler/xla/xla_data.proto +++ b/tensorflow/compiler/xla/xla_data.proto @@ -355,17 +355,19 @@ message WindowDimension { // positions of the window in this dimension. int64 stride = 2; - // If positive, means the amount of padding with zeroes to add to the base - // area at the low end of this dimension; if negative, its negative means the - // number of elements removed from the low end of this dimension. For example, - // in the horizontal dimension of a rectangle, this would be the number of - // zeroes to pad on the left, given that indices increase when going right. + // If positive, means the amount of padding to add to the base area at the low + // end of this dimension; if negative, its negative means the number of + // elements removed from the low end of this dimension. For example, in the + // horizontal dimension of a rectangle, this would be the number of padding + // values to pad on the left, given that indices increase when going right. + // The actual padding value depends upon the context. Convolution pads with + // zeros. ReduceWindow and SelectAndScatter pads with the reduce function's + // init value. int64 padding_low = 3; - // As padding_low, but on the high end of this dimension. For - // example, in the horizontal dimension of a rectangle, this would - // be the number of zeroes to pad on the right, given that indices - // increase when going right. + // As padding_low, but on the high end of this dimension. For example, in the + // horizontal dimension of a rectangle, this would be the number of values to + // pad on the right, given that indices increase when going right. int64 padding_high = 4; // Dilation factor of the sliding window in this dimension. A dilation factor @@ -799,6 +801,9 @@ enum UnaryOperation { // Elementwise, extract real component of complex x. UNOP_IMAG = 16; + + // Elementwise, computes clz(x). + UNOP_CLZ = 17; } message UnaryOpRequest { diff --git a/tensorflow/contrib/BUILD b/tensorflow/contrib/BUILD index 0cebb49afbba2657429542bf46a9dd2497e1517e..d28392a62c2fa9e34ec7912c77d29ac28adb5dee 100644 --- a/tensorflow/contrib/BUILD +++ b/tensorflow/contrib/BUILD @@ -8,6 +8,7 @@ package(default_visibility = ["//tensorflow:__subpackages__"]) load("//third_party/mpi:mpi.bzl", "if_mpi") load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda") load("@local_config_tensorrt//:build_defs.bzl", "if_tensorrt") +load("//tensorflow:tensorflow.bzl", "if_not_windows") py_library( name = "contrib_py", @@ -24,6 +25,7 @@ py_library( "//tensorflow/contrib/batching:batch_py", "//tensorflow/contrib/bayesflow:bayesflow_py", "//tensorflow/contrib/boosted_trees:init_py", + "//tensorflow/contrib/checkpoint/python:checkpoint", "//tensorflow/contrib/cloud:cloud_py", "//tensorflow/contrib/cluster_resolver:cluster_resolver_pip", "//tensorflow/contrib/cluster_resolver:cluster_resolver_py", @@ -40,7 +42,6 @@ py_library( "//tensorflow/contrib/estimator:estimator_py", "//tensorflow/contrib/factorization:factorization_py", "//tensorflow/contrib/feature_column:feature_column_py", - "//tensorflow/contrib/ffmpeg:ffmpeg_ops_py", "//tensorflow/contrib/framework:framework_py", "//tensorflow/contrib/fused_conv:fused_conv_py", "//tensorflow/contrib/gan", @@ -63,7 +64,6 @@ py_library( "//tensorflow/contrib/linalg:linalg_py", "//tensorflow/contrib/linear_optimizer:sdca_estimator_py", "//tensorflow/contrib/linear_optimizer:sdca_ops_py", - "//tensorflow/contrib/lite/python:lite", "//tensorflow/contrib/lookup:lookup_py", "//tensorflow/contrib/losses:losses_py", "//tensorflow/contrib/losses:metric_learning_py", @@ -78,14 +78,17 @@ py_library( "//tensorflow/contrib/optimizer_v2:optimizer_v2_py", "//tensorflow/contrib/periodic_resample:init_py", "//tensorflow/contrib/predictor", + "//tensorflow/contrib/proto", "//tensorflow/contrib/quantization:quantization_py", "//tensorflow/contrib/quantize:quantize_graph", "//tensorflow/contrib/autograph", "//tensorflow/contrib/receptive_field:receptive_field_py", + "//tensorflow/contrib/recurrent:recurrent_py", "//tensorflow/contrib/reduce_slice_ops:reduce_slice_ops_py", "//tensorflow/contrib/remote_fused_graph/pylib:remote_fused_graph_ops_py", "//tensorflow/contrib/resampler:resampler_py", "//tensorflow/contrib/rnn:rnn_py", + "//tensorflow/contrib/rpc", "//tensorflow/contrib/saved_model:saved_model_py", "//tensorflow/contrib/seq2seq:seq2seq_py", "//tensorflow/contrib/signal:signal_py", @@ -117,7 +120,10 @@ py_library( "//tensorflow/contrib/kafka", ], "//conditions:default": [], - }), + }) + if_not_windows([ + "//tensorflow/contrib/ffmpeg:ffmpeg_ops_py", + "//tensorflow/contrib/lite/python:lite", # unix dependency, need to fix code + ]), ) cc_library( diff --git a/tensorflow/contrib/__init__.py b/tensorflow/contrib/__init__.py index a8e05df708135a3b5edab55dd6ed6b62429bcde1..0d163daa6e23435fe42aaa046860ea434039a619 100644 --- a/tensorflow/contrib/__init__.py +++ b/tensorflow/contrib/__init__.py @@ -1,3 +1,4 @@ +# pylint: disable=g-import-not-at-top # Copyright 2015 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -18,9 +19,12 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import os + # Add projects here, they will show up under tf.contrib. from tensorflow.contrib import batching from tensorflow.contrib import bayesflow +from tensorflow.contrib import checkpoint from tensorflow.contrib import cloud from tensorflow.contrib import cluster_resolver from tensorflow.contrib import coder @@ -61,11 +65,14 @@ from tensorflow.contrib import nn from tensorflow.contrib import opt from tensorflow.contrib import periodic_resample from tensorflow.contrib import predictor +from tensorflow.contrib import proto from tensorflow.contrib import quantization from tensorflow.contrib import quantize +from tensorflow.contrib import recurrent from tensorflow.contrib import reduce_slice_ops from tensorflow.contrib import resampler from tensorflow.contrib import rnn +from tensorflow.contrib import rpc from tensorflow.contrib import saved_model from tensorflow.contrib import seq2seq from tensorflow.contrib import signal @@ -84,7 +91,8 @@ from tensorflow.contrib import tpu from tensorflow.contrib import training from tensorflow.contrib import util from tensorflow.contrib.eager.python import tfe as eager -from tensorflow.contrib.lite.python import lite +if os.name != "nt": + from tensorflow.contrib.lite.python import lite from tensorflow.contrib.optimizer_v2 import optimizer_v2_symbols as optimizer_v2 from tensorflow.contrib.receptive_field import receptive_field_api as receptive_field from tensorflow.contrib.remote_fused_graph import pylib as remote_fused_graph @@ -94,6 +102,7 @@ from tensorflow.contrib.summary import summary from tensorflow.python.util.lazy_loader import LazyLoader ffmpeg = LazyLoader("ffmpeg", globals(), "tensorflow.contrib.ffmpeg") +del os del LazyLoader del absolute_import diff --git a/tensorflow/contrib/android/asset_manager_filesystem.cc b/tensorflow/contrib/android/asset_manager_filesystem.cc index 380a652435ad089f46f3ca80e4fd43097fd96e10..513d519eabbd54f46fde9ec0f004247c02277732 100644 --- a/tensorflow/contrib/android/asset_manager_filesystem.cc +++ b/tensorflow/contrib/android/asset_manager_filesystem.cc @@ -19,6 +19,7 @@ limitations under the License. #include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/platform/env.h" +#include "tensorflow/core/platform/file_system_helper.h" namespace tensorflow { namespace { @@ -228,9 +229,8 @@ string AssetManagerFileSystem::NormalizeDirectoryPath(const string& fname) { } string AssetManagerFileSystem::RemoveAssetPrefix(const string& name) { - string output(name); - StringPiece piece(output); - piece.Consume(prefix_); + StringPiece piece(name); + str_util::ConsumePrefix(&piece, prefix_); return piece.ToString(); } @@ -243,6 +243,11 @@ bool AssetManagerFileSystem::DirectoryExists(const std::string& fname) { return AAssetDir_getNextFileName(dir.get()) != NULL; } +Status AssetManagerFileSystem::GetMatchingPaths(const string& pattern, + std::vector* results) { + return internal::GetMatchingPaths(this, Env::Default(), pattern, results); +} + Status AssetManagerFileSystem::NewWritableFile( const string& fname, std::unique_ptr* result) { return errors::Unimplemented("Asset storage is read only."); diff --git a/tensorflow/contrib/android/asset_manager_filesystem.h b/tensorflow/contrib/android/asset_manager_filesystem.h index 665304b5eef1f8a3633c8c522259e20d744b1808..a87ff42ae217c429ecf5d2458b88b3431551ad97 100644 --- a/tensorflow/contrib/android/asset_manager_filesystem.h +++ b/tensorflow/contrib/android/asset_manager_filesystem.h @@ -66,6 +66,9 @@ class AssetManagerFileSystem : public FileSystem { Status DeleteDir(const string& d) override; Status RenameFile(const string& s, const string& t) override; + Status GetMatchingPaths(const string& pattern, + std::vector* results) override; + private: string RemoveAssetPrefix(const string& name); diff --git a/tensorflow/contrib/autograph/__init__.py b/tensorflow/contrib/autograph/__init__.py index a39f44b21aa0ddf683b30c18bbe15a43262f7db2..3386c4eca4b93e850f6fe3c6239d29c61d787ece 100644 --- a/tensorflow/contrib/autograph/__init__.py +++ b/tensorflow/contrib/autograph/__init__.py @@ -21,6 +21,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +# TODO(mdan): Bring only the relevant symbols to the top level. from tensorflow.contrib.autograph import utils from tensorflow.contrib.autograph.impl.api import convert from tensorflow.contrib.autograph.impl.api import converted_call diff --git a/tensorflow/contrib/autograph/converters/BUILD b/tensorflow/contrib/autograph/converters/BUILD index 608bd82722fa45a7009bd597cfd74060b1239a3b..8f9bffa55e44e4942bb3845945b3d440c7957cc9 100644 --- a/tensorflow/contrib/autograph/converters/BUILD +++ b/tensorflow/contrib/autograph/converters/BUILD @@ -24,7 +24,6 @@ py_library( "continue_statements.py", "control_flow.py", "decorators.py", - "for_loops.py", "ifexp.py", "list_comprehension.py", "lists.py", @@ -49,6 +48,7 @@ py_library( visibility = ["//tensorflow:__subpackages__"], deps = [ ":converters", + "//tensorflow/contrib/autograph/operators", "//tensorflow/contrib/autograph/pyct", "//tensorflow/contrib/autograph/pyct/static_analysis", "//tensorflow/contrib/autograph/utils", @@ -61,6 +61,7 @@ py_test( name = "asserts_test", srcs = ["asserts_test.py"], srcs_version = "PY2AND3", + tags = ["no_windows"], deps = [ ":test_lib", "//tensorflow/python:client_testlib", @@ -81,6 +82,7 @@ py_test( name = "builtin_functions_test", srcs = ["builtin_functions_test.py"], srcs_version = "PY2AND3", + tags = ["no_windows"], deps = [ ":test_lib", "//tensorflow/python:client_testlib", @@ -92,6 +94,7 @@ py_test( size = "large", srcs = ["call_trees_test.py"], srcs_version = "PY2AND3", + tags = ["no_windows"], deps = [ ":test_lib", "//tensorflow/contrib/autograph/impl", @@ -129,16 +132,6 @@ py_test( ], ) -py_test( - name = "for_loops_test", - srcs = ["for_loops_test.py"], - srcs_version = "PY2AND3", - deps = [ - ":test_lib", - "//tensorflow/python:client_testlib", - ], -) - py_test( name = "name_scopes_test", srcs = ["name_scopes_test.py"], diff --git a/tensorflow/contrib/autograph/converters/asserts.py b/tensorflow/contrib/autograph/converters/asserts.py index f011a97ade94f2979486ef6329673a0160dd9bac..2d9e2c58e3afcef5c18f477a7a29e518e98e672e 100644 --- a/tensorflow/contrib/autograph/converters/asserts.py +++ b/tensorflow/contrib/autograph/converters/asserts.py @@ -27,8 +27,6 @@ from tensorflow.contrib.autograph.pyct import transformer class AssertsTransformer(transformer.Base): """Transforms Print nodes to Call so they can be handled as functions.""" - # pylint:disable=invalid-name - def visit_Assert(self, node): self.generic_visit(node) @@ -44,9 +42,7 @@ class AssertsTransformer(transformer.Base): elif isinstance(node.msg, gast.Str): return templates.replace(template, test=node.test, msg=node.msg) else: - raise NotImplementedError('Can only convert string messages for now.') - - # pylint:enable=invalid-name + raise NotImplementedError('can only convert string messages for now.') def transform(node, context): diff --git a/tensorflow/contrib/autograph/converters/break_statements.py b/tensorflow/contrib/autograph/converters/break_statements.py index 48026bccab5ff3474e9d54e365dad4a589b931fc..91de82f0a78ccae711298d78364810dd099a5c38 100644 --- a/tensorflow/contrib/autograph/converters/break_statements.py +++ b/tensorflow/contrib/autograph/converters/break_statements.py @@ -18,103 +18,108 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import gast - from tensorflow.contrib.autograph.pyct import anno from tensorflow.contrib.autograph.pyct import templates from tensorflow.contrib.autograph.pyct import transformer from tensorflow.contrib.autograph.pyct.static_analysis.annos import NodeAnno -class BreakCanonicalizationTransformer(transformer.Base): - """Canonicalizes break statements into additional conditionals.""" +# Tags for local state. +BREAK_USED = 'break_used' +CONTROL_VAR_NAME = 'control_var_name' - def __init__(self, context): - super(BreakCanonicalizationTransformer, self).__init__(context) - # This is a stack structure, to correctly process nested loops. - self.break_uses = [] - def _create_break_check(self): - template = """ - (not var_name) - """ - expr, = templates.replace(template, var_name=self.break_uses[-1][1]) - return expr.value +class BreakStatementTransformer(transformer.Base): + """Canonicalizes break statements into additional conditionals.""" + + def _track_body(self, nodes, break_var): + self.enter_local_scope() + self.set_local(CONTROL_VAR_NAME, break_var) + nodes = self.visit_block(nodes) + break_used = self.get_local(BREAK_USED, False) + self.exit_local_scope() + return nodes, break_used - def _create_break_trigger(self): + def visit_Break(self, node): + self.set_local(BREAK_USED, True) + var_name = self.get_local(CONTROL_VAR_NAME) + # TODO(mdan): This will fail when expanded inside a top-level else block. template = """ var_name = True + continue """ - block = templates.replace(template, var_name=self.break_uses[-1][1]) - block.append(gast.Continue()) - return block + return templates.replace(template, var_name=var_name) - def _create_break_init(self): + def _guard_if_present(self, block, var_name): + """Prevents the block from executing if var_name is set.""" + if not block: + return block template = """ - var_name = False - """ - assign, = templates.replace(template, var_name=self.break_uses[-1][1]) - return assign - - # TODO(mdan): Surely the transformer supports this better? - def _manual_visit_list(self, block): - new_block = [] - for n in block: - new_n = self.visit(n) - if isinstance(new_n, list): - new_block.extend(new_n) - else: - new_block.append(new_n) - return new_block + if not var_name: + block + """ + node = templates.replace( + template, + var_name=var_name, + block=block) + return node def visit_While(self, node): - self.generic_visit(node.test) scope = anno.getanno(node, NodeAnno.BODY_SCOPE) - - break_var = self.context.namer.new_symbol('break_requested', - scope.referenced) - self.break_uses.append([False, break_var]) - node.body = self._manual_visit_list(node.body) - if self.break_uses[-1][0]: - node.test = gast.BoolOp(gast.And(), [ - node.test, - gast.UnaryOp(gast.Not(), gast.Name(break_var, gast.Load(), None)) - ]) - final_nodes = [self._create_break_init(), node] - else: - final_nodes = node - self.break_uses.pop() - - for n in node.orelse: - self.generic_visit(n) - return final_nodes + break_var = self.context.namer.new_symbol('break__', scope.referenced) + + node.test = self.visit(node.test) + node.body, break_used = self._track_body(node.body, break_var) + # A break in the else clause applies to the containing scope. + node.orelse = self.visit_block(node.orelse) + + if break_used: + template = """ + var_name = False + while test and not var_name: + body + else: + orelse + """ + # Python's else clause only triggers if the loop exited cleanly (e.g. + # break did not trigger). + node = templates.replace( + template, + var_name=break_var, + test=node.test, + body=node.body, + orelse=self._guard_if_present(node.orelse, break_var)) + + return node def visit_For(self, node): - self.generic_visit(node.target) - self.generic_visit(node.iter) scope = anno.getanno(node, NodeAnno.BODY_SCOPE) - - break_var = self.context.namer.new_symbol('break_requested', - scope.referenced) - self.break_uses.append([False, break_var]) - node.body = self._manual_visit_list(node.body) - if self.break_uses[-1][0]: - anno.setanno(node, 'extra_cond', - gast.UnaryOp(gast.Not(), - gast.Name(break_var, gast.Load(), None))) - final_nodes = [self._create_break_init(), node] - else: - final_nodes = node - self.break_uses.pop() - - for n in node.orelse: - self.generic_visit(n) - return final_nodes - - def visit_Break(self, node): - self.break_uses[-1][0] = True - return self._create_break_trigger() + break_var = self.context.namer.new_symbol('break__', scope.referenced) + + node.target = self.visit(node.target) + node.iter = self.visit(node.iter) + node.body, break_used = self._track_body(node.body, break_var) + # A break in the else clause applies to the containing scope. + node.orelse = self.visit_block(node.orelse) + + if break_used: + node.orelse = self._guard_if_present(node.orelse, break_var) + template = """ + var_name = False + for_stmt + """ + # Python's else clause only triggers if the loop exited cleanly (e.g. + # break did not trigger). + node = templates.replace( + template, + var_name=break_var, + for_stmt=node) + extra_cond = templates.replace_as_expression( + 'not var_name', var_name=break_var) + anno.setanno(node[1], 'extra_cond', extra_cond) + + return node def transform(node, context): - return BreakCanonicalizationTransformer(context).visit(node) + return BreakStatementTransformer(context).visit(node) diff --git a/tensorflow/contrib/autograph/converters/break_statements_test.py b/tensorflow/contrib/autograph/converters/break_statements_test.py index dd4914a022f57b3bb4a19ec132f311f12269fa9e..1af59e9b5260fe0d3a3ef72c7a003dc451e230f3 100644 --- a/tensorflow/contrib/autograph/converters/break_statements_test.py +++ b/tensorflow/contrib/autograph/converters/break_statements_test.py @@ -25,7 +25,7 @@ from tensorflow.python.platform import test class BreakCanonicalizationTest(converter_test_base.TestCase): - def test_basic_break(self): + def test_basic_while(self): def test_fn(x): v = [] @@ -40,13 +40,11 @@ class BreakCanonicalizationTest(converter_test_base.TestCase): node = break_statements.transform(node, self.ctx) with self.compiled(node) as result: - self.assertEqual(test_fn(0), result.test_fn(0)) - self.assertEqual(test_fn(1), result.test_fn(1)) - self.assertEqual(test_fn(2), result.test_fn(2)) - self.assertEqual(test_fn(3), result.test_fn(3)) - self.assertEqual(test_fn(4), result.test_fn(4)) + self.assertEqual([], result.test_fn(0)) + self.assertEqual([], result.test_fn(1)) + self.assertEqual([3], result.test_fn(4)) - def test_basic_break_for_loop(self): + def test_basic_for(self): def test_fn(a): v = [] @@ -57,30 +55,18 @@ class BreakCanonicalizationTest(converter_test_base.TestCase): v.append(x) return v - # The break is incompletely canonicalized for for loops. Everything is - # in place except for the condition verification. - def test_equiv_fn(a): - v = [] - for x in a: - x -= 1 - if x % 2 == 0: - continue - v.append(x) - return v - node = self.parse_and_analyze(test_fn, {}) node = break_statements.transform(node, self.ctx) with self.compiled(node) as result: - # The break is incompletely canonicalized. Everything is in place, but - # the loop does not break. - self.assertEqual(test_equiv_fn([]), result.test_fn([])) - self.assertEqual(test_equiv_fn([1]), result.test_fn([1])) - self.assertEqual(test_equiv_fn([2]), result.test_fn([2])) - self.assertEqual( - test_equiv_fn([1, 2, 3, 4]), result.test_fn([1, 2, 3, 4])) + # The break is incompletely canonicalized. The loop will not interrupt, + # but the section following the break will be skipped. + self.assertEqual([], result.test_fn([])) + self.assertEqual([3, 3], result.test_fn([4, 4])) + self.assertEqual([3], result.test_fn([4, 5])) + self.assertEqual([3], result.test_fn([5, 4])) - def test_continue_deeply_nested(self): + def test_deeply_nested(self): def test_fn(x): v = [] @@ -93,7 +79,7 @@ class BreakCanonicalizationTest(converter_test_base.TestCase): u.append(x) else: w.append(x) - continue + break v.append(x) return v, u, w @@ -101,11 +87,60 @@ class BreakCanonicalizationTest(converter_test_base.TestCase): node = break_statements.transform(node, self.ctx) with self.compiled(node) as result: - self.assertEqual(test_fn(0), result.test_fn(0)) - self.assertEqual(test_fn(1), result.test_fn(1)) - self.assertEqual(test_fn(2), result.test_fn(2)) - self.assertEqual(test_fn(3), result.test_fn(3)) - self.assertEqual(test_fn(4), result.test_fn(4)) + self.assertEqual(([], [], []), result.test_fn(0)) + self.assertEqual(([2, 1], [2], [0]), result.test_fn(3)) + self.assertEqual(([10, 9, 8, 7], [10, 8], [6]), result.test_fn(11)) + + def test_nested_loops(self): + + def test_fn(x): + v = [] + u = [] + while x > 0: + x -= 1 + y = x + while y > 0: + y -= 1 + if y % 2 == 0: + break + u.append(y) + if x == 0: + break + v.append(x) + return v, u + + node = self.parse_and_analyze(test_fn, {}) + node = break_statements.transform(node, self.ctx) + + with self.compiled(node) as result: + self.assertEqual(([], []), result.test_fn(0)) + self.assertEqual(([1], []), result.test_fn(2)) + self.assertEqual(([2, 1], [1]), result.test_fn(3)) + self.assertEqual(([4, 3, 2, 1], [3, 1]), result.test_fn(5)) + + def test_loop_else(self): + + def test_fn(x): + v = [] + u = [] + while x > 0: + x -= 1 + y = x + while y > 1: + break + else: + u.append(y) + break + v.append(x) + return v, u + + node = self.parse_and_analyze(test_fn, {}) + node = break_statements.transform(node, self.ctx) + + with self.compiled(node) as result: + self.assertEqual(([], []), result.test_fn(0)) + self.assertEqual(([], [1]), result.test_fn(2)) + self.assertEqual(([2], [1]), result.test_fn(3)) if __name__ == '__main__': diff --git a/tensorflow/contrib/autograph/converters/builtin_functions.py b/tensorflow/contrib/autograph/converters/builtin_functions.py index 0349ce29ceb097fbebc36a0378b9072750772416..317711a866f731de1b497295a2752dee0eb544f5 100644 --- a/tensorflow/contrib/autograph/converters/builtin_functions.py +++ b/tensorflow/contrib/autograph/converters/builtin_functions.py @@ -34,24 +34,24 @@ class BuiltinFunctionTransformer(transformer.Base): def __init__(self, context): super(BuiltinFunctionTransformer, self).__init__(context) - # pylint:disable=invalid-name - def _convert_builtin(self, node): template = """ - autograph_utils.dynamic_builtin(func, args) + ag__.utils.dynamic_builtin(func, args) """ return templates.replace(template, func=node.func, args=node.args)[0].value def _convert_print(self, node): template = """ - autograph_utils.dynamic_print(args) + ag__.utils.dynamic_print(args) """ return templates.replace(template, args=node.args)[0].value def visit_Call(self, node): self.generic_visit(node) # TODO(mdan): This won't work if the function was hidden. - if isinstance(node.func, gast.Name) and node.func.id in ('len', 'range'): + # TODO(mdan): Rely on the live_val and use inspect_utils.is_builtin instead. + if (isinstance(node.func, gast.Name) and + node.func.id in ('len', 'range', 'xrange')): return self._convert_builtin(node) # Print needs to be handled separately because it can be read as statement. if isinstance(node.func, gast.Name) and node.func.id == 'print': @@ -70,8 +70,6 @@ class BuiltinFunctionTransformer(transformer.Base): function_call = templates.replace(template, fname='print', args=args)[0] return self.visit(function_call) - # pylint:enable=invalid-name - def transform(node, context): return BuiltinFunctionTransformer(context).visit(node) diff --git a/tensorflow/contrib/autograph/converters/builtin_functions_test.py b/tensorflow/contrib/autograph/converters/builtin_functions_test.py index ac7e756c47c31816ad34a7ea6926917712afa6c3..30272409df322560b04ba75b3e1cb6f9ad5ff0af 100644 --- a/tensorflow/contrib/autograph/converters/builtin_functions_test.py +++ b/tensorflow/contrib/autograph/converters/builtin_functions_test.py @@ -26,8 +26,6 @@ from tensorflow.contrib.autograph.converters import builtin_functions from tensorflow.contrib.autograph.converters import converter_test_base from tensorflow.python.framework import constant_op from tensorflow.python.ops import array_ops -from tensorflow.python.ops import logging_ops -from tensorflow.python.ops import script_ops from tensorflow.python.platform import test @@ -49,7 +47,7 @@ class BuiltinFunctionsTest(converter_test_base.TestCase): self.assertEqual(3, result.test_fn([0, 0, 0])) - def test_print_with_op(self): + def test_print(self): def test_fn(a): print(a) @@ -57,14 +55,12 @@ class BuiltinFunctionsTest(converter_test_base.TestCase): node = self.parse_and_analyze(test_fn, {'print': print}) node = builtin_functions.transform(node, self.ctx) - # Note: it's relevant not to include script_ops.py_func here, to verify - # that tf.Print is used. - with self.compiled(node, logging_ops.Print) as result: + with self.compiled(node) as result: with self.test_session() as sess: try: out_capturer = six.StringIO() sys.stdout = out_capturer - result.test_fn('a') + result.test_fn(constant_op.constant('a')) sess.run(sess.graph.get_operations()) self.assertEqual(out_capturer.getvalue(), 'a\n') finally: @@ -72,41 +68,19 @@ class BuiltinFunctionsTest(converter_test_base.TestCase): def test_print_with_op_multiple_values(self): - def test_fn(a, b): - print(a, b) - - node = self.parse_and_analyze(test_fn, {'print': print}) - node = builtin_functions.transform(node, self.ctx) - - # Note: it's relevant not to include script_ops.py_func here, to verify - # that tf.Print is used. - with self.compiled(node, logging_ops.Print) as result: - with self.test_session() as sess: - try: - out_capturer = six.StringIO() - sys.stdout = out_capturer - result.test_fn('a', 1) - sess.run(sess.graph.get_operations()) - self.assertEqual(out_capturer.getvalue(), 'a 1\n') - finally: - sys.stdout = sys.__stdout__ - - def test_print_with_py_func(self): - def test_fn(a, b, c): print(a, b, c) node = self.parse_and_analyze(test_fn, {'print': print}) node = builtin_functions.transform(node, self.ctx) - # Note: it's relevant not to include logging_ops.Print here, to verify - # that py_func is used. - with self.compiled(node, script_ops.py_func) as result: + with self.compiled(node) as result: with self.test_session() as sess: try: out_capturer = six.StringIO() sys.stdout = out_capturer - result.test_fn('a', 1, [2, 3]) + result.test_fn( + constant_op.constant('a'), constant_op.constant(1), [2, 3]) sess.run(sess.graph.get_operations()) self.assertEqual(out_capturer.getvalue(), 'a 1 [2, 3]\n') finally: diff --git a/tensorflow/contrib/autograph/converters/call_trees.py b/tensorflow/contrib/autograph/converters/call_trees.py index 61f6bfd7e733fc3e2e0bea35a955509c39d57bc9..554f0471d44d54194c45c3855b1483796ae65a6a 100644 --- a/tensorflow/contrib/autograph/converters/call_trees.py +++ b/tensorflow/contrib/autograph/converters/call_trees.py @@ -23,7 +23,6 @@ from __future__ import division from __future__ import print_function from collections import namedtuple -import types import gast @@ -114,7 +113,7 @@ class CallTreeTransformer(transformer.Base): def _function_is_compilable(self, target_entity): """Determines whether an entity can be compiled at all.""" # TODO(mdan): This is just a placeholder. Implement. - return not isinstance(target_entity, types.BuiltinFunctionType) + return not inspect_utils.isbuiltin(target_entity) def _should_compile(self, node, fqn): """Determines whether an entity should be compiled in the context.""" @@ -147,7 +146,7 @@ class CallTreeTransformer(transformer.Base): # Inspect the target function decorators. If any include a @convert # or @graph_ready annotation, then they must be called as they are. # TODO(mdan): This may be quite heavy. - # To parse and re-analize each function for every call site could be quite + # To parse and re-analyze each function for every call site could be quite # wasteful. Maybe we could cache the parsed AST? try: target_node, _ = parser.parse_entity(target_entity) @@ -199,7 +198,7 @@ class CallTreeTransformer(transformer.Base): def _wrap_to_py_func_no_return(self, node): # TODO(mdan): Properly handle varargs, etc. template = """ - autograph_utils.wrap_py_func(func, None, (args,), kwargs, True) + ag__.utils.wrap_py_func(func, None, (args,), kwargs, True) """ return templates.replace( template, @@ -210,7 +209,7 @@ class CallTreeTransformer(transformer.Base): def _wrap_to_py_func_single_return(self, node, dtype): # TODO(mdan): Properly handle varargs, etc. template = """ - autograph_utils.wrap_py_func(func, dtype, (args,), kwargs, False) + ag__.utils.wrap_py_func(func, dtype, (args,), kwargs, False) """ return templates.replace_as_expression( template, @@ -238,7 +237,7 @@ class CallTreeTransformer(transformer.Base): # Before we could convert all the time though, we'd need a reasonable # caching mechanism. template = """ - autograph_api.converted_call(func, True, False, {}, args) + ag__.converted_call(func, True, False, {}, args) """ call_expr = templates.replace(template, func=node.func, args=node.args) new_call = call_expr[0].value @@ -246,8 +245,6 @@ class CallTreeTransformer(transformer.Base): new_call.keywords = node.keywords return new_call - # pylint:disable=invalid-name - def visit_Expr(self, node): if isinstance(node.value, gast.Call): if anno.hasanno(node.value.func, 'live_val'): @@ -295,15 +292,17 @@ class CallTreeTransformer(transformer.Base): raise NotImplementedError( 'py_func with return values (unknown function)') else: - if self.context.recursive: + if ast_util.matches(node, 'super(_)'): + # super() calls are preserved. The class conversion mechanism will + # ensure that they return the correct value. + pass + elif self.context.recursive: node = self._insert_dynamic_conversion(node) else: # Unresolved functions are allowed in non-recursive mode. pass return node - # pylint:enable=invalid-name - def transform(node, context, uncompiled_modules, nocompile_decorators): """Transform function call to the compiled counterparts. diff --git a/tensorflow/contrib/autograph/converters/call_trees_test.py b/tensorflow/contrib/autograph/converters/call_trees_test.py index c666dcb73b232ce443898cfe3359f74605af98f2..303dd54a4ee49de27fad0c5cdc2d6274abfe0fa8 100644 --- a/tensorflow/contrib/autograph/converters/call_trees_test.py +++ b/tensorflow/contrib/autograph/converters/call_trees_test.py @@ -34,7 +34,7 @@ class CallTreesTest(converter_test_base.TestCase): def test_basic(self): def test_fn_1(_): - raise ValueError('This should not be called in the compiled verison.') + raise ValueError('This should not be called in the compiled version.') def renamed_test_fn_1(a): return a + 1 diff --git a/tensorflow/contrib/autograph/converters/control_flow.py b/tensorflow/contrib/autograph/converters/control_flow.py index 49d932026ffa9e79e7ddc640f7d3deaec0f4b8a6..2e26cdb3d9387d358e0225555506f199e9945d0b 100644 --- a/tensorflow/contrib/autograph/converters/control_flow.py +++ b/tensorflow/contrib/autograph/converters/control_flow.py @@ -22,6 +22,7 @@ import gast from tensorflow.contrib.autograph.pyct import anno from tensorflow.contrib.autograph.pyct import ast_util +from tensorflow.contrib.autograph.pyct import parser from tensorflow.contrib.autograph.pyct import templates from tensorflow.contrib.autograph.pyct import transformer from tensorflow.contrib.autograph.pyct.static_analysis.annos import NodeAnno @@ -49,11 +50,6 @@ class ControlFlowTransformer(transformer.Base): def __init__(self, context): super(ControlFlowTransformer, self).__init__(context) - # pylint:disable=invalid-name - - def visit_For(self, node): - assert False, 'for statement should have been canonicalized at this point' - def _create_cond_branch(self, body_name, aliased_orig_names, aliased_new_names, body, returns): if aliased_orig_names: @@ -82,7 +78,7 @@ class ControlFlowTransformer(transformer.Base): def _create_cond_expr(self, results, test, body_name, orelse_name): if results is not None: template = """ - results = autograph_utils.run_cond(test, body_name, orelse_name) + results = ag__.utils.run_cond(test, body_name, orelse_name) """ return templates.replace( template, @@ -92,7 +88,7 @@ class ControlFlowTransformer(transformer.Base): orelse_name=orelse_name) else: template = """ - autograph_utils.run_cond(test, body_name, orelse_name) + ag__.utils.run_cond(test, body_name, orelse_name) """ return templates.replace( template, test=test, body_name=body_name, orelse_name=orelse_name) @@ -170,6 +166,13 @@ class ControlFlowTransformer(transformer.Base): body_closure = body_scope.modified - body_scope.created all_referenced = body_scope.referenced + cond_scope = anno.getanno(node, NodeAnno.COND_SCOPE) + cond_closure = set() + for s in cond_scope.referenced: + for root in s.support_set: + if root not in body_scope.created: + cond_closure.add(root) + state = list(body_closure) if not state: # TODO(mdan): Implement this properly. @@ -204,7 +207,8 @@ class ControlFlowTransformer(transformer.Base): def body_name(state_ssf): body return state_ssf, - state_ast_tuple = autograph_utils.run_while(test_name, body_name, [state]) + state_ast_tuple = ag__.while_loop( + test_name, body_name, (state,), (extra_deps,)) """ node = templates.replace( template, @@ -216,11 +220,67 @@ class ControlFlowTransformer(transformer.Base): test=test, body_name=self.context.namer.new_symbol('loop_body', body_scope.referenced), - body=node_body) + body=node_body, + extra_deps=tuple(s.ast() for s in cond_closure), + ) return node - # pylint:enable=invalid-name + def visit_For(self, node): + self.generic_visit(node) + + body_scope = anno.getanno(node, NodeAnno.BODY_SCOPE) + body_closure = body_scope.modified - body_scope.created + all_referenced = body_scope.referenced + + state = list(body_closure) + + state_ssf = [ + self.context.namer.new_symbol(s.ssf(), all_referenced) for s in state + ] + ssf_map = { + name: ssf + for name, ssf in zip(state, state_ssf) + if str(name) != ssf + } + + if len(state) == 1: + state = state[0] + state_ssf = state_ssf[0] + state_ast_tuple = state + else: + state_ast_tuple = gast.Tuple([n.ast() for n in state], None) + + node_body = ast_util.rename_symbols(node.body, ssf_map) + if anno.hasanno(node, 'extra_cond'): + extra_cond = anno.getanno(node, 'extra_cond') + extra_cond = ast_util.rename_symbols(extra_cond, ssf_map) + else: + extra_cond = parser.parse_expression('True') + + template = """ + def extra_cond_name(state_ssf): + return extra_cond_expr + def body_name(iterate, state_ssf): + body + return state_ssf, + state_ast_tuple = ag__.for_loop( + iterated, extra_cond_name, body_name, (state,)) + """ + node = templates.replace( + template, + state=state, + state_ssf=state_ssf, + state_ast_tuple=state_ast_tuple, + iterated=node.iter, + iterate=node.target, + extra_cond_name=self.context.namer.new_symbol('extra_cond', + all_referenced), + extra_cond_expr=extra_cond, + body_name=self.context.namer.new_symbol('loop_body', all_referenced), + body=node_body) + + return node def transform(node, context): diff --git a/tensorflow/contrib/autograph/converters/control_flow_test.py b/tensorflow/contrib/autograph/converters/control_flow_test.py index 86fed51f27bee07f772633f3928ac5263bf57652..c5610b16b4e5de374f404307d3583660707d5e0b 100644 --- a/tensorflow/contrib/autograph/converters/control_flow_test.py +++ b/tensorflow/contrib/autograph/converters/control_flow_test.py @@ -21,6 +21,7 @@ from __future__ import print_function from tensorflow.contrib.autograph.converters import control_flow from tensorflow.contrib.autograph.converters import converter_test_base from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes from tensorflow.python.ops import control_flow_ops from tensorflow.python.platform import test @@ -94,6 +95,77 @@ class ControlFlowTest(converter_test_base.TestCase): with self.test_session() as sess: self.assertEqual(-1, sess.run(result.test_fn(constant_op.constant(1)))) + def test_simple_for(self): + + def test_fn(l): + s1 = 0 + s2 = 0 + for e in l: + s1 += e + s2 += e * e + return s1, s2 + + node = self.parse_and_analyze(test_fn, {}) + node = control_flow.transform(node, self.ctx) + + with self.compiled(node) as result: + with self.test_session() as sess: + l = [1, 2, 3] + self.assertEqual( + test_fn(l), sess.run(result.test_fn(constant_op.constant(l)))) + l = [] + self.assertEqual( + test_fn(l), + sess.run( + result.test_fn( + constant_op.constant(l, shape=(0,), dtype=dtypes.int32)))) + + def test_for_single_var(self): + + def test_fn(l): + s = 0 + for e in l: + s += e + return s + + node = self.parse_and_analyze(test_fn, {}) + node = control_flow.transform(node, self.ctx) + + with self.compiled(node) as result: + with self.test_session() as sess: + l = [1, 2, 3] + self.assertEqual( + test_fn(l), sess.run(result.test_fn(constant_op.constant(l)))) + l = [] + self.assertEqual( + test_fn(l), + sess.run( + result.test_fn( + constant_op.constant(l, shape=(0,), dtype=dtypes.int32)))) + + def test_for_with_iterated_expression(self): + + eval_count = [0] + + def count_evals(x): + eval_count[0] += 1 + return x + + def test_fn(n): + s = 0 + for e in count_evals(range(n)): + s += e + return s + + node = self.parse_and_analyze(test_fn, {'count_evals': count_evals}) + node = control_flow.transform(node, self.ctx) + + with self.compiled(node) as result: + result.count_evals = count_evals + self.assertEqual(test_fn(5), result.test_fn(5)) + # count_evals ran twice, once for test_fn and another for result.test_fn + self.assertEqual(eval_count[0], 2) + if __name__ == '__main__': test.main() diff --git a/tensorflow/contrib/autograph/converters/converter_test_base.py b/tensorflow/contrib/autograph/converters/converter_test_base.py index 3ea2cfd668270a69427c24cdf1bbf11d32d66ebe..41c2e71702e7e3ee3811a2cbee27c8c988eb3a5c 100644 --- a/tensorflow/contrib/autograph/converters/converter_test_base.py +++ b/tensorflow/contrib/autograph/converters/converter_test_base.py @@ -21,6 +21,7 @@ from __future__ import print_function import contextlib import imp +from tensorflow.contrib.autograph import operators from tensorflow.contrib.autograph import utils from tensorflow.contrib.autograph.pyct import compiler from tensorflow.contrib.autograph.pyct import context @@ -34,14 +35,17 @@ from tensorflow.python.platform import test class FakeNamer(object): + """A fake namer that uses a global counter to generate unique names.""" + + def __init__(self): + self.i = 0 def new_symbol(self, name_root, used): - i = 0 while True: - name = '%s%d' % (name_root, i) + self.i += 1 + name = '%s%d' % (name_root, self.i) if name not in used: return name - i += 1 def compiled_function_name(self, original_fqn, @@ -75,8 +79,10 @@ class TestCase(test.TestCase): try: result, source = compiler.ast_to_object(node) result.tf = self.make_fake_mod('fake_tf', *symbols) - result.autograph_utils = utils - result.autograph_api = self.make_fake_mod('fake_api', converted_call) + fake_ag = self.make_fake_mod('fake_ag', converted_call) + fake_ag.__dict__.update(operators.__dict__) + fake_ag.__dict__['utils'] = utils + result.__dict__['ag__'] = fake_ag yield result except Exception: # pylint:disable=broad-except if source is None: diff --git a/tensorflow/contrib/autograph/converters/decorators_test.py b/tensorflow/contrib/autograph/converters/decorators_test.py index e67ab1cd6a15ceb66fe75140419c7abca9653ae4..9c01f689127dbedad7669c65b03e7da071b2d64d 100644 --- a/tensorflow/contrib/autograph/converters/decorators_test.py +++ b/tensorflow/contrib/autograph/converters/decorators_test.py @@ -28,7 +28,7 @@ from tensorflow.python.platform import test # The Python parser only briefly captures decorators into the AST. # The interpreter desugars them on load, and the decorated function loses any -# trace of the decorator (which is notmally what you would expect, since +# trace of the decorator (which is normally what you would expect, since # they are meant to be transparent). # However, decorators are still visible when you analyze the function # from inside a decorator, before it was applied - as is the case diff --git a/tensorflow/contrib/autograph/converters/for_loops.py b/tensorflow/contrib/autograph/converters/for_loops.py deleted file mode 100644 index 4999c47bdc79ec0ea352472cfd3e97b94ebc7cce..0000000000000000000000000000000000000000 --- a/tensorflow/contrib/autograph/converters/for_loops.py +++ /dev/null @@ -1,92 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Canonicalizes for loops into while loops. - -This canonicalizer uses the len function on its argument. That should be -converted to a tf.shape separately. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from tensorflow.contrib.autograph.pyct import anno -from tensorflow.contrib.autograph.pyct import templates -from tensorflow.contrib.autograph.pyct import transformer -from tensorflow.contrib.autograph.pyct.static_analysis.annos import NodeAnno - - -class ForLoopCanonicalizationTransformer(transformer.Base): - """Canonicalizes for loops (e.g. into while loops).""" - - def __init__(self, context): - super(ForLoopCanonicalizationTransformer, self).__init__(context) - - def visit_For(self, node): - self.generic_visit(node) - body_scope = anno.getanno(node, NodeAnno.BODY_SCOPE) - i_var = self.context.namer.new_symbol('i', body_scope.referenced) - smart_loop_iter_var = self.context.namer.new_symbol('smart_loop_iter', - body_scope.referenced) - cont_var = self.context.namer.new_symbol('cont', body_scope.referenced) - # TODO(mdan): Use TensorListFromTensor(loop_iter) here. - if anno.hasanno(node, 'extra_cond'): - template = """ - i = 0 - smart_loop_iter = autograph_utils.dynamic_dataset(loop_iter) - cont, target = autograph_utils.dynamic_for_cond(i, smart_loop_iter) - while cont and extra_cond: - body - i += 1 - cont, target = autograph_utils.dynamic_for_cond(i, smart_loop_iter) - """ - return templates.replace( - template, - loop_iter=node.iter, - target=node.target, - body=node.body, - i=i_var, - smart_loop_iter=smart_loop_iter_var, - cont=cont_var, - extra_cond=anno.getanno(node, 'extra_cond')) - else: - template = """ - i = 0 - smart_loop_iter = autograph_utils.dynamic_dataset(loop_iter) - cont, target = autograph_utils.dynamic_for_cond(i, smart_loop_iter) - while cont: - body - i += 1 - cont, target = autograph_utils.dynamic_for_cond(i, smart_loop_iter) - """ - repl = templates.replace( - template, - loop_iter=node.iter, - target=node.target, - body=node.body, - i=i_var, - smart_loop_iter=smart_loop_iter_var, - cont=cont_var) - return repl - - def visit_Continue(self, node): - assert False, 'continue statement should be desugared at this point' - - def visit_Break(self, node): - assert False, 'break statement should be desugared at this point' - - -def transform(node, context): - return ForLoopCanonicalizationTransformer(context).visit(node) diff --git a/tensorflow/contrib/autograph/converters/for_loops_test.py b/tensorflow/contrib/autograph/converters/for_loops_test.py deleted file mode 100644 index 943f52de55a3629fdb18e6188e42269a4cb06275..0000000000000000000000000000000000000000 --- a/tensorflow/contrib/autograph/converters/for_loops_test.py +++ /dev/null @@ -1,70 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for for_loops module.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from tensorflow.contrib.autograph.converters import converter_test_base -from tensorflow.contrib.autograph.converters import for_loops -from tensorflow.python.platform import test - - -class ControlFlowTest(converter_test_base.TestCase): - - def test_basic_for(self): - - def test_fn(l): - s = 0 - for e in l: - s += e - return s - - node = self.parse_and_analyze(test_fn, {}) - node = for_loops.transform(node, self.ctx) - - with self.compiled(node) as result: - l = [1, 2, 3] - self.assertEqual(test_fn(l), result.test_fn(l)) - l = [] - self.assertEqual(test_fn(l), result.test_fn(l)) - - def test_for_with_iterated_expression(self): - - eval_count = [0] - - def count_evals(x): - eval_count[0] += 1 - return x - - def test_fn(n): - s = 0 - for e in count_evals(range(n)): - s += e - return s - - node = self.parse_and_analyze(test_fn, {'count_evals': count_evals}) - node = for_loops.transform(node, self.ctx) - - with self.compiled(node) as result: - result.count_evals = count_evals - self.assertEqual(test_fn(5), result.test_fn(5)) - # count_evals ran twice, once for test_fn and another for result.test_fn - self.assertEqual(eval_count[0], 2) - - -if __name__ == '__main__': - test.main() diff --git a/tensorflow/contrib/autograph/converters/ifexp.py b/tensorflow/contrib/autograph/converters/ifexp.py index bb0c0a36a7827e5c73e0fa67f09aa4f54d497a2c..616d222762e09feeba1809f119d915dfbe522283 100644 --- a/tensorflow/contrib/autograph/converters/ifexp.py +++ b/tensorflow/contrib/autograph/converters/ifexp.py @@ -27,7 +27,7 @@ class IfExp(transformer.Base): def visit_IfExp(self, node): template = """ - autograph_utils.run_cond(test, lambda: (body,), lambda: (orelse,)) + ag__.utils.run_cond(test, lambda: (body,), lambda: (orelse,)) """ desugared_ifexp = templates.replace_as_expression( template, test=node.test, body=node.body, orelse=node.orelse) diff --git a/tensorflow/contrib/autograph/converters/lists.py b/tensorflow/contrib/autograph/converters/lists.py index 234a0a7487d5fc9e068acf4a19af3bac84f4737e..b49521b2c328f418828a5e92890aa1b169384b70 100644 --- a/tensorflow/contrib/autograph/converters/lists.py +++ b/tensorflow/contrib/autograph/converters/lists.py @@ -45,7 +45,7 @@ class ListTransformer(transformer.Base): if not anno.hasanno(node, 'element_type'): raise NotImplementedError( 'type inference for empty lists is not yet supported; ' - 'use utils.set_element_type(, ) to continue') + 'use set_element_type(, ) to continue') dtype = anno.getanno(node, 'element_type') if not isinstance(dtype, dtypes.DType): # TODO(mdan): Allow non-TF dtypes? @@ -74,7 +74,7 @@ class ListTransformer(transformer.Base): if qn.qn[-1] == 'append' and (len(call_node.args) == 1): template = """ - target = autograph_utils.dynamic_list_append(target, element) + target = ag__.utils.dynamic_list_append(target, element) """ node = templates.replace( template, @@ -82,23 +82,33 @@ class ListTransformer(transformer.Base): element=call_node.args[0]) return node + def _replace_list_constructors(self, targets, values): + for target in targets: + if (isinstance(target, (gast.Tuple, gast.List)) and + isinstance(values, (gast.Tuple, gast.List))): + n_targets = len(target.elts) + for i in range(n_targets): + target_el, value_el = target.elts[i], values.elts[i] + values.elts[i] = self._replace_list_constructors( + (target_el,), value_el) + return values + if isinstance(values, gast.List): + if values.elts: + return self._pre_populated_list(values) + else: + return self._empty_list(values) + return values + def visit_Assign(self, node): node = self.generic_visit(node) # Only convert lists when they are assigned to a variable, e.g.: # l = [] - # TODO(mdan): This rule should be improved. - if len(node.targets) != 1: - return node - if not isinstance(node.value, gast.List): - return node - if not isinstance(node.value.ctx, gast.Load): - return node - - if node.value.elts: - node.value = self._pre_populated_list(node.value) - else: - node.value = self._empty_list(node.value) + # TODO(mdan): A similar pattern exists in type_info.py + # We should add a generic "unpack_assignment" function to the base + # transformer, that has the same effect as applying some logic to the SSA + # form. + node.value = self._replace_list_constructors(node.targets, node.value) return node diff --git a/tensorflow/contrib/autograph/converters/lists_test.py b/tensorflow/contrib/autograph/converters/lists_test.py index 749ba14347314f975c5a6e1111133336e2f5c5e6..74c6dc64f197f75eb3e66c01fb078467e8e8ea89 100644 --- a/tensorflow/contrib/autograph/converters/lists_test.py +++ b/tensorflow/contrib/autograph/converters/lists_test.py @@ -45,7 +45,51 @@ class ListTest(converter_test_base.TestCase): result.utils = utils result.dtypes = dtypes with self.test_session() as sess: - self.assertEqual(test_fn(), sess.run(result.test_fn().stack())) + self.assertAllEqual([1], sess.run(result.test_fn().stack())) + + def test_empty_annotated_lists_unpacked(self): + + def test_fn(): + l, m = [], [] + utils.set_element_type(l, dtypes.int32) + utils.set_element_type(m, dtypes.int32) + l.append(1) + m.append(2) + return l, m + + node = self.parse_and_analyze(test_fn, {'dtypes': dtypes, 'utils': utils}) + node = lists.transform(node, self.ctx) + + with self.compiled(node, tensor_array_ops.TensorArray, + dtypes.int32) as result: + result.utils = utils + result.dtypes = dtypes + with self.test_session() as sess: + res_l, res_m = result.test_fn() + self.assertEqual([1], sess.run(res_l.stack())) + self.assertEqual([2], sess.run(res_m.stack())) + + def test_empty_annotated_lists_list_unpacked(self): + + def test_fn(): + [l, m] = [], [] + utils.set_element_type(l, dtypes.int32) + utils.set_element_type(m, dtypes.int32) + l.append(1) + m.append(2) + return l, m + + node = self.parse_and_analyze(test_fn, {'dtypes': dtypes, 'utils': utils}) + node = lists.transform(node, self.ctx) + + with self.compiled(node, tensor_array_ops.TensorArray, + dtypes.int32) as result: + result.utils = utils + result.dtypes = dtypes + with self.test_session() as sess: + res_l, res_m = result.test_fn() + self.assertEqual([1], sess.run(res_l.stack())) + self.assertEqual([2], sess.run(res_m.stack())) if __name__ == '__main__': diff --git a/tensorflow/contrib/autograph/converters/name_scopes.py b/tensorflow/contrib/autograph/converters/name_scopes.py index 2a3f474360e94635470bf9581222e4c79f46b7a1..dfee529abaa8c14d9b408819b32c5199500a2c2f 100644 --- a/tensorflow/contrib/autograph/converters/name_scopes.py +++ b/tensorflow/contrib/autograph/converters/name_scopes.py @@ -12,8 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Wraps a function body with a `name_scope` of the function name. -""" +"""Wraps a function body with a `name_scope` of the function name.""" from __future__ import absolute_import from __future__ import division @@ -28,23 +27,46 @@ from tensorflow.contrib.autograph.pyct import transformer class FunctionNameScopeTransformer(transformer.Base): """Wrap a function body with a `name_scope` of the function name.""" - def __init__(self, context): - super(FunctionNameScopeTransformer, self).__init__(context) - self._function_level = 0 + def _name_for_current_scope(self): + innermost = self.enclosing_entities[-1] + if len(self.enclosing_entities) > 1: + parent = self.enclosing_entities[-2] + if isinstance(parent, gast.ClassDef): + # Methods also take the name of their class. + name = '%s/%s' % (parent.name, innermost.name) + else: + name = innermost.name + else: + name = innermost.name + + # Sanitize the name. + # See https://www.tensorflow.org/api_docs/python/tf/Graph#name_scope + # TensorFlow doesn't like leading underscores at the top level. + while name[0] == '_': + name = name[1:] + return name def visit_FunctionDef(self, node): - self._function_level += 1 - try: - self.generic_visit(node) - finally: - self._function_level -= 1 - scope_name = node.name - if self._function_level == 0 and self.context.owner_type is not None: - scope_name = '{}/{}'.format(self.context.owner_type.__name__, scope_name) - node.body = templates.replace( - 'with tf.name_scope(scope_name): body', - scope_name=gast.Str(scope_name), - body=node.body) + node = self.generic_visit(node) + + unscoped_body = [] + scoped_body = node.body + if scoped_body: + first = scoped_body[0] + if isinstance(first, gast.Expr) and isinstance(first.value, gast.Str): + # Skip any docstring. + unscoped_body = scoped_body[:1] + scoped_body = scoped_body[1:] + + template = """ + with tf.name_scope(scope_name): + body + """ + scoped_body = templates.replace( + template, + scope_name=gast.Str(self._name_for_current_scope()), + body=scoped_body) + node.body = unscoped_body + scoped_body return node diff --git a/tensorflow/contrib/autograph/converters/name_scopes_test.py b/tensorflow/contrib/autograph/converters/name_scopes_test.py index 61e5db2af826d0c2238f1af0f3240411596f7429..17692cbd880dbc1db4bb40ad7345e27907499f9d 100644 --- a/tensorflow/contrib/autograph/converters/name_scopes_test.py +++ b/tensorflow/contrib/autograph/converters/name_scopes_test.py @@ -27,9 +27,10 @@ from tensorflow.python.platform import test class FunctionNameScopeTransformer(converter_test_base.TestCase): - def test_basic_name(self): + def test_basic(self): def test_fn(l): + """This should stay here.""" a = 5 l += a return l @@ -38,41 +39,62 @@ class FunctionNameScopeTransformer(converter_test_base.TestCase): node = name_scopes.transform(node, self.ctx) with self.compiled(node, ops.name_scope) as result: - result_op = result.test_fn(constant_op.constant([1, 2, 3])) + result_op = result.test_fn(constant_op.constant(1)) self.assertIn('test_fn/', result_op.op.name) - def test_nested_name(self): + self.assertEqual('This should stay here.', result.test_fn.__doc__) + + def test_long_docstring(self): def test_fn(l): + """Multi-line docstring. + + Args: + l: A thing. + Returns: + l + """ + return l - def body(i): - return i**2 + node = self.parse_and_analyze(test_fn, {}) + node = name_scopes.transform(node, self.ctx) + + with self.compiled(node, ops.name_scope) as result: + self.assertIn('Multi-line', result.test_fn.__doc__) + self.assertIn('Returns:', result.test_fn.__doc__) - l += [4] - return body(l) + def test_nested_functions(self): + + def test_fn(l): + + def inner_fn(i): + return i ** 2 + + l += 4 + return inner_fn(l) node = self.parse_and_analyze(test_fn, {}) node = name_scopes.transform(node, self.ctx) with self.compiled(node, ops.name_scope) as result: - result_op = result.test_fn(constant_op.constant([1, 2, 3])) + result_op = result.test_fn(constant_op.constant(1)) first_result_input_name = result_op.op.inputs[0].name second_result_input_name = result_op.op.inputs[1].name self.assertIn('test_fn/', first_result_input_name) - self.assertNotIn('body/', first_result_input_name) - self.assertIn('test_fn/body/', second_result_input_name) + self.assertNotIn('inner_fn', first_result_input_name) + self.assertIn('test_fn/inner_fn/', second_result_input_name) - def test_class_name(self): + def test_method(self): class TestClass(object): def test_fn(self, l): - def body(i): - return i**2 + def inner_fn(i): + return i ** 2 - l += [4] - return body(l) + l += 4 + return inner_fn(l) # Note that 'TestClass' was needed in the namespace here. node = self.parse_and_analyze( @@ -80,12 +102,37 @@ class FunctionNameScopeTransformer(converter_test_base.TestCase): node = name_scopes.transform(node, self.ctx) with self.compiled(node, ops.name_scope) as result: - result_op = result.TestClass().test_fn(constant_op.constant([1, 2, 3])) + result_op = result.TestClass().test_fn(constant_op.constant(1)) first_result_input_name = result_op.op.inputs[0].name second_result_input_name = result_op.op.inputs[1].name self.assertIn('TestClass/test_fn/', first_result_input_name) - self.assertNotIn('body/', first_result_input_name) - self.assertIn('TestClass/test_fn/body/', second_result_input_name) + self.assertNotIn('inner_fn', first_result_input_name) + self.assertIn('TestClass/test_fn/inner_fn/', second_result_input_name) + + def test_operator(self): + + class TestClass(object): + + def __call__(self, l): + + def inner_fn(i): + return i ** 2 + + l += 4 + return inner_fn(l) + + # Note that 'TestClass' was needed in the namespace here. + node = self.parse_and_analyze( + TestClass.__call__, {'TestClass': TestClass}, owner_type=TestClass) + node = name_scopes.transform(node, self.ctx) + + with self.compiled(node, ops.name_scope) as result: + result_op = result.__call__(TestClass(), constant_op.constant(1)) + first_result_input_name = result_op.op.inputs[0].name + second_result_input_name = result_op.op.inputs[1].name + self.assertIn('call__/', first_result_input_name) + self.assertNotIn('inner_fn', first_result_input_name) + self.assertIn('call__/inner_fn/', second_result_input_name) if __name__ == '__main__': diff --git a/tensorflow/contrib/autograph/converters/side_effect_guards.py b/tensorflow/contrib/autograph/converters/side_effect_guards.py index 1c1293d2c411b51b563ac3965284a48725ed3278..3bcb2d3c42c6e0663c8f78523199a364b6ac231f 100644 --- a/tensorflow/contrib/autograph/converters/side_effect_guards.py +++ b/tensorflow/contrib/autograph/converters/side_effect_guards.py @@ -160,8 +160,8 @@ class SideEffectGuardTransformer(transformer.Base): [alias_map.get(s, s).ast() for s in guarded_args], None) template = """ - with autograph_utils.control_dependency_on_returns(call): - aliased_guarded_args = autograph_utils.alias_tensors(guarded_args) + with ag__.utils.control_dependency_on_returns(call): + aliased_guarded_args = ag__.utils.alias_tensors(guarded_args) """ control_deps_guard = templates.replace( template, @@ -172,7 +172,7 @@ class SideEffectGuardTransformer(transformer.Base): alias_map = {} template = """ - with autograph_utils.control_dependency_on_returns(call): + with ag__.utils.control_dependency_on_returns(call): pass """ control_deps_guard = templates.replace(template, call=node.value)[-1] diff --git a/tensorflow/contrib/autograph/examples/notebooks/rnn_keras_estimator.ipynb b/tensorflow/contrib/autograph/examples/notebooks/rnn_keras_estimator.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..324b23c24b5a7970d7f20ed955839ba1cf1774fc --- /dev/null +++ b/tensorflow/contrib/autograph/examples/notebooks/rnn_keras_estimator.ipynb @@ -0,0 +1,1078 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } + }, + "colab_type": "code", + "id": "LqNpENf-ec0X", + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [], + "source": [ + "!pip install -U tf-nightly" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } + }, + "colab_type": "code", + "id": "Pa2qpEmoVOGe", + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [], + "source": [ + "import os\n", + "import time\n", + "\n", + "import tensorflow as tf\n", + "from tensorflow.contrib import autograph\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import six\n", + "\n", + "from google.colab import widgets" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "HNqUFL4deCsL", + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "# Case study: training a custom RNN, using Keras and Estimators\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "YkC1k4HEQ7rw", + "slideshow": { + "slide_type": "-" + } + }, + "source": [ + "In this section, we show how you can use AutoGraph to build RNNColorbot, an RNN that takes as input names of colors and predicts their corresponding RGB tuples. The model will be trained by a [custom Estimator](https://www.tensorflow.org/get_started/custom_estimators)." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "7nkPDl5CTCNb", + "slideshow": { + "slide_type": "-" + } + }, + "source": [ + "To get started, set up the dataset. The following cells defines methods that download and format the data needed for RNNColorbot; the details aren't important (read them in the privacy of your own home if you so wish), but make sure to run the cells before proceeding." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } + }, + "colab_type": "code", + "id": "A0uREmVXCQEw", + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [], + "source": [ + "def parse(line):\n", + " \"\"\"Parses a line from the colors dataset.\"\"\"\n", + " items = tf.string_split([line], \",\").values\n", + " rgb = tf.string_to_number(items[1:], out_type=tf.float32) / 255.0\n", + " color_name = items[0]\n", + " chars = tf.one_hot(tf.decode_raw(color_name, tf.uint8), depth=256)\n", + " length = tf.cast(tf.shape(chars)[0], dtype=tf.int64)\n", + " return rgb, chars, length\n", + "\n", + "\n", + "def set_static_batch_shape(batch_size):\n", + " def apply(rgb, chars, length):\n", + " rgb.set_shape((batch_size, None))\n", + " chars.set_shape((batch_size, None, 256))\n", + " length.set_shape((batch_size,))\n", + " return rgb, chars, length\n", + " return apply\n", + "\n", + "\n", + "def load_dataset(data_dir, url, batch_size, training=True):\n", + " \"\"\"Loads the colors data at path into a tf.PaddedDataset.\"\"\"\n", + " path = tf.keras.utils.get_file(os.path.basename(url), url, cache_dir=data_dir)\n", + " dataset = tf.data.TextLineDataset(path)\n", + " dataset = dataset.skip(1)\n", + " dataset = dataset.map(parse)\n", + " dataset = dataset.cache()\n", + " dataset = dataset.repeat()\n", + " if training:\n", + " dataset = dataset.shuffle(buffer_size=3000)\n", + " dataset = dataset.padded_batch(\n", + " batch_size, padded_shapes=((None,), (None, 256), ()))\n", + " # To simplify the model code, we statically set as many of the shapes that we\n", + " # know.\n", + " dataset = dataset.map(set_static_batch_shape(batch_size))\n", + " return dataset" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "waZ89t3DTUla", + "slideshow": { + "slide_type": "-" + } + }, + "source": [ + "To show the use of control flow, we write the RNN loop by hand, rather than using a pre-built RNN model.\n", + "\n", + "Note how we write the model code in Eager style, with regular `if` and `while` statements. Then, we annotate the functions with `@autograph.convert` to have them automatically compiled to run in graph mode.\n", + "We use Keras to define the model, and we will train it using Estimators." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } + }, + "colab_type": "code", + "id": "9v8AJouiC44V", + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [], + "source": [ + "@autograph.convert()\n", + "class RnnColorbot(tf.keras.Model):\n", + " \"\"\"RNN Colorbot model.\"\"\"\n", + "\n", + " def __init__(self):\n", + " super(RnnColorbot, self).__init__()\n", + " self.lower_cell = tf.contrib.rnn.LSTMBlockCell(256)\n", + " self.upper_cell = tf.contrib.rnn.LSTMBlockCell(128)\n", + " self.relu_layer = tf.layers.Dense(3, activation=tf.nn.relu)\n", + "\n", + "\n", + " def _rnn_layer(self, chars, cell, batch_size, training):\n", + " \"\"\"A single RNN layer.\n", + "\n", + " Args:\n", + " chars: A Tensor of shape (max_sequence_length, batch_size, input_size)\n", + " cell: An object of type tf.contrib.rnn.LSTMBlockCell\n", + " batch_size: Int, the batch size to use\n", + " training: Boolean, whether the layer is used for training\n", + "\n", + " Returns:\n", + " A Tensor of shape (max_sequence_length, batch_size, output_size).\n", + " \"\"\"\n", + " hidden_outputs = []\n", + " autograph.utils.set_element_type(hidden_outputs, tf.float32)\n", + " state, output = cell.zero_state(batch_size, tf.float32)\n", + " for ch in chars:\n", + " cell_output, (state, output) = cell.call(ch, (state, output))\n", + " hidden_outputs.append(cell_output)\n", + " hidden_outputs = hidden_outputs.stack()\n", + " if training:\n", + " hidden_outputs = tf.nn.dropout(hidden_outputs, 0.5)\n", + " return hidden_outputs\n", + "\n", + " def build(self, _):\n", + " \"\"\"Creates the model variables. See keras.Model.build().\"\"\"\n", + " self.lower_cell.build(tf.TensorShape((None, 256)))\n", + " self.upper_cell.build(tf.TensorShape((None, 256)))\n", + " self.relu_layer.build(tf.TensorShape((None, 128))) \n", + " self.built = True\n", + "\n", + "\n", + " def call(self, inputs, training=False):\n", + " \"\"\"The RNN model code. Uses Eager and \n", + "\n", + " The model consists of two RNN layers (made by lower_cell and upper_cell),\n", + " followed by a fully connected layer with ReLU activation.\n", + "\n", + " Args:\n", + " inputs: A tuple (chars, length)\n", + " training: Boolean, whether the layer is used for training\n", + "\n", + " Returns:\n", + " A Tensor of shape (batch_size, 3) - the model predictions.\n", + " \"\"\"\n", + " chars, length = inputs\n", + " batch_size = chars.shape[0]\n", + " seq = tf.transpose(chars, (1, 0, 2))\n", + "\n", + " seq = self._rnn_layer(seq, self.lower_cell, batch_size, training)\n", + " seq = self._rnn_layer(seq, self.upper_cell, batch_size, training)\n", + "\n", + " # Grab just the end-of-sequence from each output.\n", + " indices = tf.stack([length - 1, range(batch_size)], axis=1)\n", + " sequence_ends = tf.gather_nd(seq, indices)\n", + " return self.relu_layer(sequence_ends)\n", + "\n", + "@autograph.convert()\n", + "def loss_fn(labels, predictions):\n", + " return tf.reduce_mean((predictions - labels) ** 2)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "JjK4gXFvFsf4", + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "We will now create the model function for the custom Estimator.\n", + "\n", + "In the model function, we simply use the model class we defined above - that's it!" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } + }, + "colab_type": "code", + "id": "-yso_Nx23Gy1", + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [], + "source": [ + "def model_fn(features, labels, mode, params):\n", + " \"\"\"Estimator model function.\"\"\"\n", + " chars = features['chars']\n", + " sequence_length = features['sequence_length']\n", + " inputs = (chars, sequence_length)\n", + "\n", + " # Create the model. Simply using the AutoGraph-ed class just works!\n", + " colorbot = RnnColorbot()\n", + " colorbot.build(None)\n", + "\n", + " if mode == tf.estimator.ModeKeys.TRAIN:\n", + " predictions = colorbot(inputs, training=True)\n", + " loss = loss_fn(labels, predictions)\n", + "\n", + " learning_rate = params['learning_rate']\n", + " optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n", + " global_step = tf.train.get_global_step()\n", + " train_op = optimizer.minimize(loss, global_step=global_step)\n", + " return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)\n", + "\n", + " elif mode == tf.estimator.ModeKeys.EVAL:\n", + " predictions = colorbot(inputs)\n", + " loss = loss_fn(labels, predictions)\n", + "\n", + " return tf.estimator.EstimatorSpec(mode, loss=loss)\n", + "\n", + " elif mode == tf.estimator.ModeKeys.PREDICT:\n", + " predictions = colorbot(inputs)\n", + "\n", + " predictions = tf.minimum(predictions, 1.0)\n", + " return tf.estimator.EstimatorSpec(mode, predictions=predictions)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "HOQfoBnHC9CP", + "slideshow": { + "slide_type": "-" + } + }, + "source": [ + "We'll create an input function that will feed our training and eval data." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } + }, + "colab_type": "code", + "id": "FJZlx7yG2MP0", + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [], + "source": [ + "def input_fn(data_dir, data_url, params, training=True):\n", + " \"\"\"An input function for training\"\"\"\n", + " batch_size = params['batch_size']\n", + " \n", + " # load_dataset defined above\n", + " dataset = load_dataset(data_dir, data_url, batch_size, training=training)\n", + "\n", + " # Package the pipeline end in a format suitable for the estimator.\n", + " labels, chars, sequence_length = dataset.make_one_shot_iterator().get_next()\n", + " features = {\n", + " 'chars': chars,\n", + " 'sequence_length': sequence_length\n", + " }\n", + "\n", + " return features, labels" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "qsvv-lzbDqXd", + "slideshow": { + "slide_type": "-" + } + }, + "source": [ + "We now have everything in place to build our custom estimator and use it for training and eval!" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + }, + "height": 35 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 10604, + "status": "ok", + "timestamp": 1524095272039, + "user": { + "displayName": "", + "photoUrl": "", + "userId": "" + }, + "user_tz": 240 + }, + "id": "2pg1AfbxBJQq", + "outputId": "9c924b4f-06e1-4538-976c-a3e1ddac5660", + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Eval loss at step 100: 0.0674834\n" + ] + } + ], + "source": [ + "params = {\n", + " 'batch_size': 64,\n", + " 'learning_rate': 0.01,\n", + "}\n", + "\n", + "train_url = \"https://raw.githubusercontent.com/random-forests/tensorflow-workshop/master/extras/colorbot/data/train.csv\"\n", + "test_url = \"https://raw.githubusercontent.com/random-forests/tensorflow-workshop/master/extras/colorbot/data/test.csv\"\n", + "data_dir = \"tmp/rnn/data\"\n", + "\n", + "regressor = tf.estimator.Estimator(\n", + " model_fn=model_fn,\n", + " params=params)\n", + "\n", + "regressor.train(\n", + " input_fn=lambda: input_fn(data_dir, train_url, params),\n", + " steps=100)\n", + "eval_results = regressor.evaluate(\n", + " input_fn=lambda: input_fn(data_dir, test_url, params, training=False),\n", + " steps=2\n", + ")\n", + "\n", + "print('Eval loss at step %d: %s' % (eval_results['global_step'], eval_results['loss']))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "zG1YAjB_cUnQ", + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "And here's the same estimator used for inference." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + }, + "height": 343 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 7990, + "status": "ok", + "timestamp": 1524095280105, + "user": { + "displayName": "", + "photoUrl": "", + "userId": "" + }, + "user_tz": 240 + }, + "id": "dxHex2tUN_10", + "outputId": "2b889e5a-b9ed-4645-bf03-d98f26c72101", + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "\u003clink rel=stylesheet type=text/css href='/nbextensions/google.colab/tabbar.css'\u003e\u003c/link\u003e" + ], + "text/plain": [ + "\u003cIPython.core.display.HTML at 0x7f3f36aa6cd0\u003e" + ] + }, + "metadata": { + "tags": [ + "outputarea_id1" + ] + }, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\u003cscript src='/nbextensions/google.colab/tabbar_main.min.js'\u003e\u003c/script\u003e" + ], + "text/plain": [ + "\u003cIPython.core.display.HTML at 0x7f3eca67f7d0\u003e" + ] + }, + "metadata": { + "tags": [ + "outputarea_id1" + ] + }, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\u003cdiv id=\"id1\"\u003e\u003c/div\u003e" + ], + "text/plain": [ + "\u003cIPython.core.display.HTML at 0x7f3eca67f8d0\u003e" + ] + }, + "metadata": { + "tags": [ + "outputarea_id1" + ] + }, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": [ + "window[\"e8ddfa22-4362-11e8-91ec-c8d3ffb5fbe0\"] = colab_lib.createTabBar({\"contentBorder\": [\"0px\"], \"elementId\": \"id1\", \"borderColor\": [\"#a7a7a7\"], \"contentHeight\": [\"initial\"], \"tabNames\": [\"RNN Colorbot\"], \"location\": \"top\", \"initialSelection\": 0});\n", + "//# sourceURL=js_71b9087b6d" + ], + "text/plain": [ + "\u003cIPython.core.display.Javascript at 0x7f3eca67f950\u003e" + ] + }, + "metadata": { + "tags": [ + "outputarea_id1" + ] + }, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": [ + "window[\"e8ddfa23-4362-11e8-91ec-c8d3ffb5fbe0\"] = window[\"id1\"].setSelectedTabIndex(0);\n", + "//# sourceURL=js_e390445f33" + ], + "text/plain": [ + "\u003cIPython.core.display.Javascript at 0x7f3eca67f990\u003e" + ] + }, + "metadata": { + "tags": [ + "outputarea_id1" + ] + }, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": [ + "window[\"e8ddfa24-4362-11e8-91ec-c8d3ffb5fbe0\"] = google.colab.output.getActiveOutputArea();\n", + "//# sourceURL=js_241dd76d85" + ], + "text/plain": [ + "\u003cIPython.core.display.Javascript at 0x7f3eca67fc50\u003e" + ] + }, + "metadata": { + "tags": [ + "id1_content_0", + "outputarea_id1" + ] + }, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": [ + "window[\"e8ddfa25-4362-11e8-91ec-c8d3ffb5fbe0\"] = document.querySelector(\"#id1_content_0\");\n", + "//# sourceURL=js_60c64e3d50" + ], + "text/plain": [ + "\u003cIPython.core.display.Javascript at 0x7f3eca67fd90\u003e" + ] + }, + "metadata": { + "tags": [ + "id1_content_0", + "outputarea_id1" + ] + }, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": [ + "window[\"e8ddfa26-4362-11e8-91ec-c8d3ffb5fbe0\"] = google.colab.output.setActiveOutputArea(window[\"e8ddfa25-4362-11e8-91ec-c8d3ffb5fbe0\"]);\n", + "//# sourceURL=js_14ea437cbd" + ], + "text/plain": [ + "\u003cIPython.core.display.Javascript at 0x7f3eca67fe10\u003e" + ] + }, + "metadata": { + "tags": [ + "id1_content_0", + "outputarea_id1" + ] + }, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": [ + "window[\"e8ddfa27-4362-11e8-91ec-c8d3ffb5fbe0\"] = window[\"id1\"].setSelectedTabIndex(0);\n", + "//# sourceURL=js_09294c2226" + ], + "text/plain": [ + "\u003cIPython.core.display.Javascript at 0x7f3eca67fcd0\u003e" + ] + }, + "metadata": { + "tags": [ + "id1_content_0", + "outputarea_id1" + ] + }, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": [ + "window[\"ec965514-4362-11e8-91ec-c8d3ffb5fbe0\"] = google.colab.output.setActiveOutputArea(window[\"e8ddfa24-4362-11e8-91ec-c8d3ffb5fbe0\"]);\n", + "//# sourceURL=js_e5e8266997" + ], + "text/plain": [ + "\u003cIPython.core.display.Javascript at 0x7f3eca67fe10\u003e" + ] + }, + "metadata": { + "tags": [ + "id1_content_0", + "outputarea_id1" + ] + }, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": [ + "window[\"ec965515-4362-11e8-91ec-c8d3ffb5fbe0\"] = google.colab.output.getActiveOutputArea();\n", + "//# sourceURL=js_07a097f0ee" + ], + "text/plain": [ + "\u003cIPython.core.display.Javascript at 0x7f3eca67fc90\u003e" + ] + }, + "metadata": { + "tags": [ + "id1_content_0", + "outputarea_id1" + ] + }, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": [ + "window[\"ec965516-4362-11e8-91ec-c8d3ffb5fbe0\"] = document.querySelector(\"#id1_content_0\");\n", + "//# sourceURL=js_790d669ca8" + ], + "text/plain": [ + "\u003cIPython.core.display.Javascript at 0x7f3eca67f8d0\u003e" + ] + }, + "metadata": { + "tags": [ + "id1_content_0", + "outputarea_id1" + ] + }, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": [ + "window[\"ec965517-4362-11e8-91ec-c8d3ffb5fbe0\"] = google.colab.output.setActiveOutputArea(window[\"ec965516-4362-11e8-91ec-c8d3ffb5fbe0\"]);\n", + "//# sourceURL=js_d30df771f0" + ], + "text/plain": [ + "\u003cIPython.core.display.Javascript at 0x7f3eca67fd90\u003e" + ] + }, + "metadata": { + "tags": [ + "id1_content_0", + "outputarea_id1" + ] + }, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": [ + "window[\"ec965518-4362-11e8-91ec-c8d3ffb5fbe0\"] = window[\"id1\"].setSelectedTabIndex(0);\n", + "//# sourceURL=js_8a43a2da4b" + ], + "text/plain": [ + "\u003cIPython.core.display.Javascript at 0x7f3eca67fc50\u003e" + ] + }, + "metadata": { + "tags": [ + "id1_content_0", + "outputarea_id1" + ] + }, + "output_type": "display_data" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAQwAAAENCAYAAAD60Fs2AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAACMBJREFUeJzt3F+I1XX+x/G32zjiFERUpgaFd2JBzOg5joX4h0SiMgmM\n/uhVGIlgFBlERGB3hUEkhkRdtDfRP1ACL6KpLBqcguxCjEAkmGamQcSohFHzsxe7O6zssvsydtff\n+ns8rs758j3f8z7fiyef7/k3o7XWCiDwh4s9APC/QzCAmGAAMcEAYoIBxAQDiAkGF8XTTz9d3W63\n7rvvvhoZGakVK1Zc7JEICMYlbvXq1TU8PHyxxzjPV199VcPDw/XZZ5/V22+/XVVVM2bMuMhTkRAM\n/qt+++23+uGHH+r666+vWbNmXexxuECCcQl76qmnanx8vLZs2VIDAwP1+uuv1zfffFP3339/dTqd\nWr9+fY2MjEzvv2nTpnr55ZfrgQceqIGBgXr44Yfr5MmTVVV1+vTp2r59ey1durQ6nU5t2LChTpw4\nUVVVk5OTtWXLllq6dGmtXbu23nnnnelj7tq1q7Zt21bbt2+vJUuW1HvvvVfPPvtsHTp0qAYGBmrX\nrl1/N/fRo0dr06ZN1el06u67766hoaGqqhodHa1OpzO93zPPPFO33nrr9P3t27fXm2+++e89iZyv\ncUlbtWpVGx4ebq21NjEx0brdbjtw4EBrrbUvvviidbvdduLEidZaaxs3bmxr1qxp33//fZuammob\nN25sO3fubK219tZbb7VHH320TU1NtXPnzrXDhw+3X375pbXW2kMPPdR27NjRTp8+3Y4cOdIGBwen\nn/OVV15pN910U/voo49aa61NTU21999/vz344IPTMx48eLCtWLGitdbamTNn2po1a9qePXvamTNn\n2vDwcOvv72/Hjh2bfj2HDx9urbW2du3advvtt7ejR4+21lpbuXJlO3LkyH/qVNJas8L4f6D95edC\n+/btq5UrV9by5curqmrZsmV1880316effjq977333ls33HBD9fb21h133FFHjhypqqqenp46efJk\nHTt2rGbMmFGLFi2qyy+/vCYmJurrr7+uJ598smbOnFkLFy6sDRs21N69e6eP2d/fX6tXr66qqt7e\n3n8666FDh+rUqVP1yCOPVE9PTw0ODtaqVavqgw8+qKqqJUuW1MjISB0/fryqqtauXVtffvlljY6O\n1q+//loLFy78N501/pGeiz0A/z1jY2O1f//++vjjj6vqzyE5e/ZsLVu2bHqfa665Zvr27Nmz69Sp\nU1VVdc8999TExEQ98cQT9fPPP9e6devq8ccfr8nJybryyitr9uzZ04+bP39+HT58ePr+3Llz4xkn\nJydr3rx5522bP39+TU5OVlVVp9OpoaGhuu6666rb7Va32629e/dWb29vLV68+ALOBr+HYFzi/vbT\nh3nz5tX69etrx44dF3ycnp6e2rp1a23durXGxsZq8+bNtWDBgrrtttvqp59+qlOnTlVfX19VVY2P\nj9ecOXP+4Qz/ypw5c2p8fPy8bWNjY7VgwYKqqup2u/Xiiy/WvHnzqtPp1MDAQD333HPV29tb3W73\ngl8XF8YlySXu2muvrdHR0aqqWrduXQ0NDdXnn39e586dq6mpqRoZGakff/zxXx7n4MGD9d1339W5\nc+eqr6+venp66rLLLqu5c+dWf39/vfTSS3X69On69ttv6913361169b9rnlvueWW6uvrq9dee63O\nnj1bBw8erE8++aTuvPPOqqq68cYba9asWbVv377qdDp1xRVX1NVXX10ffvjheW+I8p8hGJe4zZs3\n1+7du6vb7db+/ftr9+7dtWfPnlq2bFmtWrWq3njjjen3OP7ZSuD48eO1bdu2Wrx4cd111121dOnS\n6Sjs3LmzRkdHa/ny5bVt27Z67LHHzrvMuRAzZ86sV199tQ4cOFCDg4P1/PPP1wsvvDC9wqj68yrj\nqquumr7U+WsoFi1a9Luek9yM1vyBDpCxwgBiggHEBAOICQYQ+z/7PYzjf/QRGVxM12z68u+2WWEA\nMcEAYoIBxAQDiAkGEBMMICYYQEwwgJhgADHBAGKCAcQEA4gJBhATDCAmGEBMMICYYAAxwQBiggHE\nBAOICQYQEwwgJhhATDCAmGAAMcEAYoIBxAQDiAkGEBMMICYYQEwwgJhgADHBAGKCAcQEA4gJBhAT\nDCAmGEBMMICYYAAxwQBiggHEBAOICQYQEwwgJhhATDCAmGAAMcEAYoIBxAQDiAkGEBMMICYYQEww\ngJhgADHBAGKCAcQEA4gJBhATDCAmGEBMMICYYAAxwQBiggHEBAOICQYQEwwgJhhATDCAmGAAMcEA\nYoIBxAQDiAkGEBMMICYYQEwwgJhgADHBAGKCAcQEA4gJBhATDCAmGEBMMICYYAAxwQBiggHEBAOI\nCQYQEwwgJhhATDCAmGAAMcEAYoIBxAQDiAkGEBMMICYYQEwwgJhgADHBAGKCAcQEA4gJBhATDCAm\nGEBMMICYYAAxwQBiggHEBAOICQYQEwwgJhhATDCAmGAAMcEAYoIBxAQDiAkGEBMMICYYQEwwgJhg\nADHBAGKCAcQEA4gJBhATDCAmGEBMMICYYAAxwQBiggHEBAOICQYQEwwgJhhATDCAmGAAMcEAYoIB\nxAQDiAkGEBMMICYYQEwwgJhgADHBAGKCAcQEA4gJBhATDCAmGEBMMICYYAAxwQBiggHEBAOICQYQ\nEwwgJhhATDCAmGAAMcEAYoIBxAQDiAkGEBMMICYYQEwwgJhgADHBAGKCAcQEA4gJBhATDCAmGEBM\nMICYYAAxwQBiggHEBAOICQYQEwwgJhhATDCAmGAAMcEAYoIBxAQDiAkGEBMMICYYQEwwgJhgADHB\nAGKCAcQEA4gJBhATDCAmGEBMMICYYAAxwQBiggHEBAOICQYQEwwgJhhATDCAmGAAMcEAYoIBxAQD\niAkGEBMMIDajtdYu9hDA/wYrDCAmGEBMMICYYAAxwQBiggHEBAOICQYQEwwgJhhATDCAmGAAMcEA\nYoIBxAQDiAkGEBMMICYYQEwwgJhgADHBAGKCAcQEA4j9CY2LTAbbRbWuAAAAAElFTkSuQmCC\n", + "text/plain": [ + "\u003cmatplotlib.figure.Figure at 0x7f3ecc00bf10\u003e" + ] + }, + "metadata": { + "tags": [ + "id1_content_0", + "outputarea_id1", + "user_output" + ] + }, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": [ + "window[\"ec965519-4362-11e8-91ec-c8d3ffb5fbe0\"] = google.colab.output.setActiveOutputArea(window[\"ec965515-4362-11e8-91ec-c8d3ffb5fbe0\"]);\n", + "//# sourceURL=js_893ad561f4" + ], + "text/plain": [ + "\u003cIPython.core.display.Javascript at 0x7f3f31b55c90\u003e" + ] + }, + "metadata": { + "tags": [ + "id1_content_0", + "outputarea_id1" + ] + }, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": [ + "window[\"ec96551a-4362-11e8-91ec-c8d3ffb5fbe0\"] = google.colab.output.getActiveOutputArea();\n", + "//# sourceURL=js_2d99e0ac17" + ], + "text/plain": [ + "\u003cIPython.core.display.Javascript at 0x7f3eca67fe50\u003e" + ] + }, + "metadata": { + "tags": [ + "id1_content_0", + "outputarea_id1" + ] + }, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": [ + "window[\"ec96551b-4362-11e8-91ec-c8d3ffb5fbe0\"] = document.querySelector(\"#id1_content_0\");\n", + "//# sourceURL=js_5c19462e32" + ], + "text/plain": [ + "\u003cIPython.core.display.Javascript at 0x7f3f31b55dd0\u003e" + ] + }, + "metadata": { + "tags": [ + "id1_content_0", + "outputarea_id1" + ] + }, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": [ + "window[\"ec96551c-4362-11e8-91ec-c8d3ffb5fbe0\"] = google.colab.output.setActiveOutputArea(window[\"ec96551b-4362-11e8-91ec-c8d3ffb5fbe0\"]);\n", + "//# sourceURL=js_b9c8b7567b" + ], + "text/plain": [ + "\u003cIPython.core.display.Javascript at 0x7f3f31b55a50\u003e" + ] + }, + "metadata": { + "tags": [ + "id1_content_0", + "outputarea_id1" + ] + }, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": [ + "window[\"ec96551d-4362-11e8-91ec-c8d3ffb5fbe0\"] = window[\"id1\"].setSelectedTabIndex(0);\n", + "//# sourceURL=js_fd05186348" + ], + "text/plain": [ + "\u003cIPython.core.display.Javascript at 0x7f3f31b55810\u003e" + ] + }, + "metadata": { + "tags": [ + "id1_content_0", + "outputarea_id1" + ] + }, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\u003cdiv class=id_888646481 style=\"margin-right:10px; display:flex;align-items:center;\"\u003e\u003cspan style=\"margin-right: 3px;\"\u003e\u003c/span\u003e\u003c/div\u003e" + ], + "text/plain": [ + "\u003cIPython.core.display.HTML at 0x7f3f32414810\u003e" + ] + }, + "metadata": { + "tags": [ + "id1_content_0", + "outputarea_id1", + "user_output" + ] + }, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": [ + "window[\"ec96551e-4362-11e8-91ec-c8d3ffb5fbe0\"] = jQuery(\".id_888646481 span\");\n", + "//# sourceURL=js_efef96e882" + ], + "text/plain": [ + "\u003cIPython.core.display.Javascript at 0x7f3f31b55710\u003e" + ] + }, + "metadata": { + "tags": [ + "id1_content_0", + "outputarea_id1", + "user_output" + ] + }, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": [ + "window[\"ec96551f-4362-11e8-91ec-c8d3ffb5fbe0\"] = window[\"ec96551e-4362-11e8-91ec-c8d3ffb5fbe0\"].text(\"Give me a color name (or press 'enter' to exit): \");\n", + "//# sourceURL=js_6eca889864" + ], + "text/plain": [ + "\u003cIPython.core.display.Javascript at 0x7f3eca67f990\u003e" + ] + }, + "metadata": { + "tags": [ + "id1_content_0", + "outputarea_id1", + "user_output" + ] + }, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": [ + "window[\"ed8ea972-4362-11e8-91ec-c8d3ffb5fbe0\"] = jQuery(\".id_888646481 input\");\n", + "//# sourceURL=js_f02070cc60" + ], + "text/plain": [ + "\u003cIPython.core.display.Javascript at 0x7f3f31b553d0\u003e" + ] + }, + "metadata": { + "tags": [ + "id1_content_0", + "outputarea_id1", + "user_output" + ] + }, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": [ + "window[\"ed8ea973-4362-11e8-91ec-c8d3ffb5fbe0\"] = window[\"ed8ea972-4362-11e8-91ec-c8d3ffb5fbe0\"].remove();\n", + "//# sourceURL=js_ed9faba660" + ], + "text/plain": [ + "\u003cIPython.core.display.Javascript at 0x7f3f31a95450\u003e" + ] + }, + "metadata": { + "tags": [ + "id1_content_0", + "outputarea_id1", + "user_output" + ] + }, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": [ + "window[\"ed8ea974-4362-11e8-91ec-c8d3ffb5fbe0\"] = jQuery(\".id_888646481 span\");\n", + "//# sourceURL=js_f3458d7074" + ], + "text/plain": [ + "\u003cIPython.core.display.Javascript at 0x7f3f31a95250\u003e" + ] + }, + "metadata": { + "tags": [ + "id1_content_0", + "outputarea_id1", + "user_output" + ] + }, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": [ + "window[\"ed8ea975-4362-11e8-91ec-c8d3ffb5fbe0\"] = window[\"ed8ea974-4362-11e8-91ec-c8d3ffb5fbe0\"].text(\"Give me a color name (or press 'enter' to exit): \");\n", + "//# sourceURL=js_3ffd97bd6f" + ], + "text/plain": [ + "\u003cIPython.core.display.Javascript at 0x7f3f31a953d0\u003e" + ] + }, + "metadata": { + "tags": [ + "id1_content_0", + "outputarea_id1", + "user_output" + ] + }, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": [ + "window[\"ed8ea976-4362-11e8-91ec-c8d3ffb5fbe0\"] = google.colab.output.setActiveOutputArea(window[\"ec96551a-4362-11e8-91ec-c8d3ffb5fbe0\"]);\n", + "//# sourceURL=js_7f73e8bcca" + ], + "text/plain": [ + "\u003cIPython.core.display.Javascript at 0x7f3f31b55710\u003e" + ] + }, + "metadata": { + "tags": [ + "id1_content_0", + "outputarea_id1" + ] + }, + "output_type": "display_data" + } + ], + "source": [ + "def predict_input_fn(color_name):\n", + " \"\"\"An input function for prediction.\"\"\"\n", + " _, chars, sequence_length = parse(color_name)\n", + "\n", + " # We create a batch of a single element.\n", + " features = {\n", + " 'chars': tf.expand_dims(chars, 0),\n", + " 'sequence_length': tf.expand_dims(sequence_length, 0)\n", + " }\n", + " return features, None\n", + "\n", + "\n", + "def draw_prediction(color_name, pred):\n", + " pred = pred * 255\n", + " pred = pred.astype(np.uint8)\n", + " plt.axis('off')\n", + " plt.imshow(pred)\n", + " plt.title(color_name)\n", + " plt.show()\n", + "\n", + "\n", + "def predict_with_estimator(color_name, regressor):\n", + " predictions = regressor.predict(\n", + " input_fn=lambda:predict_input_fn(color_name))\n", + " pred = next(predictions)\n", + " predictions.close()\n", + " pred = np.minimum(pred, 1.0)\n", + " pred = np.expand_dims(np.expand_dims(pred, 0), 0)\n", + "\n", + " draw_prediction(color_name, pred)\n", + "\n", + "tb = widgets.TabBar([\"RNN Colorbot\"])\n", + "while True:\n", + " with tb.output_to(0):\n", + " try:\n", + " color_name = six.moves.input(\"Give me a color name (or press 'enter' to exit): \")\n", + " except (EOFError, KeyboardInterrupt):\n", + " break\n", + " if not color_name:\n", + " break\n", + " with tb.output_to(0):\n", + " tb.clear_tab()\n", + " predict_with_estimator(color_name, regressor)\n", + " " + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "default_view": {}, + "last_runtime": { + "build_target": "", + "kind": "local" + }, + "name": "RNN Colorbot using Keras and Estimators", + "provenance": [ + { + "file_id": "1CtzefX39ffFibX_BqE6cRbT0UW_DdVKl", + "timestamp": 1523579810961 + }, + { + "file_id": "1DcfimonWU11tmyivKBGVrbpAl3BIOaRG", + "timestamp": 1523016192637 + }, + { + "file_id": "1wCZUh73zTNs1jzzYjqoxMIdaBWCdKJ2K", + "timestamp": 1522238054357 + }, + { + "file_id": "1_HpC-RrmIv4lNaqeoslUeWaX8zH5IXaJ", + "timestamp": 1521743157199 + }, + { + "file_id": "1mjO2fQ2F9hxpAzw2mnrrUkcgfb7xSGW-", + "timestamp": 1520522344607 + } + ], + "version": "0.3.2", + "views": {} + }, + "kernelspec": { + "display_name": "Python 2", + "name": "python2" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/tensorflow/contrib/autograph/impl/BUILD b/tensorflow/contrib/autograph/impl/BUILD index e468176da1724d8a7ce62647dc3c4b656c71affb..54424e26472b8466b8fe68ea848b5463c10224c9 100644 --- a/tensorflow/contrib/autograph/impl/BUILD +++ b/tensorflow/contrib/autograph/impl/BUILD @@ -26,6 +26,7 @@ py_library( visibility = ["//tensorflow:__subpackages__"], deps = [ "//tensorflow/contrib/autograph/converters", + "//tensorflow/contrib/autograph/operators", "//tensorflow/contrib/autograph/pyct", "//tensorflow/contrib/autograph/pyct/static_analysis", "//tensorflow/contrib/autograph/utils", @@ -38,6 +39,7 @@ py_test( name = "api_test", srcs = ["api_test.py"], srcs_version = "PY2AND3", + tags = ["no_windows"], deps = [ ":impl", "//tensorflow/contrib/autograph/utils", @@ -50,6 +52,7 @@ py_test( name = "conversion_test", srcs = ["conversion_test.py"], srcs_version = "PY2AND3", + tags = ["no_windows"], deps = [ ":impl", "//tensorflow/python:client_testlib", diff --git a/tensorflow/contrib/autograph/impl/api.py b/tensorflow/contrib/autograph/impl/api.py index dce994e50df60d8bd419f62207d77035beac9f5a..24f87b2c14da4a3523f1e580d4362cbd3679a2cd 100644 --- a/tensorflow/contrib/autograph/impl/api.py +++ b/tensorflow/contrib/autograph/impl/api.py @@ -49,7 +49,7 @@ def convert(recursive=False, verbose=False, arg_types=None): function is called. This means the parameter values are known at compilation. Args: - recursive: Whether to recusrively convert any functions that the decorator + recursive: Whether to recursively convert any functions that the decorator function may call. verbose: Whether to output the compiled code in the logs. arg_types: See to_graph. @@ -137,7 +137,7 @@ def converted_call(f, recursive, verbose, arg_types, *args, **kwargs): unknown_arg_value = object() # Sentinel for arguments of unknown value - if tf_inspect.isbuiltin(f): + if inspect_utils.isbuiltin(f): return builtins.dynamic_builtin(f, *args, **kwargs) if tf_inspect.isfunction(f) or tf_inspect.ismethod(f): @@ -156,7 +156,7 @@ def converted_call(f, recursive, verbose, arg_types, *args, **kwargs): # Constructors target_entity = f arg_map_target = f.__init__ - effective_args = (unknown_arg_value,) + args + effective_args = args partial_types = () elif hasattr(f, '__call__') and hasattr(f, '__class__'): @@ -215,7 +215,7 @@ def to_graph(e, Args: e: A Python entity. - recursive: Whether to recusrively convert any functions that the decorator + recursive: Whether to recursively convert any functions that the decorator function may call. verbose: Whether to output the compiled code in the logs. arg_values: A dict containing value hints for symbols like function @@ -235,22 +235,22 @@ def to_graph(e, nocompile_decorators=(convert, do_not_convert, converted_call), partial_types=partial_types, api_module=tf_inspect.getmodule(to_graph)) - _, name = conversion.entity_to_graph(e, conversion_map, arg_values, arg_types) + _, name, namespace = conversion.entity_to_graph(e, conversion_map, arg_values, + arg_types) module = gast.Module([]) for import_line in config.COMPILED_IMPORT_STATEMENTS: module.body.extend(parser.parse_str(import_line).body) - for dep in conversion_map.dependency_cache.values(): + for dep in reversed(conversion_map.dependency_cache.values()): module.body.append(dep) compiled_node, compiled_src = compiler.ast_to_object(module) - # The compiled code should see everything the entry function saw. + # The compiled code should see everything the entry entity saw. # TODO(mdan): This might not work well if the call tree spans modules? - if tf_inspect.isfunction(e): - for key, val in inspect_utils.getnamespace(e).items(): - # Avoid overwriting entities that have been transformed. - if key not in compiled_node.__dict__: - compiled_node.__dict__[key] = val + for key, val in namespace.items(): + # Avoid overwriting entities that have been transformed. + if key not in compiled_node.__dict__: + compiled_node.__dict__[key] = val compiled_fn = getattr(compiled_node, name) if verbose: diff --git a/tensorflow/contrib/autograph/impl/api_test.py b/tensorflow/contrib/autograph/impl/api_test.py index ee2d301d7562ef5ba6bc7ca6d013b99dec78d4c3..a7737b7f448131b1c54951efa719b481e1f4d0c9 100644 --- a/tensorflow/contrib/autograph/impl/api_test.py +++ b/tensorflow/contrib/autograph/impl/api_test.py @@ -37,8 +37,10 @@ class ApiTest(test.TestCase): def setUp(self): config.COMPILED_IMPORT_STATEMENTS = ( 'from __future__ import print_function', - 'from tensorflow.contrib.autograph import utils as ' - 'autograph_utils', 'tf = autograph_utils.fake_tf()') + 'from tensorflow.contrib.autograph import utils' + ' as autograph_utils', + 'tf = autograph_utils.fake_tf()', + ) def test_decorator_recurses(self): @@ -175,6 +177,92 @@ class ApiTest(test.TestCase): constant_op.constant(-2)) self.assertListEqual([0, 1], sess.run(x).tolist()) + def test_converted_call_builtin(self): + x = api.converted_call(range, False, False, {}, 3) + self.assertEqual((0, 1, 2), tuple(x)) + + def test_converted_call_function(self): + + def test_fn(x): + if x < 0: + return -x + return x + + with self.test_session() as sess: + x = api.converted_call( + test_fn, False, False, {}, constant_op.constant(-1)) + self.assertEqual(1, sess.run(x)) + + def test_converted_call_method(self): + + class TestClass(object): + + def __init__(self, x): + self.x = x + + def test_method(self): + if self.x < 0: + return -self.x + return self.x + + with self.test_session() as sess: + tc = TestClass(constant_op.constant(-1)) + x = api.converted_call(tc.test_method, False, False, {}, tc) + self.assertEqual(1, sess.run(x)) + + def test_converted_call_method_by_class(self): + + class TestClass(object): + + def __init__(self, x): + self.x = x + + def test_method(self): + if self.x < 0: + return -self.x + return self.x + + with self.test_session() as sess: + tc = TestClass(constant_op.constant(-1)) + x = api.converted_call(TestClass.test_method, False, False, {}, tc) + self.assertEqual(1, sess.run(x)) + + def test_converted_call_callable_object(self): + + class TestClass(object): + + def __init__(self, x): + self.x = x + + def __call__(self): + if self.x < 0: + return -self.x + return self.x + + with self.test_session() as sess: + tc = TestClass(constant_op.constant(-1)) + x = api.converted_call(tc, False, False, {}) + self.assertEqual(1, sess.run(x)) + + def test_converted_call_constructor(self): + + class TestClass(object): + + def __init__(self, x): + self.x = x + + def test_method(self): + if self.x < 0: + return -self.x + return self.x + + with self.test_session() as sess: + tc = api.converted_call( + TestClass, False, False, {}, constant_op.constant(-1)) + # tc is now a converted object. + x = tc.test_method() + self.assertEqual(1, sess.run(x)) + def test_to_graph_basic(self): def test_fn(x, s): @@ -197,8 +285,7 @@ class ApiTest(test.TestCase): compiled_code = api.to_code(test_fn) - # Just check for some key words and that it is parseable Python code. - self.assertRegexpMatches(compiled_code, 'autograph_utils\\.run_while') + # Just check that it is parseable Python code. self.assertIsNotNone(parser.parse_str(compiled_code)) diff --git a/tensorflow/contrib/autograph/impl/config.py b/tensorflow/contrib/autograph/impl/config.py index 543c1486e657f4e7b16e5723cc294c09ebbcec00..2600088595a12761b1138c4649c06882bd8fd000 100644 --- a/tensorflow/contrib/autograph/impl/config.py +++ b/tensorflow/contrib/autograph/impl/config.py @@ -41,10 +41,9 @@ DEFAULT_UNCOMPILED_MODULES = set(( NO_SIDE_EFFECT_CONSTRUCTORS = set(('tensorflow',)) -# TODO(mdan): Also allow controlling the generated names (for testability). +# TODO(mdan): Also allow controlling the generated names. +# TODO(mdan); Consolidate all internal imports into a single __ag module. COMPILED_IMPORT_STATEMENTS = ( - 'from __future__ import print_function', 'import tensorflow as tf', - 'from tensorflow.contrib.autograph.impl import api as ' - 'autograph_api', - 'from tensorflow.contrib.autograph import utils as ' - 'autograph_utils') + 'from __future__ import print_function', + 'import tensorflow as tf', +) diff --git a/tensorflow/contrib/autograph/impl/conversion.py b/tensorflow/contrib/autograph/impl/conversion.py index 62a49cd92d835fb942f48354041cb0ab03d02c97..55a30dc127957b2a9caa053db843380c94bacfbf 100644 --- a/tensorflow/contrib/autograph/impl/conversion.py +++ b/tensorflow/contrib/autograph/impl/conversion.py @@ -18,8 +18,12 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import collections +import imp + import gast +from tensorflow.contrib.autograph import operators from tensorflow.contrib.autograph import utils from tensorflow.contrib.autograph.converters import asserts from tensorflow.contrib.autograph.converters import break_statements @@ -28,7 +32,6 @@ from tensorflow.contrib.autograph.converters import call_trees from tensorflow.contrib.autograph.converters import continue_statements from tensorflow.contrib.autograph.converters import control_flow from tensorflow.contrib.autograph.converters import decorators -from tensorflow.contrib.autograph.converters import for_loops from tensorflow.contrib.autograph.converters import ifexp from tensorflow.contrib.autograph.converters import lists from tensorflow.contrib.autograph.converters import logical_expressions @@ -37,6 +40,7 @@ from tensorflow.contrib.autograph.converters import side_effect_guards from tensorflow.contrib.autograph.converters import single_return from tensorflow.contrib.autograph.impl import config from tensorflow.contrib.autograph.impl import naming +from tensorflow.contrib.autograph.pyct import ast_util from tensorflow.contrib.autograph.pyct import context from tensorflow.contrib.autograph.pyct import inspect_utils from tensorflow.contrib.autograph.pyct import parser @@ -57,7 +61,7 @@ class ConversionMap(object): This object is mutable, and is updated as functions are converted. Attributes: - recursive: Whether to recusrively convert any functions that the decorator + recursive: Whether to recursively convert any functions that the decorator function may call. nocompile_decorators: tuple of decorator functions that toggle compilation off. @@ -79,7 +83,9 @@ class ConversionMap(object): self.recursive = recursive self.nocompile_decorators = nocompile_decorators self.partial_types = partial_types if partial_types else () - self.dependency_cache = {} + # Required to output dependencies in discovery order, which should match + # the reverse dependency order. + self.dependency_cache = collections.OrderedDict() self.additional_imports = set() self.name_map = {} self.api_module = api_module @@ -139,20 +145,31 @@ def entity_to_graph(o, conversion_map, arg_values, arg_types): parameters. Returns: - A tuple (ast, new_name): + A tuple (ast, new_name, namespace): * ast: An AST representing an entity with interface equivalent to `o`, but which when executed it creates TF a graph. * new_name: The symbol name under which the new entity can be found. + * namespace: A dict mapping all symbols visible to the converted entity, + keyed by their symbol name. Raises: ValueError: if the entity type is not supported. """ if tf_inspect.isclass(o): - node, new_name = class_to_graph(o, conversion_map) + node, name, ns = class_to_graph(o, conversion_map) elif tf_inspect.isfunction(o): - node, new_name = function_to_graph(o, conversion_map, arg_values, arg_types) + # TODO(mdan): This is not a reliable mechanism. + # The most reliable way is to check the source code, the AST will contain + # a Lambda node instead of a FunctionDef + if o.__name__ == '': + raise NotImplementedError( + 'lambda functions are not yet supported; declare the function' + ' using def instead: %s' % o) + else: + node, name, ns = function_to_graph(o, conversion_map, arg_values, + arg_types) elif tf_inspect.ismethod(o): - node, new_name = function_to_graph(o, conversion_map, arg_values, arg_types) + node, name, ns = function_to_graph(o, conversion_map, arg_values, arg_types) else: raise ValueError( 'Entity "%s" has unsupported type "%s". Only functions and classes are ' @@ -175,7 +192,7 @@ def entity_to_graph(o, conversion_map, arg_values, arg_types): continue entity_to_graph(candidate, conversion_map, {}, {}) - return node, new_name + return node, name, ns def class_to_graph(c, conversion_map): @@ -186,46 +203,96 @@ def class_to_graph(c, conversion_map): if not members: raise ValueError('Cannot convert %s: it has no member methods.' % c) - class_namespace = None + class_namespace = {} for _, m in members: - node, _ = function_to_graph( + # Only convert the members that are directly defined by the class. + if inspect_utils.getdefiningclass(m, c) is not c: + continue + node, _, namespace = function_to_graph( m, conversion_map=conversion_map, arg_values={}, arg_types={'self': (c.__name__, c)}, owner_type=c) - # TODO(mdan): Do not assume all members have the same view of globals. if class_namespace is None: - class_namespace = inspect_utils.getnamespace(m) + class_namespace = namespace + else: + class_namespace.update(namespace) converted_members[m] = node namer = conversion_map.new_namer(class_namespace) class_name = namer.compiled_class_name(c.__name__, c) - node = gast.ClassDef( - class_name, - bases=[], - keywords=[], - body=list(converted_members.values()), - decorator_list=[]) - return node, class_name + # TODO(mdan): This needs to be explained more thoroughly. + # Process any base classes: if the sueprclass if of a whitelisted type, an + # absolute import line is generated. Otherwise, it is marked for conversion + # (as a side effect of the call to namer.compiled_class_name() followed by + # conversion_map.update_name_map(namer)). + output_nodes = [] + renames = {} + bases = [] + for base in c.__bases__: + if isinstance(object, base): + bases.append('object') + continue + if is_whitelisted_for_graph(base): + alias = namer.new_symbol(base.__name__, ()) + output_nodes.append( + gast.ImportFrom( + module=base.__module__, + names=[gast.alias(name=base.__name__, asname=alias)], + level=0)) + else: + # This will trigger a conversion into a class with this name. + alias = namer.compiled_class_name(base.__name__, base) + bases.append(alias) + renames[qual_names.QN(base.__name__)] = qual_names.QN(alias) + conversion_map.update_name_map(namer) + # Generate the definition of the converted class. + output_nodes.append( + gast.ClassDef( + class_name, + bases=bases, + keywords=[], + body=list(converted_members.values()), + decorator_list=[])) + node = gast.Module(output_nodes) + + # Make a final pass to replace references to the class or its base classes. + # Most commonly, this occurs when making super().__init__() calls. + # TODO(mdan): Making direct references to superclass' superclass will fail. + node = qual_names.resolve(node) + renames[qual_names.QN(c.__name__)] = qual_names.QN(class_name) + node = ast_util.rename_symbols(node, renames) -def _add_self_references(namespace, api_module): - """Self refs are only required for analysis and are not used directly.""" - # Manually add the utils namespace which may be used from generated code. - if 'autograph_util' not in namespace: - namespace['autograph_utils'] = utils - elif namespace['autograph_utils'] != utils: - raise ValueError( - 'The module name "autograph_utils" is reserved and may not be used.') + return node, class_name, class_namespace - # We also make reference to the api module for dynamic conversion, but - # to avoid circular references we don't import it here. - if 'autograph_api' not in namespace: - namespace['autograph_api'] = api_module - elif namespace['autograph_api'] != api_module: - raise ValueError( - 'The module name "autograph_api" is reserved and may not be used.') + +def _add_reserved_symbol(namespace, name, entity): + if name not in namespace: + namespace[name] = entity + elif namespace[name] != entity: + raise ValueError('The name "%s" is reserved and may not be used.' % name) + + +ag_internal = None + + +def _add_self_references(namespace, api_module): + """Adds namespace references to the module that exposes the api itself.""" + global ag_internal + if ag_internal is None: + # Craft a module that exposes parts of the external API as well as certain + # internal modules. + ag_internal = imp.new_module('autograph') + ag_internal.converted_call = api_module.converted_call + ag_internal.utils = utils + # TODO(mdan): Add safeguards against name clashes. + # We don't want to create a submodule because we want the operators to be + # accessible as ag__. + ag_internal.__dict__.update(operators.__dict__) + + _add_reserved_symbol(namespace, 'ag__', ag_internal) def function_to_graph(f, conversion_map, arg_values, arg_types, @@ -262,7 +329,7 @@ def function_to_graph(f, conversion_map, arg_values, arg_types, # TODO(mdan): Use this at compilation. conversion_map.additional_imports.update(deps) - return node, new_name + return node, new_name, namespace def _static_analysis_pass(node, ctx): @@ -311,6 +378,8 @@ def node_to_graph(node, ctx, nocompile_decorators): node = ifexp.transform(node, ctx) node, deps = decorators.transform(node, nocompile_decorators) node = break_statements.transform(node, ctx) + node = _static_analysis_pass(node, ctx) + node = asserts.transform(node, ctx) # Note: sequencing continue canonicalization before for loop one avoids @@ -324,8 +393,6 @@ def node_to_graph(node, ctx, nocompile_decorators): node = _static_analysis_pass(node, ctx) node = lists.transform(node, ctx) - node = for_loops.transform(node, ctx) - # for_loops may insert new global references. node = builtin_functions.transform(node, ctx) node = _static_analysis_pass(node, ctx) diff --git a/tensorflow/contrib/autograph/impl/conversion_test.py b/tensorflow/contrib/autograph/impl/conversion_test.py index 7066739eb87f89ab98e906b10dab62baeaa2de8e..5edd8e74a8899a25fb51e2a4e133f3cb7933fa26 100644 --- a/tensorflow/contrib/autograph/impl/conversion_test.py +++ b/tensorflow/contrib/autograph/impl/conversion_test.py @@ -21,13 +21,18 @@ from __future__ import print_function import gast from tensorflow.contrib.autograph import utils +from tensorflow.contrib.autograph.impl import api from tensorflow.contrib.autograph.impl import conversion from tensorflow.python.framework import constant_op +from tensorflow.python.keras._impl.keras.engine import training from tensorflow.python.platform import test class ConversionTest(test.TestCase): + def _simple_conversion_map(self): + return conversion.ConversionMap(True, (), (), api) + def test_is_whitelisted_for_graph(self): def test_fn(): @@ -39,18 +44,19 @@ class ConversionTest(test.TestCase): def test_entity_to_graph_unsupported_types(self): with self.assertRaises(ValueError): - conversion_map = conversion.ConversionMap(True, (), (), None) + conversion_map = self._simple_conversion_map() conversion.entity_to_graph('dummy', conversion_map, None, None) def test_entity_to_graph_callable(self): - + b = 2 def f(a): - return a + return a + b - conversion_map = conversion.ConversionMap(True, (), (), None) - ast, new_name = conversion.entity_to_graph(f, conversion_map, None, None) + conversion_map = self._simple_conversion_map() + ast, name, ns = conversion.entity_to_graph(f, conversion_map, None, None) self.assertTrue(isinstance(ast, gast.FunctionDef), ast) - self.assertEqual('tf__f', new_name) + self.assertEqual('tf__f', name) + self.assertTrue(ns['b'] is b) def test_entity_to_graph_call_tree(self): @@ -60,7 +66,7 @@ class ConversionTest(test.TestCase): def f(a): return g(a) - conversion_map = conversion.ConversionMap(True, (), (), None) + conversion_map = self._simple_conversion_map() conversion.entity_to_graph(f, conversion_map, None, None) self.assertTrue(f in conversion_map.dependency_cache) @@ -73,6 +79,87 @@ class ConversionTest(test.TestCase): conversion_map.dependency_cache[f].body[0].body[0].value.func.id) self.assertEqual('tf__g', conversion_map.dependency_cache[g].name) + def test_entity_to_graph_class_hierarchy(self): + + class TestBase(object): + + def __init__(self, x='base'): + self.x = x + + def foo(self): + return self.x + + def bar(self): + return self.x + + class TestSubclass(TestBase): + + def __init__(self, y): + super(TestSubclass, self).__init__('sub') + self.y = y + + def foo(self): + return self.y + + def baz(self): + return self.y + + conversion_map = self._simple_conversion_map() + conversion.entity_to_graph(TestSubclass, conversion_map, None, None) + + self.assertTrue(TestBase in conversion_map.dependency_cache) + self.assertTrue(TestSubclass in conversion_map.dependency_cache) + self.assertEqual('TfTestBase', + conversion_map.dependency_cache[TestBase].body[-1].name) + self.assertEqual( + 'TfTestSubclass', + conversion_map.dependency_cache[TestSubclass].body[-1].name) + + def test_entity_to_graph_class_hierarchy_whitelisted(self): + + class TestSubclass(training.Model): + + def __init__(self, y): + super(TestSubclass, self).__init__() + self.built = False + + def call(self, x): + return 3 * x + + conversion_map = self._simple_conversion_map() + conversion.entity_to_graph(TestSubclass, conversion_map, None, None) + + self.assertTrue(TestSubclass in conversion_map.dependency_cache) + self.assertFalse(training.Model in conversion_map.dependency_cache) + self.assertEqual( + 'Model', + conversion_map.dependency_cache[TestSubclass].body[0].names[0].name) + self.assertEqual( + 'TfTestSubclass', + conversion_map.dependency_cache[TestSubclass].body[-1].name) + + def test_entity_to_graph_lambda(self): + f = lambda a: a + + with self.assertRaises(NotImplementedError): + conversion_map = self._simple_conversion_map() + conversion.entity_to_graph(f, conversion_map, None, None) + + def test_ag_module_cached(self): + def callee(): + return range(3) + + def caller(a): + return a() + + conversion_map = self._simple_conversion_map() + _, _, callee_ns = conversion.entity_to_graph( + callee, conversion_map, None, None) + _, _, caller_ns = conversion.entity_to_graph( + caller, conversion_map, None, None) + + self.assertTrue(callee_ns['ag__'] is caller_ns['ag__']) + if __name__ == '__main__': test.main() diff --git a/tensorflow/contrib/autograph/impl/naming.py b/tensorflow/contrib/autograph/impl/naming.py index 1facaa0ca0ebcc6d4281e7c92a462ceeb00b453a..b1d3f76be7763fada88fd0a1da9d3aa43b67ddfa 100644 --- a/tensorflow/contrib/autograph/impl/naming.py +++ b/tensorflow/contrib/autograph/impl/naming.py @@ -62,8 +62,6 @@ class Namer(object): n += 1 new_name = '%s_%d' % (new_name_root, n) - if live_entity is not None: - self.renamed_calls[live_entity] = new_name self.generated_names.add(new_name) if live_entity is not None: self.renamed_calls[live_entity] = new_name diff --git a/tensorflow/contrib/autograph/operators/BUILD b/tensorflow/contrib/autograph/operators/BUILD new file mode 100644 index 0000000000000000000000000000000000000000..efb8d441dd839bd34dcbc18f701c7993a2f03906 --- /dev/null +++ b/tensorflow/contrib/autograph/operators/BUILD @@ -0,0 +1,53 @@ +licenses(["notice"]) # Apache 2.0 + +exports_files(["LICENSE"]) + +load("//tensorflow:tensorflow.bzl", "py_test") + +filegroup( + name = "all_files", + srcs = glob( + ["**/*"], + exclude = [ + "**/METADATA", + "**/OWNERS", + ], + ), + visibility = ["//tensorflow:__subpackages__"], +) + +py_library( + name = "operators", + srcs = [ + "__init__.py", + "control_flow.py", + "data_structures.py", + ], + srcs_version = "PY2AND3", + visibility = ["//tensorflow:__subpackages__"], + deps = [ + "//tensorflow/contrib/autograph/utils", + "//tensorflow/python:tensor_array_ops", + "//tensorflow/python/data/ops:dataset_ops", + ], +) + +py_test( + name = "data_structures_test", + srcs = ["data_structures_test.py"], + srcs_version = "PY2AND3", + deps = [ + ":operators", + "//tensorflow/python:client_testlib", + ], +) + +py_test( + name = "control_flow_test", + srcs = ["control_flow_test.py"], + srcs_version = "PY2AND3", + deps = [ + ":operators", + "//tensorflow/python:client_testlib", + ], +) diff --git a/tensorflow/contrib/autograph/operators/__init__.py b/tensorflow/contrib/autograph/operators/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..04b4734551d3227a1c611d668f006a157c2c2dd3 --- /dev/null +++ b/tensorflow/contrib/autograph/operators/__init__.py @@ -0,0 +1,29 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""This module implements operators that we overload. + +Note that "operator" is used loosely here, and includes control structures like +conditionals and loops, implemented in functional form, using for example +closures for the body. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +# TODO(mdan): Add a container for implementation-specific toggles (throughout). + +from tensorflow.contrib.autograph.operators.control_flow import for_loop +from tensorflow.contrib.autograph.operators.control_flow import while_loop diff --git a/tensorflow/contrib/autograph/operators/control_flow.py b/tensorflow/contrib/autograph/operators/control_flow.py new file mode 100644 index 0000000000000000000000000000000000000000..d9d8b0d593e5372942ca6423d10022f0f56d78ce --- /dev/null +++ b/tensorflow/contrib/autograph/operators/control_flow.py @@ -0,0 +1,216 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Control flow statements: loops, conditionals, etc.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.autograph.utils import builtins +from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_util +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import gen_math_ops + +# TODO(mdan): Rename _loop to _stmt to follow Python nomenclature. +# TODO(mdan): Rename arguments to match the AST names. + + +def for_loop(iterated, extra_cond, loop_body, init_state): + """Functional form of a for statement. + + The loop operates on a so-called state, which includes all symbols that are + variant across loop iterations, excluding the iterate. In what follows we + refer to state as either a tuple of entities that represent an actual state, + or a list of arguments of the corresponding types. + + Args: + iterated: The entity being iterated over. + extra_cond: Callable with the state as arguments, and boolean return type. + An additionnal loop condition. + loop_body: Callable with the iterate and the state as arguments, and + state as return type. The actual loop body. + init_state: Tuple containing the initial state. + + Returns: + Tuple containing the final state. + """ + if tensor_util.is_tensor(iterated): + return _known_len_for_loop(iterated, extra_cond, loop_body, init_state) + elif isinstance(iterated, dataset_ops.Dataset): + return _dataset_for_loop(iterated, extra_cond, loop_body, init_state) + else: + return _py_for_loop(iterated, extra_cond, loop_body, init_state) + + +def _py_for_loop(iterated, extra_cond, loop_body, init_state): + """Overload of for_loop that executes a Python for loop.""" + state = init_state + for iterate in iterated: + if not extra_cond(*state): + break + state = loop_body(iterate, *state) + + # TODO(mdan): Remove this special case. + if len(state) == 1: + return state[0] + return state + + +def _known_len_for_loop(iterated, extra_cond, loop_body, init_state): + """Overload of for_loop that iterates over objects that define a length.""" + n = builtins.dynamic_len(iterated) + + def while_body(iterate_index, *state): + iterate = iterated[iterate_index] + new_state = loop_body(iterate, *state) + return (iterate_index + 1,) + new_state + + def while_cond(iterate_index, *state): + return gen_math_ops.logical_and(iterate_index < n, extra_cond(*state)) + + results = while_loop( + while_cond, + while_body, + init_state=(0,) + init_state, + extra_deps=(iterated,), + opts=dict(maximum_iterations=n)) + # Dropping the iteration index because it's not syntactically visible. + results = results[1:] + + # TODO(mdan): Remove this special case. + if len(results) == 1: + return results[0] + return results + + +def _dataset_for_loop(ds, extra_cond, loop_body, init_state): + """Overload of for_loop that iterates over TF Datasets.""" + # Because Datsets only expose get_next, in the style of Python iterators, + # we are forced to unpack the loop as: + # + # epoch_number, iterate = ds.get_next() + # while epoch_number < 2: + # + # epoch_number, iterate = ds.get_next() + epoch_numbers = dataset_ops.Dataset.range(2) + def tag_with(ds, tag): + return dataset_ops.Dataset.zip( + (dataset_ops.Dataset.from_tensors(tag).repeat(), ds)) + ds_with_epoch = epoch_numbers.flat_map(lambda i: tag_with(ds, i)) + + iterator = ds_with_epoch.make_initializable_iterator() + with ops.control_dependencies((iterator.initializer,)): + epoch_number, iterate = iterator.get_next() + + def while_body(epoch_number, iterate, *state): + new_state = loop_body(iterate, *state) + epoch_number, iterate = iterator.get_next() + return (epoch_number, iterate) + new_state + + def while_cond(epoch_number, iterate, *state): + del iterate + return gen_math_ops.logical_and(epoch_number < 1, extra_cond(*state)) + + results = while_loop( + while_cond, + while_body, + init_state=(epoch_number, iterate) + init_state, + extra_deps=()) + # Dropping the epoch number and iterate because they are not not syntactically + # visible. + results = results[2:] + + # TODO(mdan): Remove this special case. + if len(results) == 1: + return results[0] + return results + + +def while_loop(loop_cond, loop_body, init_state, extra_deps, opts=None): + """Functional form of a while statement. + + The loop operates on a so-called state, which includes all symbols that are + variant across loop iterations. In what follows we refer to state as either + a tuple of entities that represent an actual state, or a list of arguments + of the corresponding types. + + Args: + loop_cond: Callable with the state as arguments, and boolean return type. + The loop condition. + loop_body: Callable with the state as arguments, and state as return type. + The actual loop body. + init_state: Tuple containing the initial state. + extra_deps: Tuple containing additional entities on which the loop may + depend, such as loop invariants referenced by loop_cond. Used + exclusively for dispatch control. + opts: Optional dict of extra loop parameters. + + Returns: + Tuple containing the final state. + """ + # TODO(mdan): Consider adding a generic mechanism for dynamic dispatch. + # That could be somethins as simple as a collection of dispatch rules, with + # some prioritization. + if any(tensor_util.is_tensor(v) for v in init_state + extra_deps): + return _tf_while_loop(loop_cond, loop_body, init_state, opts) + else: + return _py_while_loop(loop_cond, loop_body, init_state, opts) + + +def _tf_while_loop(loop_cond, loop_body, init_state, opts): + """Overload of while_loop that stages a TF while_loop.""" + if opts is None: + opts = {} + return control_flow_ops.while_loop(loop_cond, loop_body, init_state, **opts) + + +def _py_while_loop(loop_cond, loop_body, init_state, opts): + """Overload of while_loop that executes a Python while loop.""" + del opts + state = init_state + while loop_cond(*state): + state = loop_body(*state) + return state + + +def if_stmt(cond, body, orelse): + """Functional form of an if statement. + + Args: + cond: Boolean. + body: Callable with no arguments, and outputs of the positive (if) branch + as return type. + orelse: Callable with no arguments, and outputs of the negative (else) + branch as return type. + + Returns: + Tuple containing the statement outputs. + """ + if tensor_util.is_tensor(cond): + return _tf_if_stmt(cond, body, orelse) + else: + return _py_if_stmt(cond, body, orelse) + + +def _tf_if_stmt(cond, body, orelse): + """Overload of if_stmt that stages a TF cond.""" + return control_flow_ops.cond(cond, body, orelse) + + +def _py_if_stmt(cond, body, orelse): + """Overload of if_stmt that executes a Python if statement.""" + return body() if cond else orelse() diff --git a/tensorflow/contrib/autograph/operators/control_flow_test.py b/tensorflow/contrib/autograph/operators/control_flow_test.py new file mode 100644 index 0000000000000000000000000000000000000000..a0cd0bfa82bb052d55dfe30f8700fc33a794a59f --- /dev/null +++ b/tensorflow/contrib/autograph/operators/control_flow_test.py @@ -0,0 +1,99 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for control_flow module.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.autograph.operators import control_flow +from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.ops import math_ops +from tensorflow.python.platform import test + + +class ForLoopTest(test.TestCase): + + def test_tensor(self): + s = control_flow.for_loop( + constant_op.constant([1, 2, 3, 4]), + extra_cond=lambda s: True, + loop_body=lambda i, s: (s + i,), + init_state=(0,)) + with self.test_session() as sess: + self.assertEqual((10,), sess.run(s)) + + def test_python(self): + s = control_flow.for_loop( + range(5), + extra_cond=lambda s: True, + loop_body=lambda i, s: (s + i,), + init_state=(0,)) + self.assertEqual(10, s) + + def test_dataset(self): + to_int32 = lambda i: math_ops.cast(i, dtypes.int32) + s = control_flow.for_loop( + dataset_ops.Dataset.range(5).map(to_int32), + extra_cond=lambda s: True, + loop_body=lambda i, s: (s + i,), + init_state=(0,)) + with self.test_session() as sess: + self.assertEqual((10,), sess.run(s)) + + +class WhileLoopTest(test.TestCase): + + def test_tensor(self): + n = constant_op.constant(5) + results = control_flow.while_loop( + loop_cond=lambda i, s: i < n, + loop_body=lambda i, s: (i + 1, s + i,), + init_state=(0, 0), + extra_deps=(n,)) + with self.test_session() as sess: + self.assertEqual((5, 10), sess.run(results)) + + def test_python(self): + n = 5 + results = control_flow.while_loop( + loop_cond=lambda i, s: i < n, + loop_body=lambda i, s: (i + 1, s + i), + init_state=(0, 0), + extra_deps=(n,)) + self.assertEqual((5, 10), results) + + +class IfStmtTest(test.TestCase): + + def test_tensor(self): + def test_if_stmt(cond): + return control_flow.if_stmt( + cond=cond, + body=lambda: 1, + orelse=lambda: -1) + with self.test_session() as sess: + self.assertEqual(1, sess.run(test_if_stmt(constant_op.constant(True)))) + self.assertEqual(-1, sess.run(test_if_stmt(constant_op.constant(False)))) + + def test_python(self): + self.assertEqual(1, control_flow.if_stmt(True, lambda: 1, lambda: -1)) + self.assertEqual(-1, control_flow.if_stmt(False, lambda: 1, lambda: -1)) + + +if __name__ == '__main__': + test.main() diff --git a/tensorflow/contrib/autograph/operators/data_structures.py b/tensorflow/contrib/autograph/operators/data_structures.py new file mode 100644 index 0000000000000000000000000000000000000000..c862306baa9e8114a71a26323ddcbd35c8592c55 --- /dev/null +++ b/tensorflow/contrib/autograph/operators/data_structures.py @@ -0,0 +1,56 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Operators specific to data structures: list append, subscripts, etc.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.ops import tensor_array_ops + +# TODO(mdan): Add support for TensorList once functional. +# TODO(mdan): Add primitives for empty list, list with elements. + + +def append(target, element): + """The list append function. + + Note: it is unspecified where target will be mutated or not. If target is + a TensorFlow entity, it will not be typically mutated. If target is a plain + list, it will be. In general, if the target is mutated then the return value + should point to the original entity. + + Args: + target: An entity that supports append semantics. + element: The element to append. + + Returns: + Same as target, after the append was performed. + """ + if isinstance(target, tensor_array_ops.TensorArray): + return _tf_tensorarray_append(target, element) + else: + return _py_append(target, element) + + +def _tf_tensorarray_append(target, element): + """Overload of append that stages a TensorArray write at the last position.""" + return target.write(target.size(), element) + + +def _py_append(target, element): + """Overload of append that executes a Python list append.""" + target.append(element) + return target diff --git a/tensorflow/contrib/autograph/operators/data_structures_test.py b/tensorflow/contrib/autograph/operators/data_structures_test.py new file mode 100644 index 0000000000000000000000000000000000000000..577d28c34da39f1216669513c29a00ac07bec126 --- /dev/null +++ b/tensorflow/contrib/autograph/operators/data_structures_test.py @@ -0,0 +1,44 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for data_structures module.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.autograph.operators import data_structures +from tensorflow.python.framework import dtypes +from tensorflow.python.ops import tensor_array_ops +from tensorflow.python.platform import test + + +class AppendTest(test.TestCase): + + def test_tf_tensorarray(self): + l = tensor_array_ops.TensorArray(dtypes.int32, size=0, dynamic_size=True) + l1 = data_structures.append(l, 1) + l2 = data_structures.append(l1, 2) + with self.test_session() as sess: + self.assertAllEqual(sess.run(l1.stack()), [1]) + self.assertAllEqual(sess.run(l2.stack()), [1, 2]) + + def test_python(self): + l = [] + self.assertAllEqual(data_structures.append(l, 1), [1]) + self.assertAllEqual(data_structures.append(l, 2), [1, 2]) + + +if __name__ == '__main__': + test.main() diff --git a/tensorflow/contrib/autograph/pyct/BUILD b/tensorflow/contrib/autograph/pyct/BUILD index edec5f7712d08247437c9e95d743e59dafffcd7b..796ab445c74128e1123e24b67c288e0e3c5ca24c 100644 --- a/tensorflow/contrib/autograph/pyct/BUILD +++ b/tensorflow/contrib/autograph/pyct/BUILD @@ -66,6 +66,7 @@ py_test( name = "compiler_test", srcs = ["compiler_test.py"], srcs_version = "PY2AND3", + tags = ["no_windows"], deps = [ ":pyct", "//tensorflow/python:client_testlib", @@ -124,3 +125,14 @@ py_test( "@gast_archive//:gast", ], ) + +py_test( + name = "transformer_test", + srcs = ["transformer_test.py"], + srcs_version = "PY2AND3", + deps = [ + ":pyct", + "//tensorflow/python:client_testlib", + "@gast_archive//:gast", + ], +) diff --git a/tensorflow/contrib/autograph/pyct/ast_util.py b/tensorflow/contrib/autograph/pyct/ast_util.py index 4f76a695228f7d84b80b2e4b03801e15e94b8f11..c4f82d11708393a6029d3f17be428b47eb9342ff 100644 --- a/tensorflow/contrib/autograph/pyct/ast_util.py +++ b/tensorflow/contrib/autograph/pyct/ast_util.py @@ -23,12 +23,13 @@ import ast import gast from tensorflow.contrib.autograph.pyct import anno +from tensorflow.contrib.autograph.pyct import parser class CleanCopier(gast.NodeVisitor): - """Copy AST nodes. + """Copies AST nodes. - The copied nodes will ignore almost all fields that prefixed by '__'. + The copied nodes will ignore almost all fields that are prefixed by '__'. Exceptions make some annotations. """ @@ -106,3 +107,79 @@ def keywords_to_dict(keywords): keys.append(gast.Str(kw.arg)) values.append(kw.value) return gast.Dict(keys=keys, values=values) + + +class PatternMatcher(gast.NodeVisitor): + """Matches a node against a pattern represented by a node. + + The pattern may contain wildcards represented by the symbol '_'. + """ + + def __init__(self, pattern): + self.pattern = pattern + self.pattern_stack = [] + self.matches = True + + def compare_and_visit(self, node, pattern): + self.pattern_stack.append(self.pattern) + self.pattern = pattern + self.generic_visit(node) + self.pattern = self.pattern_stack.pop() + + def no_match(self): + self.matches = False + return False + + def is_wildcard(self, p): + if isinstance(p, (list, tuple)) and len(p) == 1: + p, = p + if isinstance(p, gast.Name) and p.id == '_': + return True + if p == '_': + return True + return False + + def generic_visit(self, node): + if not self.matches: + return + + pattern = self.pattern + for f in node._fields: + if f.startswith('__'): + continue + + if not hasattr(node, f): + if hasattr(pattern, f) and getattr(pattern, f): + return self.no_match() + else: + continue + if not hasattr(pattern, f): + return self.no_match() + + v = getattr(node, f) + p = getattr(pattern, f) + + if self.is_wildcard(p): + continue + if isinstance(v, (list, tuple)): + if not isinstance(p, (list, tuple)) or len(v) != len(p): + return self.no_match() + for v_item, p_item in zip(v, p): + self.compare_and_visit(v_item, p_item) + elif isinstance(v, (gast.AST, ast.AST)): + if not isinstance(v, type(p)) and not isinstance(p, type(v)): + return self.no_match() + self.compare_and_visit(v, p) + else: + # Assume everything else is a value type. + if v != p: + return self.no_match() + + +def matches(node, pattern): + if isinstance(pattern, str): + pattern = parser.parse_expression(pattern) + matcher = PatternMatcher(pattern) + matcher.visit(node) + return matcher.matches + diff --git a/tensorflow/contrib/autograph/pyct/ast_util_test.py b/tensorflow/contrib/autograph/pyct/ast_util_test.py index 8faf92c705d997db298dbb1115981fd9da26372d..3afa04a50685d19c90944c14ed39f9d3ad35e486 100644 --- a/tensorflow/contrib/autograph/pyct/ast_util_test.py +++ b/tensorflow/contrib/autograph/pyct/ast_util_test.py @@ -85,7 +85,33 @@ class AstUtilTest(test.TestCase): output.body += (ast.Assign([ast.Name(id='d', ctx=ast.Store())], d),) result, _ = compiler.ast_to_object(output) self.assertDictEqual(result.d, {'a': 3, 'c': 1, 'd': 'e'}) - print(d) + + def assertMatch(self, target_str, pattern_str): + node = parser.parse_expression(target_str) + pattern = parser.parse_expression(pattern_str) + self.assertTrue(ast_util.matches(node, pattern)) + + def assertNoMatch(self, target_str, pattern_str): + node = parser.parse_expression(target_str) + pattern = parser.parse_expression(pattern_str) + self.assertFalse(ast_util.matches(node, pattern)) + + def test_matches_symbols(self): + self.assertMatch('foo', '_') + self.assertNoMatch('foo()', '_') + self.assertMatch('foo + bar', 'foo + _') + self.assertNoMatch('bar + bar', 'foo + _') + self.assertNoMatch('foo - bar', 'foo + _') + + def test_matches_function_args(self): + self.assertMatch('super(Foo, self).__init__(arg1, arg2)', + 'super(_).__init__(_)') + self.assertMatch('super().__init__()', 'super(_).__init__(_)') + self.assertNoMatch('super(Foo, self).bar(arg1, arg2)', + 'super(_).__init__(_)') + self.assertMatch('super(Foo, self).__init__()', 'super(Foo, _).__init__(_)') + self.assertNoMatch('super(Foo, self).__init__()', + 'super(Bar, _).__init__(_)') if __name__ == '__main__': diff --git a/tensorflow/contrib/autograph/pyct/inspect_utils.py b/tensorflow/contrib/autograph/pyct/inspect_utils.py index d19c6ed75e0f0651781d6e1ed80f7be11fb8a5a4..eef74599a7d5415b4b05d2f05fb094b1dcd33323 100644 --- a/tensorflow/contrib/autograph/pyct/inspect_utils.py +++ b/tensorflow/contrib/autograph/pyct/inspect_utils.py @@ -22,12 +22,25 @@ from __future__ import division from __future__ import print_function import itertools +import types import six from tensorflow.python.util import tf_inspect +def isbuiltin(f): + # Note these return false for isinstance(f, types.BuiltinFunctionType) so we + # need to specifically check for them. + if f in (range, int, float): + return True + if isinstance(f, types.BuiltinFunctionType): + return True + if tf_inspect.isbuiltin(f): + return True + return False + + def getnamespace(f): """Returns the complete namespace of a function. @@ -50,6 +63,29 @@ def getnamespace(f): return namespace +def _get_unbound_function(m): + # TODO(mdan): Figure out why six.get_unbound_function fails in some cases. + # The failure case is for tf.keras.Model. + if hasattr(m, 'im_func'): + return m.im_func + return m + + +def getdefiningclass(m, owner_class): + """Resolves the class (e.g. one of the superclasses) that defined a method.""" + # Normalize bound functions to their respective unbound versions. + m = _get_unbound_function(m) + for superclass in owner_class.__bases__: + if hasattr(superclass, m.__name__): + superclass_m = getattr(superclass, m.__name__) + if _get_unbound_function(superclass_m) is m: + return superclass + elif hasattr(m, '__self__') and m.__self__ == owner_class: + # Python 3 class methods only work this way it seems :S + return superclass + return owner_class + + def getmethodclass(m): """Resolves a function's owner, e.g. a method's class. @@ -74,6 +110,12 @@ def getmethodclass(m): ValueError: if the class could not be resolved for any unexpected reason. """ + # Callable objects: return their own class. + if (not hasattr(m, '__name__') and hasattr(m, '__class__') and + hasattr(m, '__call__')): + if isinstance(m.__class__, six.class_types): + return m.__class__ + # Instance method and class methods: should be bound to a non-null "self". # If self is a class, then it's a class method. if hasattr(m, '__self__'): diff --git a/tensorflow/contrib/autograph/pyct/inspect_utils_test.py b/tensorflow/contrib/autograph/pyct/inspect_utils_test.py index ddca6f963b8abadd621c544a79935c69326bf65e..1a212f676a616307b41feafafda9d1d794ba3d2d 100644 --- a/tensorflow/contrib/autograph/pyct/inspect_utils_test.py +++ b/tensorflow/contrib/autograph/pyct/inspect_utils_test.py @@ -225,6 +225,53 @@ class InspectUtilsTest(test.TestCase): inspect_utils.getmethodclass(test_obj.wrap_decorated_member), LocalClass) + def test_getmethodclass_callables(self): + class TestCallable(object): + + def __call__(self): + pass + + c = TestCallable() + self.assertEqual(inspect_utils.getmethodclass(c), TestCallable) + + def test_getdefiningclass(self): + class Superclass(object): + + def foo(self): + pass + + def bar(self): + pass + + @classmethod + def class_method(cls): + pass + + class Subclass(Superclass): + + def foo(self): + pass + + def baz(self): + pass + + self.assertTrue( + inspect_utils.getdefiningclass(Subclass.foo, Subclass) is Subclass) + self.assertTrue( + inspect_utils.getdefiningclass(Subclass.bar, Subclass) is Superclass) + self.assertTrue( + inspect_utils.getdefiningclass(Subclass.baz, Subclass) is Subclass) + self.assertTrue( + inspect_utils.getdefiningclass(Subclass.class_method, Subclass) is + Superclass) + + def test_isbuiltin(self): + self.assertTrue(inspect_utils.isbuiltin(range)) + self.assertTrue(inspect_utils.isbuiltin(float)) + self.assertTrue(inspect_utils.isbuiltin(int)) + self.assertTrue(inspect_utils.isbuiltin(len)) + self.assertFalse(inspect_utils.isbuiltin(function_decorator)) + if __name__ == '__main__': test.main() diff --git a/tensorflow/contrib/autograph/pyct/qual_names.py b/tensorflow/contrib/autograph/pyct/qual_names.py index 4d5764a974aac542ddf4a54a9acd36f1afcb0464..583cf7ecd7bce31c55de58361ab5295abb5d6707 100644 --- a/tensorflow/contrib/autograph/pyct/qual_names.py +++ b/tensorflow/contrib/autograph/pyct/qual_names.py @@ -112,6 +112,29 @@ class QN(object): raise ValueError('Cannot get parent of simple name "%s".' % self.qn[0]) return self._parent + @property + def support_set(self): + """Returns the set of simple symbols that this QN relies on. + + This would be the smallest set of symbols necessary for the QN to + statically resolve (assuming properties and index ranges are verified + at runtime). + + Examples: + 'a.b' has only one support symbol, 'a' + 'a[i]' has two roots, 'a' and 'i' + """ + # TODO(mdan): This might be the set of Name nodes in the AST. Track those? + roots = set() + if self.has_attr(): + roots.update(self.parent.support_set) + elif self.has_subscript(): + roots.update(self.parent.support_set) + roots.update(self.qn[1].support_set) + else: + roots.add(self) + return roots + def __hash__(self): return hash(self.qn + (self._has_attr, self._has_subscript)) diff --git a/tensorflow/contrib/autograph/pyct/qual_names_test.py b/tensorflow/contrib/autograph/pyct/qual_names_test.py index 103bd25aa380e9f61ecea9c5298f34df5157d629..264afd508cdb847315c486806b531dc1483ef622 100644 --- a/tensorflow/contrib/autograph/pyct/qual_names_test.py +++ b/tensorflow/contrib/autograph/pyct/qual_names_test.py @@ -154,6 +154,21 @@ class QNTest(test.TestCase): a_sub_three = QN(a, subscript=QN(qual_names.NumberLiteral(3))) self.assertEqual(a_sub_three.ast().slice.value.n, 3) + def test_support_set(self): + a = QN('a') + b = QN('b') + c = QN('c') + a_sub_b = QN(a, subscript=b) + a_dot_b = QN(a, attr='b') + a_dot_b_dot_c = QN(a_dot_b, attr='c') + a_dot_b_sub_c = QN(a_dot_b, subscript=c) + + self.assertSetEqual(a.support_set, set((a,))) + self.assertSetEqual(a_sub_b.support_set, set((a, b))) + self.assertSetEqual(a_dot_b.support_set, set((a,))) + self.assertSetEqual(a_dot_b_dot_c.support_set, set((a,))) + self.assertSetEqual(a_dot_b_sub_c.support_set, set((a, c))) + class QNResolverTest(test.TestCase): diff --git a/tensorflow/contrib/autograph/pyct/static_analysis/BUILD b/tensorflow/contrib/autograph/pyct/static_analysis/BUILD index d192bc7aabf6ea36d616ff6f2cef60fddd5973b4..83f3bafc4217649db6499566d548c1657428ad0b 100644 --- a/tensorflow/contrib/autograph/pyct/static_analysis/BUILD +++ b/tensorflow/contrib/autograph/pyct/static_analysis/BUILD @@ -34,6 +34,7 @@ py_test( name = "activity_test", srcs = ["activity_test.py"], srcs_version = "PY2AND3", + tags = ["no_windows"], deps = [ ":static_analysis", "//tensorflow/contrib/autograph/pyct", @@ -46,6 +47,7 @@ py_test( name = "live_values_test", srcs = ["live_values_test.py"], srcs_version = "PY2AND3", + tags = ["no_windows"], deps = [ ":static_analysis", "//tensorflow/contrib/autograph/pyct", diff --git a/tensorflow/contrib/autograph/pyct/static_analysis/activity.py b/tensorflow/contrib/autograph/pyct/static_analysis/activity.py index da6a2f6f0500ebba41b85d06dcc912aae9d68f97..2c14c2c8c23810c64446eb9e7ffc5402ce9a2298 100644 --- a/tensorflow/contrib/autograph/pyct/static_analysis/activity.py +++ b/tensorflow/contrib/autograph/pyct/static_analysis/activity.py @@ -133,18 +133,18 @@ class Scope(object): def mark_param(self, name): self.params.add(name) - def mark_creation(self, name): + def mark_creation(self, name, writes_create_symbol=False): if name.is_composite(): parent = name.parent if self.has(parent): - # This is considered mutation of the parent, not creation. - # TODO(mdan): Is that really so? - return + if not writes_create_symbol: + return else: raise ValueError('Unknown symbol "%s".' % parent) self.created.add(name) def mark_write(self, name): + """Marks the given symbol as modified in the current scope.""" self.modified.add(name) if self.isolated: self.mark_creation(name) @@ -162,23 +162,45 @@ class Scope(object): self.parent.mark_returned(name) -class ActivityAnalizer(transformer.Base): +class ActivityAnalyzer(transformer.Base): """Annotates nodes with local scope information. See Scope.""" def __init__(self, context, parent_scope): - super(ActivityAnalizer, self).__init__(context) + super(ActivityAnalyzer, self).__init__(context) self.scope = Scope(parent_scope) self._in_return_statement = False - def _track_symbol(self, node): - # This can happen when we have an attribute (or subscript) on a function - # call. Example: a().b + @property + def _in_constructor(self): + innermost = self.enclosing_entities[-1] + if len(self.enclosing_entities) > 1: + parent = self.enclosing_entities[-2] + return isinstance(parent, gast.ClassDef) and innermost.name == '__init__' + return False + + def _node_sets_self_attribute(self, node): + if anno.hasanno(node, anno.Basic.QN): + qn = anno.getanno(node, anno.Basic.QN) + # TODO(mdan): The 'self' argument is not guaranteed to be called 'self'. + if qn.has_attr and qn.parent.qn == ('self',): + return True + + def _track_symbol(self, + node, + composite_writes_alter_parent=False, + writes_create_symbol=False): + # A QN may be missing when we have an attribute (or subscript) on a function + # call. Example: a().b if not anno.hasanno(node, anno.Basic.QN): return qn = anno.getanno(node, anno.Basic.QN) if isinstance(node.ctx, gast.Store): self.scope.mark_write(qn) + if qn.is_composite and composite_writes_alter_parent: + self.scope.mark_write(qn.parent) + if writes_create_symbol: + self.scope.mark_creation(qn, writes_create_symbol=True) elif isinstance(node.ctx, gast.Load): self.scope.mark_read(qn) elif isinstance(node.ctx, gast.Param): @@ -207,7 +229,18 @@ class ActivityAnalizer(transformer.Base): def visit_Attribute(self, node): self.generic_visit(node) - self._track_symbol(node) + if self._in_constructor and self._node_sets_self_attribute(node): + self._track_symbol( + node, composite_writes_alter_parent=True, writes_create_symbol=True) + else: + self._track_symbol(node) + return node + + def visit_Subscript(self, node): + self.generic_visit(node) + # Subscript writes (e.g. a[b] = "value") are considered to modify + # both the element itself (a[b]) and its parent (a). + self._track_symbol(node, composite_writes_alter_parent=True) return node def visit_Print(self, node): @@ -265,10 +298,10 @@ class ActivityAnalizer(transformer.Base): qn = QN(node.name) self.scope.mark_write(qn) current_scope = self.scope - fndef_scope = Scope(current_scope, isolated=True) - self.scope = fndef_scope + body_scope = Scope(current_scope, isolated=True) + self.scope = body_scope self.generic_visit(node) - anno.setanno(node, NodeAnno.BODY_SCOPE, fndef_scope) + anno.setanno(node, NodeAnno.BODY_SCOPE, body_scope) self.scope = current_scope return node @@ -282,7 +315,13 @@ class ActivityAnalizer(transformer.Base): return node def visit_If(self, node): + current_scope = self.scope + cond_scope = Scope(current_scope, isolated=False) + self.scope = cond_scope self.visit(node.test) + anno.setanno(node, NodeAnno.COND_SCOPE, cond_scope) + self.scope = current_scope + node = self._process_parallel_blocks(node, ((node.body, NodeAnno.BODY_SCOPE), (node.orelse, NodeAnno.ORELSE_SCOPE))) @@ -297,7 +336,13 @@ class ActivityAnalizer(transformer.Base): return node def visit_While(self, node): + current_scope = self.scope + cond_scope = Scope(current_scope, isolated=False) + self.scope = cond_scope self.visit(node.test) + anno.setanno(node, NodeAnno.COND_SCOPE, cond_scope) + self.scope = current_scope + node = self._process_parallel_blocks(node, ((node.body, NodeAnno.BODY_SCOPE), (node.orelse, NodeAnno.ORELSE_SCOPE))) @@ -311,4 +356,4 @@ class ActivityAnalizer(transformer.Base): def resolve(node, context, parent_scope=None): - return ActivityAnalizer(context, parent_scope).visit(node) + return ActivityAnalyzer(context, parent_scope).visit(node) diff --git a/tensorflow/contrib/autograph/pyct/static_analysis/activity_test.py b/tensorflow/contrib/autograph/pyct/static_analysis/activity_test.py index 37c28872bb9fc4f0c6f95eec8145101b7a6c83de..ef79a295bfa3940705d2f341edd4eda74d7d7068 100644 --- a/tensorflow/contrib/autograph/pyct/static_analysis/activity_test.py +++ b/tensorflow/contrib/autograph/pyct/static_analysis/activity_test.py @@ -108,7 +108,7 @@ class ScopeTest(test.TestCase): self.assertFalse(QN('a') in child.referenced) -class ActivityAnalizerTest(test.TestCase): +class ActivityAnalyzerTest(test.TestCase): def _parse_and_analyze(self, test_fn): node, source = parser.parse_entity(test_fn) @@ -144,10 +144,21 @@ class ActivityAnalizerTest(test.TestCase): anno.getanno(node.body[0].body[2].value, NodeAnno.IS_LOCAL)) # b in return b + def assertSymbolSetsAre(self, expected, actual, name): + expected = set(expected) + actual = set(str(s) for s in actual) + self.assertSetEqual( + expected, actual, 'for symbol set: %s\n' + ' Expected: %s\n' + ' Got: %s\n' + ' Missing: %s\n' + ' Extra: %s\n' % (name.upper(), expected, actual, + expected - actual, actual - expected)) + def assertScopeIsRmc(self, scope, used, modified, created): - self.assertItemsEqual(used, tuple(str(s) for s in scope.used)) - self.assertItemsEqual(modified, tuple(str(s) for s in scope.modified)) - self.assertItemsEqual(created, tuple(str(s) for s in scope.created)) + self.assertSymbolSetsAre(used, scope.used, 'read') + self.assertSymbolSetsAre(modified, scope.modified, 'modified') + self.assertSymbolSetsAre(created, scope.created, 'created') def test_print_statement(self): @@ -172,7 +183,7 @@ class ActivityAnalizerTest(test.TestCase): # arguments. self.assertScopeIsRmc(print_args_scope, ('a', 'b'), (), ()) - def test_call(self): + def test_call_args(self): def test_fn(a): b = 0 @@ -187,6 +198,57 @@ class ActivityAnalizerTest(test.TestCase): self.assertScopeIsRmc( anno.getanno(call_node, NodeAnno.ARGS_SCOPE), ('a', 'b'), (), ()) + def test_call_args_attributes(self): + + def foo(*_): + pass + + def test_fn(a): + a.c = 0 + foo(a.b, a.c) + return a.d + + node = self._parse_and_analyze(test_fn) + call_node = node.body[0].body[1].value + self.assertScopeIsRmc( + anno.getanno(call_node, NodeAnno.ARGS_SCOPE), + ('a', 'a.b', 'a.c'), + (), + (), + ) + self.assertScopeIsRmc( + anno.getanno(call_node, NodeAnno.ARGS_SCOPE).parent, + ('a', 'a.b', 'a.c', 'a.d', 'foo'), + ('a.c',), + ('a',), + ) + + def test_call_args_subscripts(self): + + def foo(*_): + pass + + def test_fn(a): + b = 1 + c = 2 + foo(a[0], a[b]) + return a[c] + + node = self._parse_and_analyze(test_fn) + call_node = node.body[0].body[2].value + self.assertScopeIsRmc( + anno.getanno(call_node, NodeAnno.ARGS_SCOPE), + ('a', 'a[0]', 'a[b]', 'b'), + (), + (), + ) + self.assertScopeIsRmc( + anno.getanno(call_node, NodeAnno.ARGS_SCOPE).parent, + ('a', 'a[0]', 'a[b]', 'a[c]', 'b', 'c', 'foo'), + ('b', 'c'), + ('a', 'b', 'c'), + ) + def test_while(self): def test_fn(a): @@ -204,6 +266,8 @@ class ActivityAnalizerTest(test.TestCase): self.assertScopeIsRmc( anno.getanno(while_node, NodeAnno.BODY_SCOPE).parent, ('a', 'b', 'c'), ('b', 'c'), ('a', 'b', 'c')) + self.assertScopeIsRmc( + anno.getanno(while_node, NodeAnno.COND_SCOPE), ('b',), (), ()) def test_for(self): @@ -251,7 +315,72 @@ class ActivityAnalizerTest(test.TestCase): anno.getanno(if_node, NodeAnno.ORELSE_SCOPE).parent, ('x', 'z', 'u'), ('x', 'y', 'z', 'u'), ('x', 'y', 'z', 'u')) - def test_nested_if_else_creation(self): + def test_if_attributes(self): + + def test_fn(a): + if a > 0: + a.b = -a.c + d = 2 * a + else: + a.b = a.c + d = 1 + return d + + node = self._parse_and_analyze(test_fn) + if_node = node.body[0].body[0] + self.assertScopeIsRmc( + anno.getanno(if_node, NodeAnno.BODY_SCOPE), + ('a', 'a.c'), + ('a.b', 'd'), + ('d',), + ) + self.assertScopeIsRmc( + anno.getanno(if_node, NodeAnno.ORELSE_SCOPE), + ('a', 'a.c'), + ('a.b', 'd'), + ('d',), + ) + self.assertScopeIsRmc( + anno.getanno(if_node, NodeAnno.BODY_SCOPE).parent, + ('a', 'a.c', 'd'), + ('a.b', 'd'), + ('a', 'd'), + ) + + def test_if_subscripts(self): + + def test_fn(a, b, c, e): + if a > 0: + a[b] = -a[c] + d = 2 * a + else: + a[0] = e + d = 1 + return d + + node = self._parse_and_analyze(test_fn) + if_node = node.body[0].body[0] + self.assertScopeIsRmc( + anno.getanno(if_node, NodeAnno.BODY_SCOPE), + ('a', 'b', 'c', 'a[c]'), + ('a', 'a[b]', 'd'), + ('d',), + ) + # TODO(mdan): Should subscript writes (a[0] = 1) be considered to read "a"? + self.assertScopeIsRmc( + anno.getanno(if_node, NodeAnno.ORELSE_SCOPE), + ('a', 'e'), + ('a', 'a[0]', 'd'), + ('d',), + ) + self.assertScopeIsRmc( + anno.getanno(if_node, NodeAnno.ORELSE_SCOPE).parent, + ('a', 'b', 'c', 'd', 'e', 'a[c]'), + ('a', 'd', 'a[b]', 'a[0]'), + ('a', 'b', 'c', 'd', 'e'), + ) + + def test_nested_if(self): def test_fn(b): if b > 0: @@ -270,7 +399,7 @@ class ActivityAnalizerTest(test.TestCase): anno.getanno(inner_if_node, NodeAnno.ORELSE_SCOPE), ('b',), ('a',), ('a',)) - def test_function_def(self): + def test_nested_function(self): def test_fn(a): @@ -285,44 +414,48 @@ class ActivityAnalizerTest(test.TestCase): return b, c node = self._parse_and_analyze(test_fn) - fndef_node = node.body[0].body[0] + fn_def_node = node.body[0].body[0] self.assertScopeIsRmc( - anno.getanno(fndef_node, + anno.getanno(fn_def_node, NodeAnno.BODY_SCOPE).parent, ('b', 'i', 'f', 'c', 'a'), ('f', 'b', 'c', 'i'), ('f', 'a', 'b', 'c', 'i')) self.assertScopeIsRmc( - anno.getanno(fndef_node, NodeAnno.BODY_SCOPE), ('x', 'y'), ('y',), ( + anno.getanno(fn_def_node, NodeAnno.BODY_SCOPE), ('x', 'y'), ('y',), ( 'x', 'y', )) - def test_call_with_composite_names(self): + def test_constructor_attributes(self): - def foo(*_): - pass + class TestClass(object): + + def __init__(self, a): + self.b = a + self.b.c = 1 + + node = self._parse_and_analyze(TestClass) + init_node = node.body[0].body[0] + self.assertScopeIsRmc( + anno.getanno(init_node, NodeAnno.BODY_SCOPE), + ('self', 'a', 'self.b'), + ('self', 'self.b', 'self.b.c'), + ('self', 'a', 'self.b'), + ) + + def test_aug_assign_subscripts(self): def test_fn(a): - foo(a.b, a.c) - if a > 0: - a.b = 2 - else: - d = 2 - d.e = a.c - f = d.e + 1 - a.c = f + a[0] += 1 node = self._parse_and_analyze(test_fn) - call_node = node.body[0].body[0].value + fn_node = node.body[0] self.assertScopeIsRmc( - anno.getanno(call_node, NodeAnno.ARGS_SCOPE), ('a', 'a.b', 'a.c'), (), - ()) - if_node = node.body[0].body[1] - self.assertScopeIsRmc( - anno.getanno(if_node, NodeAnno.BODY_SCOPE), ('a',), ('a.b',), ()) - self.assertScopeIsRmc( - anno.getanno(if_node, NodeAnno.ORELSE_SCOPE), - ('a', 'a.c', 'd', 'd.e', 'f'), ('a.c', 'd', 'd.e', 'f'), ('d', 'f')) + anno.getanno(fn_node, NodeAnno.BODY_SCOPE), + ('a',), + ('a', 'a[0]'), + ('a',), + ) if __name__ == '__main__': diff --git a/tensorflow/contrib/autograph/pyct/static_analysis/annos.py b/tensorflow/contrib/autograph/pyct/static_analysis/annos.py index 5254b83ca7c775867fc2ad5ef0a0ad93ac483ba0..b929b35b79200b0968c9c4f26b10cda28763773a 100644 --- a/tensorflow/contrib/autograph/pyct/static_analysis/annos.py +++ b/tensorflow/contrib/autograph/pyct/static_analysis/annos.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Annotations used by the static analizer.""" +"""Annotations used by the static analyzer.""" from __future__ import absolute_import from __future__ import division @@ -28,21 +28,22 @@ class NoValue(Enum): class NodeAnno(NoValue): - """Additionnal annotations used by the static analyzer. + """Additional annotations used by the static analyzer. These are in addition to the basic annotations declared in anno.py. """ # Symbols # These flags are boolean. - IS_LOCAL = 'Symbol is local to the function scope being analized.' - IS_PARAM = 'Symbol is a parameter to the function being analized.' + IS_LOCAL = 'Symbol is local to the function scope being analyzed.' + IS_PARAM = 'Symbol is a parameter to the function being analyzed.' IS_MODIFIED_SINCE_ENTRY = ( 'Symbol has been explicitly replaced in the current function scope.') # Scopes # Scopes are represented by objects of type activity.Scope. ARGS_SCOPE = 'The scope for the argument list of a function call.' + COND_SCOPE = 'The scope for the test node of a conditional statement.' BODY_SCOPE = ( 'The scope for the main body of a statement (True branch for if ' 'statements, main body for loops).') diff --git a/tensorflow/contrib/autograph/pyct/static_analysis/type_info.py b/tensorflow/contrib/autograph/pyct/static_analysis/type_info.py index 203aa3c3d18ab15300bbf424adeece6e74d9c994..c00946f9c41bc68d5c638d71f356b484db1286d1 100644 --- a/tensorflow/contrib/autograph/pyct/static_analysis/type_info.py +++ b/tensorflow/contrib/autograph/pyct/static_analysis/type_info.py @@ -48,6 +48,9 @@ from tensorflow.contrib.autograph.pyct import transformer from tensorflow.python.util import tf_inspect +# TODO(mdan): Remove the duplication between this and activity.py. +# In particular, the symbol definitions we track here could as well be tracked +# there because they follow the same rules for visibility. class Scope(object): """Tracks symbol value references. @@ -99,20 +102,16 @@ class TypeInfoResolver(transformer.Base): def __init__(self, context): super(TypeInfoResolver, self).__init__(context) self.scope = Scope(None) - self.function_level = 0 def visit_FunctionDef(self, node): self.scope = Scope(self.scope) - self.function_level += 1 - self.generic_visit(node) - self.function_level -= 1 + node = self.generic_visit(node) self.scope = self.scope.parent return node def _visit_block(self, block): self.scope = Scope(self.scope) - for i, n in enumerate(block): - block[i] = self.generic_visit(n) + block = self.visit_block(block) self.scope = self.scope.parent return block @@ -137,7 +136,7 @@ class TypeInfoResolver(transformer.Base): def _process_function_arg(self, arg_name): str_name = str(arg_name) - if self.function_level == 1 and str_name in self.context.arg_types: + if len(self.enclosing_entities) == 1 and str_name in self.context.arg_types: # Forge a node to hold the type information, so that method calls on # it can resolve the type. type_holder = arg_name.ast() @@ -168,16 +167,8 @@ class TypeInfoResolver(transformer.Base): anno.getanno(definition, 'element_type')) return node - def _process_tuple_assignment(self, source, t): - for i, e in enumerate(t.elts): - if isinstance(e, gast.Tuple): - self._process_tuple_assignment(source, e) - else: - self.scope.setval( - anno.getanno(e, anno.Basic.QN), - gast.Subscript(source, gast.Index(i), ctx=gast.Store())) - def _process_variable_assignment(self, source, targets): + # Special case: constructors. if isinstance(source, gast.Call): func = source.func if anno.hasanno(func, 'live_val'): @@ -190,15 +181,25 @@ class TypeInfoResolver(transformer.Base): # We can have a whitelist of no-side-effects constructors. # We can also step inside the constructor and further analyze. - for t in targets: - if isinstance(t, gast.Tuple): - # need to recurse on the case of assigning nested tuples, - # ex. a, (b, c) = f() - self._process_tuple_assignment(source, t) - elif isinstance(t, (gast.Name, gast.Attribute)): - self.scope.setval(anno.getanno(t, anno.Basic.QN), source) + # Multiple targets mean multiple assignment. + for target in targets: + # Tuple target means unpacking. + if isinstance(target, (gast.Tuple, gast.List)): + for i, target_item in enumerate(target.elts): + # Two cases here: + # 1. Static unpacking, e.g. a, b = c, d + # 2. Dynamic unpacking, e.g. a, b = c + # The former case is optimized away. + if isinstance(source, (gast.Tuple, gast.List)): + source_item = source.elts[i] + else: + source_item = gast.Subscript(source, gast.Index(i), ctx=None) + self._process_variable_assignment(source_item, (target_item,)) + elif isinstance(target, (gast.Name, gast.Attribute)): + target_symbol = anno.getanno(target, anno.Basic.QN) + self.scope.setval(target_symbol, source) else: - raise ValueError('Dont know how to handle assignment to %s' % t) + raise ValueError('assignment target has unknown type: %s' % target) def visit_With(self, node): for wi in node.items: @@ -218,19 +219,26 @@ class TypeInfoResolver(transformer.Base): # type that it specified. if (anno.getanno(node.func, 'live_val') is self.context.type_annotation_func): - # Expecting the actual type to be the second argument. + if len(node.args) != 2: raise ValueError('"%s" must have exactly two parameters' % self.context.type_annotation_func) - if not anno.hasanno(node.args[0], anno.Basic.QN): + target_arg, type_arg = node.args + if not anno.hasanno(target_arg, anno.Basic.QN): raise ValueError('the first argument of "%s" must by a symbol' % self.context.type_annotation_func) - if not anno.hasanno(node.args[1], 'live_val'): - raise ValueError( - 'the second argument of "%s" must be statically resolvable' % - self.context.type_annotation_func) - target_symbol = anno.getanno(node.args[0], anno.Basic.QN) - element_type = anno.getanno(node.args[1], 'live_val') + if isinstance(type_arg, gast.Str): + element_type = type_arg.s + elif isinstance(type_arg, gast.Num): + element_type = type_arg.n + else: + if not anno.hasanno(type_arg, 'live_val'): + raise ValueError( + 'the second argument of "%s" must be statically resolvable' % + self.context.type_annotation_func) + element_type = anno.getanno(type_arg, 'live_val') + + target_symbol = anno.getanno(target_arg, anno.Basic.QN) # Find the definition of this symbol and annotate it with the given # data type. That in turn will cause future uses of the symbol # to receive the same type annotation. diff --git a/tensorflow/contrib/autograph/pyct/static_analysis/type_info_test.py b/tensorflow/contrib/autograph/pyct/static_analysis/type_info_test.py index c0de4a604301b6e9f80ee83e4797b9ac7e558a48..46b7701624a43073fb7cc612d2678ab851513d91 100644 --- a/tensorflow/contrib/autograph/pyct/static_analysis/type_info_test.py +++ b/tensorflow/contrib/autograph/pyct/static_analysis/type_info_test.py @@ -196,23 +196,46 @@ class TypeInfoResolverTest(test.TestCase): f_ref = node.body[0].body[1].value self.assertEqual(anno.getanno(f_ref, 'element_type'), Foo) - def test_nested_assignment(self): + def test_nested_unpacking(self): - def test_fn(foo): - a, (b, c) = foo + class Foo(object): + pass + + class Bar(object): + pass + + def test_fn(): + a, (b, c) = (Foo(), (Bar(), Foo())) return a, b, c - node = self._parse_and_analyze(test_fn, {'foo': (1, 2, 3)}) - lhs = node.body[0].body[1].value.elts - a = lhs[0] - b = lhs[1] - c = lhs[2] - # TODO(mdan): change these once we have the live values propagating - # correctly + node = self._parse_and_analyze(test_fn, {'Foo': Foo, 'Bar': Bar}) + a, b, c = node.body[0].body[1].value.elts + self.assertEquals(Foo, anno.getanno(a, 'type')) + self.assertEquals(Bar, anno.getanno(b, 'type')) + self.assertEquals(Foo, anno.getanno(c, 'type')) self.assertFalse(anno.hasanno(a, 'live_val')) self.assertFalse(anno.hasanno(b, 'live_val')) self.assertFalse(anno.hasanno(c, 'live_val')) + def test_inner_scope(self): + + def test_fn(): + a = [] + utils.set_element_type(a, 1) + for _ in a: + b = [] + utils.set_element_type(b, 2) + return a, b + + node = self._parse_and_analyze(test_fn, {'utils': utils}) + a, b = node.body[0].body[2].body[2].value.elts + self.assertEquals(1, anno.getanno(a, 'element_type')) + self.assertEquals(2, anno.getanno(b, 'element_type')) + self.assertFalse(anno.hasanno(a, 'type')) + self.assertFalse(anno.hasanno(b, 'type')) + self.assertFalse(anno.hasanno(a, 'live_val')) + self.assertFalse(anno.hasanno(b, 'live_val')) + if __name__ == '__main__': test.main() diff --git a/tensorflow/contrib/autograph/pyct/transformer.py b/tensorflow/contrib/autograph/pyct/transformer.py index 35f114b6e11901a854c1d631061ae42285c0e261..4db6cc0adfad90ffc1a6bbcadfc80215688d271e 100644 --- a/tensorflow/contrib/autograph/pyct/transformer.py +++ b/tensorflow/contrib/autograph/pyct/transformer.py @@ -40,7 +40,13 @@ def try_ast_to_source(node): class Base(gast.NodeTransformer): - """Base class for specialized transformers.""" + """Base class for specialized transformers. + + Scope-local state tracking: to keep state across nodes, at the level of + (possibly nested) scopes, use enter/exit_local_scope and set/get_local. + You must call enter/exit_local_scope manually, but the transformer detects + when they are not properly paired. + """ def __init__(self, context): """Initialize the transformer. Subclasses should call this. @@ -51,6 +57,33 @@ class Base(gast.NodeTransformer): self._lineno = 0 self._col_offset = 0 self.context = context + self._enclosing_entities = [] + + # A stack that allows keeping mutable, scope-local state where scopes may be + # nested. For example, it can be used to track the usage of break + # statements in each loop, where loops may be nested. + self._local_scope_state = [] + self.enter_local_scope() + + @property + def enclosing_entities(self): + return tuple(self._enclosing_entities) + + @property + def locel_scope_level(self): + return len(self._local_scope_state) + + def enter_local_scope(self): + self._local_scope_state.append({}) + + def exit_local_scope(self): + return self._local_scope_state.pop() + + def set_local(self, name, value): + self._local_scope_state[-1][name] = value + + def get_local(self, name, default=None): + return self._local_scope_state[-1].get(name, default) def debug_print(self, node): """Helper method useful for debugging.""" @@ -58,16 +91,36 @@ class Base(gast.NodeTransformer): print(pretty_printer.fmt(node)) return node + def visit_block(self, nodes): + """Helper equivalent to generic_visit, but for node lists.""" + results = [] + for node in nodes: + replacement = self.visit(node) + if replacement: + if isinstance(replacement, (list, tuple)): + results.extend(replacement) + else: + results.append(replacement) + return results + def visit(self, node): source_code = self.context.source_code source_file = self.context.source_file + did_enter_function = False + local_scope_state_size = len(self._local_scope_state) + try: + if isinstance(node, (gast.FunctionDef, gast.ClassDef, gast.Lambda)): + self._enclosing_entities.append(node) + did_enter_function = True + if source_code and hasattr(node, 'lineno'): self._lineno = node.lineno self._col_offset = node.col_offset if anno.hasanno(node, anno.Basic.SKIP_PROCESSING): return node return super(Base, self).visit(node) + except (ValueError, AttributeError, KeyError, NotImplementedError, AssertionError) as e: msg = '%s: %s\nOffending source:\n%s\n\nOccurred at node:\n%s' % ( @@ -82,3 +135,13 @@ class Base(gast.NodeTransformer): msg, (source_file, self._lineno, self._col_offset + 1, line)), sys.exc_info()[2]) + finally: + if did_enter_function: + self._enclosing_entities.pop() + + if local_scope_state_size != len(self._local_scope_state): + raise AssertionError( + 'Inconsistent local scope stack. Before entering node %s, the' + ' stack had length %d, after exit it has length %d. This' + ' indicates enter_local_scope and exit_local_scope are not' + ' well paired.') diff --git a/tensorflow/contrib/autograph/pyct/transformer_test.py b/tensorflow/contrib/autograph/pyct/transformer_test.py new file mode 100644 index 0000000000000000000000000000000000000000..f96b0dc377521a482d347436caa98633a0a32c8a --- /dev/null +++ b/tensorflow/contrib/autograph/pyct/transformer_test.py @@ -0,0 +1,179 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for templates module.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.autograph.pyct import anno +from tensorflow.contrib.autograph.pyct import context +from tensorflow.contrib.autograph.pyct import parser +from tensorflow.contrib.autograph.pyct import transformer +from tensorflow.python.platform import test + + +class TransformerTest(test.TestCase): + + def _context_for_nodetesting(self): + return context.EntityContext( + namer=None, + source_code=None, + source_file=None, + namespace=None, + arg_values=None, + arg_types=None, + owner_type=None, + recursive=False) + + def test_entity_scope_tracking(self): + + class TestTransformer(transformer.Base): + + # The choice of note to assign to is arbitrary. Using Assign because it's + # easy to find in the tree. + def visit_Assign(self, node): + anno.setanno(node, 'enclosing_entities', self.enclosing_entities) + return self.generic_visit(node) + + # This will show up in the lambda function. + def visit_BinOp(self, node): + anno.setanno(node, 'enclosing_entities', self.enclosing_entities) + return self.generic_visit(node) + + tr = TestTransformer(self._context_for_nodetesting()) + + def test_function(): + a = 0 + + class TestClass(object): + + def test_method(self): + b = 0 + def inner_function(x): + c = 0 + d = lambda y: (x + y) + return c, d + return b, inner_function + return a, TestClass + + node, _ = parser.parse_entity(test_function) + node = tr.visit(node) + + test_function_node = node.body[0] + test_class = test_function_node.body[1] + test_method = test_class.body[0] + inner_function = test_method.body[1] + lambda_node = inner_function.body[1].value + + a = test_function_node.body[0] + b = test_method.body[0] + c = inner_function.body[0] + lambda_expr = lambda_node.body + + self.assertEqual( + (test_function_node,), anno.getanno(a, 'enclosing_entities')) + self.assertEqual((test_function_node, test_class, test_method), + anno.getanno(b, 'enclosing_entities')) + self.assertEqual( + (test_function_node, test_class, test_method, inner_function), + anno.getanno(c, 'enclosing_entities')) + self.assertEqual((test_function_node, test_class, test_method, + inner_function, lambda_node), + anno.getanno(lambda_expr, 'enclosing_entities')) + + def test_statement_info_stack(self): + + class TestTransformer(transformer.Base): + + # Extract all string constants from the block. + def visit_Str(self, node): + self.set_local('string', self.get_local('string', default='') + node.s) + return self.generic_visit(node) + + def _annotate_result(self, node): + self.enter_local_scope() + node = self.generic_visit(node) + anno.setanno(node, 'test', self.get_local('string')) + self.exit_local_scope() + return node + + def visit_While(self, node): + return self._annotate_result(node) + + def visit_For(self, node): + return self._annotate_result(node) + + tr = TestTransformer(self._context_for_nodetesting()) + + def test_function(a): + """Docstring.""" + assert a == 'This should not be counted' + for i in range(3): + _ = 'a' + if i > 2: + return 'b' + else: + _ = 'c' + while True: + raise '1' + return 'nor this' + + node, _ = parser.parse_entity(test_function) + node = tr.visit(node) + + for_node = node.body[0].body[2] + while_node = for_node.body[1].orelse[1] + + self.assertFalse(anno.hasanno(for_node, 'string')) + self.assertEqual('abc', anno.getanno(for_node, 'test')) + self.assertFalse(anno.hasanno(while_node, 'string')) + self.assertEqual('1', anno.getanno(while_node, 'test')) + + def test_statement_info_stack_checks_integrity(self): + + class TestTransformer(transformer.Base): + + def visit_If(self, node): + self.enter_local_scope() + return self.generic_visit(node) + + def visit_For(self, node): + node = self.generic_visit(node) + self.exit_local_scope() + return node + + tr = TestTransformer(self._context_for_nodetesting()) + + def no_exit(a): + if a > 0: + print(a) + return None + + node, _ = parser.parse_entity(no_exit) + with self.assertRaises(AssertionError): + tr.visit(node) + + def no_entry(a): + for _ in a: + print(a) + + node, _ = parser.parse_entity(no_entry) + with self.assertRaises(AssertionError): + tr.visit(node) + + +if __name__ == '__main__': + test.main() diff --git a/tensorflow/contrib/autograph/utils/BUILD b/tensorflow/contrib/autograph/utils/BUILD index b53fbb5c18f27aa4681347d965dc7322c849ec91..d3a1b9468892531cbc51bc13de66ef595f1a95f8 100644 --- a/tensorflow/contrib/autograph/utils/BUILD +++ b/tensorflow/contrib/autograph/utils/BUILD @@ -44,6 +44,7 @@ py_test( name = "builtins_test", srcs = ["builtins_test.py"], srcs_version = "PY2AND3", + tags = ["no_windows"], deps = [ ":utils", "//tensorflow/python:client_testlib", @@ -84,6 +85,7 @@ py_test( name = "py_func_test", srcs = ["py_func_test.py"], srcs_version = "PY2AND3", + tags = ["no_windows"], deps = [ ":utils", "//tensorflow/python:client_testlib", diff --git a/tensorflow/contrib/autograph/utils/__init__.py b/tensorflow/contrib/autograph/utils/__init__.py index 22898b17e98bb004b4d2aa529b58cc99fc64dbb2..817d4126d106487e1fea3e442712a69bbfccd7f3 100644 --- a/tensorflow/contrib/autograph/utils/__init__.py +++ b/tensorflow/contrib/autograph/utils/__init__.py @@ -19,8 +19,6 @@ from __future__ import division from __future__ import print_function from tensorflow.contrib.autograph.utils.builtins import dynamic_builtin -from tensorflow.contrib.autograph.utils.builtins import dynamic_dataset -from tensorflow.contrib.autograph.utils.builtins import dynamic_for_cond from tensorflow.contrib.autograph.utils.builtins import dynamic_print from tensorflow.contrib.autograph.utils.builtins import dynamic_range from tensorflow.contrib.autograph.utils.context_managers import control_dependency_on_returns @@ -28,7 +26,6 @@ from tensorflow.contrib.autograph.utils.misc import alias_tensors from tensorflow.contrib.autograph.utils.multiple_dispatch import dynamic_is from tensorflow.contrib.autograph.utils.multiple_dispatch import dynamic_is_not from tensorflow.contrib.autograph.utils.multiple_dispatch import run_cond -from tensorflow.contrib.autograph.utils.multiple_dispatch import run_while from tensorflow.contrib.autograph.utils.py_func import wrap_py_func from tensorflow.contrib.autograph.utils.tensor_list import dynamic_list_append from tensorflow.contrib.autograph.utils.testing import fake_tf diff --git a/tensorflow/contrib/autograph/utils/builtins.py b/tensorflow/contrib/autograph/utils/builtins.py index c6af0e4d13b8d15bebf857ff7e1129149490ee7a..211e8eaee9082dd3e4f035e4379871cd2e154a39 100644 --- a/tensorflow/contrib/autograph/utils/builtins.py +++ b/tensorflow/contrib/autograph/utils/builtins.py @@ -24,30 +24,21 @@ import six from tensorflow.contrib.autograph.utils import py_func from tensorflow.contrib.autograph.utils import type_check -from tensorflow.python.data.ops import dataset_ops from tensorflow.python.framework import tensor_util from tensorflow.python.ops import array_ops -from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import logging_ops from tensorflow.python.ops import math_ops -from tensorflow.python.util import tf_inspect def dynamic_builtin(f, *args, **kwargs): """Converts a builtin function call inline.""" - # Some built-ins may be objects. - if not tf_inspect.isbuiltin(f) and f not in (range,): - return f(*args, **kwargs) - if f is len: return dynamic_len(*args, **kwargs) if six.PY2 and f is xrange: return dynamic_range(*args, **kwargs) if f is range: return dynamic_range(*args, **kwargs) - - raise NotImplementedError( - 'The "%s" builtin is not yet supported.' % f.__name__) + raise ValueError('%s is not supported' % f) def dynamic_len(list_or_tensor): @@ -86,7 +77,7 @@ def is_tf_print_compatible(value): def dynamic_print(*values): - """Implementartion of print using dynamic dispatch. + """Implementation of print using dynamic dispatch. The function attempts to use tf.Print if all the values are compatible. Otherwise, it will fall back to py_func. @@ -100,75 +91,15 @@ def dynamic_print(*values): if all(map(is_tf_print_compatible, values)): return logging_ops.Print(1, values) - def flushed_print(*vals): + def print_wrapper(*vals): + if six.PY3: + # TensorFlow doesn't seem to generate Unicode when passing strings to + # py_func. This causes the print to add a "b'" wrapper to the output, + # which is probably never what you want. + vals = tuple(v.decode() if isinstance(v, bytes) else v for v in vals) print(*vals) + # The flush helps avoid garbled output in IPython. sys.stdout.flush() return py_func.wrap_py_func( - flushed_print, None, values, use_dummy_return=True) - - -def dynamic_dataset(iterated): - """Implementartion of smart tf.data.Dataset epoch wrapping. - - The function checks if the input is a tf.data.Dataset and if so then wraps it - so that for each element it returns it also returns the current epoch the - dataset iteration is in, for two epochs. If the input is not a - tf.data.Dataset then it just returns the input. - - Args: - iterated: The iterable or tf.data.Dataset that is being iterated over. - Returns: - Either just the untouched input, or in the case of input being a - tf.data.Dataset then it returns a wrapped tf.data.Dataset where for each - element it returns it also returns the current epoch the dataset iteration - is in. - """ - if not isinstance(iterated, dataset_ops.Dataset): - return iterated - - def epoch_dataset_number_helper(i): - return dataset_ops.Dataset.zip( - (dataset_ops.Dataset.from_tensors(i).repeat(), iterated)) - - epoch_numbers = dataset_ops.Dataset.range(2) - return epoch_numbers.flat_map(epoch_dataset_number_helper) - - -def dynamic_for_cond(iteration, iterated): - """Implementartion of smart while-loop condition using dynamic dispatch. - - The function checks if it is iterating over a tf.data.Dataset or not, and in - the case it is not then it simply returns if we are still in range of the - iterated and the next element. If it is iterating over a dataset then it only - iterates for a single epoch. - - Args: - iteration: The current iteration of the loop. - iterated: The iterable or tf.data.Dataset that is being iterated over. - Returns: - A tuple of a bool that indicates whether the loop should continue, and the - next element in iterated. - """ - # TODO(znado): Clean up. - # TODO(znado): This won't work for unpacked iterates. Fix. - if isinstance(iterated, dataset_ops.Dataset): - curr_epoch, next_elem = iterated.make_one_shot_iterator().get_next() - return math_ops.less(curr_epoch, 1), next_elem - elif tensor_util.is_tensor(iterated): - if iterated.shape.ndims > 1: - elem_shape = array_ops.shape(iterated)[1:] - else: - elem_shape = () - if iterated.shape.ndims == 0 or iterated.shape[0] == 0: - return False, array_ops.zeros(elem_shape, iterated.dtype) - return control_flow_ops.cond( - math_ops.less(iteration, dynamic_len(iterated)), - lambda: (True, iterated[iteration]), - lambda: (False, array_ops.zeros(elem_shape, iterated.dtype))) - elif hasattr(iterated, '__len__'): - if iteration < len(iterated): - return True, iterated[iteration] - return False, None - else: - raise NotImplementedError('Python iterators not yet supported.') + print_wrapper, None, values, use_dummy_return=True) diff --git a/tensorflow/contrib/autograph/utils/builtins_test.py b/tensorflow/contrib/autograph/utils/builtins_test.py index d9f7913d89a5471c76eb7ae484674bd7a1853ac9..163e6984079fea5c3b3d9aeda0ec8048d651686f 100644 --- a/tensorflow/contrib/autograph/utils/builtins_test.py +++ b/tensorflow/contrib/autograph/utils/builtins_test.py @@ -76,8 +76,9 @@ class BuiltinsTest(test.TestCase): def range(x): # pylint:disable=redefined-builtin return x - # Functions that just have the names of builtins are ignored. - self.assertEqual(builtins.dynamic_builtin(range, 1), 1) + # Functions that just have the names of builtins are rejected. + with self.assertRaises(ValueError): + self.assertEqual(builtins.dynamic_builtin(range, 1), 1) if six.PY2: self.assertListEqual( list(builtins.dynamic_builtin(xrange, 3)), [0, 1, 2]) diff --git a/tensorflow/contrib/autograph/utils/multiple_dispatch.py b/tensorflow/contrib/autograph/utils/multiple_dispatch.py index 47049255f31113a0c7b2f5a1269593afdbbc9b19..70eef5676f61bcd978ea53260f0b86a817f2bd7c 100644 --- a/tensorflow/contrib/autograph/utils/multiple_dispatch.py +++ b/tensorflow/contrib/autograph/utils/multiple_dispatch.py @@ -18,8 +18,6 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import six - from tensorflow.contrib.autograph.utils.type_check import is_tensor from tensorflow.python.ops import control_flow_ops @@ -66,42 +64,3 @@ def py_cond(condition, true_fn, false_fn): if len(results) == 1: return results[0] return results - - -def run_while(cond_fn, body_fn, init_args): - """Type-dependent functional while loop. - - Args: - cond_fn: A Python callable implementing the stop conditions of the loop. - body_fn: A Python callable implementing the body of the loop. - init_args: The initial values of the arguments that will be passed to both - cond_fn and body_fn. - - Returns: - result: A list of values with the same shape and type as init_args. If any - of the init_args, or any variables closed-over in cond_fn are Tensors, - tf.while_loop will be used, otherwise a Python while loop will be ran. - - Raises: - ValueError: if init_args is not a tuple or list with one or more elements. - """ - if not isinstance(init_args, (tuple, list)) or not init_args: - raise ValueError( - 'init_args must be a non-empty list or tuple, found %s' % init_args) - - # TODO(alexbw): statically determine all active variables in cond_fn, - # and pass them directly - closure_vars = tuple( - [c.cell_contents for c in six.get_function_closure(cond_fn) or []]) - possibly_tensors = tuple(init_args) + closure_vars - if is_tensor(*possibly_tensors): - return control_flow_ops.while_loop(cond_fn, body_fn, init_args) - else: - return py_while_loop(cond_fn, body_fn, init_args) - - -def py_while_loop(cond_fn, body_fn, init_args): - state = init_args - while cond_fn(*state): - state = body_fn(*state) - return state diff --git a/tensorflow/contrib/autograph/utils/multiple_dispatch_test.py b/tensorflow/contrib/autograph/utils/multiple_dispatch_test.py index e6a41bb4166e8cfc8c703685f56eb90a1b5f63b4..f72f8e94a0df815f7d517e2b81ffc86c5c545f07 100644 --- a/tensorflow/contrib/autograph/utils/multiple_dispatch_test.py +++ b/tensorflow/contrib/autograph/utils/multiple_dispatch_test.py @@ -70,29 +70,6 @@ class MultipleDispatchTest(test.TestCase): out = multiple_dispatch.run_cond(constant(False), true_fn, false_fn) self.assertEqual(sess.run(out), 3) - def test_run_while_python(self): - cond_fn = lambda x, t, s: x > t - body_fn = lambda x, t, s: (x * s, t, s) - - x, _, _ = multiple_dispatch.run_while(cond_fn, body_fn, [3.0, 1.0, 0.5]) - self.assertEqual(x, 0.75) - - x, _, _ = multiple_dispatch.run_while(cond_fn, body_fn, [3.0, 4.0, 0.5]) - self.assertEqual(x, 3.0) - - def test_run_while_tf(self): - cond_fn = lambda x, t, s: x > t - body_fn = lambda x, t, s: (x * s, t, s) - - with Session() as sess: - x, _, _ = multiple_dispatch.run_while(cond_fn, body_fn, - [constant(3.0), 1.0, 0.5]) - self.assertEqual(sess.run(x), 0.75) - - x, _, _ = multiple_dispatch.run_while(cond_fn, body_fn, - [constant(3.0), 4.0, 0.5]) - self.assertEqual(sess.run(x), 3.0) - if __name__ == '__main__': test.main() diff --git a/tensorflow/contrib/bayesflow/python/ops/monte_carlo_impl.py b/tensorflow/contrib/bayesflow/python/ops/monte_carlo_impl.py index d193a8459d00b83580509c8de25d5f7801b195fe..032b859d469ee5039e08e4af4c2f4ebf35c2ff19 100644 --- a/tensorflow/contrib/bayesflow/python/ops/monte_carlo_impl.py +++ b/tensorflow/contrib/bayesflow/python/ops/monte_carlo_impl.py @@ -44,15 +44,13 @@ def expectation_importance_sampler(f, n=None, seed=None, name='expectation_importance_sampler'): - r"""Monte Carlo estimate of `\\(E_p[f(Z)] = E_q[f(Z) p(Z) / q(Z)]\\)`. + r"""Monte Carlo estimate of \\(E_p[f(Z)] = E_q[f(Z) p(Z) / q(Z)]\\). - With `\\(p(z) := exp^{log_p(z)}\\)`, this `Op` returns + With \\(p(z) := exp^{log_p(z)}\\), this `Op` returns - ``` \\(n^{-1} sum_{i=1}^n [ f(z_i) p(z_i) / q(z_i) ], z_i ~ q,\\) \\(\approx E_q[ f(Z) p(Z) / q(Z) ]\\) \\(= E_p[f(Z)]\\) - ``` This integral is done in log-space with max-subtraction to better handle the often extreme values that `f(z) p(z) / q(z)` can take on. @@ -121,14 +119,12 @@ def expectation_importance_sampler_logspace( name='expectation_importance_sampler_logspace'): r"""Importance sampling with a positive function, in log-space. - With `\\(p(z) := exp^{log_p(z)}\\)`, and `\\(f(z) = exp{log_f(z)}\\)`, + With \\(p(z) := exp^{log_p(z)}\\), and \\(f(z) = exp{log_f(z)}\\), this `Op` returns - ``` \\(Log[ n^{-1} sum_{i=1}^n [ f(z_i) p(z_i) / q(z_i) ] ], z_i ~ q,\\) \\(\approx Log[ E_q[ f(Z) p(Z) / q(Z) ] ]\\) \\(= Log[E_p[f(Z)]]\\) - ``` This integral is done in log-space with max-subtraction to better handle the often extreme values that `f(z) p(z) / q(z)` can take on. @@ -196,13 +192,11 @@ def _logspace_mean(log_values): def expectation(f, samples, log_prob=None, use_reparametrization=True, axis=0, keep_dims=False, name=None): - """Computes the Monte-Carlo approximation of `\\(E_p[f(X)]\\)`. + """Computes the Monte-Carlo approximation of \\(E_p[f(X)]\\). This function computes the Monte-Carlo approximation of an expectation, i.e., - ```none \\(E_p[f(X)] \approx= m^{-1} sum_i^m f(x_j), x_j\ ~iid\ p(X)\\) - ``` where: @@ -216,8 +210,8 @@ def expectation(f, samples, log_prob=None, use_reparametrization=True, parameterless distribution (e.g., `Normal(Y; m, s) <=> Y = sX + m, X ~ Normal(0,1)`), we can swap gradient and expectation, i.e., - `grad[ Avg{ \\(s_i : i=1...n\\) } ] = Avg{ grad[\\(s_i\\)] : i=1...n }` where - `S_n = Avg{\\(s_i\\)}` and `\\(s_i = f(x_i), x_i ~ p\\)`. + grad[ Avg{ \\(s_i : i=1...n\\) } ] = Avg{ grad[\\(s_i\\)] : i=1...n } where + S_n = Avg{\\(s_i\\)}` and `\\(s_i = f(x_i), x_i ~ p\\). However, if p is not reparameterized, TensorFlow's gradient will be incorrect since the chain-rule stops at samples of non-reparameterized distributions. @@ -296,7 +290,7 @@ def expectation(f, samples, log_prob=None, use_reparametrization=True, Args: f: Python callable which can return `f(samples)`. samples: `Tensor` of samples used to form the Monte-Carlo approximation of - `\\(E_p[f(X)]\\)`. A batch of samples should be indexed by `axis` + \\(E_p[f(X)]\\). A batch of samples should be indexed by `axis` dimensions. log_prob: Python callable which can return `log_prob(samples)`. Must correspond to the natural-logarithm of the pdf/pmf of each sample. Only @@ -317,7 +311,7 @@ def expectation(f, samples, log_prob=None, use_reparametrization=True, Returns: approx_expectation: `Tensor` corresponding to the Monte-Carlo approximation - of `\\(E_p[f(X)]\\)`. + of \\(E_p[f(X)]\\). Raises: ValueError: if `f` is not a Python `callable`. @@ -329,7 +323,7 @@ def expectation(f, samples, log_prob=None, use_reparametrization=True, if not callable(f): raise ValueError('`f` must be a callable function.') if use_reparametrization: - return math_ops.reduce_mean(f(samples), axis=axis, keep_dims=keep_dims) + return math_ops.reduce_mean(f(samples), axis=axis, keepdims=keep_dims) else: if not callable(log_prob): raise ValueError('`log_prob` must be a callable function.') @@ -349,7 +343,7 @@ def expectation(f, samples, log_prob=None, use_reparametrization=True, # "Is there a floating point value of x, for which x-x == 0 is false?" # http://stackoverflow.com/q/2686644 fx += stop(fx) * (logpx - stop(logpx)) # Add zeros_like(logpx). - return math_ops.reduce_mean(fx, axis=axis, keep_dims=keep_dims) + return math_ops.reduce_mean(fx, axis=axis, keepdims=keep_dims) def _sample_mean(values): diff --git a/tensorflow/contrib/boosted_trees/estimator_batch/BUILD b/tensorflow/contrib/boosted_trees/estimator_batch/BUILD index 17e20c4b315bab8852c90788567a2f2f92119f40..8cff1a3bb1d11aff6a264636291a7149b40de516 100644 --- a/tensorflow/contrib/boosted_trees/estimator_batch/BUILD +++ b/tensorflow/contrib/boosted_trees/estimator_batch/BUILD @@ -28,12 +28,13 @@ py_library( srcs = ["model.py"], srcs_version = "PY2AND3", deps = [ + ":estimator_utils", ":trainer_hooks", "//tensorflow/contrib/boosted_trees:gbdt_batch", "//tensorflow/contrib/boosted_trees:model_ops_py", "//tensorflow/python:framework_ops", "//tensorflow/python:state_ops", - "//tensorflow/python:training", + "//tensorflow/python:training_util", ], ) @@ -51,6 +52,18 @@ py_library( ], ) +py_library( + name = "estimator_utils", + srcs = ["estimator_utils.py"], + srcs_version = "PY2AND3", + deps = [ + "//tensorflow/contrib/learn", + "//tensorflow/core:protos_all_py", + "//tensorflow/python:control_flow_ops", + "//tensorflow/python:framework_ops", + ], +) + py_test( name = "trainer_hooks_test", size = "small", @@ -118,6 +131,7 @@ py_library( srcs = ["estimator.py"], srcs_version = "PY2AND3", deps = [ + ":estimator_utils", ":model", "//tensorflow/contrib/boosted_trees:losses", "//tensorflow/contrib/learn", @@ -130,6 +144,7 @@ py_library( srcs = ["dnn_tree_combined_estimator.py"], srcs_version = "PY2AND3", deps = [ + ":estimator_utils", ":trainer_hooks", "//tensorflow/contrib/boosted_trees:gbdt_batch", "//tensorflow/contrib/boosted_trees:model_ops_py", @@ -159,3 +174,22 @@ py_test( "//tensorflow/python:framework_for_generated_wrappers", ], ) + +py_test( + name = "estimator_test", + size = "medium", + srcs = ["estimator_test.py"], + srcs_version = "PY2AND3", + tags = [ + "no_gpu", + "no_pip_gpu", + "notsan", + ], + deps = [ + ":estimator", + "//tensorflow/contrib/boosted_trees:gbdt_batch", + "//tensorflow/contrib/layers:layers_py", + "//tensorflow/python:array_ops", + "//tensorflow/python:framework_for_generated_wrappers", + ], +) diff --git a/tensorflow/contrib/boosted_trees/estimator_batch/custom_export_strategy.py b/tensorflow/contrib/boosted_trees/estimator_batch/custom_export_strategy.py index d9b0d89a03dce40d34f76bb1262d26bb587a2dc7..62f1f4122b05b56a708823df4246d618bd3fa5d4 100644 --- a/tensorflow/contrib/boosted_trees/estimator_batch/custom_export_strategy.py +++ b/tensorflow/contrib/boosted_trees/estimator_batch/custom_export_strategy.py @@ -39,7 +39,8 @@ _SPARSE_FLOAT_FEATURE_NAME_TEMPLATE = "%s_%d" def make_custom_export_strategy(name, convert_fn, feature_columns, - export_input_fn): + export_input_fn, + use_core_columns=False): """Makes custom exporter of GTFlow tree format. Args: @@ -58,7 +59,7 @@ def make_custom_export_strategy(name, input_fn = export_input_fn() (sorted_feature_names, dense_floats, sparse_float_indices, _, _, sparse_int_indices, _, _) = gbdt_batch.extract_features( - input_fn.features, feature_columns) + input_fn.features, feature_columns, use_core_columns) def export_fn(estimator, export_dir, checkpoint_path=None, eval_result=None): """A wrapper to export to SavedModel, and convert it to other formats.""" diff --git a/tensorflow/contrib/boosted_trees/estimator_batch/dnn_tree_combined_estimator.py b/tensorflow/contrib/boosted_trees/estimator_batch/dnn_tree_combined_estimator.py index 2e7b8cba05b89feaac3f47e13d26e7ae37a7b0ae..9994c84ebdb930eea0818188225488eb5eca84eb 100644 --- a/tensorflow/contrib/boosted_trees/estimator_batch/dnn_tree_combined_estimator.py +++ b/tensorflow/contrib/boosted_trees/estimator_batch/dnn_tree_combined_estimator.py @@ -19,25 +19,20 @@ logits of the DNN. The input layer of the DNN (including the embeddings learned over sparse features) can optionally be provided to the boosted trees as an additional input feature. """ - from __future__ import absolute_import from __future__ import division from __future__ import print_function import six from tensorflow.contrib import layers +from tensorflow.contrib.boosted_trees.estimator_batch import estimator_utils from tensorflow.contrib.boosted_trees.estimator_batch import trainer_hooks from tensorflow.contrib.boosted_trees.python.ops import model_ops from tensorflow.contrib.boosted_trees.python.training.functions import gbdt_batch from tensorflow.contrib.layers.python.layers import optimizers -from tensorflow.contrib.learn.python.learn.estimators import constants from tensorflow.contrib.learn.python.learn.estimators import estimator from tensorflow.contrib.learn.python.learn.estimators import head as head_lib from tensorflow.contrib.learn.python.learn.estimators import model_fn -from tensorflow.contrib.learn.python.learn.estimators import model_fn as contrib_model_fn_lib -from tensorflow.contrib.learn.python.learn.estimators import prediction_key -from tensorflow.python.estimator import model_fn as model_fn_lib -from tensorflow.python.estimator.export import export_output from tensorflow.python.feature_column import feature_column as feature_column_lib from tensorflow.python.framework import ops from tensorflow.python.ops import control_flow_ops @@ -48,56 +43,8 @@ from tensorflow.python.ops import variable_scope from tensorflow.python.summary import summary from tensorflow.python.training import training_util - _DNN_LEARNING_RATE = 0.001 -_CORE_MODE_TO_CONTRIB_MODE_ = { - model_fn_lib.ModeKeys.TRAIN: contrib_model_fn_lib.ModeKeys.TRAIN, - model_fn_lib.ModeKeys.EVAL: contrib_model_fn_lib.ModeKeys.EVAL, - model_fn_lib.ModeKeys.PREDICT: contrib_model_fn_lib.ModeKeys.INFER -} - - -def _core_mode_to_contrib_mode(mode): - return _CORE_MODE_TO_CONTRIB_MODE_[mode] - - -def _export_outputs_to_output_alternatives(export_outputs): - """Converts EstimatorSpec.export_outputs to output_alternatives. - - Args: - export_outputs: export_outputs created by create_estimator_spec. - Returns: - converted output_alternatives. - """ - output = dict() - if export_outputs is not None: - for key, value in export_outputs.items(): - if isinstance(value, export_output.ClassificationOutput): - exported_predictions = { - prediction_key.PredictionKey.SCORES: value.scores, - prediction_key.PredictionKey.CLASSES: value.classes - } - output[key] = (constants.ProblemType.CLASSIFICATION, - exported_predictions) - return output - return None - - -def _estimator_spec_to_model_fn_ops(estimator_spec, is_regression): - alternatives = [] - if not is_regression: - _export_outputs_to_output_alternatives(estimator_spec.export_outputs) - - return model_fn.ModelFnOps( - mode=_core_mode_to_contrib_mode(estimator_spec.mode), - predictions=estimator_spec.predictions, - loss=estimator_spec.loss, - train_op=estimator_spec.train_op, - eval_metric_ops=estimator_spec.eval_metric_ops, - output_alternatives=alternatives) - - def _get_optimizer(optimizer): if callable(optimizer): return optimizer() @@ -128,8 +75,7 @@ def _dnn_tree_combined_model_fn(features, dnn_steps_to_train=10000, tree_feature_columns=None, tree_center_bias=False, - use_core_versions=False, - is_regression=False): + use_core_versions=False): """DNN and GBDT combined model_fn. Args: @@ -169,7 +115,6 @@ def _dnn_tree_combined_model_fn(features, first fitting the bias. use_core_versions: Whether feature columns and loss are from the core (as opposed to contrib) version of tensorflow. - is_regression: Whether the problem is regression or not. Returns: A `ModelFnOps` object. @@ -305,8 +250,8 @@ def _dnn_tree_combined_model_fn(features, labels=labels, train_op_fn=_dnn_train_op_fn, logits=dnn_logits) - dnn_train_op = _estimator_spec_to_model_fn_ops(dnn_train_op, - is_regression).train_op + dnn_train_op = estimator_utils.estimator_spec_to_model_fn_ops( + dnn_train_op).train_op tree_train_op = head.create_estimator_spec( features=tree_features, @@ -314,10 +259,10 @@ def _dnn_tree_combined_model_fn(features, labels=labels, train_op_fn=_tree_train_op_fn, logits=tree_train_logits) - tree_train_op = _estimator_spec_to_model_fn_ops(tree_train_op, - is_regression).train_op + tree_train_op = estimator_utils.estimator_spec_to_model_fn_ops( + tree_train_op).train_op - model_fn_ops = _estimator_spec_to_model_fn_ops(model_fn_ops, is_regression) + model_fn_ops = estimator_utils.estimator_spec_to_model_fn_ops(model_fn_ops) else: model_fn_ops = head.create_model_fn_ops( features=features, @@ -529,26 +474,12 @@ class DNNBoostedTreeCombinedRegressor(estimator.Estimator): def _model_fn(features, labels, mode, config): return _dnn_tree_combined_model_fn( - features, - labels, - mode, - head, - dnn_hidden_units, - dnn_feature_columns, - tree_learner_config, - num_trees, - tree_examples_per_layer, - config, - dnn_optimizer, - dnn_activation_fn, - dnn_dropout, - dnn_input_layer_partitioner, - dnn_input_layer_to_tree, - dnn_steps_to_train, - tree_feature_columns, - tree_center_bias, - use_core_versions, - is_regression=True) + features, labels, mode, head, dnn_hidden_units, dnn_feature_columns, + tree_learner_config, num_trees, tree_examples_per_layer, config, + dnn_optimizer, dnn_activation_fn, dnn_dropout, + dnn_input_layer_partitioner, dnn_input_layer_to_tree, + dnn_steps_to_train, tree_feature_columns, tree_center_bias, + use_core_versions) super(DNNBoostedTreeCombinedRegressor, self).__init__( model_fn=_model_fn, model_dir=model_dir, diff --git a/tensorflow/contrib/boosted_trees/estimator_batch/estimator.py b/tensorflow/contrib/boosted_trees/estimator_batch/estimator.py index 70454aa6dbdb19297028a3f80822719bef5a0f72..89d0d611d2905492cec09e033b8cbc238ec7fac6 100644 --- a/tensorflow/contrib/boosted_trees/estimator_batch/estimator.py +++ b/tensorflow/contrib/boosted_trees/estimator_batch/estimator.py @@ -40,7 +40,8 @@ class GradientBoostedDecisionTreeClassifier(estimator.Estimator): label_keys=None, feature_engineering_fn=None, logits_modifier_function=None, - center_bias=True): + center_bias=True, + use_core_libs=False): """Initializes a GradientBoostedDecisionTreeClassifier estimator instance. Args: @@ -63,7 +64,8 @@ class GradientBoostedDecisionTreeClassifier(estimator.Estimator): logits_modifier_function: A modifier function for the logits. center_bias: Whether a separate tree should be created for first fitting the bias. - + use_core_libs: Whether feature columns and loss are from the core (as + opposed to contrib) version of tensorflow. Raises: ValueError: If learner_config is not valid. """ @@ -99,6 +101,7 @@ class GradientBoostedDecisionTreeClassifier(estimator.Estimator): 'examples_per_layer': examples_per_layer, 'center_bias': center_bias, 'logits_modifier_function': logits_modifier_function, + 'use_core_libs': use_core_libs, }, model_dir=model_dir, config=config, @@ -120,7 +123,8 @@ class GradientBoostedDecisionTreeRegressor(estimator.Estimator): config=None, feature_engineering_fn=None, logits_modifier_function=None, - center_bias=True): + center_bias=True, + use_core_libs=False): """Initializes a GradientBoostedDecisionTreeRegressor estimator instance. Args: @@ -145,6 +149,8 @@ class GradientBoostedDecisionTreeRegressor(estimator.Estimator): logits_modifier_function: A modifier function for the logits. center_bias: Whether a separate tree should be created for first fitting the bias. + use_core_libs: Whether feature columns and loss are from the core (as + opposed to contrib) version of tensorflow. """ head = head_lib.regression_head( label_name=label_name, @@ -166,6 +172,7 @@ class GradientBoostedDecisionTreeRegressor(estimator.Estimator): 'examples_per_layer': examples_per_layer, 'logits_modifier_function': logits_modifier_function, 'center_bias': center_bias, + 'use_core_libs': use_core_libs, }, model_dir=model_dir, config=config, @@ -189,7 +196,8 @@ class GradientBoostedDecisionTreeEstimator(estimator.Estimator): config=None, feature_engineering_fn=None, logits_modifier_function=None, - center_bias=True): + center_bias=True, + use_core_libs=False): """Initializes a GradientBoostedDecisionTreeEstimator estimator instance. Args: @@ -210,6 +218,8 @@ class GradientBoostedDecisionTreeEstimator(estimator.Estimator): logits_modifier_function: A modifier function for the logits. center_bias: Whether a separate tree should be created for first fitting the bias. + use_core_libs: Whether feature columns and loss are from the core (as + opposed to contrib) version of tensorflow. """ super(GradientBoostedDecisionTreeEstimator, self).__init__( model_fn=model.model_builder, @@ -222,6 +232,7 @@ class GradientBoostedDecisionTreeEstimator(estimator.Estimator): 'examples_per_layer': examples_per_layer, 'logits_modifier_function': logits_modifier_function, 'center_bias': center_bias, + 'use_core_libs': use_core_libs, }, model_dir=model_dir, config=config, diff --git a/tensorflow/contrib/boosted_trees/estimator_batch/estimator_test.py b/tensorflow/contrib/boosted_trees/estimator_batch/estimator_test.py new file mode 100644 index 0000000000000000000000000000000000000000..0d58317bd59331cfcde0e12aeb3a3a03fc45d89b --- /dev/null +++ b/tensorflow/contrib/boosted_trees/estimator_batch/estimator_test.py @@ -0,0 +1,138 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for GBDT estimator.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +import tempfile +from tensorflow.contrib.boosted_trees.estimator_batch import estimator +from tensorflow.contrib.boosted_trees.proto import learner_pb2 +from tensorflow.contrib.layers.python.layers import feature_column as contrib_feature_column +from tensorflow.contrib.learn.python.learn.estimators import run_config +from tensorflow.python.estimator.canned import head as head_lib +from tensorflow.python.feature_column import feature_column_lib as core_feature_column +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import test_util +from tensorflow.python.ops.losses import losses +from tensorflow.python.platform import gfile +from tensorflow.python.platform import googletest + + +def _train_input_fn(): + features = {"x": constant_op.constant([[2.], [1.], [1.]])} + label = constant_op.constant([[1], [0], [0]], dtype=dtypes.int32) + return features, label + + +def _eval_input_fn(): + features = {"x": constant_op.constant([[1.], [2.], [2.]])} + label = constant_op.constant([[0], [1], [1]], dtype=dtypes.int32) + return features, label + + +class BoostedTreeEstimatorTest(test_util.TensorFlowTestCase): + + def setUp(self): + self._export_dir_base = tempfile.mkdtemp() + "export/" + gfile.MkDir(self._export_dir_base) + + def testFitAndEvaluateDontThrowException(self): + learner_config = learner_pb2.LearnerConfig() + learner_config.num_classes = 2 + learner_config.constraints.max_tree_depth = 1 + model_dir = tempfile.mkdtemp() + config = run_config.RunConfig() + + classifier = estimator.GradientBoostedDecisionTreeClassifier( + learner_config=learner_config, + num_trees=1, + examples_per_layer=3, + model_dir=model_dir, + config=config, + feature_columns=[contrib_feature_column.real_valued_column("x")]) + + classifier.fit(input_fn=_train_input_fn, steps=15) + classifier.evaluate(input_fn=_eval_input_fn, steps=1) + classifier.export(self._export_dir_base) + + def testFitAndEvaluateDontThrowExceptionWithCoreForEstimator(self): + learner_config = learner_pb2.LearnerConfig() + learner_config.num_classes = 2 + learner_config.constraints.max_tree_depth = 1 + model_dir = tempfile.mkdtemp() + config = run_config.RunConfig() + + # Use core head + head_fn = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss( + loss_reduction=losses.Reduction.SUM_OVER_BATCH_SIZE) + + model = estimator.GradientBoostedDecisionTreeEstimator( + head=head_fn, + learner_config=learner_config, + num_trees=1, + examples_per_layer=3, + model_dir=model_dir, + config=config, + feature_columns=[core_feature_column.numeric_column("x")], + use_core_libs=True) + + model.fit(input_fn=_train_input_fn, steps=15) + model.evaluate(input_fn=_eval_input_fn, steps=1) + model.export(self._export_dir_base) + + def testFitAndEvaluateDontThrowExceptionWithCoreForClassifier(self): + learner_config = learner_pb2.LearnerConfig() + learner_config.num_classes = 2 + learner_config.constraints.max_tree_depth = 1 + model_dir = tempfile.mkdtemp() + config = run_config.RunConfig() + + classifier = estimator.GradientBoostedDecisionTreeClassifier( + learner_config=learner_config, + num_trees=1, + examples_per_layer=3, + model_dir=model_dir, + config=config, + feature_columns=[core_feature_column.numeric_column("x")], + use_core_libs=True) + + classifier.fit(input_fn=_train_input_fn, steps=15) + classifier.evaluate(input_fn=_eval_input_fn, steps=1) + classifier.export(self._export_dir_base) + + def testFitAndEvaluateDontThrowExceptionWithCoreForRegressor(self): + learner_config = learner_pb2.LearnerConfig() + learner_config.num_classes = 2 + learner_config.constraints.max_tree_depth = 1 + model_dir = tempfile.mkdtemp() + config = run_config.RunConfig() + + regressor = estimator.GradientBoostedDecisionTreeRegressor( + learner_config=learner_config, + num_trees=1, + examples_per_layer=3, + model_dir=model_dir, + config=config, + feature_columns=[core_feature_column.numeric_column("x")], + use_core_libs=True) + + regressor.fit(input_fn=_train_input_fn, steps=15) + regressor.evaluate(input_fn=_eval_input_fn, steps=1) + regressor.export(self._export_dir_base) + + +if __name__ == "__main__": + googletest.main() diff --git a/tensorflow/contrib/boosted_trees/estimator_batch/estimator_utils.py b/tensorflow/contrib/boosted_trees/estimator_batch/estimator_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..48a7f85eada8c72de83b814af2f00e97a62a073e --- /dev/null +++ b/tensorflow/contrib/boosted_trees/estimator_batch/estimator_utils.py @@ -0,0 +1,74 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Utilities for converting between core and contrib feature columns.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.learn.python.learn.estimators import constants +from tensorflow.contrib.learn.python.learn.estimators import model_fn +from tensorflow.contrib.learn.python.learn.estimators import model_fn as contrib_model_fn_lib +from tensorflow.contrib.learn.python.learn.estimators import prediction_key +from tensorflow.python.estimator import model_fn as model_fn_lib +from tensorflow.python.estimator.export import export_output + +_CORE_MODE_TO_CONTRIB_MODE_ = { + model_fn_lib.ModeKeys.TRAIN: contrib_model_fn_lib.ModeKeys.TRAIN, + model_fn_lib.ModeKeys.EVAL: contrib_model_fn_lib.ModeKeys.EVAL, + model_fn_lib.ModeKeys.PREDICT: contrib_model_fn_lib.ModeKeys.INFER +} + + +def _core_mode_to_contrib_mode(mode): + return _CORE_MODE_TO_CONTRIB_MODE_[mode] + + +def _export_outputs_to_output_alternatives(export_outputs): + """Converts EstimatorSpec.export_outputs to output_alternatives. + + Args: + export_outputs: export_outputs created by create_estimator_spec. + Returns: + converted output_alternatives. + """ + output = dict() + if export_outputs is not None: + for key, value in export_outputs.items(): + if isinstance(value, export_output.ClassificationOutput): + exported_predictions = { + prediction_key.PredictionKey.SCORES: value.scores, + prediction_key.PredictionKey.CLASSES: value.classes + } + output[key] = (constants.ProblemType.CLASSIFICATION, + exported_predictions) + return output + return None + + +def estimator_spec_to_model_fn_ops(estimator_spec, export_alternatives=False): + if export_alternatives: + alternatives = _export_outputs_to_output_alternatives( + estimator_spec.export_outputs) + else: + alternatives = [] + + return model_fn.ModelFnOps( + mode=_core_mode_to_contrib_mode(estimator_spec.mode), + predictions=estimator_spec.predictions, + loss=estimator_spec.loss, + train_op=estimator_spec.train_op, + eval_metric_ops=estimator_spec.eval_metric_ops, + output_alternatives=alternatives) diff --git a/tensorflow/contrib/boosted_trees/estimator_batch/model.py b/tensorflow/contrib/boosted_trees/estimator_batch/model.py index c6455a7ea3d18eb358edee034cee58b2bed21024..15ab6d814522ab1dee58dcd71246354fc4d8a483 100644 --- a/tensorflow/contrib/boosted_trees/estimator_batch/model.py +++ b/tensorflow/contrib/boosted_trees/estimator_batch/model.py @@ -20,6 +20,7 @@ from __future__ import print_function import copy +from tensorflow.contrib.boosted_trees.estimator_batch import estimator_utils from tensorflow.contrib.boosted_trees.estimator_batch import trainer_hooks from tensorflow.contrib.boosted_trees.python.ops import model_ops from tensorflow.contrib.boosted_trees.python.training.functions import gbdt_batch @@ -60,6 +61,7 @@ def model_builder(features, labels, mode, params, config): feature_columns = params["feature_columns"] weight_column_name = params["weight_column_name"] num_trees = params["num_trees"] + use_core_libs = params["use_core_libs"] logits_modifier_function = params["logits_modifier_function"] if features is None: raise ValueError("At least one feature must be specified.") @@ -93,7 +95,8 @@ def model_builder(features, labels, mode, params, config): learner_config=learner_config, feature_columns=feature_columns, logits_dimension=head.logits_dimension, - features=training_features) + features=training_features, + use_core_columns=use_core_libs) with ops.name_scope("gbdt", "gbdt_optimizer"): predictions_dict = gbdt_model.predict(mode) logits = predictions_dict["predictions"] @@ -108,12 +111,22 @@ def model_builder(features, labels, mode, params, config): update_op = state_ops.assign_add(global_step, 1).op return update_op - model_fn_ops = head.create_model_fn_ops( - features=features, - mode=mode, - labels=labels, - train_op_fn=_train_op_fn, - logits=logits) + create_estimator_spec_op = getattr(head, "create_estimator_spec", None) + if use_core_libs and callable(create_estimator_spec_op): + model_fn_ops = head.create_estimator_spec( + features=features, + mode=mode, + labels=labels, + train_op_fn=_train_op_fn, + logits=logits) + model_fn_ops = estimator_utils.estimator_spec_to_model_fn_ops(model_fn_ops) + else: + model_fn_ops = head.create_model_fn_ops( + features=features, + mode=mode, + labels=labels, + train_op_fn=_train_op_fn, + logits=logits) if num_trees: if center_bias: num_trees += 1 diff --git a/tensorflow/contrib/boosted_trees/lib/quantiles/weighted_quantiles_stream_test.cc b/tensorflow/contrib/boosted_trees/lib/quantiles/weighted_quantiles_stream_test.cc index 4481c0d0e4400acd93c9a277de185db7aaf9bcb0..67ac9bf387ae9b3ca29e610c2c4138c28302ca33 100644 --- a/tensorflow/contrib/boosted_trees/lib/quantiles/weighted_quantiles_stream_test.cc +++ b/tensorflow/contrib/boosted_trees/lib/quantiles/weighted_quantiles_stream_test.cc @@ -138,6 +138,12 @@ void GenerateOneValue(int32 worker_id, int64 max_elements, double *total_weight, stream->Finalize(); } +void GenerateOneZeroWeightedValue(int32 worker_id, int64 max_elements, + double *total_weight, Stream *stream) { + stream->PushEntry(10, 0); + stream->Finalize(); +} + TEST(WeightedQuantilesStreamTest, OneValue) { const double eps = 0.01; const int64 max_elements = 1 << 16; @@ -145,6 +151,13 @@ TEST(WeightedQuantilesStreamTest, OneValue) { {10.0, 10.0, 10.0, 10.0, 10.0}, 1e-2); } +TEST(WeightedQuantilesStreamTest, OneZeroWeightValue) { + const double eps = 0.01; + const int64 max_elements = 1 << 16; + TestSingleWorkerStreams(eps, max_elements, GenerateOneZeroWeightedValue, {}, + 1e-2); +} + TEST(WeightedQuantilesStreamTest, FixedUniform) { const double eps = 0.01; const int64 max_elements = 1 << 16; diff --git a/tensorflow/contrib/boosted_trees/lib/quantiles/weighted_quantiles_summary.h b/tensorflow/contrib/boosted_trees/lib/quantiles/weighted_quantiles_summary.h index aec232f3cbb096f0aa51e4362a821882391f8027..7576856dc3a6d0b6681ee9745c875cf46d1e2960 100644 --- a/tensorflow/contrib/boosted_trees/lib/quantiles/weighted_quantiles_summary.h +++ b/tensorflow/contrib/boosted_trees/lib/quantiles/weighted_quantiles_summary.h @@ -235,6 +235,11 @@ class WeightedQuantilesSummary { // The resulting boundaries are guaranteed to both contain at least // num_boundaries unique elements and maintain approximation bounds. std::vector GenerateBoundaries(int64 num_boundaries) const { + std::vector output; + if (entries_.empty()) { + return output; + } + // Generate soft compressed summary. WeightedQuantilesSummary compressed_summary; @@ -246,7 +251,6 @@ class WeightedQuantilesSummary { compressed_summary.Compress(num_boundaries, compression_eps); // Return boundaries. - std::vector output; output.reserve(compressed_summary.entries_.size()); for (const auto& entry : compressed_summary.entries_) { output.push_back(entry.value); @@ -260,6 +264,9 @@ class WeightedQuantilesSummary { // full rank queries O(nlogn). std::vector GenerateQuantiles(int64 num_quantiles) const { std::vector output; + if (entries_.empty()) { + return output; + } num_quantiles = std::max(num_quantiles, 2LL); output.reserve(num_quantiles + 1); diff --git a/tensorflow/contrib/boosted_trees/lib/utils/batch_features.h b/tensorflow/contrib/boosted_trees/lib/utils/batch_features.h index da5e7448519cb7f4092f7bbbe1b526271008ec22..a3b1b013e3a40116f74d6ed2df78d87ed3a11ac7 100644 --- a/tensorflow/contrib/boosted_trees/lib/utils/batch_features.h +++ b/tensorflow/contrib/boosted_trees/lib/utils/batch_features.h @@ -48,9 +48,9 @@ class BatchFeatures { Status GetFeatureColumnSizes(int64* const num_dense_float_features, int64* const num_sparse_float_features, int64* const num_sparse_int_features) const { - QCHECK_NE(num_dense_float_features, nullptr); - QCHECK_NE(num_sparse_float_features, nullptr); - QCHECK_NE(num_sparse_int_features, nullptr); + QCHECK_NE(num_dense_float_features, static_cast(nullptr)); + QCHECK_NE(num_sparse_float_features, static_cast(nullptr)); + QCHECK_NE(num_sparse_int_features, static_cast(nullptr)); *num_dense_float_features = dense_float_feature_columns_.size(); *num_sparse_float_features = sparse_float_feature_columns_.size(); *num_sparse_int_features = sparse_int_feature_columns_.size(); diff --git a/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py b/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py index 85b909e4f2556c520a5bffe46d5954683d9dda5a..4bde7f3e33d6f8b295cd35cb32bbbccecf8a2b87 100644 --- a/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py +++ b/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py @@ -23,7 +23,6 @@ import copy from tensorflow.contrib import learn from tensorflow.contrib import stateless - from tensorflow.contrib.boosted_trees.lib.learner.batch import categorical_split_handler from tensorflow.contrib.boosted_trees.lib.learner.batch import ordinal_split_handler from tensorflow.contrib.boosted_trees.proto import learner_pb2 @@ -141,7 +140,7 @@ class _OpRoundRobinStrategy(object): return task -def extract_features(features, feature_columns): +def extract_features(features, feature_columns, use_core_columns): """Extracts columns from a dictionary of features. Args: @@ -174,7 +173,11 @@ def extract_features(features, feature_columns): transformed_features = collections.OrderedDict() for fc in feature_columns: # pylint: disable=protected-access - if isinstance(fc, feature_column_lib._EmbeddingColumn): + if use_core_columns: + # pylint: disable=protected-access + tensor = fc_core._transform_features(features, [fc])[fc] + transformed_features[fc.name] = tensor + elif isinstance(fc, feature_column_lib._EmbeddingColumn): # pylint: enable=protected-access transformed_features[fc.name] = fc_core.input_layer( features, [fc], @@ -265,7 +268,8 @@ class GradientBoostedDecisionTreeModel(object): learner_config, features, logits_dimension, - feature_columns=None): + feature_columns=None, + use_core_columns=False): """Construct a new GradientBoostedDecisionTreeModel function. Args: @@ -338,8 +342,9 @@ class GradientBoostedDecisionTreeModel(object): if not features: raise ValueError("Features dictionary must be specified.") (fc_names, dense_floats, sparse_float_indices, sparse_float_values, - sparse_float_shapes, sparse_int_indices, sparse_int_values, - sparse_int_shapes) = extract_features(features, self._feature_columns) + sparse_float_shapes, sparse_int_indices, + sparse_int_values, sparse_int_shapes) = extract_features( + features, self._feature_columns, use_core_columns) logging.info("Active Feature Columns: " + str(fc_names)) self._fc_names = fc_names self._dense_floats = dense_floats diff --git a/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch_test.py b/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch_test.py index 6411f57a5419123e799af9231a04fce8ae7724d4..f9c22283b7f5136777bfa60a12c94974adfbd245 100644 --- a/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch_test.py +++ b/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch_test.py @@ -27,9 +27,11 @@ from tensorflow.contrib.boosted_trees.python.ops import model_ops from tensorflow.contrib.boosted_trees.python.training.functions import gbdt_batch from tensorflow.contrib.boosted_trees.python.utils import losses +from tensorflow.python.feature_column import feature_column_lib as core_feature_column from tensorflow.contrib.layers.python.layers import feature_column as feature_column_lib from tensorflow.contrib.learn.python.learn.estimators import model_fn + from tensorflow.python.framework import dtypes from tensorflow.python.framework import sparse_tensor from tensorflow.python.framework import test_util @@ -43,7 +45,7 @@ from tensorflow.python.platform import googletest def _squared_loss(label, unused_weights, predictions): """Unweighted loss implementation.""" loss = math_ops.reduce_sum( - math_ops.square(predictions - label), 1, keep_dims=True) + math_ops.square(predictions - label), 1, keepdims=True) return loss @@ -99,7 +101,8 @@ class GbdtTest(test_util.TensorFlowTestCase): array_ops.zeros([2], dtypes.int64)) (fc_names, dense_floats, sparse_float_indices, sparse_float_values, sparse_float_shapes, sparse_int_indices, sparse_int_values, - sparse_int_shapes) = (gbdt_batch.extract_features(features, None)) + sparse_int_shapes) = ( + gbdt_batch.extract_features(features, None, use_core_columns=False)) self.assertEqual(len(fc_names), 3) self.assertAllEqual(fc_names, ["dense_float", "sparse_float", "sparse_int"]) @@ -148,8 +151,9 @@ class GbdtTest(test_util.TensorFlowTestCase): "sparse_categorical", hash_bucket_size=1000000)) (fc_names, dense_floats, sparse_float_indices, sparse_float_values, sparse_float_shapes, sparse_int_indices, sparse_int_values, - sparse_int_shapes) = (gbdt_batch.extract_features( - features, feature_columns)) + sparse_int_shapes) = ( + gbdt_batch.extract_features( + features, feature_columns, use_core_columns=False)) self.assertEqual(len(fc_names), 3) self.assertAllEqual(fc_names, ["dense_float", "sparse_float", "sparse_categorical"]) @@ -174,6 +178,41 @@ class GbdtTest(test_util.TensorFlowTestCase): self.assertAllEqual(sparse_int_shapes[0].eval(), features["sparse_categorical"].dense_shape.eval()) + def testExtractFeaturesFromCoreFeatureColumns(self): + """Tests feature extraction when using core columns.""" + with self.test_session(): + features = {} + # Sparse float column does not exist in core, so only dense numeric and + # categorical. + features["dense_float"] = array_ops.zeros([2, 1], dtypes.float32) + features["sparse_categorical"] = sparse_tensor.SparseTensor( + array_ops.zeros([2, 2], dtypes.int64), + array_ops.zeros([2], dtypes.string), array_ops.zeros([2], + dtypes.int64)) + + feature_columns = set() + feature_columns.add(core_feature_column.numeric_column("dense_float")) + feature_columns.add( + core_feature_column.categorical_column_with_hash_bucket( + "sparse_categorical", hash_bucket_size=1000000)) + (fc_names, dense_floats, _, _, _, sparse_int_indices, sparse_int_values, + sparse_int_shapes) = ( + gbdt_batch.extract_features( + features, feature_columns, use_core_columns=True)) + self.assertEqual(len(fc_names), 2) + self.assertAllEqual(fc_names, ["dense_float", "sparse_categorical"]) + self.assertEqual(len(dense_floats), 1) + self.assertEqual(len(sparse_int_indices), 1) + self.assertEqual(len(sparse_int_values), 1) + self.assertEqual(len(sparse_int_shapes), 1) + self.assertAllEqual(dense_floats[0].eval(), + features["dense_float"].eval()) + self.assertAllEqual(sparse_int_indices[0].eval(), + features["sparse_categorical"].indices.eval()) + self.assertAllEqual(sparse_int_values[0].eval(), [397263, 397263]) + self.assertAllEqual(sparse_int_shapes[0].eval(), + features["sparse_categorical"].dense_shape.eval()) + def testTrainFnChiefNoBiasCentering(self): """Tests the train function running on chief without bias centering.""" with self.test_session() as sess: diff --git a/tensorflow/contrib/checkpoint/README.md b/tensorflow/contrib/checkpoint/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d35c5bae3b702c0fea5194e5e653660e319e38c5 --- /dev/null +++ b/tensorflow/contrib/checkpoint/README.md @@ -0,0 +1,2 @@ +Tools for working with object-based checkpoints produced by +`tf.train.Checkpoint`. diff --git a/tensorflow/python/ops/distributions/bijectors.py b/tensorflow/contrib/checkpoint/__init__.py similarity index 63% rename from tensorflow/python/ops/distributions/bijectors.py rename to tensorflow/contrib/checkpoint/__init__.py index 69c3a5d4c0ba86586ccb6e55e71d898b1bf7c035..1192cc44a17823f69db28947308a8b839a83e57e 100644 --- a/tensorflow/python/ops/distributions/bijectors.py +++ b/tensorflow/contrib/checkpoint/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,20 +12,21 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Core module for TensorFlow distribution bijectors.""" +"""Tools for working with object-based checkpoints. + + +For creating and managing dependencies: +@@dot_graph_from_checkpoint +@@split_dependency +""" + from __future__ import absolute_import from __future__ import division from __future__ import print_function +from tensorflow.contrib.checkpoint.python.split_dependency import split_dependency +from tensorflow.contrib.checkpoint.python.visualize import dot_graph_from_checkpoint -# go/tf-wildcard-import -# pylint: disable=wildcard-import,unused-import -from tensorflow.python.ops.distributions.bijector import Bijector -from tensorflow.python.ops.distributions.identity_bijector import Identity - -# pylint: enable=wildcard-import,unused-import from tensorflow.python.util.all_util import remove_undocumented -_allowed_symbols = ["Bijector", "Identity"] - -remove_undocumented(__name__, _allowed_symbols) +remove_undocumented(module_name=__name__) diff --git a/tensorflow/contrib/checkpoint/python/BUILD b/tensorflow/contrib/checkpoint/python/BUILD new file mode 100644 index 0000000000000000000000000000000000000000..a5681ffa61d07ef29d0a0862db9736a210c8e26e --- /dev/null +++ b/tensorflow/contrib/checkpoint/python/BUILD @@ -0,0 +1,61 @@ +licenses(["notice"]) # Apache 2.0 + +package(default_visibility = ["//tensorflow:internal"]) + +load("//tensorflow:tensorflow.bzl", "py_test") + +py_library( + name = "checkpoint", + srcs_version = "PY2AND3", + deps = [ + ":split_dependency", + ":visualize", + ], +) + +py_library( + name = "split_dependency", + srcs = ["split_dependency.py"], + srcs_version = "PY2AND3", + visibility = ["//tensorflow:internal"], + deps = [ + "//tensorflow/python:control_flow_ops", + "//tensorflow/python:training", + ], +) + +py_test( + name = "split_dependency_test", + srcs = ["split_dependency_test.py"], + deps = [ + ":split_dependency", + "//tensorflow/python:array_ops", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python:resource_variable_ops", + "//tensorflow/python:training", + "//tensorflow/python/eager:test", + ], +) + +py_library( + name = "visualize", + srcs = ["visualize.py"], + srcs_version = "PY2AND3", + visibility = ["//tensorflow:internal"], + deps = [ + "//tensorflow/python:pywrap_tensorflow", + ], +) + +py_test( + name = "visualize_test", + srcs = ["visualize_test.py"], + deps = [ + ":visualize", + "//tensorflow/python:array_ops", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python:resource_variable_ops", + "//tensorflow/python:training", + "//tensorflow/python/eager:test", + ], +) diff --git a/tensorflow/contrib/checkpoint/python/split_dependency.py b/tensorflow/contrib/checkpoint/python/split_dependency.py new file mode 100644 index 0000000000000000000000000000000000000000..3aec8c96e90440d6da00d95cffc34bd53ec7164f --- /dev/null +++ b/tensorflow/contrib/checkpoint/python/split_dependency.py @@ -0,0 +1,136 @@ +"""Utility for creating multiple dependencies with synchronized save/restore.""" +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import functools + +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.training import checkpointable as checkpointable +from tensorflow.python.training import saver as saver_lib + + +class _CallbackSaveable(saver_lib.BaseSaverBuilder.SaveableObject): + """Wraps save and restore callbacks as a `SaveableObject`.""" + + def __init__(self, name, dtype, save_callback, restore_callback): + self._restore_callback = restore_callback + spec = saver_lib.BaseSaverBuilder.SaveSpec( + tensor=save_callback, + slice_spec="", + name=name, + dtype=dtype) + super(_CallbackSaveable, self).__init__( + save_callback, [spec], name) + + def restore(self, restored_tensors, restored_shapes): + """Restore the same value into both variables.""" + tensor, = restored_tensors + return self._restore_callback(tensor) + + +class _SplitDependency(checkpointable.CheckpointableBase): + """Looks like a regular variable while synchronizing save/restores.""" + + def __init__(self, save_buffer, restore_buffer, name, dtype, num_components, + fill_save_buffer_fn, consume_restore_buffer_fn): + self._save_buffer = save_buffer + self._restore_buffer = restore_buffer + self._name = name + self._dtype = dtype + self._num_components = num_components + self._fill_save_buffer_fn = fill_save_buffer_fn + self._consume_restore_buffer_fn = consume_restore_buffer_fn + + def _save(self): + """Pull from the shared buffer, populating it if necessary.""" + if self._name not in self._save_buffer: + if self._save_buffer: + raise AssertionError( + ("Split dependency %s (%s) unsynchronized. Split dependencies must " + "be saved together.") % (self._name, self)) + self._fill_save_buffer_fn(self._save_buffer) + return self._save_buffer.pop(self._name) + + def _restore(self, tensor): + """Push into the shared buffer, flushing it if necessary.""" + if self._name in self._restore_buffer: + raise AssertionError( + ("Split dependency %s (%s) unsynchronized. Split dependencies must " + "be restored together.") % (self._name, self)) + self._restore_buffer[self._name] = tensor + if len(self._restore_buffer) == self._num_components: + op = self._consume_restore_buffer_fn(self._restore_buffer) + self._restore_buffer.clear() + return op + else: + return control_flow_ops.no_op() + + def _gather_saveables_for_checkpoint(self): + """Looks to Checkpointable like a regular variable.""" + return { + checkpointable.VARIABLE_VALUE_KEY: + functools.partial(_CallbackSaveable, + dtype=self._dtype, + save_callback=self._save, + restore_callback=self._restore) + } + + +def split_dependency(component_names, component_dtypes, + fill_save_buffer_fn, consume_restore_buffer_fn): + """Creates multiple dependencies with a synchronized save/restore. + + Useful when a single op produces `Tensor`s which should each be saved under + different objects, or when `Tensor`s saved with many different objects need to + be restored together as inputs to a single op (i.e. an object which uses a + single fused op may be swapped out for a subgraph of objects, and these two + programs are checkpoint compatible). + + Args: + component_names: A sequence of names for the split + dependencies. `fill_save_buffer_fn` must add these keys to the dictionary + it is passed, and `consume_restore_buffer_fn` will receive a dictionary + with these keys. + component_dtypes: Data types for the `Tensor`s being saved and restored, a + sequence corresponding to `component_names`. + fill_save_buffer_fn: A function which takes an empty dictionary as an + argument and adds `Tensor`s with `component_names` as keys. These + `Tensor`s will be saved as if they were individual variables. + consume_restore_buffer_fn: A function which takes a dictionary with + `component_names` as keys mapping to restored individual `Tensor`s and + returns a restore op (or if executing eagerly, runs the restoration and + may return `None`). + + Returns: + A dictionary mapping from names to Checkpointable objects. If one is + reachable from an object as a dependency, the others should be too; adding + dependencies on some but not all of the objects will result in errors. + """ + save_buffer = {} + restore_buffer = {} + split_dependencies = {} + for name, dtype in zip(component_names, component_dtypes): + split_dependencies[name] = _SplitDependency( + save_buffer=save_buffer, + restore_buffer=restore_buffer, + name=name, + dtype=dtype, + num_components=len(component_names), + fill_save_buffer_fn=fill_save_buffer_fn, + consume_restore_buffer_fn=consume_restore_buffer_fn) + return split_dependencies diff --git a/tensorflow/contrib/checkpoint/python/split_dependency_test.py b/tensorflow/contrib/checkpoint/python/split_dependency_test.py new file mode 100644 index 0000000000000000000000000000000000000000..f1d9d19b047ee69281cf8bdba38a28dc87947e38 --- /dev/null +++ b/tensorflow/contrib/checkpoint/python/split_dependency_test.py @@ -0,0 +1,112 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os + +from tensorflow.contrib.checkpoint.python import split_dependency +from tensorflow.python.eager import test +from tensorflow.python.framework import test_util +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import resource_variable_ops +from tensorflow.python.training import checkpointable +from tensorflow.python.training import checkpointable_utils + + +def _split_variable_closure(variable): + def _fill_save_buffer_fn(save_buffer): + save_buffer["first_half"] = variable[:2] + save_buffer["second_half"] = variable[2:] + return _fill_save_buffer_fn + + +def _combine_variable_closure(variable): + def _consume_restore_buffer_fn(restore_buffer): + return variable.assign( + array_ops.concat([restore_buffer["first_half"], + restore_buffer["second_half"]], + axis=0)) + return _consume_restore_buffer_fn + + +class SaveTensorSlicesAsDeps(checkpointable.CheckpointableBase): + + def __init__(self): + self.combined = resource_variable_ops.ResourceVariable([0., 0., 0., 0.]) + split_dependencies = split_dependency.split_dependency( + component_names=("first_half", "second_half"), + component_dtypes=(self.combined.dtype,) * 2, + fill_save_buffer_fn=_split_variable_closure( + self.combined), + consume_restore_buffer_fn=_combine_variable_closure( + self.combined)) + for name, dep in split_dependencies.items(): + self._track_checkpointable(dep, name=name) + + +class HasRegularDeps(checkpointable.Checkpointable): + + def __init__(self): + self.first_half = resource_variable_ops.ResourceVariable([0., 0.]) + self.second_half = resource_variable_ops.ResourceVariable([0., 0.]) + + +class OnlyOneDep(checkpointable.Checkpointable): + + def __init__(self): + self.first_half = resource_variable_ops.ResourceVariable([0., 0.]) + + +class SplitTests(test.TestCase): + + @test_util.run_in_graph_and_eager_modes() + def testSaveRestoreSplitDep(self): + save_checkpoint = checkpointable_utils.Checkpoint( + dep=SaveTensorSlicesAsDeps()) + self.evaluate(save_checkpoint.dep.combined.assign([1., 2., 3., 4.])) + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + save_path = save_checkpoint.save(checkpoint_prefix) + + regular_deps = HasRegularDeps() + regular_restore_checkpoint = checkpointable_utils.Checkpoint( + dep=regular_deps) + regular_restore_checkpoint.restore( + save_path).assert_consumed().run_restore_ops() + self.assertAllEqual([1., 2.], self.evaluate(regular_deps.first_half)) + self.assertAllEqual([3., 4.], self.evaluate(regular_deps.second_half)) + + one_dep = OnlyOneDep() + one_dep_restore_checkpoint = checkpointable_utils.Checkpoint(dep=one_dep) + status = one_dep_restore_checkpoint.restore(save_path) + with self.assertRaises(AssertionError): + # Missing the second dependency. + status.assert_consumed() + status.run_restore_ops() + self.assertAllEqual([1., 2.], self.evaluate(one_dep.first_half)) + + restore_checkpoint = checkpointable_utils.Checkpoint() + status = restore_checkpoint.restore(save_path) + restore_checkpoint.dep = SaveTensorSlicesAsDeps() + status.assert_consumed().run_restore_ops() + self.assertAllEqual( + [1., 2., 3., 4.], + self.evaluate(restore_checkpoint.dep.combined)) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/checkpoint/python/visualize.py b/tensorflow/contrib/checkpoint/python/visualize.py new file mode 100644 index 0000000000000000000000000000000000000000..86fbdb41d2c37803f2bd71b5aa2f72845c87d448 --- /dev/null +++ b/tensorflow/contrib/checkpoint/python/visualize.py @@ -0,0 +1,111 @@ +"""Utilities for visualizing dependency graphs.""" +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.core.protobuf import checkpointable_object_graph_pb2 +from tensorflow.python import pywrap_tensorflow +from tensorflow.python.framework import errors_impl +from tensorflow.python.training import checkpointable + + +def dot_graph_from_checkpoint(save_path): + r"""Visualizes an object-based checkpoint (from `tf.train.Checkpoint`). + + Useful for inspecting checkpoints and debugging loading issues. + + Example usage from Python (requires pydot): + ```python + import tensorflow as tf + import pydot + + dot_string = tf.contrib.checkpoint.dot_graph_from_checkpoint('/path/to/ckpt') + parsed, = pydot.graph_from_dot_data(dot_string) + parsed.write_svg('/tmp/tensorflow/visualized_checkpoint.svg') + ``` + + Example command line usage: + ```sh + python -c "import tensorflow as tf;\ + print(tf.contrib.checkpoint.dot_graph_from_checkpoint('/path/to/ckpt'))"\ + | dot -Tsvg > /tmp/tensorflow/checkpoint_viz.svg + ``` + + Args: + save_path: The checkpoint prefix, as returned by `tf.train.Checkpoint.save` + or `tf.train.latest_checkpoint`. + Returns: + A graph in DOT format as a string. + """ + reader = pywrap_tensorflow.NewCheckpointReader(save_path) + try: + object_graph_string = reader.get_tensor( + checkpointable.OBJECT_GRAPH_PROTO_KEY) + except errors_impl.NotFoundError: + raise ValueError( + ('The specified checkpoint "%s" does not appear to be object-based (it ' + 'is missing the key "%s"). Likely it was created with a name-based ' + 'saver and does not contain an object dependency graph.') % ( + save_path, checkpointable.OBJECT_GRAPH_PROTO_KEY)) + shape_map = reader.get_variable_to_shape_map() + dtype_map = reader.get_variable_to_dtype_map() + object_graph = ( + checkpointable_object_graph_pb2.CheckpointableObjectGraph()) + object_graph.ParseFromString(object_graph_string) + graph = 'digraph {\n' + def _escape(name): + return name.replace('"', '\\"') + slot_ids = set() + for node in object_graph.nodes: + for slot_reference in node.slot_variables: + slot_ids.add(slot_reference.slot_variable_node_id) + for node_id, node in enumerate(object_graph.nodes): + if (len(node.attributes) == 1 + and node.attributes[0].name == checkpointable.VARIABLE_VALUE_KEY): + if node_id in slot_ids: + color = 'orange' + tooltip_prefix = 'Slot variable' + else: + color = 'blue' + tooltip_prefix = 'Variable' + attribute = node.attributes[0] + graph += ('N_%d [shape=point label="" color=%s width=.25' + ' tooltip="%s %s shape=%s %s"]\n') % ( + node_id, + color, + tooltip_prefix, + _escape(attribute.full_name), + shape_map[attribute.checkpoint_key], + dtype_map[attribute.checkpoint_key].name) + elif node.slot_variables: + graph += ('N_%d [shape=point label="" width=.25 color=red,' + 'tooltip="Optimizer"]\n') % node_id + else: + graph += 'N_%d [shape=point label="" width=.25]\n' % node_id + for reference in node.children: + graph += 'N_%d -> N_%d [label="%s"]\n' % ( + node_id, reference.node_id, _escape(reference.local_name)) + for slot_reference in node.slot_variables: + graph += 'N_%d -> N_%d [label="%s" style=dotted]\n' % ( + node_id, + slot_reference.slot_variable_node_id, + _escape(slot_reference.slot_name)) + graph += 'N_%d -> N_%d [style=dotted]\n' % ( + slot_reference.original_variable_node_id, + slot_reference.slot_variable_node_id) + graph += '}\n' + return graph diff --git a/tensorflow/contrib/checkpoint/python/visualize_test.py b/tensorflow/contrib/checkpoint/python/visualize_test.py new file mode 100644 index 0000000000000000000000000000000000000000..1d9ab789235cb964521315b4864563f89745ae75 --- /dev/null +++ b/tensorflow/contrib/checkpoint/python/visualize_test.py @@ -0,0 +1,97 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import functools +import os + +from tensorflow.contrib.checkpoint.python import visualize + +from tensorflow.python.eager import context +from tensorflow.python.eager import test +from tensorflow.python.framework import constant_op +from tensorflow.python.keras._impl.keras.engine import training +from tensorflow.python.keras._impl.keras.layers import core +from tensorflow.python.ops import resource_variable_ops +from tensorflow.python.training import adam +from tensorflow.python.training import checkpointable_utils + +try: + import pydot # pylint: disable=g-import-not-at-top +except ImportError: + pydot = None + + +class MyModel(training.Model): + """A concrete Model for testing.""" + + def __init__(self): + super(MyModel, self).__init__() + self._named_dense = core.Dense(1, use_bias=True) + self._second = core.Dense(1, use_bias=False) + + def call(self, values): + ret = self._second(self._named_dense(values)) + return ret + + +class DotGraphTests(test.TestCase): + + def testMakeDotGraph(self): + with context.eager_mode(): + input_value = constant_op.constant([[3.]]) + model = MyModel() + optimizer = adam.AdamOptimizer(0.001) + optimizer_step = resource_variable_ops.ResourceVariable(12) + save_checkpoint = checkpointable_utils.Checkpoint( + optimizer=optimizer, model=model, optimizer_step=optimizer_step) + optimizer.minimize(functools.partial(model, input_value)) + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, 'ckpt') + save_path = save_checkpoint.save(checkpoint_prefix) + prefix = save_checkpoint.save(save_path) + + dot_graph_string = visualize.dot_graph_from_checkpoint(prefix) + + # The remainder of this test is more-or-less optional since it's so + # dependent on pydot/platform/Python versions. + if pydot is None: + self.skipTest('pydot is required for the remainder of this test.') + try: + parsed, = pydot.graph_from_dot_data(dot_graph_string) + except NameError as e: + if "name 'dot_parser' is not defined" in str(e): + self.skipTest("pydot isn't working") + else: + raise + # Check that the graph isn't completely trivial + self.assertEqual( + '"model"', + parsed.obj_dict['edges'][('N_0', 'N_1')][0]['attributes']['label']) + image_path = os.path.join(self.get_temp_dir(), 'saved.svg') + try: + parsed.write_svg(image_path) + except Exception as e: # pylint: disable=broad-except + # For some reason PyDot's "dot not available" error is an Exception, not + # something more specific. + if '"dot" not found in path' in str(e): + self.skipTest("pydot won't save SVGs (dot not available)") + else: + raise + +if __name__ == '__main__': + test.main() diff --git a/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver.py b/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver.py index 95c5c920aa2ccf92d8aa6aa179102fe379f0236c..1403483d287041b02dfbf538f7e7ddee11662f47 100644 --- a/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver.py +++ b/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver.py @@ -61,11 +61,13 @@ class TPUClusterResolver(ClusterResolver): return False return True - def _inGke(self): + @staticmethod + def _inGke(): """When running in GKE, the environment variable will be set.""" return _GKE_ENV_VARIABLE in os.environ - def _gkeMaster(self): + @staticmethod + def _gkeMaster(): return os.environ[_GKE_ENV_VARIABLE].split(',')[0] def __init__(self, @@ -119,8 +121,9 @@ class TPUClusterResolver(ClusterResolver): 'Using multiple TPUs in a single session is not yet implemented') tpu = tpu[0] + in_gke = self._inGke() # When using GKE with Cloud TPUs, the env variable will be set. - if tpu is None and self._inGke(): + if tpu is None and in_gke: tpu = self._gkeMaster() self._tpu = compat.as_bytes(tpu) # self._tpu is always bytes @@ -158,7 +161,8 @@ class TPUClusterResolver(ClusterResolver): self._service = service self._coordinator_name = coordinator_name - if coordinator_name and not coordinator_address and should_resolve: + if coordinator_name and not coordinator_address and (should_resolve or + in_gke): self._start_local_server() else: self._coordinator_address = coordinator_address @@ -204,31 +208,50 @@ class TPUClusterResolver(ClusterResolver): Raises: RuntimeError: If the provided TPU is not healthy. """ - if not self._shouldResolve(): - return server_lib.ClusterSpec({}) - - full_name = 'projects/%s/locations/%s/nodes/%s' % ( - self._project, self._zone, compat.as_text(self._tpu)) - request = self._service.projects().locations().nodes().get(name=full_name) - response = request.execute() - - if 'health' in response and response['health'] != 'HEALTHY': - raise RuntimeError('TPU "%s" is unhealthy: "%s"' % (self._tpu, - response['health'])) - - if 'networkEndpoints' in response: - worker_list = [ - '%s:%s' % (endpoint['ipAddress'], endpoint['port']) - for endpoint in response['networkEndpoints'] - ] + ############################################################################ + # There are 5 potential cases this code must handle: + # 1. [Normal case.] We should resolve the TPU name to a set of tasks, and + # a. Create a ClusterSpec that includes the coordinator job + # b. Create a ClusterSpec without the coordinator job. + # 2. [GKE / No API Access.] We should not resolve the TPU name to a set of + # tasks and + # a. Create a ClusterSpec with the coordinator + # b. Create a ClusterSpec without the coordinator + # 3. [Other (legacy non-gRPC).] We should return an empty ClusterSpec. + ############################################################################ + + if self._shouldResolve(): + # Case 1. + full_name = 'projects/%s/locations/%s/nodes/%s' % ( + self._project, self._zone, compat.as_text(self._tpu)) + request = self._service.projects().locations().nodes().get(name=full_name) + response = request.execute() + + if 'health' in response and response['health'] != 'HEALTHY': + raise RuntimeError('TPU "%s" is unhealthy: "%s"' % (self._tpu, + response['health'])) + + if 'networkEndpoints' in response: + worker_list = [ + '%s:%s' % (endpoint['ipAddress'], endpoint['port']) + for endpoint in response['networkEndpoints'] + ] + else: + # Fall back to the deprecated response format + instance_url = '%s:%s' % (response['ipAddress'], response['port']) + worker_list = [instance_url] + + cluster_spec = {self._job_name: worker_list} else: - # Fall back to the deprecated response format - instance_url = '%s:%s' % (response['ipAddress'], response['port']) - worker_list = [instance_url] - - cluster_spec = {self._job_name: worker_list} + if not self._tpu.startswith(compat.as_bytes('grpc://')): + # Case 3. + return None + # Case 2. + cluster_spec = {self._job_name: [self._tpu[len( + compat.as_bytes('grpc://')):]]} if self._coordinator_address: + # {1, 2}.a cluster_spec[self._coordinator_name] = [self._coordinator_address] return server_lib.ClusterSpec(cluster_spec) diff --git a/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver_test.py b/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver_test.py index e1e3e6867a24b917885a9ab7e780df55742ec0f9..5b3f9be5a11237f9dceebefa1db294efaf7e482d 100644 --- a/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver_test.py +++ b/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver_test.py @@ -356,20 +356,15 @@ class TPUClusterResolverTest(test.TestCase): tpu_cluster_resolver = TPUClusterResolver(tpu='/bns/foo/bar') self.assertEqual( compat.as_bytes('/bns/foo/bar'), tpu_cluster_resolver.master()) - self.assertEqual( - server_lib.ClusterSpec({}), tpu_cluster_resolver.cluster_spec()) + self.assertEqual(None, tpu_cluster_resolver.cluster_spec()) def testGkeEnvironment(self): os.environ['KUBE_GOOGLE_CLOUD_TPU_ENDPOINTS'] = 'grpc://10.120.27.5:8470' self.assertTrue('KUBE_GOOGLE_CLOUD_TPU_ENDPOINTS' in os.environ) - tpu_cluster_resolver = TPUClusterResolver() - self.assertTrue(tpu_cluster_resolver._inGke()) - self.assertEqual( - compat.as_bytes('grpc://10.120.27.5:8470'), - compat.as_bytes(tpu_cluster_resolver._gkeMaster())) + self.assertTrue(TPUClusterResolver._inGke()) self.assertEqual( compat.as_bytes('grpc://10.120.27.5:8470'), - compat.as_bytes(tpu_cluster_resolver.get_master())) + compat.as_bytes(TPUClusterResolver._gkeMaster())) del os.environ['KUBE_GOOGLE_CLOUD_TPU_ENDPOINTS'] diff --git a/tensorflow/contrib/cmake/CMakeLists.txt b/tensorflow/contrib/cmake/CMakeLists.txt index 23b31ae1dcc83d8a7152354ac147de9ada320429..5f38a8e5c755cd11a568e437b3ade665d983461f 100644 --- a/tensorflow/contrib/cmake/CMakeLists.txt +++ b/tensorflow/contrib/cmake/CMakeLists.txt @@ -31,10 +31,14 @@ option(tensorflow_BUILD_PYTHON_TESTS "Build python unit tests " OFF) option(tensorflow_BUILD_MORE_PYTHON_TESTS "Build more python unit tests for contrib packages" OFF) option(tensorflow_BUILD_SHARED_LIB "Build TensorFlow as a shared library" OFF) option(tensorflow_OPTIMIZE_FOR_NATIVE_ARCH "Enable compiler optimizations for the native processor architecture (if available)" ON) -option(tensorflow_WIN_CPU_SIMD_OPTIONS "Enables CPU SIMD instructions") option(tensorflow_ENABLE_SNAPPY_SUPPORT "Enable SNAPPY compression support" ON) option(tensorflow_DISABLE_EIGEN_FORCEINLINE "Disable forceinline, to speed up build on windows." OFF) +# SIMD, MKL and MKLDNN options +option(tensorflow_WIN_CPU_SIMD_OPTIONS "Enables CPU SIMD instructions" OFF) +option(tensorflow_ENABLE_MKL_SUPPORT "Enable Intel MKL support" OFF) +option(tensorflow_ENABLE_MKLDNN_SUPPORT "Enable Intel MKLDNN support, requires MKL enabled" OFF) + # GPU, CUDA and cuDNN options option(tensorflow_ENABLE_GPU "Enable GPU support" OFF) set(tensorflow_CUDA_VERSION "9.0" CACHE STRING "CUDA version to build against") @@ -124,8 +128,16 @@ endif() add_definitions(-DEIGEN_AVOID_STL_ARRAY) if(WIN32) - add_definitions(-DNOMINMAX -D_WIN32_WINNT=0x0A00 -DLANG_CXX11 -DCOMPILER_MSVC) - add_definitions(-DWIN32 -DOS_WIN -D_MBCS -DWIN64 -DWIN32_LEAN_AND_MEAN -DNOGDI -DPLATFORM_WINDOWS) + if(CMAKE_SIZEOF_VOID_P EQUAL 8) + # 64 bits + add_definitions(-DWIN64) + elseif(CMAKE_SIZEOF_VOID_P EQUAL 4) + # 32 bits + # temporary fix for #18241 + add_definitions(-DEIGEN_DEFAULT_DENSE_INDEX_TYPE=std::int64_t) + endif() + add_definitions(-DNOMINMAX -D_WIN32_WINNT=0x0A00 -DLANG_CXX11) + add_definitions(-DWIN32 -DOS_WIN -D_MBCS -DWIN32_LEAN_AND_MEAN -DNOGDI -DPLATFORM_WINDOWS) add_definitions(-DTENSORFLOW_USE_EIGEN_THREADPOOL -DEIGEN_HAS_C99_MATH) add_definitions(-DTF_COMPILE_LIBRARY) add_definitions(/bigobj /nologo /EHsc /GF /MP /Gm-) @@ -162,12 +174,21 @@ endif() # MSVC SIMD instructions if (tensorflow_WIN_CPU_SIMD_OPTIONS) + include(CheckCXXCompilerFlag) + if (tensorflow_ENABLE_MKL_SUPPORT) + add_definitions(-DINTEL_MKL -DEIGEN_USE_VML) + if (NOT tensorflow_ENABLE_MKLDNN_SUPPORT) + add_definitions(-DINTEL_MKL_ML) + endif() + endif() + CHECK_CXX_COMPILER_FLAG("-fopenmp" COMPILER_OPT_OPENMP_SUPPORT) + if (COMPILER_OPT_OPENMP_SUPPORT) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp") + endif() if (WIN32) - CHECK_CXX_COMPILER_FLAG("${tensorflow_WIN_CPU_SIMD_OPTIONS}" COMPILER_OPT_WIN_CPU_SIMD_SUPPORTED) + CHECK_CXX_COMPILER_FLAG(${tensorflow_WIN_CPU_SIMD_OPTIONS} COMPILER_OPT_WIN_CPU_SIMD_SUPPORTED) if(COMPILER_OPT_WIN_CPU_SIMD_SUPPORTED) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${tensorflow_WIN_CPU_SIMD_OPTIONS}") - else() - message(FATAL_ERROR "${tensorflow_WIN_CPU_SIMD_OPTIONS} not supported") endif() endif() endif() @@ -193,6 +214,7 @@ include(protobuf) include(re2) include(cub) include(sqlite) +include(double_conversion) if (tensorflow_BUILD_CC_TESTS) include(googletest) endif() @@ -213,6 +235,7 @@ set(tensorflow_EXTERNAL_LIBRARIES ${protobuf_STATIC_LIBRARIES} ${re2_STATIC_LIBRARIES} ${sqlite_STATIC_LIBRARIES} + ${double_conversion_STATIC_LIBRARIES} ) if (systemlib_ZLIB) @@ -240,6 +263,7 @@ set(tensorflow_EXTERNAL_DEPENDENCIES fft2d re2 sqlite_copy_headers_to_destination + double_conversion ) include_directories( @@ -262,6 +286,7 @@ include_directories( ${PROTOBUF_INCLUDE_DIRS} ${re2_INCLUDE_DIR} ${sqlite_INCLUDE_DIR} + ${double_conversion_INCLUDE_DIR} ) if(tensorflow_ENABLE_SSL_SUPPORT) @@ -298,6 +323,43 @@ if(HAIKU) list(APPEND tensorflow_EXTERNAL_LIBRARIES network) endif() +if (tensorflow_ENABLE_MKL_SUPPORT) + if (WIN32) + find_path(MKL_HOME_PLATFORM mkl + PATHS ${MKL_HOME} ${MKL_HOME}/../ ${MKL_HOME}/../../ + PATH_SUFFIXES windows) + set(MKL_INCLUDE_DIRS ${MKL_HOME_PLATFORM}/mkl/include) + set(MKL_LINK_DIRS + ${MKL_HOME_PLATFORM}/mkl/lib/intel64 + ${MKL_HOME_PLATFORM}/tbb/lib/intel64/vc_mt + ${MKL_HOME_PLATFORM}/compiler/lib/intel64 + ${MKL_HOME_PLATFORM}/mkl/tools/builder/lib) + set(MKL_REDIST_DLL_DIRS + ${MKL_HOME_PLATFORM}/redist/intel64/mkl + ${MKL_HOME_PLATFORM}/redist/intel64/tbb/vc_mt + ${MKL_HOME_PLATFORM}/redist/intel64/compiler) + list(APPEND tensorflow_EXTERNAL_LIBRARIES + mkl_intel_lp64_dll mkl_sequential_dll mkl_core_dll mkl_rt mkl_cdll_intel64) + endif() + if (UNIX) + # Fix me: complete the path on linux + find_path(MKL_HOME_PLATFORM mkl + HINTS ${MKL_HOME} ${MKL_HOME}/../ ${MKL_HOME}/../../ + PATH_SUFFIXES linux) + set(MKL_INCLUDE_DIRS ${MKL_HOME_PLATFORM}/mkl/include) + set(MKL_LINK_DIRS) # incompleted + set(MKL_REDIST_SO_DIRS) # incompleted + endif() + include_directories(${MKL_INCLUDE_DIRS}) + link_directories(${MKL_LINK_DIRS}) + if (tensorflow_ENABLE_MKLDNN_SUPPORT) + include(mkldnn) + list(APPEND tensorflow_EXTERNAL_LIBRARIES ${mkldnn_STATIC_LIBRARIES}) + list(APPEND tensorflow_EXTERNAL_DEPENDENCIES mkldnn) + include_directories(${mkldnn_INCLUDE_DIRS}) + endif() +endif (tensorflow_ENABLE_MKL_SUPPORT) + if (tensorflow_ENABLE_GPU) if (NOT WIN32) # Default install paths for cuda libraries in Linux diff --git a/tensorflow/contrib/cmake/README.md b/tensorflow/contrib/cmake/README.md index fe83bb32046cd75328c92a74cdb4fdb6ce44560e..0b79f718d4823a987e02804f59a432ee46d0ada3 100644 --- a/tensorflow/contrib/cmake/README.md +++ b/tensorflow/contrib/cmake/README.md @@ -128,6 +128,18 @@ Step-by-step Windows build D:\local\cuda\bin ``` + * When building with MKL support after installing [MKL](https://software.intel.com/en-us/mkl) from INTEL, append its bin directories to your PATH environment variable. + + In case TensorFlow fails to find the MKL dll's during initialization, check your PATH environment variable. + It should contain the directory of the MKL dlls. For example: + + ``` + D:\Tools\IntelSWTools\compilers_and_libraries\windows\redist\intel64\mkl + D:\Tools\IntelSWTools\compilers_and_libraries\windows\redist\intel64\compiler + D:\Tools\IntelSWTools\compilers_and_libraries\windows\redist\intel64\tbb\vc_mt + ``` + + * We assume that `cmake` and `git` are installed and in your `%PATH%`. If for example `cmake` is not in your path and it is installed in `C:\Program Files (x86)\CMake\bin\cmake.exe`, you can add this directory @@ -166,7 +178,15 @@ Step-by-step Windows build More? -Dtensorflow_ENABLE_GPU=ON ^ More? -DCUDNN_HOME="D:\...\cudnn" ``` + To build with MKL support add "^" at the end of the last line above following with: + + ``` + More? -Dtensorflow_ENABLE_MKL_SUPPORT=ON ^ + More? -DMKL_HOME="D:\...\compilers_and_libraries" + ``` + To enable SIMD instructions with MSVC, as AVX and SSE, define it as follows: + ``` More? -Dtensorflow_WIN_CPU_SIMD_OPTIONS=/arch:AVX ``` @@ -226,6 +246,7 @@ Step-by-step Windows build ``` ctest -C RelWithDebInfo ``` + * `-Dtensorflow_BUILD_MORE_PYTHON_TESTS=(ON|OFF)`. Defaults to `OFF`. This enables python tests on serveral major packages. This option is only valid if this and tensorflow_BUILD_PYTHON_TESTS are both set as `ON`. After building the python wheel, you need to install the new wheel before running the tests. @@ -234,6 +255,12 @@ Step-by-step Windows build ctest -C RelWithDebInfo ``` + * `-Dtensorflow_ENABLE_MKL_SUPPORT=(ON|OFF)`. Defaults to `OFF`. Include MKL support. If MKL is enabled you need to install the [Intel Math Kernal Library](https://software.intel.com/en-us/mkl). + CMake will expect the location of MKL in -MKL_HOME=path_you_install_mkl. + + * `-Dtensorflow_ENABLE_MKLDNN_SUPPORT=(ON|OFF)`. Defaults to `OFF`. Include MKL DNN support. MKL DNN is [Intel(R) Math Kernel Library for Deep Neural Networks (Intel(R) MKL-DNN)](https://github.com/intel/mkl-dnn). You have to add `-Dtensorflow_ENABLE_MKL_SUPPORT=ON` before including MKL DNN support. + + 4. Invoke MSBuild to build TensorFlow. To build the C++ example program, which will be created as a `.exe` @@ -251,6 +278,7 @@ Step-by-step Windows build D:\...\build> MSBuild /p:Configuration=Release tf_python_build_pip_package.vcxproj ``` + Linux Continuous Integration build ================================== diff --git a/tensorflow/contrib/cmake/external/double_conversion.cmake b/tensorflow/contrib/cmake/external/double_conversion.cmake new file mode 100644 index 0000000000000000000000000000000000000000..527ccdc8d887cb4c2e7d2412c99a8bc682568472 --- /dev/null +++ b/tensorflow/contrib/cmake/external/double_conversion.cmake @@ -0,0 +1,54 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +include (ExternalProject) + +set(double_conversion_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/double_conversion/src/double_conversion) +set(double_conversion_URL https://github.com/google/double-conversion.git) +set(double_conversion_TAG 5664746) +set(double_conversion_BUILD ${double_conversion_INCLUDE_DIR}) +set(double_conversion_LIBRARIES ${double_conversion_BUILD}/double-conversion/libdouble-conversion.so) +set(double_conversion_INCLUDES ${double_conversion_BUILD}) + +if(WIN32) + set(double_conversion_STATIC_LIBRARIES ${double_conversion_BUILD}/double-conversion/$(Configuration)/double-conversion.lib) +else() + set(double_conversion_STATIC_LIBRARIES ${double_conversion_BUILD}/double-conversion/libdouble-conversion.a) +endif() + +set(double_conversion_HEADERS + "${double_conversion_INCLUDE_DIR}/double-conversion/bignum-dtoa.h" + "${double_conversion_INCLUDE_DIR}/double-conversion/cached-powers.h" + "${double_conversion_INCLUDE_DIR}/double-conversion/double-conversion.h" + "${double_conversion_INCLUDE_DIR}/double-conversion/fixed-dtoa.h" + "${double_conversion_INCLUDE_DIR}/double-conversion/strtod.h" + "${double_conversion_INCLUDE_DIR}/double-conversion/bignum.h" + "${double_conversion_INCLUDE_DIR}/double-conversion/diy-fp.h" + "${double_conversion_INCLUDE_DIR}/double-conversion/fast-dtoa.h" + "${double_conversion_INCLUDE_DIR}/double-conversion/ieee.h" + "${double_conversion_INCLUDE_DIR}/double-conversion/utils.h" +) + +ExternalProject_Add(double_conversion + PREFIX double_conversion + GIT_REPOSITORY ${double_conversion_URL} + GIT_TAG ${double_conversion_TAG} + DOWNLOAD_DIR "${DOWNLOAD_LOCATION}" + BUILD_IN_SOURCE 1 + INSTALL_COMMAND "" + CMAKE_CACHE_ARGS + -DCMAKE_BUILD_TYPE:STRING=Release + -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON +) diff --git a/tensorflow/contrib/cmake/external/gemmlowp.cmake b/tensorflow/contrib/cmake/external/gemmlowp.cmake index a235442dc5c0a07e249653381436eeae81575883..cdaa6b73b93666d272faacb869e8272561a2c74c 100644 --- a/tensorflow/contrib/cmake/external/gemmlowp.cmake +++ b/tensorflow/contrib/cmake/external/gemmlowp.cmake @@ -14,8 +14,8 @@ # ============================================================================== include (ExternalProject) -set(gemmlowp_URL https://github.com/google/gemmlowp/archive/6a2a90822e8546fc2bfa7044de0faf1c1cb4862f.zip) -set(gemmlowp_HASH SHA256=3447948d219f3270383766bbe08942888c0eb4e0ca6663c0e0548502ec5bb77d) +set(gemmlowp_URL https://github.com/google/gemmlowp/archive/38ebac7b059e84692f53e5938f97a9943c120d98.zip) +set(gemmlowp_HASH SHA256=b87faa7294dfcc5d678f22a59d2c01ca94ea1e2a3b488c38a95a67889ed0a658) set(gemmlowp_BUILD ${CMAKE_CURRENT_BINARY_DIR}/gemmlowp/src/gemmlowp) set(gemmlowp_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/gemmlowp/src/gemmlowp) diff --git a/tensorflow/contrib/cmake/external/grpc.cmake b/tensorflow/contrib/cmake/external/grpc.cmake index abfc69243eb1d65e29a718a659738afd3c7750d9..693dc7cd673233b889b35a3f3170b57581da9a9f 100644 --- a/tensorflow/contrib/cmake/external/grpc.cmake +++ b/tensorflow/contrib/cmake/external/grpc.cmake @@ -17,7 +17,7 @@ include (ExternalProject) set(GRPC_INCLUDE_DIRS ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/include) set(GRPC_URL https://github.com/grpc/grpc.git) set(GRPC_BUILD ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc) -set(GRPC_TAG bd6bdf93279a39a8cd92978fd7c9d14eccd98fc2) +set(GRPC_TAG d184fa229d75d336aedea0041bd59cb93e7e267f) if(WIN32) if(${CMAKE_GENERATOR} MATCHES "Visual Studio.*") @@ -35,6 +35,7 @@ else() set(grpc_STATIC_LIBRARIES ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/libgrpc++_unsecure.a ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/libgrpc_unsecure.a + ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/libaddress_sorting.a ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/third_party/cares/cares/lib/libcares.a ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/libgpr.a) endif() diff --git a/tensorflow/contrib/cmake/external/mkldnn.cmake b/tensorflow/contrib/cmake/external/mkldnn.cmake new file mode 100644 index 0000000000000000000000000000000000000000..a639fdee367f060d4c8a79267803da6ffe3dc503 --- /dev/null +++ b/tensorflow/contrib/cmake/external/mkldnn.cmake @@ -0,0 +1,44 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +include (ExternalProject) + +set(mkldnn_INCLUDE_DIRS ${CMAKE_CURRENT_BINARY_DIR}/mkldnn/src/mkldnn/include) +set(mkldnn_URL https://github.com/01org/mkl-dnn.git) +set(mkldnn_BUILD ${CMAKE_CURRENT_BINARY_DIR}/mkldnn/src/mkldnn/src) +set(mkldnn_TAG 3063b2e4c943983f6bf5f2fb9a490d4a998cd291) + +if(WIN32) + if(${CMAKE_GENERATOR} MATCHES "Visual Studio.*") + set(mkldnn_STATIC_LIBRARIES ${CMAKE_CURRENT_BINARY_DIR}/mkldnn/src/mkldnn/src/Release/mkldnn.lib) + else() + set(mkldnn_STATIC_LIBRARIES ${CMAKE_CURRENT_BINARY_DIR}/mkldnn/src/mkldnn/src/mkldnn.lib) + endif() +else() + set(mkldnn_STATIC_LIBRARIES ${CMAKE_CURRENT_BINARY_DIR}/mkldnn/src/mkldnn/src/libmkldnn.a) +endif() + +ExternalProject_Add(mkldnn + PREFIX mkldnn + GIT_REPOSITORY ${mkldnn_URL} + GIT_TAG ${mkldnn_TAG} + DOWNLOAD_DIR "${DOWNLOAD_LOCATION}" + BUILD_IN_SOURCE 1 + BUILD_BYPRODUCTS ${mkldnn_STATIC_LIBRARIES} + INSTALL_COMMAND "" + CMAKE_CACHE_ARGS + -DCMAKE_BUILD_TYPE:STRING=Release + -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF + -DMKLINC:STRING=${MKL_INCLUDE_DIRS} +) diff --git a/tensorflow/contrib/cmake/external/png.cmake b/tensorflow/contrib/cmake/external/png.cmake index 6cd66a65990e7a2b963b52b310061b551752cd4d..ad2af01bc002555ce48f8b9bfb7d8d724a1a7dc8 100644 --- a/tensorflow/contrib/cmake/external/png.cmake +++ b/tensorflow/contrib/cmake/external/png.cmake @@ -15,32 +15,33 @@ include (ExternalProject) set(png_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/external/png_archive) -set(png_URL https://storage.googleapis.com/libpng-public-archive/libpng-1.2.53.tar.gz) -set(png_HASH SHA256=e05c9056d7f323088fd7824d8c6acc03a4a758c4b4916715924edc5dd3223a72) +set(png_URL https://mirror.bazel.build/github.com/glennrp/libpng/archive/v1.6.34.tar.gz) +set(png_HASH SHA256=e45ce5f68b1d80e2cb9a2b601605b374bdf51e1798ef1c2c2bd62131dfcf9eef) set(png_BUILD ${CMAKE_BINARY_DIR}/png/src/png) set(png_INSTALL ${CMAKE_BINARY_DIR}/png/install) if(WIN32) if(${CMAKE_GENERATOR} MATCHES "Visual Studio.*") set(png_STATIC_LIBRARIES - debug ${CMAKE_BINARY_DIR}/png/install/lib/libpng12_staticd.lib - optimized ${CMAKE_BINARY_DIR}/png/install/lib/libpng12_static.lib) + debug ${CMAKE_BINARY_DIR}/png/install/lib/libpng16_staticd.lib + optimized ${CMAKE_BINARY_DIR}/png/install/lib/libpng16_static.lib) else() if(CMAKE_BUILD_TYPE EQUAL Debug) set(png_STATIC_LIBRARIES - ${CMAKE_BINARY_DIR}/png/install/lib/libpng12_staticd.lib) + ${CMAKE_BINARY_DIR}/png/install/lib/libpng16_staticd.lib) else() set(png_STATIC_LIBRARIES - ${CMAKE_BINARY_DIR}/png/install/lib/libpng12_static.lib) + ${CMAKE_BINARY_DIR}/png/install/lib/libpng16_static.lib) endif() endif() else() - set(png_STATIC_LIBRARIES ${CMAKE_BINARY_DIR}/png/install/lib/libpng12.a) + set(png_STATIC_LIBRARIES ${CMAKE_BINARY_DIR}/png/install/lib/libpng16.a) endif() set(png_HEADERS - "${png_INSTALL}/include/libpng12/png.h" - "${png_INSTALL}/include/libpng12/pngconf.h" + "${png_INSTALL}/include/libpng16/png.h" + "${png_INSTALL}/include/libpng16/pngconf.h" + "${png_INSTALL}/include/libpng16/pnglibconf.h" ) ExternalProject_Add(png diff --git a/tensorflow/contrib/cmake/external/sqlite.cmake b/tensorflow/contrib/cmake/external/sqlite.cmake index 57c4ae76517e4d7247093edd5e5bd95a83258d87..7f835d2d519273a6d52d12f92ed585a4ddbeb973 100644 --- a/tensorflow/contrib/cmake/external/sqlite.cmake +++ b/tensorflow/contrib/cmake/external/sqlite.cmake @@ -15,8 +15,8 @@ include (ExternalProject) set(sqlite_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/external/sqlite) -set(sqlite_URL https://mirror.bazel.build/www.sqlite.org/2017/sqlite-amalgamation-3200000.zip) -set(sqlite_HASH SHA256=208780b3616f9de0aeb50822b7a8f5482f6515193859e91ed61637be6ad74fd4) +set(sqlite_URL https://mirror.bazel.build/www.sqlite.org/2018/sqlite-amalgamation-3230100.zip) +set(sqlite_HASH SHA256=4239a1f69e5721d07d9a374eb84d594225229e54be4ee628da2995f4315d8dfc) set(sqlite_BUILD ${CMAKE_CURRENT_BINARY_DIR}/sqlite/src/sqlite) set(sqlite_INSTALL ${CMAKE_CURRENT_BINARY_DIR}/sqlite/install) diff --git a/tensorflow/contrib/cmake/python_modules.txt b/tensorflow/contrib/cmake/python_modules.txt index f273c7e5508e10407d013acd7adc08c732322841..fbcdf7e753de45519203c5747b1b2217c36527f0 100644 --- a/tensorflow/contrib/cmake/python_modules.txt +++ b/tensorflow/contrib/cmake/python_modules.txt @@ -104,6 +104,8 @@ tensorflow/python/user_ops tensorflow/python/util tensorflow/python/util/protobuf tensorflow/tools +tensorflow/tools/api +tensorflow/tools/api/generator tensorflow/tools/graph_transforms tensorflow/contrib tensorflow/contrib/all_reduce @@ -128,6 +130,8 @@ tensorflow/contrib/boosted_trees/ops tensorflow/contrib/boosted_trees/proto tensorflow/contrib/boosted_trees/python tensorflow/contrib/boosted_trees/python/ops +tensorflow/contrib/checkpoint +tensorflow/contrib/checkpoint/python tensorflow/contrib/cloud tensorflow/contrib/cloud/kernels tensorflow/contrib/cloud/ops @@ -168,7 +172,6 @@ tensorflow/contrib/distributions/python tensorflow/contrib/distributions/python/ops tensorflow/contrib/distributions/python/ops/bijectors tensorflow/contrib/eager -tensorflow/contrib/eager/proto tensorflow/contrib/eager/python tensorflow/contrib/estimator tensorflow/contrib/estimator/python @@ -355,6 +358,9 @@ tensorflow/contrib/periodic_resample tensorflow/contrib/periodic_resample/python tensorflow/contrib/periodic_resample/python/ops tensorflow/contrib/predictor +tensorflow/contrib/proto +tensorflow/contrib/proto/python +tensorflow/contrib/proto/python/ops tensorflow/contrib/quantization tensorflow/contrib/quantization/python tensorflow/contrib/quantize @@ -363,6 +369,10 @@ tensorflow/contrib/receptive_field tensorflow/contrib/receptive_field/python tensorflow/contrib/receptive_field/python/util tensorflow/contrib/receptive_field/python/util/examples +tensorflow/contrib/recurrent +tensorflow/contrib/recurrent/python +tensorflow/contrib/recurrent/python/ops +tensorflow/contrib/recurrent/python/kernel_tests tensorflow/contrib/reduce_slice_ops tensorflow/contrib/reduce_slice_ops/kernels tensorflow/contrib/reduce_slice_ops/ops @@ -383,6 +393,9 @@ tensorflow/contrib/rnn/ops tensorflow/contrib/rnn/python tensorflow/contrib/rnn/python/kernel_tests tensorflow/contrib/rnn/python/ops +tensorflow/contrib/rpc +tensorflow/contrib/rpc/python +tensorflow/contrib/rpc/python/ops tensorflow/contrib/saved_model tensorflow/contrib/saved_model/python tensorflow/contrib/saved_model/python/saved_model diff --git a/tensorflow/contrib/cmake/python_protos.txt b/tensorflow/contrib/cmake/python_protos.txt index 0c80d529af5230ed6d36b265e12ee4b749a14ec4..d63c41db844af243f0c6600b1565635ac9b91cac 100644 --- a/tensorflow/contrib/cmake/python_protos.txt +++ b/tensorflow/contrib/cmake/python_protos.txt @@ -5,7 +5,6 @@ tensorflow/python tensorflow/contrib/boosted_trees/proto tensorflow/contrib/cloud/kernels tensorflow/contrib/decision_trees/proto -tensorflow/contrib/eager/proto tensorflow/contrib/gdr tensorflow/contrib/lite/toco tensorflow/contrib/mpi diff --git a/tensorflow/contrib/cmake/tf_core_framework.cmake b/tensorflow/contrib/cmake/tf_core_framework.cmake index a1c320347fe60f87806736befc677541a93e7e93..b47c32f1c48b3d42fe5b4ba115cc2a511b7ee5f4 100644 --- a/tensorflow/contrib/cmake/tf_core_framework.cmake +++ b/tensorflow/contrib/cmake/tf_core_framework.cmake @@ -276,7 +276,7 @@ add_custom_command(OUTPUT __force_rebuild COMMAND ${CMAKE_COMMAND} -E echo) add_custom_command(OUTPUT ${VERSION_INFO_CC} COMMAND ${PYTHON_EXECUTABLE} ${tensorflow_source_dir}/tensorflow/tools/git/gen_git_source.py - --raw_generate ${VERSION_INFO_CC} + ARGS --raw_generate ${VERSION_INFO_CC} --source_dir ${tensorflow_source_dir} --git_tag_override=${GIT_TAG_OVERRIDE} DEPENDS __force_rebuild) set(tf_version_srcs ${tensorflow_source_dir}/tensorflow/core/util/version_info.cc) @@ -341,9 +341,3 @@ add_dependencies(tf_core_framework tf_core_lib proto_text ) - -if(WIN32) - # Cmake > 3.6 will quote this as -D"__VERSION__=\"MSVC\"" which nvcc fails on. - # Instead of defining this global, limit it to tf_core_framework where its used. - target_compile_definitions(tf_core_framework PRIVATE __VERSION__="MSVC") -endif() diff --git a/tensorflow/contrib/cmake/tf_core_ops.cmake b/tensorflow/contrib/cmake/tf_core_ops.cmake index 092a48bc6b63503be39343a1f936875082490b3e..e558691de4b74988031f7b2204aad92e8c7af68b 100644 --- a/tensorflow/contrib/cmake/tf_core_ops.cmake +++ b/tensorflow/contrib/cmake/tf_core_ops.cmake @@ -25,6 +25,8 @@ set(tf_op_lib_names "cudnn_rnn_ops" "data_flow_ops" "dataset_ops" + "decode_proto_ops" + "encode_proto_ops" "functional_ops" "image_ops" "io_ops" @@ -40,6 +42,7 @@ set(tf_op_lib_names "random_ops" "remote_fused_graph_ops" "resource_variable_ops" + "rpc_ops" "script_ops" "sdca_ops" "set_ops" diff --git a/tensorflow/contrib/cmake/tf_python.cmake b/tensorflow/contrib/cmake/tf_python.cmake index b7763079242c9c205751a302be3f309b2eb68a14..c4bdb69d828b269e6246777e74c3756ba1c4b96f 100755 --- a/tensorflow/contrib/cmake/tf_python.cmake +++ b/tensorflow/contrib/cmake/tf_python.cmake @@ -330,6 +330,10 @@ GENERATE_PYTHON_OP_LIB("ctc_ops") GENERATE_PYTHON_OP_LIB("cudnn_rnn_ops") GENERATE_PYTHON_OP_LIB("data_flow_ops") GENERATE_PYTHON_OP_LIB("dataset_ops") +GENERATE_PYTHON_OP_LIB("decode_proto_ops" + DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/proto/python/ops/gen_decode_proto_op.py) +GENERATE_PYTHON_OP_LIB("encode_proto_ops" + DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/proto/python/ops/gen_encode_proto_op.py) GENERATE_PYTHON_OP_LIB("image_ops") GENERATE_PYTHON_OP_LIB("io_ops") GENERATE_PYTHON_OP_LIB("linalg_ops") @@ -343,6 +347,8 @@ GENERATE_PYTHON_OP_LIB("random_ops") GENERATE_PYTHON_OP_LIB("remote_fused_graph_ops" DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/remote_fused_graph/pylib/python/ops/gen_remote_fused_graph_ops.py) GENERATE_PYTHON_OP_LIB("resource_variable_ops") +GENERATE_PYTHON_OP_LIB("rpc_ops" + DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/rpc/python/ops/gen_rpc_op.py) GENERATE_PYTHON_OP_LIB("script_ops") GENERATE_PYTHON_OP_LIB("sdca_ops") GENERATE_PYTHON_OP_LIB("set_ops") @@ -474,6 +480,8 @@ set (pywrap_tensorflow_internal_src "${tensorflow_source_dir}/tensorflow/python/lib/core/ndarray_tensor_bridge.cc" "${tensorflow_source_dir}/tensorflow/python/lib/core/py_func.h" "${tensorflow_source_dir}/tensorflow/python/lib/core/py_func.cc" + "${tensorflow_source_dir}/tensorflow/python/lib/core/py_exception_registry.h" + "${tensorflow_source_dir}/tensorflow/python/lib/core/py_exception_registry.cc" "${tensorflow_source_dir}/tensorflow/python/lib/core/py_seq_tensor.h" "${tensorflow_source_dir}/tensorflow/python/lib/core/py_seq_tensor.cc" "${tensorflow_source_dir}/tensorflow/python/lib/core/py_util.h" @@ -546,12 +554,13 @@ if(WIN32) set(pywrap_tensorflow_deffile "${CMAKE_CURRENT_BINARY_DIR}/pywrap_tensorflow.def") endif() set_source_files_properties(${pywrap_tensorflow_deffile} PROPERTIES GENERATED TRUE) - + math(EXPR tensorflow_target_bitness "${CMAKE_SIZEOF_VOID_P}*8") add_custom_command(TARGET pywrap_tensorflow_internal_static POST_BUILD COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/tools/create_def_file.py --input "${pywrap_tensorflow_internal_static_dependencies}" --output "${pywrap_tensorflow_deffile}" --target _pywrap_tensorflow_internal.pyd + --bitness "${tensorflow_target_bitness}" BYPRODUCTS ${pywrap_tensorflow_deffile} # Required for Ninja ) endif(WIN32) @@ -581,6 +590,12 @@ add_library(pywrap_tensorflow_internal SHARED ${pywrap_tensorflow_deffile} ) +# There is a bug in GCC 5 resulting in undefined reference to a __cpu_model function when +# linking to the tensorflow library. Adding the following libraries fixes it. +if(CMAKE_COMPILER_IS_GNUCC AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 5.0) + target_link_libraries(pywrap_tensorflow_internal PRIVATE gcc_s gcc) +endif() + if(WIN32) add_dependencies(pywrap_tensorflow_internal pywrap_tensorflow_internal_static) endif(WIN32) @@ -684,6 +699,77 @@ AddUserOps(TARGET _beam_search_ops DEPENDS pywrap_tensorflow_internal tf_python_ops DISTCOPY ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/seq2seq/python/ops/) +if(WIN32) + if(${CMAKE_GENERATOR} MATCHES "Visual Studio.*") + add_custom_command(TARGET pywrap_tensorflow_internal POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_BINARY_DIR}/$(Configuration)/pywrap_tensorflow_internal.dll + ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/python/_pywrap_tensorflow_internal.pyd + COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_BINARY_DIR}/$(Configuration)/pywrap_tensorflow_internal.lib + ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/python/) + else() + add_custom_command(TARGET pywrap_tensorflow_internal POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_BINARY_DIR}/pywrap_tensorflow_internal.dll + ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/python/_pywrap_tensorflow_internal.pyd + COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_BINARY_DIR}/pywrap_tensorflow_internal.lib + ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/python/) + endif() +else() + add_custom_command(TARGET pywrap_tensorflow_internal POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_BINARY_DIR}/libpywrap_tensorflow_internal.so + ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/python/_pywrap_tensorflow_internal.so) +endif() + + +######################################################## +# Generate API __init__.py files. +######################################################## + +# Parse tensorflow/tools/api/generator/BUILD to get list of generated files. +FILE(READ ${tensorflow_source_dir}/tensorflow/tools/api/generator/BUILD api_generator_BUILD_text) +STRING(REGEX MATCH "# BEGIN GENERATED FILES.*# END GENERATED FILES" api_init_files_text ${api_generator_BUILD_text}) +string(REPLACE "# BEGIN GENERATED FILES" "" api_init_files_text ${api_init_files_text}) +string(REPLACE "# END GENERATED FILES" "" api_init_files_text ${api_init_files_text}) +string(REPLACE "," ";" api_init_files_list ${api_init_files_text}) + +set(api_init_files "") +foreach(api_init_file ${api_init_files_list}) + string(STRIP "${api_init_file}" api_init_file) + if(api_init_file) + string(REPLACE "\"" "" api_init_file "${api_init_file}") # Remove quotes + list(APPEND api_init_files "${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/tools/api/generator/${api_init_file}") + endif() +endforeach(api_init_file) +set(api_init_list_file "${tensorflow_source_dir}/api_init_files_list.txt") +file(WRITE "${api_init_list_file}" "${api_init_files}") + +# Run create_python_api.py to generate __init__.py files. +add_custom_command( + OUTPUT ${api_init_files} + DEPENDS tf_python_ops tf_python_copy_scripts_to_destination pywrap_tensorflow_internal tf_python_touchup_modules tf_extension_ops + + # tensorflow/__init__.py depends on files generated in this step. So, remove it while + # this step is running since the files aren't there yet. + COMMAND ${CMAKE_COMMAND} -E rename ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/__init__.py + ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/final.__init__.py + COMMAND ${CMAKE_COMMAND} -E touch ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/__init__.py + + # Run create_python_api.py to generate API init files. + COMMAND ${CMAKE_COMMAND} -E env PYTHONPATH=${CMAKE_CURRENT_BINARY_DIR}/tf_python ${PYTHON_EXECUTABLE} + "${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/tools/api/generator/create_python_api.py" "${api_init_list_file}" + + # Re-add tensorflow/__init__.py back. + COMMAND ${CMAKE_COMMAND} -E remove -f ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/__init__.py + COMMAND ${CMAKE_COMMAND} -E rename ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/final.__init__.py + ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/__init__.py + + COMMENT "Generating __init__.py files for Python API." + WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/tf_python" +) + +add_custom_target(tf_python_api SOURCES ${api_init_files}) +add_dependencies(tf_python_api tf_python_ops) + + ############################################################ # Build a PIP package containing the TensorFlow runtime. ############################################################ @@ -693,6 +779,7 @@ add_dependencies(tf_python_build_pip_package tf_python_copy_scripts_to_destination tf_python_touchup_modules tf_python_ops + tf_python_api tf_extension_ops) # Fix-up Python files that were not included by the add_python_module() macros. @@ -705,25 +792,6 @@ add_custom_command(TARGET tf_python_copy_scripts_to_destination PRE_BUILD COMMAND ${CMAKE_COMMAND} -E copy ${tensorflow_source_dir}/tensorflow/contrib/testing/python/framework/util_test.py ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/testing/python/framework/) -if(WIN32) - if(${CMAKE_GENERATOR} MATCHES "Visual Studio.*") - add_custom_command(TARGET tf_python_build_pip_package POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_BINARY_DIR}/$(Configuration)/pywrap_tensorflow_internal.dll - ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/python/_pywrap_tensorflow_internal.pyd - COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_BINARY_DIR}/$(Configuration)/pywrap_tensorflow_internal.lib - ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/python/) - else() - add_custom_command(TARGET tf_python_build_pip_package POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_BINARY_DIR}/pywrap_tensorflow_internal.dll - ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/python/_pywrap_tensorflow_internal.pyd - COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_BINARY_DIR}/pywrap_tensorflow_internal.lib - ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/python/) - endif() -else() - add_custom_command(TARGET tf_python_build_pip_package POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_BINARY_DIR}/libpywrap_tensorflow_internal.so - ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/python/_pywrap_tensorflow_internal.so) -endif() add_custom_command(TARGET tf_python_build_pip_package POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy ${tensorflow_source_dir}/tensorflow/tools/pip_package/README ${CMAKE_CURRENT_BINARY_DIR}/tf_python/) diff --git a/tensorflow/contrib/cmake/tf_shared_lib.cmake b/tensorflow/contrib/cmake/tf_shared_lib.cmake index 9738bbeb9aebaeb67495127528e26634887d392c..38f40452b533fdc0dba6ac686a0ff43a2ef13cb8 100644 --- a/tensorflow/contrib/cmake/tf_shared_lib.cmake +++ b/tensorflow/contrib/cmake/tf_shared_lib.cmake @@ -52,12 +52,13 @@ if(WIN32) set(tensorflow_deffile "${CMAKE_CURRENT_BINARY_DIR}/tensorflow.def") endif() set_source_files_properties(${tensorflow_deffile} PROPERTIES GENERATED TRUE) - + math(EXPR tensorflow_target_bitness "${CMAKE_SIZEOF_VOID_P}*8") add_custom_command(TARGET tensorflow_static POST_BUILD COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/tools/create_def_file.py --input "${tensorflow_static_dependencies}" --output "${tensorflow_deffile}" --target tensorflow.dll + --bitness "${tensorflow_target_bitness}" ) endif(WIN32) diff --git a/tensorflow/contrib/cmake/tf_stream_executor.cmake b/tensorflow/contrib/cmake/tf_stream_executor.cmake index 91ca33f4c4d5f6c822f45b0676e6e46d2e4c2860..af48ef1fd40456162fee8b1e2c3ca45ecdb58830 100644 --- a/tensorflow/contrib/cmake/tf_stream_executor.cmake +++ b/tensorflow/contrib/cmake/tf_stream_executor.cmake @@ -65,6 +65,12 @@ if (tensorflow_ENABLE_GPU) file(GLOB tf_stream_executor_gpu_srcs "${tensorflow_source_dir}/tensorflow/stream_executor/cuda/*.cc" ) + if (NOT tensorflow_BUILD_CC_TESTS) + file(GLOB tf_stream_executor_gpu_tests + "${tensorflow_source_dir}/tensorflow/stream_executor/cuda/*_test.cc" + ) + list(REMOVE_ITEM tf_stream_executor_gpu_srcs ${tf_stream_executor_gpu_tests}) + endif() list(APPEND tf_stream_executor_srcs ${tf_stream_executor_gpu_srcs}) endif() diff --git a/tensorflow/contrib/cmake/tools/create_def_file.py b/tensorflow/contrib/cmake/tools/create_def_file.py index 53c2285699a6ca94e1e6b147080338b507f4d768..cffe069aa352f8a6f2c436bc70b62f54e2336ac6 100644 --- a/tensorflow/contrib/cmake/tools/create_def_file.py +++ b/tensorflow/contrib/cmake/tools/create_def_file.py @@ -63,7 +63,7 @@ INCLUDE_RE = re.compile(r"^(TF_\w*)$|" r"^(TFE_\w*)$|" r"tensorflow::|" r"functor::|" - r"nsync_|" + r"\?nsync_|" r"perftools::gputools") # We want to identify data members explicitly in the DEF file, so that no one @@ -87,6 +87,7 @@ def get_args(): required=True) parser.add_argument("--output", help="output deffile", required=True) parser.add_argument("--target", help="name of the target", required=True) + parser.add_argument("--bitness", help="build target bitness", required=True) args = parser.parse_args() return args @@ -125,7 +126,10 @@ def main(): # Header for the def file. def_fp.write("LIBRARY " + args.target + "\n") def_fp.write("EXPORTS\n") - def_fp.write("\t ??1OpDef@tensorflow@@UEAA@XZ\n") + if args.bitness == "64": + def_fp.write("\t??1OpDef@tensorflow@@UEAA@XZ\n") + else: + def_fp.write("\t??1OpDef@tensorflow@@UAE@XZ\n") # Each symbols returned by undname matches the same position in candidates. # We compare on undname but use the decorated name from candidates. diff --git a/tensorflow/contrib/coder/BUILD b/tensorflow/contrib/coder/BUILD index ce12e38248785987e51befa47d04143e235554fe..9ca4ce8a9c765677865f77ea4982ad8613ce334c 100644 --- a/tensorflow/contrib/coder/BUILD +++ b/tensorflow/contrib/coder/BUILD @@ -92,6 +92,34 @@ tf_cc_test( ], ) +tf_kernel_library( + name = "pmf_to_cdf_op", + srcs = ["kernels/pmf_to_cdf_op.cc"], + visibility = ["//visibility:public"], + deps = [ + ":coder_ops_op_lib", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + ], +) + +tf_cc_test( + name = "pmf_to_cdf_op_test", + size = "small", + srcs = ["kernels/pmf_to_cdf_op_test.cc"], + deps = [ + ":pmf_to_cdf_op", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + "//tensorflow/core/kernels:ops_testutil", + ], +) + cc_library( name = "all_ops", deps = [":coder_ops_op_lib"], @@ -99,12 +127,16 @@ cc_library( cc_library( name = "all_kernels", - deps = [":range_coder_ops"], + deps = [ + ":pmf_to_cdf_op", + ":range_coder_ops", + ], ) tf_custom_op_library( name = "python/ops/_coder_ops.so", srcs = [ + "kernels/pmf_to_cdf_op.cc", "kernels/range_coder.cc", "kernels/range_coder.h", "kernels/range_coder_ops.cc", diff --git a/tensorflow/contrib/coder/kernels/pmf_to_cdf_op.cc b/tensorflow/contrib/coder/kernels/pmf_to_cdf_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..bd5272ee6f20ac3537a2e378225ede5ee90782c5 --- /dev/null +++ b/tensorflow/contrib/coder/kernels/pmf_to_cdf_op.cc @@ -0,0 +1,196 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#define EIGEN_USE_THREADS + +#include +#include +#include +#include +#include + +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor_shape.h" +#include "tensorflow/core/framework/tensor_types.h" +#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/lib/core/threadpool.h" +#include "tensorflow/core/lib/gtl/array_slice.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/macros.h" +#include "tensorflow/core/platform/types.h" + +namespace tensorflow { +namespace { +using errors::InvalidArgument; + +class PmfToCdfOp : public OpKernel { + public: + explicit PmfToCdfOp(OpKernelConstruction* context) : OpKernel(context) { + OP_REQUIRES_OK(context, context->GetAttr("precision", &precision_)); + OP_REQUIRES( + context, 0 < precision_ && precision_ <= 16, + InvalidArgument("`precision` must be in [1, 16]: ", precision_)); + } + + void Compute(OpKernelContext* context) override { + const Tensor& pmf_tensor = context->input(0); + + TensorShape shape = pmf_tensor.shape(); + OP_REQUIRES(context, TensorShapeUtils::IsVectorOrHigher(shape), + InvalidArgument("`pmf` should be at least 1-D.")); + OP_REQUIRES( + context, shape.dim_size(shape.dims() - 1) > 1, + InvalidArgument("`pmf` size should be at least 2 in the last axis.")); + shape.set_dim(shape.dims() - 1, shape.dim_size(shape.dims() - 1) + 1); + + Tensor* cdf_tensor; + OP_REQUIRES_OK(context, context->allocate_output(0, shape, &cdf_tensor)); + + auto pmf = pmf_tensor.flat_inner_dims(); + auto cdf = cdf_tensor->flat_inner_dims(); + CHECK_EQ(pmf.dimension(0), cdf.dimension(0)); + CHECK_EQ(pmf.dimension(1) + 1, cdf.dimension(1)); + + const double n = pmf.dimension(1); + const int64 cost_per_unit = static_cast(50.0 * n * std::log2(n)); + thread::ThreadPool* thread_pool = + context->device()->tensorflow_cpu_worker_threads()->workers; + thread_pool->ParallelFor( + pmf.dimension(0), cost_per_unit, + [this, pmf, &cdf](int64 start, int64 limit) { + const gtl::ArraySlice::size_type pmf_size = pmf.dimension(1); + for (int64 i = start; i < limit; ++i) { + cdf(i, 0) = 0; + PerShard({&pmf(i, 0), pmf_size}, {&cdf(i, 1), pmf_size}); + } + }); + } + + private: + struct PenaltyItem { + PenaltyItem(int32* p, double mass) : pointer(p), mass(mass) { + penalty = ComputeNextPenalty(); + } + + void Decrease() { + CHECK_GT(*pointer, 1); + --*pointer; + penalty = ComputeNextPenalty(); + } + + friend bool operator<(const PenaltyItem& lhs, const PenaltyItem& rhs) { + return lhs.penalty < rhs.penalty; + } + + double ComputeNextPenalty() { + if (*pointer <= 1) { + return std::numeric_limits::infinity(); + } + return mass * (std::log2(*pointer) - std::log2(*pointer - 1)); + } + + int32* pointer; + double mass; + double penalty; + }; + + struct GainItem { + GainItem(int32* p, double mass) : pointer(p), mass(mass) { + gain = ComputeNextGain(); + } + + void Increase() { + CHECK_GT(*pointer, 0); + ++*pointer; + gain = ComputeNextGain(); + } + + friend bool operator>(const GainItem& lhs, const GainItem& rhs) { + return lhs.gain > rhs.gain; + } + + double ComputeNextGain() { + // Never increment zero value to non-zero value. + if (*pointer < 1) { + return -std::numeric_limits::infinity(); + } + return mass * (std::log2(*pointer + 1) - std::log2(*pointer)); + } + + int32* pointer; + double mass; + double gain; + }; + + void PerShard(gtl::ArraySlice pmf, + gtl::MutableArraySlice cdf) const { + CHECK_EQ(pmf.size(), cdf.size()); + + const int32 normalizer = 1 << precision_; + std::transform(pmf.begin(), pmf.end(), cdf.begin(), + [normalizer](float mass) { + int32 value = std::rint(mass * normalizer); + // NOTE: Consider checking if mass > 0. + value = std::max(value, 1); + return value; + }); + + int32 sum = std::accumulate(cdf.begin(), cdf.end(), 0); + if (sum > normalizer) { + std::vector queue; + queue.reserve(cdf.size()); + for (int i = 0; i < cdf.size(); ++i) { + queue.emplace_back(&cdf[i], pmf[i]); + } + + std::sort(queue.begin(), queue.end()); + while (sum-- > normalizer) { + queue[0].Decrease(); + // Performs a linear search because this find_if is likely to return + // iterator very close to the begin. + auto iter = std::find_if( + std::next(queue.begin()), queue.end(), + [&queue](const PenaltyItem& rhs) { return queue[0] < rhs; }); + std::rotate(queue.begin(), std::next(queue.begin()), iter); + } + } else if (sum < normalizer) { + std::vector queue; + queue.reserve(cdf.size()); + for (int i = 0; i < cdf.size(); ++i) { + queue.emplace_back(&cdf[i], pmf[i]); + } + + std::sort(queue.begin(), queue.end(), std::greater()); + while (sum++ < normalizer) { + queue[0].Increase(); + // Performs a linear search because this find_if is likely to return + // iterator very close to the begin. + auto iter = std::find_if( + std::next(queue.begin()), queue.end(), + [&queue](const GainItem& rhs) { return queue[0] > rhs; }); + std::rotate(queue.begin(), std::next(queue.begin()), iter); + } + } + std::partial_sum(cdf.begin(), cdf.end(), cdf.begin()); + } + + int precision_; +}; + +REGISTER_KERNEL_BUILDER(Name("PmfToQuantizedCdf").Device(DEVICE_CPU), + PmfToCdfOp); +} // namespace +} // namespace tensorflow diff --git a/tensorflow/contrib/coder/kernels/pmf_to_cdf_op_test.cc b/tensorflow/contrib/coder/kernels/pmf_to_cdf_op_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..3408f6b519a33fbb8f23d19c16bc7138fc34c121 --- /dev/null +++ b/tensorflow/contrib/coder/kernels/pmf_to_cdf_op_test.cc @@ -0,0 +1,142 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include + +#include "tensorflow/core/framework/fake_input.h" +#include "tensorflow/core/framework/node_def.pb.h" +#include "tensorflow/core/framework/node_def_builder.h" +#include "tensorflow/core/framework/shape_inference_testutil.h" +#include "tensorflow/core/framework/tensor_types.h" +#include "tensorflow/core/framework/types.pb.h" +#include "tensorflow/core/kernels/ops_testutil.h" +#include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/lib/gtl/array_slice.h" +#include "tensorflow/core/lib/random/philox_random.h" +#include "tensorflow/core/lib/random/random.h" +#include "tensorflow/core/lib/random/simple_philox.h" +#include "tensorflow/core/platform/test.h" + +namespace tensorflow { +namespace { +class PmfToQuantizedCdfOpTest : public OpsTestBase { + protected: + void SetupOp(int precision, Tensor* input) { + TF_ASSERT_OK(NodeDefBuilder("pmf_to_cdf", "PmfToQuantizedCdf") + .Input(FakeInput(DT_FLOAT)) + .Attr("precision", precision) + .Finalize(node_def())); + TF_ASSERT_OK(InitOp()); + + inputs_.clear(); + inputs_.emplace_back(input); + } + + void GenerateData(random::SimplePhilox* rand, + gtl::MutableArraySlice slice) { + constexpr float minimum = std::numeric_limits::epsilon(); + float sum = 0; + for (float& value : slice) { + value = std::max(rand->RandFloat(), minimum); + sum += value; + } + for (float& value : slice) { + value /= sum; + } + } + + void Verify(int precision, const Tensor& pmf_tensor, + const Tensor& cdf_tensor) { + ASSERT_EQ(pmf_tensor.dims(), cdf_tensor.dims()); + const int n = pmf_tensor.dims(); + + for (int i = 0; i < n - 1; ++i) { + EXPECT_EQ(pmf_tensor.dim_size(i), cdf_tensor.dim_size(i)); + } + + auto pmf = pmf_tensor.flat_inner_dims(); + auto cdf = cdf_tensor.flat_inner_dims(); + EXPECT_EQ(pmf.dimension(1) + 1, cdf.dimension(1)); + + const int normalizer = 1 << precision; + for (int i = 0; i < pmf.dimension(0); ++i) { + EXPECT_EQ(0, cdf(i, 0)); + + TTypes::UnalignedConstVec cdf_slice(&cdf(i, 0), cdf.dimension(1)); + + for (int j = 1; j < cdf_slice.size(); ++j) { + const int32 diff = cdf_slice(j) - cdf_slice(j - 1); + EXPECT_GT(diff, 0); + } + + EXPECT_EQ(cdf_slice(cdf_slice.size() - 1), normalizer); + } + } +}; + +TEST_F(PmfToQuantizedCdfOpTest, UnderSum) { + Tensor pmf(DT_FLOAT, {1, 10, 1, 32}); + auto matrix = pmf.flat_inner_dims(); + const std::size_t n = matrix.dimension(1); + + random::PhiloxRandom gen(random::New64(), random::New64()); + random::SimplePhilox rand(&gen); + for (int64 i = 0; i < matrix.dimension(0); ++i) { + GenerateData(&rand, {&matrix(i, 0), n}); + } + + pmf.flat() = pmf.flat() * 0.85f; + + constexpr int kPrecision = 10; + SetupOp(kPrecision, &pmf); + TF_ASSERT_OK(RunOpKernel()); + + Verify(kPrecision, pmf, *GetOutput(0)); +} + +TEST_F(PmfToQuantizedCdfOpTest, OverSum) { + Tensor pmf(DT_FLOAT, {10, 1, 1, 100}); + auto matrix = pmf.flat_inner_dims(); + + // Half of each PMF is filled with zeros. The op will round up zeros to ones, + // post quantization. These round ups are likely to make the sum over + // normalizer value. + matrix.setZero(); + const std::size_t n = matrix.dimension(1) / 2; + + random::PhiloxRandom gen(random::New64(), random::New64()); + random::SimplePhilox rand(&gen); + for (int64 i = 0; i < matrix.dimension(0); ++i) { + GenerateData(&rand, {&matrix(i, 0), n}); + } + + constexpr int kPrecision = 7; + SetupOp(kPrecision, &pmf); + TF_ASSERT_OK(RunOpKernel()); + + Verify(kPrecision, pmf, *GetOutput(0)); +} + +TEST_F(PmfToQuantizedCdfOpTest, ShapeFn) { + ShapeInferenceTestOp op("PmfToQuantizedCdf"); + + INFER_OK(op, "?", "?"); + INFER_OK(op, "[3]", "[4]"); + INFER_OK(op, "[3,4]", "[d0_0,5]"); + INFER_OK(op, "[3,4,5]", "[d0_0,d0_1,6]"); +} +} // namespace +} // namespace tensorflow diff --git a/tensorflow/contrib/coder/ops/coder_ops.cc b/tensorflow/contrib/coder/ops/coder_ops.cc index 9056d1a6963d7be92f499db31385fb6afe2dc515..a185e07913f84a813d76a8c63741bd22a832c8b9 100644 --- a/tensorflow/contrib/coder/ops/coder_ops.cc +++ b/tensorflow/contrib/coder/ops/coder_ops.cc @@ -19,6 +19,7 @@ limitations under the License. #include "tensorflow/core/lib/core/status.h" namespace tensorflow { +using shape_inference::DimensionHandle; using shape_inference::InferenceContext; using shape_inference::ShapeHandle; @@ -76,7 +77,7 @@ are incorrect. For this reason, the range coder uses integer arithmetics and avoids using any floating point operations internally, and `cdf` should contain integers representing quantized probability mass rather than floating points. -data: An int32 tensor. +data: An int16 tensor. cdf: An int32 tensor representing the CDF's of `data`. Each integer is divided by `2^precision` to represent a fraction. encoded: A range-coded scalar string. @@ -111,9 +112,38 @@ potential performance issues, the decoder does not return error status. encoded: A scalar string tensor from RangeEncode. shape: An int32 1-D tensor representing the shape of the data encoded by RangeEncode. -decoded: An int32 tensor with shape equal to `shape`. +decoded: An int16 tensor with shape equal to `shape`. precision: The number of bits for probability quantization. Must be <= 16, and must match the precision used by RangeEncode that produced `encoded`. )doc"); + +REGISTER_OP("PmfToQuantizedCdf") + .Input("pmf: float") + .Output("cdf: int32") + .Attr("precision: int >= 1") + .SetShapeFn([] (InferenceContext* c) { + ShapeHandle in; + TF_RETURN_IF_ERROR(c->WithRankAtLeast(c->input(0), 1, &in)); + DimensionHandle last; + TF_RETURN_IF_ERROR(c->Add(c->Dim(in, -1), 1, &last)); + ShapeHandle out; + TF_RETURN_IF_ERROR(c->ReplaceDim(in, -1, last, &out)); + c->set_output(0, out); + return Status::OK(); + }) + .Doc(R"doc( +Converts PMF to quantized CDF. This op uses floating-point operations +internally. Therefore the quantized output may not be consistent across multiple +platforms. For entropy encoders and decoders to have the same quantized CDF on +different platforms, the quantized CDF should be produced once and saved, then +the saved quantized CDF should be used everywhere. + +After quantization, if PMF does not sum to 2^precision, then some values of PMF +are increased or decreased to adjust the sum to equal to 2^precision. + +Note that the input PMF is pre-quantization. The input PMF is not normalized +by this op prior to quantization. Therefore the user is responsible for +normalizing PMF if necessary. +)doc"); // clang-format on } // namespace tensorflow diff --git a/tensorflow/contrib/copy_graph/python/util/copy_elements.py b/tensorflow/contrib/copy_graph/python/util/copy_elements.py index b806799202bff4f2f6dbf717fbeea74a04b8cd6e..102bc460fdadb0ad5dc9a2960b8655c55357108e 100644 --- a/tensorflow/contrib/copy_graph/python/util/copy_elements.py +++ b/tensorflow/contrib/copy_graph/python/util/copy_elements.py @@ -201,7 +201,7 @@ def copy_op_to_graph(org_instance, to_graph, variables, scope=''): #An instance of tensorflow.core.framework.node_def_pb2.NodeDef, it #stores String-based info such as name, device and type of the op. #Unique to every Operation instance. - new_node_def = deepcopy(op._node_def) + new_node_def = deepcopy(op.node_def) #Change the name new_node_def.name = new_name @@ -211,7 +211,7 @@ def copy_op_to_graph(org_instance, to_graph, variables, scope=''): #Make a copy of the op_def too. #Its unique to every _type_ of Operation. - op_def = deepcopy(op._op_def) + op_def = deepcopy(op.op_def) #Initialize a new Operation instance new_op = ops.Operation(new_node_def, to_graph, new_inputs, output_types, diff --git a/tensorflow/contrib/crf/python/kernel_tests/crf_test.py b/tensorflow/contrib/crf/python/kernel_tests/crf_test.py index 721dc4d0801d1f0e116921888e3851a95e0b72b0..a5e065b93a23c3dd2838d81e7cf537dec226f4f9 100644 --- a/tensorflow/contrib/crf/python/kernel_tests/crf_test.py +++ b/tensorflow/contrib/crf/python/kernel_tests/crf_test.py @@ -281,6 +281,21 @@ class CrfTest(test.TestCase): self.assertEqual(list(tf_actual_max_sequence[:sequence_lengths]), expected_max_sequence[:sequence_lengths]) + def testCrfDecodeZeroSeqLength(self): + """ + Test that crf_decode works when sequence_length contains one or more zeros. + """ + with self.test_session() as sess: + inputs = constant_op.constant(np.ones([2, 10, 5], + dtype=np.float32)) + transition_params = constant_op.constant(np.ones([5, 5], + dtype=np.float32)) + sequence_lengths = constant_op.constant(np.zeros([2], + dtype=np.int32)) + values = crf.crf_decode(inputs, transition_params, sequence_lengths) + tags, scores = sess.run(values) + self.assertEqual(len(tags.shape), 2) + self.assertEqual(len(scores.shape), 1) if __name__ == "__main__": test.main() diff --git a/tensorflow/contrib/crf/python/ops/crf.py b/tensorflow/contrib/crf/python/ops/crf.py index 1233c8f251c404c57d9e2b38993e7a386b1e6ceb..e37c029cebf30eba59c560bc00ed73d2eea86213 100644 --- a/tensorflow/contrib/crf/python/ops/crf.py +++ b/tensorflow/contrib/crf/python/ops/crf.py @@ -479,15 +479,17 @@ def crf_decode(potentials, transition_params, sequence_length): initial_state = array_ops.slice(potentials, [0, 0, 0], [-1, 1, -1]) initial_state = array_ops.squeeze(initial_state, axis=[1]) # [B, O] inputs = array_ops.slice(potentials, [0, 1, 0], [-1, -1, -1]) # [B, T-1, O] + # sequence length is not allowed to be less than zero + sequence_length_less_one = math_ops.maximum(0, sequence_length - 1) backpointers, last_score = rnn.dynamic_rnn( # [B, T - 1, O], [B, O] crf_fwd_cell, inputs=inputs, - sequence_length=sequence_length - 1, + sequence_length=sequence_length_less_one, initial_state=initial_state, time_major=False, dtype=dtypes.int32) backpointers = gen_array_ops.reverse_sequence( # [B, T - 1, O] - backpointers, sequence_length - 1, seq_dim=1) + backpointers, sequence_length_less_one, seq_dim=1) # Computes backward decoding. Extract tag indices from backpointers. crf_bwd_cell = CrfDecodeBackwardRnnCell(num_tags) @@ -497,7 +499,7 @@ def crf_decode(potentials, transition_params, sequence_length): decode_tags, _ = rnn.dynamic_rnn( # [B, T - 1, 1] crf_bwd_cell, inputs=backpointers, - sequence_length=sequence_length - 1, + sequence_length=sequence_length_less_one, initial_state=initial_state, time_major=False, dtype=dtypes.int32) diff --git a/tensorflow/contrib/cudnn_rnn/BUILD b/tensorflow/contrib/cudnn_rnn/BUILD index 8b5d13f72555516babc4250fd934c55adc3d1b8b..aeefa3cee62281c74388765ea5e2cbc7f16ff927 100644 --- a/tensorflow/contrib/cudnn_rnn/BUILD +++ b/tensorflow/contrib/cudnn_rnn/BUILD @@ -25,6 +25,7 @@ tf_custom_op_py_library( srcs_version = "PY2AND3", visibility = ["//visibility:public"], deps = [ + "//tensorflow/contrib/checkpoint/python:split_dependency", "//tensorflow/contrib/util:util_py", "//tensorflow/python:array_ops", "//tensorflow/python:control_flow_ops", diff --git a/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_test.py b/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_test.py index 9897c31a98e0b335c18a84825fc518ed1fc310a2..6fb56b0858786662546ecab425b1a2564fbd9a64 100644 --- a/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_test.py +++ b/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_test.py @@ -19,6 +19,7 @@ from __future__ import print_function import argparse import collections +import functools import itertools import os import sys @@ -34,7 +35,7 @@ from tensorflow.python.eager import context from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import random_seed -from tensorflow.python.framework.test_util import TensorFlowTestCase +from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import gen_nn_ops @@ -53,6 +54,7 @@ from tensorflow.python.platform import test from tensorflow.python.platform import tf_logging as logging from tensorflow.python.training import adagrad from tensorflow.python.training import adam +from tensorflow.python.training import checkpointable_utils from tensorflow.python.training import gradient_descent from tensorflow.python.training import momentum from tensorflow.python.training import rmsprop @@ -265,7 +267,7 @@ def _CreateCudnnCompatibleCanonicalRNN(rnn, inputs, is_bidi=False, scope=None): return outputs, (output_state_fw, output_state_bw) -class CudnnRNNTestBasic(TensorFlowTestCase): +class CudnnRNNTestBasic(test_util.TensorFlowTestCase): @unittest.skipUnless(test.is_built_with_cuda(), "Test only applicable when running on GPUs") @@ -467,7 +469,7 @@ class CudnnRNNTestBasic(TensorFlowTestCase): # TODO(jamesqin): Transform to parameterized test after it is included in the # TF open source codebase. -class CudnnRNNTestSaveRestore(TensorFlowTestCase): +class CudnnRNNTestSaveRestore(test_util.TensorFlowTestCase): def _CompareWeights(self, lhs, rhs): self.assertEqual(len(lhs), len(rhs)) @@ -701,9 +703,146 @@ class CudnnRNNTestSaveRestore(TensorFlowTestCase): self._TestSaveRestoreHelper(CUDNN_RNN_RELU) +class CudnnRNNTestSaveRestoreCheckpointable(test_util.TensorFlowTestCase): + + def _VerifyCheckpoint( + self, checkpoint_path, compatible_cell_fn, cudnn_cell_fn, + num_layers, input_size, expected_variable_values, num_applications=3): + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + with ops.device("gpu:0"): + cudnn_layer = cudnn_cell_fn() + cudnn_checkpoint = checkpointable_utils.Checkpoint(cell=cudnn_layer) + status = cudnn_checkpoint.restore(checkpoint_path) + inputs = 3. * array_ops.ones([num_applications, num_layers, input_size], + dtype=dtypes.float32) + cudnn_output, _ = cudnn_layer(inputs) + status.assert_consumed().run_restore_ops() + second_save_path = cudnn_checkpoint.save(checkpoint_prefix) + restore_layer = compatible_cell_fn() + restore_layer_checkpoint = checkpointable_utils.Checkpoint( + cell=restore_layer) + status = restore_layer_checkpoint.restore(second_save_path) + current_state = restore_layer.zero_state(1, dtypes.float32) + for _ in range(num_applications): + restore_layer_output, current_state = restore_layer( + inputs=3. * array_ops.ones([1, input_size]), + state=current_state) + status.assert_consumed().run_restore_ops() + self.assertTrue(restore_layer.variables) + for variable, expected_value in zip( + restore_layer.variables, expected_variable_values): + self.assertAllClose(expected_value, self.evaluate(variable)) + self.assertAllClose(self.evaluate(restore_layer_output), + self.evaluate(cudnn_output)[-1, -1:, ...]) + + def _CheckpointableSingleCellUnidirectionalTestTemplate( + self, single_cell_fn, cudnn_cell_fn): + # Single-layer cuDNN cells with object-based checkpointing should be + # checkpoint compatible with either single CudnnCompatible cells or + # MultiRnnCells with one cell. + input_size = 3 + save_cell_layer = single_cell_fn() + save_cell_layer( + inputs=array_ops.ones([1, input_size]), + state=save_cell_layer.zero_state(1, dtypes.float32)) + self.assertTrue(save_cell_layer.variables) + expected_values = [] + np.random.seed(10) + for variable in save_cell_layer.variables: + value = np.random.normal(size=variable.shape) + expected_values.append(value) + self.evaluate(variable.assign(value)) + save_checkpoint = checkpointable_utils.Checkpoint(cell=save_cell_layer) + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + first_save_path = save_checkpoint.save(checkpoint_prefix) + self._VerifyCheckpoint( + checkpoint_path=first_save_path, + compatible_cell_fn= + lambda: rnn_cell_impl.MultiRNNCell([single_cell_fn()]), + cudnn_cell_fn=cudnn_cell_fn, + num_layers=1, + expected_variable_values=expected_values, + input_size=input_size) + + @unittest.skipUnless(test.is_built_with_cuda(), + "Test only applicable when running on GPUs") + @test_util.run_in_graph_and_eager_modes() + def testLSTMCheckpointableSingleLayer(self): + num_units = 2 + direction = CUDNN_RNN_UNIDIRECTION + self._CheckpointableSingleCellUnidirectionalTestTemplate( + single_cell_fn=functools.partial( + cudnn_rnn_ops.CudnnCompatibleLSTMCell, num_units=num_units), + cudnn_cell_fn=functools.partial( + cudnn_rnn.CudnnLSTM, num_layers=1, num_units=num_units, + direction=direction, name="awesome_lstm")) + + @unittest.skipUnless(test.is_built_with_cuda(), + "Test only applicable when running on GPUs") + @test_util.run_in_graph_and_eager_modes() + def testGRUCheckpointableSingleLayer(self): + num_units = 2 + direction = CUDNN_RNN_UNIDIRECTION + with self.assertRaises(NotImplementedError): + # TODO(allenl): Implement object-based saving for GRUs and other cells. + self._CheckpointableSingleCellUnidirectionalTestTemplate( + single_cell_fn=functools.partial( + cudnn_rnn_ops.CudnnCompatibleGRUCell, num_units=num_units), + cudnn_cell_fn=functools.partial( + cudnn_rnn.CudnnGRU, num_layers=1, num_units=num_units, + direction=direction, name="awesome_gru")) + + def _CheckpointableMultiLayerTestTemplate( + self, single_cell_fn, cudnn_cell_fn, num_layers): + + def _MultiCellFn(): + return rnn_cell_impl.MultiRNNCell( + [single_cell_fn() for _ in range(num_layers)]) + input_size = 3 + save_graph = ops.Graph() + with save_graph.as_default(), self.test_session(graph=save_graph): + save_layer = _MultiCellFn() + save_layer(inputs=array_ops.ones([1, input_size]), + state=save_layer.zero_state(1, dtypes.float32)) + self.assertTrue(save_layer.variables) + expected_values = [] + np.random.seed(10) + for variable in save_layer.variables: + value = np.random.normal(size=variable.shape) + expected_values.append(value) + self.evaluate(variable.assign(value)) + save_checkpoint = checkpointable_utils.Checkpoint(cell=save_layer) + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + first_save_path = save_checkpoint.save(checkpoint_prefix) + self._VerifyCheckpoint( + checkpoint_path=first_save_path, + compatible_cell_fn=_MultiCellFn, cudnn_cell_fn=cudnn_cell_fn, + num_layers=num_layers, + expected_variable_values=expected_values, + input_size=input_size) + + @unittest.skipUnless(test.is_built_with_cuda(), + "Test only applicable when running on GPUs") + @test_util.run_in_graph_and_eager_modes() + def testCudnnCompatibleLSTMCheckpointablMultiLayer(self): + num_units = 2 + num_layers = 3 + direction = CUDNN_RNN_UNIDIRECTION + self._CheckpointableMultiLayerTestTemplate( + single_cell_fn=functools.partial( + cudnn_rnn_ops.CudnnCompatibleLSTMCell, num_units=num_units), + cudnn_cell_fn=functools.partial( + cudnn_rnn.CudnnLSTM, num_layers=num_layers, num_units=num_units, + direction=direction, name="awesome_lstm"), + num_layers=num_layers) + + # TODO(jamesqin): Transform to parameterized test after it is included in the # TF open source codebase. -class CudnnRNNTestCompatibleRNNCells(TensorFlowTestCase): +class CudnnRNNTestCompatibleRNNCells(test_util.TensorFlowTestCase): @unittest.skipUnless(test.is_built_with_cuda(), "Test only applicable when running on GPUs") @@ -884,7 +1023,7 @@ class CudnnRNNTestCompatibleRNNCells(TensorFlowTestCase): rtol=2e-5) -class CudnnRNNTestParamsSize(TensorFlowTestCase): +class CudnnRNNTestParamsSize(test_util.TensorFlowTestCase): def _TestOpaqueParamsSize(self, rnn_mode, num_layers, num_units, input_size, dtype, direction): @@ -931,7 +1070,7 @@ class CudnnRNNTestParamsSize(TensorFlowTestCase): dtype, direction) -class CudnnRNNTestTraining(TensorFlowTestCase): +class CudnnRNNTestTraining(test_util.TensorFlowTestCase): def _ComputeNumericGrad(self, sess, y, x, delta=1e-4, step=1): """Compute the numeric gradient of y wrt to x. diff --git a/tensorflow/contrib/cudnn_rnn/python/layers/cudnn_rnn.py b/tensorflow/contrib/cudnn_rnn/python/layers/cudnn_rnn.py index 36fba917a8f56c26fd5b4c3468d1d980a8ba2ba5..d58198faf353aab68430d2fa153a18de359112de 100644 --- a/tensorflow/contrib/cudnn_rnn/python/layers/cudnn_rnn.py +++ b/tensorflow/contrib/cudnn_rnn/python/layers/cudnn_rnn.py @@ -142,6 +142,9 @@ class _CudnnRNN(base_layer.Layer): """ # pylint:enable=line-too-long + # TODO(allenl): Document object-based saving and checkpoint compatibility once + # it's implemented for more cuDNN Layers. + # The following are constants defined by subclasses. # Type of RNN cell. _rnn_mode = None @@ -355,7 +358,8 @@ class _CudnnRNN(base_layer.Layer): "CUDA/CuDNN generations.") # Initialize opaque params with a tensor. self.kernel = vs.get_variable( - "opaque_kernel", initializer=opaque_params_t, validate_shape=False) + "opaque_kernel", dtype=self._plain_dtype, + initializer=opaque_params_t, validate_shape=False) # Create saveable in the outer scope of the cudnn subgraph, such that # alternative subgraph with platform-independent rnn cells can load the # checkpoints directly. @@ -363,6 +367,11 @@ class _CudnnRNN(base_layer.Layer): self._create_saveable() self.built = True + def _gather_saveables_for_checkpoint(self): + raise NotImplementedError( + "This cell does not yet support object-based saving. File a feature " + "request if this limitation bothers you.") + def call(self, inputs, initial_state=None, training=True): """Runs the forward step for the RNN model. @@ -499,6 +508,8 @@ class _CudnnRNN(base_layer.Layer): direction=self.direction, scope=vs.get_variable_scope(), name="%s_saveable" % self.trainable_variables[0].name.split(":")[0]) + self._saveable._add_checkpointable_dependencies( # pylint: disable=protected-access + checkpointable=self, dtype=self._plain_dtype) ops.add_to_collection(ops.GraphKeys.SAVEABLE_OBJECTS, self._saveable) @@ -521,6 +532,16 @@ class CudnnLSTM(_CudnnRNN): return ([self.num_layers * self.num_dirs, batch_size, self.num_units], [self.num_layers * self.num_dirs, batch_size, self.num_units]) + @property + def _gather_saveables_for_checkpoint(self): + if self._direction == CUDNN_RNN_UNIDIRECTION: + # Skip one inheritance level to avoid NotImplementedError. + return super(_CudnnRNN, self)._gather_saveables_for_checkpoint + else: + raise NotImplementedError( + "Object-based saving does not currently support bidirectional LSTM " + "cells. File a feature request if this limitation bothers you.") + class _CudnnRNNNoInputC(_CudnnRNN): """Abstract simple CudnnRNN layer without input_c.""" diff --git a/tensorflow/contrib/cudnn_rnn/python/ops/cudnn_rnn_ops.py b/tensorflow/contrib/cudnn_rnn/python/ops/cudnn_rnn_ops.py index 2ac94424061a07e5727a98642aa855222c0afb81..a1ede4471ef24adbfb7db32ee7ea276f33ed20a9 100644 --- a/tensorflow/contrib/cudnn_rnn/python/ops/cudnn_rnn_ops.py +++ b/tensorflow/contrib/cudnn_rnn/python/ops/cudnn_rnn_ops.py @@ -17,12 +17,13 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from tensorflow.contrib.checkpoint.python import split_dependency from tensorflow.contrib.rnn.python.ops import lstm_ops from tensorflow.python.framework import common_shapes from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import random_seed -from tensorflow.python.layers import base as base_layer +from tensorflow.python.keras._impl.keras.engine import base_layer from tensorflow.python.ops import array_ops from tensorflow.python.ops import gen_cudnn_rnn_ops from tensorflow.python.ops import init_ops @@ -31,6 +32,7 @@ from tensorflow.python.ops import nn_ops from tensorflow.python.ops import rnn_cell_impl from tensorflow.python.ops import state_ops from tensorflow.python.ops import variable_scope as vs +from tensorflow.python.training import checkpointable as checkpointable_lib from tensorflow.python.training import saver CUDNN_RNN_UNIDIRECTION = "unidirectional" @@ -266,13 +268,16 @@ class CudnnOpaqueParamsSaveable(saver.BaseSaverBuilder.SaveableObject): # instead of having the master pull all slices and then save them. slice_spec = "" params = weights + biases - param_names = weight_names + bias_names + self._weight_names = weight_names + self._bias_names = bias_names + self._param_names = weight_names + bias_names + prefixed_param_names = weight_names + bias_names if self._scope: - param_names = ["%s/%s" % (self._scope, pn) for pn in param_names] - + prefixed_param_names = [ + "%s/%s" % (self._scope, pn) for pn in prefixed_param_names] specs = [ saver.BaseSaverBuilder.SaveSpec(param, slice_spec, param_name) - for param, param_name in zip(params, param_names) + for param, param_name in zip(params, prefixed_param_names) ] super(CudnnOpaqueParamsSaveable, self).__init__( array_ops.identity(self._variables), specs, name) @@ -285,6 +290,45 @@ class CudnnOpaqueParamsSaveable(saver.BaseSaverBuilder.SaveableObject): return state_ops.assign( self._variables, opaque_params, validate_shape=False) + def _checkpointable_save(self, save_buffer): + weights, biases = self._OpaqueParamsToCanonical() + with ops.device("gpu:0"): + (weights, _), (biases, _) = self._TransformCanonical( + weights, biases) + for name, tensor in zip(self._param_names, weights + biases): + save_buffer[name] = array_ops.identity(tensor) + + def _checkpointable_restore(self, restore_buffer): + tensors = [array_ops.identity(restore_buffer[name]) + for name in self._param_names] + return self.restore( + restored_tensors=tensors, + restored_shapes=None # Unused + ) + + def _add_checkpointable_dependencies(self, checkpointable, dtype): + """Add canonical weight dependencies to `checkpointable`. + + When saving or restoring, converts to or from the opaque buffer + format. Weights are saved and loaded in the configuration expected by + cuDNN-compatible cells. + + Args: + checkpointable: An object inheriting from `CheckpointableBase` to add + dependencies too (typically the cuDNN `Layer`). + dtype: The dtype for the canonical parameter Tensors. + """ + split_dependencies = split_dependency.split_dependency( + component_names=self._param_names, + component_dtypes=(dtype,) * len(self._param_names), + fill_save_buffer_fn=self._checkpointable_save, + consume_restore_buffer_fn=self._checkpointable_restore) + self._checkpointable_track_params(checkpointable, split_dependencies) + + def _checkpointable_track_params(self, checkpointable, params): + """Tracks parameters in a canonical configuration.""" + return # NotImplementedError raised by the Layer. + def _TFCanonicalNamePrefix(self, layer, is_fwd=True): if self._direction == CUDNN_RNN_UNIDIRECTION: return "rnn/multi_rnn_cell/cell_%d/%s" % (layer, self._rnn_cell_name) @@ -480,10 +524,7 @@ class CudnnLSTMSaveable(CudnnOpaqueParamsSaveable): _rnn_mode = CUDNN_LSTM _num_params_per_layer = CUDNN_LSTM_PARAMS_PER_LAYER - # pylint:disable=protected-access - _rnn_cell_name = base_layer._to_snake_case(CudnnCompatibleLSTMCell.__name__) - - # pylint:enable=protected-access + _rnn_cell_name = base_layer.to_snake_case(CudnnCompatibleLSTMCell.__name__) def _cudnn_to_tf_gate_params(self, *cu_gate_order): i_g, f_g, c_g, o_g = cu_gate_order @@ -574,6 +615,29 @@ class CudnnLSTMSaveable(CudnnOpaqueParamsSaveable): tf_biases.append(b) tf_bias_names.append(prefix + "/bias") + def _checkpointable_track_params(self, checkpointable, params): + """Track parameters for compatibility with CudnnCompatibleLSTMCell.""" + biases = [] + weights = [] + for name in self._weight_names: + weights.append(params[name]) + for name in self._bias_names: + biases.append(params[name]) + assert len(params) == len(weights) + len(biases) + if len(weights) == 1 and len(biases) == 1: + # For single-layer cells, allow substituting a cell with no MultiRNNCell + # wrapping. + kernel, = weights # pylint: disable=unbalanced-tuple-unpacking + bias, = biases # pylint: disable=unbalanced-tuple-unpacking + checkpointable._track_checkpointable(kernel, name="kernel") # pylint: disable=protected-access + checkpointable._track_checkpointable(bias, name="bias") # pylint: disable=protected-access + assert len(biases) == len(weights) + for cell_index, (bias, kernel) in enumerate(zip(biases, weights)): + cell = checkpointable_lib.Checkpointable() + checkpointable._track_checkpointable(cell, name="cell-%d" % cell_index) # pylint: disable=protected-access + cell.bias = bias + cell.kernel = kernel + class CudnnGRUSaveable(CudnnOpaqueParamsSaveable): """SaveableObject implementation handling Cudnn GRU opaque params.""" @@ -581,10 +645,7 @@ class CudnnGRUSaveable(CudnnOpaqueParamsSaveable): _rnn_mode = CUDNN_GRU _num_params_per_layer = CUDNN_GRU_PARAMS_PER_LAYER - # pylint:disable=protected-access - _rnn_cell_name = base_layer._to_snake_case(CudnnCompatibleGRUCell.__name__) - - # pylint:enable=protected-access + _rnn_cell_name = base_layer.to_snake_case(CudnnCompatibleGRUCell.__name__) def _cudnn_to_tf_weights(self, *cu_weights): r"""Stitching cudnn canonical weights to generate tf canonical weights.""" @@ -663,11 +724,7 @@ class CudnnGRUSaveable(CudnnOpaqueParamsSaveable): class CudnnRNNSimpleSaveable(CudnnLSTMSaveable): """SaveableObject implementation handling Cudnn RNN Tanh opaque params.""" - # pylint:disable=protected-access - _rnn_cell_name = base_layer._to_snake_case( - rnn_cell_impl.BasicRNNCell.__name__) - - # pylint:enable=protected-access + _rnn_cell_name = base_layer.to_snake_case(rnn_cell_impl.BasicRNNCell.__name__) def _cudnn_to_tf_weights(self, *cu_weights): r"""Stitching cudnn canonical weights to generate tf canonical weights.""" @@ -1583,31 +1640,6 @@ class CudnnRNNRelu(_CudnnRNNNoInputC): _NUM_PARAMS_PER_LAYER = CUDNN_RNN_RELU_PARAMS_PER_LAYER -@ops.RegisterGradient("CudnnRNN") -def _cudnn_rnn_backward(op, *grad): - if not op.get_attr("is_training"): - raise ValueError( - "CudnnRNN must set is_training to True to be used in gradients") - return gen_cudnn_rnn_ops.cudnn_rnn_backprop( - input=op.inputs[0], - input_h=op.inputs[1], - input_c=op.inputs[2], - params=op.inputs[3], - output=op.outputs[0], - output_h=op.outputs[1], - output_c=op.outputs[2], - output_backprop=grad[0], - output_h_backprop=grad[1], - output_c_backprop=grad[2], - reserve_space=op.outputs[3], - dropout=op.get_attr("dropout"), - seed=op.get_attr("seed"), - seed2=op.get_attr("seed2"), - rnn_mode=op.get_attr("rnn_mode"), - input_mode=op.get_attr("input_mode"), - direction=op.get_attr("direction")) - - ops.RegisterShape("CudnnRNNParamsSize")(common_shapes.call_cpp_shape_fn) ops.RegisterShape("CudnnRNNParamsToCanonical")(common_shapes.call_cpp_shape_fn) ops.RegisterShape("CudnnRNNCanonicalToParams")(common_shapes.call_cpp_shape_fn) diff --git a/tensorflow/contrib/data/BUILD b/tensorflow/contrib/data/BUILD index 35312f06b33d62e1841c9bf580e0836d4b5ebc14..8bdbba83ef6a8541158d956e36caf6a9be435c5b 100644 --- a/tensorflow/contrib/data/BUILD +++ b/tensorflow/contrib/data/BUILD @@ -8,6 +8,7 @@ load( "//tensorflow:tensorflow.bzl", "tf_custom_op_library", "tf_gen_op_libs", + "if_not_windows", ) load( "//tensorflow/core:platform/default/build_config_root.bzl", @@ -21,22 +22,21 @@ py_library( deps = [ "//tensorflow/contrib/data/python/ops:dataset_ops", "//tensorflow/contrib/data/python/ops:iterator_ops", - "//tensorflow/contrib/data/python/ops:prefetching_ops", - "//tensorflow/contrib/data/python/ops:readers", - "//tensorflow/contrib/data/python/ops:shuffle_ops", - "//tensorflow/contrib/data/python/ops:transformation_ops", - "//tensorflow/python:parsing_ops", "//tensorflow/python:util", - "//tensorflow/python/data/ops:iterator_ops", ], ) +cc_library( + name = "lib_proto_parsing_for_dataset_ops", + deps = if_not_windows(["//tensorflow/core:lib_proto_parsing"]), +) + tf_custom_op_library( name = "_dataset_ops.so", srcs = ["ops/dataset_ops.cc"], deps = ["//tensorflow/contrib/data/kernels:dataset_kernels"] + if_static( - extra_deps = ["//tensorflow/core:lib_proto_parsing"], + extra_deps = [":lib_proto_parsing_for_dataset_ops"], otherwise = [], ), ) diff --git a/tensorflow/contrib/data/__init__.py b/tensorflow/contrib/data/__init__.py index 17048314a4d182ebd21c5cd0bf4323817bf4863c..077cbba9d2ae41a83f6c358a63ae27aec5741e2c 100644 --- a/tensorflow/contrib/data/__init__.py +++ b/tensorflow/contrib/data/__init__.py @@ -25,6 +25,7 @@ See the @{$datasets$Importing Data} Programmer's Guide for an overview. @@Counter @@SqlDataset +@@assert_element_shape @@batch_and_drop_remainder @@bucket_by_sequence_length @@dense_to_sparse_batch @@ -40,6 +41,7 @@ See the @{$datasets$Importing Data} Programmer's Guide for an overview. @@prefetch_to_device @@read_batch_features @@rejection_resample +@@sample_from_datasets @@scan @@shuffle_and_repeat @@sliding_window_batch @@ -55,6 +57,7 @@ from __future__ import print_function # pylint: disable=unused-import +from tensorflow.contrib.data.python.ops.batching import assert_element_shape from tensorflow.contrib.data.python.ops.batching import batch_and_drop_remainder from tensorflow.contrib.data.python.ops.batching import dense_to_sparse_batch from tensorflow.contrib.data.python.ops.batching import map_and_batch @@ -67,6 +70,7 @@ from tensorflow.contrib.data.python.ops.get_single_element import get_single_ele from tensorflow.contrib.data.python.ops.grouping import bucket_by_sequence_length from tensorflow.contrib.data.python.ops.grouping import group_by_window from tensorflow.contrib.data.python.ops.interleave_ops import parallel_interleave +from tensorflow.contrib.data.python.ops.interleave_ops import sample_from_datasets from tensorflow.contrib.data.python.ops.interleave_ops import sloppy_interleave from tensorflow.contrib.data.python.ops.iterator_ops import make_saveable_from_iterator from tensorflow.contrib.data.python.ops.prefetching_ops import prefetch_to_device @@ -78,8 +82,6 @@ from tensorflow.contrib.data.python.ops.resampling import rejection_resample from tensorflow.contrib.data.python.ops.scan_ops import scan from tensorflow.contrib.data.python.ops.shuffle_ops import shuffle_and_repeat from tensorflow.contrib.data.python.ops.sliding import sliding_window_batch -from tensorflow.python.data.ops.iterator_ops import Iterator -from tensorflow.python.ops.parsing_ops import parse_single_example_v2 as parse_single_example # pylint: enable=unused-import from tensorflow.python.util.all_util import remove_undocumented diff --git a/tensorflow/contrib/data/kernels/BUILD b/tensorflow/contrib/data/kernels/BUILD index 83ada6fb67dcbff595a38ce9e8609bdd1219b075..c56910c7833d4c54fa8db27cd061b404013f3f54 100644 --- a/tensorflow/contrib/data/kernels/BUILD +++ b/tensorflow/contrib/data/kernels/BUILD @@ -18,6 +18,17 @@ cc_library( alwayslink = 1, ) +cc_library( + name = "directed_interleave_dataset_op", + srcs = ["directed_interleave_dataset_op.cc"], + deps = [ + "//tensorflow/core:framework_headers_lib", + "//third_party/eigen3", + "@protobuf_archive//:protobuf_headers", + ], + alwayslink = 1, +) + cc_library( name = "ignore_errors_dataset_op", srcs = ["ignore_errors_dataset_op.cc"], @@ -52,6 +63,7 @@ cc_library( cc_library( name = "dataset_kernels", deps = [ + ":directed_interleave_dataset_op", ":ignore_errors_dataset_op", ":prefetching_kernels", ":threadpool_dataset_op", diff --git a/tensorflow/contrib/data/kernels/directed_interleave_dataset_op.cc b/tensorflow/contrib/data/kernels/directed_interleave_dataset_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..48d3734162525ffc6ace076e4f0523c1d0cae511 --- /dev/null +++ b/tensorflow/contrib/data/kernels/directed_interleave_dataset_op.cc @@ -0,0 +1,274 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/core/framework/dataset.h" +#include "tensorflow/core/framework/partial_tensor_shape.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/lib/hash/hash.h" + +namespace tensorflow { + +namespace { + +// See documentation in ../ops/dataset_ops.cc for a high-level +// description of the following op. + +class DirectedInterleaveDatasetOp : public DatasetOpKernel { + public: + explicit DirectedInterleaveDatasetOp(OpKernelConstruction* ctx) + : DatasetOpKernel(ctx) {} + + void MakeDataset(OpKernelContext* ctx, DatasetBase** output) override { + DatasetBase* selector_input; + OP_REQUIRES_OK(ctx, + GetDatasetFromVariantTensor(ctx->input(0), &selector_input)); + + OP_REQUIRES( + ctx, + selector_input->output_dtypes().size() == 1 && + selector_input->output_dtypes()[0] == DT_INT64 && + selector_input->output_shapes().size() == 1 && + selector_input->output_shapes()[0].IsCompatibleWith( + PartialTensorShape({})), + errors::InvalidArgument( + "The selector input must be a dataset of scalar int64 elements.")); + + std::vector data_inputs; + for (size_t i = 1; i < ctx->num_inputs(); ++i) { + DatasetBase* input; + OP_REQUIRES_OK(ctx, GetDatasetFromVariantTensor(ctx->input(i), &input)); + data_inputs.push_back(input); + + OP_REQUIRES( + ctx, data_inputs[0]->output_dtypes() == input->output_dtypes(), + errors::InvalidArgument( + "All inputs must have the same output_dtypes. First input " + "has types ", + DataTypeVectorString(data_inputs[0]->output_dtypes()), + ", and input ", i - 1, " has types ", + DataTypeVectorString(input->output_dtypes()))); + } + *output = new Dataset(ctx, selector_input, std::move(data_inputs)); + } + + private: + class Dataset : public GraphDatasetBase { + public: + Dataset(OpKernelContext* ctx, const DatasetBase* selector_input, + std::vector data_inputs) + : GraphDatasetBase(ctx), + selector_input_(selector_input), + data_inputs_(std::move(data_inputs)) { + selector_input_->Ref(); + + output_shapes_ = data_inputs_[0]->output_shapes(); + data_inputs_[0]->Ref(); + for (size_t i = 1; i < data_inputs_.size(); ++i) { + const DatasetBase* data_input = data_inputs_[i]; + data_input->Ref(); + for (size_t j = 0; j < output_shapes_.size(); ++j) { + output_shapes_[j] = MostSpecificCompatibleShape( + output_shapes_[j], data_input->output_shapes()[j]); + } + } + } + + ~Dataset() override { + selector_input_->Unref(); + for (DatasetBase* data_input : data_inputs_) { + data_input->Unref(); + } + } + + std::unique_ptr MakeIterator( + const string& prefix) const override { + return std::unique_ptr(new Iterator( + {this, strings::StrCat(prefix, "::DirectedInterleave")})); + } + + const DataTypeVector& output_dtypes() const override { + return data_inputs_[0]->output_dtypes(); + } + + const std::vector& output_shapes() const override { + return output_shapes_; + } + + string DebugString() override { + return strings::StrCat("DirectedInterleaveDatasetOp::Dataset"); + } + + protected: + Status AsGraphDefInternal(OpKernelContext* ctx, DatasetGraphDefBuilder* b, + Node** output) const override { + Node* selector_input_node; + TF_RETURN_IF_ERROR( + b->AddParentDataset(ctx, selector_input_, &selector_input_node)); + std::vector data_input_nodes(data_inputs_.size()); + for (size_t i = 0; i < data_inputs_.size(); ++i) { + TF_RETURN_IF_ERROR( + b->AddParentDataset(ctx, data_inputs_[i], &data_input_nodes[i])); + } + TF_RETURN_IF_ERROR(b->AddDataset(this, {{0, selector_input_node}}, + {{1, data_input_nodes}}, {}, output)); + return Status::OK(); + } + + private: + class Iterator : public DatasetIterator { + public: + explicit Iterator(const Params& params) + : DatasetIterator(params), + selector_input_impl_(params.dataset->selector_input_->MakeIterator( + params.prefix + ".selector")), + num_active_inputs_(params.dataset->data_inputs_.size()) { + data_input_impls_.reserve(params.dataset->data_inputs_.size()); + for (size_t i = 0; i < params.dataset->data_inputs_.size(); ++i) { + const DatasetBase* data_input = params.dataset->data_inputs_[i]; + data_input_impls_.push_back(data_input->MakeIterator( + strings::StrCat(params.prefix, "[", i, "]"))); + } + } + + Status GetNextInternal(IteratorContext* ctx, + std::vector* out_tensors, + bool* end_of_sequence) override { + mutex_lock l(mu_); + if (!selector_input_impl_) { + *end_of_sequence = true; + return Status::OK(); + } + + while (true) { + std::vector selector_result; + *end_of_sequence = false; + TF_RETURN_IF_ERROR(selector_input_impl_->GetNext( + ctx, &selector_result, end_of_sequence)); + if (*end_of_sequence) { + selector_input_impl_.reset(); + for (auto& data_input_impl : data_input_impls_) { + data_input_impl.reset(); + } + return Status::OK(); + } + + int64 selected_input = selector_result[0].scalar()(); + if (selected_input < 0 || selected_input > data_input_impls_.size()) { + return errors::InvalidArgument( + "Selector index out of range: ", selected_input, + " >= ", data_input_impls_.size()); + } + + if (data_input_impls_[selected_input]) { + bool end_of_selected_input = false; + TF_RETURN_IF_ERROR(data_input_impls_[selected_input]->GetNext( + ctx, out_tensors, &end_of_selected_input)); + + if (!end_of_selected_input) { + return Status::OK(); + } + + data_input_impls_[selected_input].reset(); + --num_active_inputs_; + + if (num_active_inputs_ == 0) { + selector_input_impl_.reset(); + *end_of_sequence = true; + return Status::OK(); + } + } + + LOG(WARNING) << "DirectedInterleave selected an exhausted input: " + << selected_input; + } + } + + protected: + Status SaveInternal(IteratorStateWriter* writer) override { + mutex_lock l(mu_); + if (selector_input_impl_) { + TF_RETURN_IF_ERROR(SaveParent(writer, selector_input_impl_)); + } else { + TF_RETURN_IF_ERROR( + writer->WriteScalar(full_name("selector_input_impl_empty"), "")); + } + for (size_t i = 0; i < data_input_impls_.size(); ++i) { + const auto& data_input_impl = data_input_impls_[i]; + if (data_input_impl) { + TF_RETURN_IF_ERROR(SaveParent(writer, data_input_impl)); + } else { + TF_RETURN_IF_ERROR(writer->WriteScalar( + full_name(strings::StrCat("data_input_impl_empty[", i, "]")), + "")); + } + } + return Status::OK(); + } + + Status RestoreInternal(IteratorContext* ctx, + IteratorStateReader* reader) override { + mutex_lock l(mu_); + if (!reader->Contains(full_name("selector_input_impl_empty"))) { + TF_RETURN_IF_ERROR(RestoreParent(ctx, reader, selector_input_impl_)); + } else { + selector_input_impl_.reset(); + } + for (size_t i = 0; i < data_input_impls_.size(); ++i) { + if (!reader->Contains(full_name( + strings::StrCat("data_input_impl_empty[", i, "]")))) { + TF_RETURN_IF_ERROR( + RestoreParent(ctx, reader, data_input_impls_[i])); + } else { + data_input_impls_[i].reset(); + } + } + return Status::OK(); + } + + private: + mutex mu_; + std::unique_ptr selector_input_impl_ GUARDED_BY(mu_); + std::vector> data_input_impls_ + GUARDED_BY(mu_); + int64 num_active_inputs_ GUARDED_BY(mu_); + }; + + static PartialTensorShape MostSpecificCompatibleShape( + const PartialTensorShape& ts1, const PartialTensorShape& ts2) { + PartialTensorShape output_tensorshape; + if (ts1.dims() != ts2.dims() || ts1.unknown_rank() || ts2.unknown_rank()) + return output_tensorshape; + auto dims1 = ts1.dim_sizes(); + auto dims2 = ts2.dim_sizes(); + for (int d = 0; d < ts1.dims(); d++) { + if (dims1[d] == dims2[d]) + output_tensorshape.Concatenate(dims1[d]); + else + output_tensorshape.Concatenate(-1); + } + return output_tensorshape; + } + + const DatasetBase* const selector_input_; + const std::vector data_inputs_; + std::vector output_shapes_; + }; +}; + +REGISTER_KERNEL_BUILDER(Name("DirectedInterleaveDataset").Device(DEVICE_CPU), + DirectedInterleaveDatasetOp); + +} // namespace + +} // namespace tensorflow diff --git a/tensorflow/contrib/data/ops/dataset_ops.cc b/tensorflow/contrib/data/ops/dataset_ops.cc index cf0a8bbccb5813c799e7e6db91d73e2ecf4107f8..137deb63527f0bdde7da8d5be83ed038f430e581 100644 --- a/tensorflow/contrib/data/ops/dataset_ops.cc +++ b/tensorflow/contrib/data/ops/dataset_ops.cc @@ -17,6 +17,23 @@ limitations under the License. namespace tensorflow { +REGISTER_OP("DirectedInterleaveDataset") + .Input("selector_input_dataset: variant") + .Input("data_input_datasets: N * variant") + .Output("handle: variant") + .Attr("output_types: list(type) >= 1") + .Attr("output_shapes: list(shape) >= 1") + .Attr("N: int >= 1") + .SetShapeFn(shape_inference::ScalarShape) + .Doc(R"doc( +A substitute for `InterleaveDataset` on a fixed list of `N` datasets. + +selector_input_dataset: A dataset of scalar `DT_INT64` elements that determines + which of the `N` data inputs should produce the next output element. +data_input_datasets: `N` datasets with the same type that will be interleaved + according to the values of `selector_input_dataset`. +)doc"); + REGISTER_OP("IgnoreErrorsDataset") .Input("input_dataset: variant") .Output("handle: variant") diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD index c8699e0d5ac865176da567dfcc0e4e30eb491449..d59dd17aea42618075e69516bcfa4ee2b9eafc81 100644 --- a/tensorflow/contrib/data/python/kernel_tests/BUILD +++ b/tensorflow/contrib/data/python/kernel_tests/BUILD @@ -4,24 +4,24 @@ licenses(["notice"]) # Apache 2.0 exports_files(["LICENSE"]) -load("//tensorflow:tensorflow.bzl", "py_test", "tf_py_test") +load("//tensorflow:tensorflow.bzl", "cuda_py_test", "py_test", "tf_py_test") py_test( name = "batch_dataset_op_test", - size = "small", + size = "medium", srcs = ["batch_dataset_op_test.py"], srcs_version = "PY2AND3", tags = ["no_pip"], deps = [ ":dataset_serialization_test", - "//tensorflow/contrib/data/python/ops:dataset_ops", - "//tensorflow/contrib/data/python/ops:transformation_ops", + "//tensorflow/contrib/data/python/ops:batching", "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", "//tensorflow/python:constant_op", "//tensorflow/python:dtypes", "//tensorflow/python:errors", "//tensorflow/python:math_ops", + "//tensorflow/python:script_ops", "//tensorflow/python:sparse_tensor", "//tensorflow/python:string_ops", "//tensorflow/python:tensor_shape", @@ -37,8 +37,7 @@ py_test( srcs_version = "PY2AND3", deps = [ ":dataset_serialization_test", - "//tensorflow/contrib/data/python/ops:dataset_ops", - "//tensorflow/contrib/data/python/ops:transformation_ops", + "//tensorflow/contrib/data/python/ops:grouping", "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", "//tensorflow/python:constant_op", @@ -59,10 +58,10 @@ py_test( srcs_version = "PY2AND3", deps = [ ":dataset_serialization_test", - "//tensorflow/contrib/data/python/ops:dataset_ops", "//tensorflow/python:client_testlib", "//tensorflow/python:errors", "//tensorflow/python:tensor_shape", + "//tensorflow/python/data/ops:dataset_ops", "//tensorflow/python/data/util:nest", "//third_party/py/numpy", ], @@ -79,8 +78,7 @@ py_test( ], deps = [ ":dataset_serialization_test", - "//tensorflow/contrib/data/python/ops:dataset_ops", - "//tensorflow/contrib/data/python/ops:transformation_ops", + "//tensorflow/contrib/data/python/ops:batching", "//tensorflow/core:protos_all_py", "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -124,16 +122,19 @@ py_test( size = "small", srcs = ["filter_dataset_op_test.py"], srcs_version = "PY2AND3", - tags = ["no_pip"], + tags = [ + "no_pip", + "optonly", + ], deps = [ ":dataset_serialization_test", - "//tensorflow/contrib/data/python/ops:dataset_ops", "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", "//tensorflow/python:dtypes", "//tensorflow/python:errors", "//tensorflow/python:functional_ops", "//tensorflow/python:math_ops", + "//tensorflow/python/data/ops:dataset_ops", "//third_party/py/numpy", ], ) @@ -145,7 +146,7 @@ tf_py_test( additional_deps = [ ":dataset_serialization_test", "//third_party/py/numpy", - "//tensorflow/contrib/data/python/ops:dataset_ops", + "//tensorflow/python/data/ops:dataset_ops", "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", "//tensorflow/python:constant_op", @@ -175,8 +176,7 @@ py_test( ], deps = [ ":dataset_serialization_test", - "//tensorflow/contrib/data/python/ops:dataset_ops", - "//tensorflow/contrib/data/python/ops:transformation_ops", + "//tensorflow/contrib/data/python/ops:interleave_ops", "//tensorflow/python:array_ops", "//tensorflow/python:client", "//tensorflow/python:client_testlib", @@ -187,6 +187,7 @@ py_test( "//tensorflow/python:sparse_ops", "//tensorflow/python:sparse_tensor", "//tensorflow/python:training", + "//tensorflow/python/data/ops:dataset_ops", "//third_party/py/numpy", ], ) @@ -197,7 +198,8 @@ tf_py_test( srcs = ["get_single_element_test.py"], additional_deps = [ "//third_party/py/numpy", - "//tensorflow/contrib/data/python/ops:dataset_ops", + "//tensorflow/contrib/data/python/ops:get_single_element", + "//tensorflow/python/data/ops:dataset_ops", "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", "//tensorflow/python:constant_op", @@ -212,11 +214,14 @@ py_test( size = "medium", srcs = ["map_dataset_op_test.py"], srcs_version = "PY2AND3", - tags = ["no_pip"], + tags = [ + "no_pip", + "noasan", # times out + "optonly", + ], deps = [ ":dataset_serialization_test", - "//tensorflow/contrib/data/python/ops:dataset_ops", - "//tensorflow/contrib/data/python/ops:transformation_ops", + "//tensorflow/contrib/data/python/ops:error_ops", "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", "//tensorflow/python:constant_op", @@ -261,8 +266,8 @@ py_test( srcs_version = "PY2AND3", deps = [ ":dataset_serialization_test", - "//tensorflow/contrib/data/python/ops:dataset_ops", - "//tensorflow/contrib/data/python/ops:transformation_ops", + "//tensorflow/contrib/data/python/ops:counter", + "//tensorflow/contrib/data/python/ops:enumerate_ops", "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", "//tensorflow/python:constant_op", @@ -274,6 +279,7 @@ py_test( "//tensorflow/python:parsing_ops", "//tensorflow/python:tensor_shape", "//tensorflow/python:variables", + "//tensorflow/python/data/ops:dataset_ops", ], ) @@ -307,14 +313,17 @@ py_test( srcs = ["resample_test.py"], shard_count = 2, srcs_version = "PY2AND3", - tags = ["noasan"], + tags = [ + "noasan", + "optonly", + ], deps = [ - "//tensorflow/contrib/data/python/ops:dataset_ops", - "//tensorflow/contrib/data/python/ops:transformation_ops", + "//tensorflow/contrib/data/python/ops:resampling", "//tensorflow/python:client_testlib", "//tensorflow/python:errors", "//tensorflow/python:string_ops", "//tensorflow/python:util", + "//tensorflow/python/data/ops:dataset_ops", "//third_party/py/numpy", ], ) @@ -327,13 +336,14 @@ py_test( tags = ["no_pip"], deps = [ ":dataset_serialization_test", - "//tensorflow/contrib/data/python/ops:transformation_ops", + "//tensorflow/contrib/data/python/ops:scan_ops", "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", "//tensorflow/python:constant_op", "//tensorflow/python:dtypes", "//tensorflow/python:errors", "//tensorflow/python/data/ops:dataset_ops", + "//tensorflow/python/eager:context", "//third_party/py/numpy", ], ) @@ -346,11 +356,11 @@ py_test( tags = ["no_pip"], deps = [ ":dataset_serialization_test", - "//tensorflow/contrib/data/python/ops:dataset_ops", "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", "//tensorflow/python:dtypes", "//tensorflow/python:errors", + "//tensorflow/python/data/ops:dataset_ops", "//third_party/py/numpy", ], ) @@ -378,7 +388,6 @@ py_test( tags = ["no_pip"], deps = [ ":dataset_serialization_test", - "//tensorflow/contrib/data/python/ops:dataset_ops", "//tensorflow/contrib/data/python/ops:shuffle_ops", "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -415,10 +424,10 @@ py_test( tags = ["no_pip"], deps = [ ":dataset_serialization_test", - "//tensorflow/contrib/data/python/ops:dataset_ops", - "//tensorflow/contrib/data/python/ops:transformation_ops", + "//tensorflow/contrib/data/python/ops:stats_ops", "//tensorflow/python:client_testlib", "//tensorflow/python:errors", + "//tensorflow/python/data/ops:dataset_ops", ], ) @@ -429,10 +438,11 @@ py_test( srcs_version = "PY2AND3", tags = ["no_pip"], deps = [ - "//tensorflow/contrib/data/python/ops:dataset_ops", - "//tensorflow/contrib/data/python/ops:transformation_ops", + "//tensorflow/contrib/data/python/ops:threadpool", + "//tensorflow/contrib/data/python/ops:unique", "//tensorflow/python:client_testlib", "//tensorflow/python:errors", + "//tensorflow/python/data/ops:dataset_ops", ], ) @@ -444,13 +454,13 @@ py_test( tags = ["no_pip"], deps = [ ":dataset_serialization_test", - "//tensorflow/contrib/data/python/ops:dataset_ops", - "//tensorflow/contrib/data/python/ops:transformation_ops", + "//tensorflow/contrib/data/python/ops:unique", "//tensorflow/contrib/stateless", "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", "//tensorflow/python:dtypes", "//tensorflow/python:errors", + "//tensorflow/python/data/ops:dataset_ops", "//third_party/py/numpy", ], ) @@ -463,21 +473,20 @@ py_test( tags = ["no_pip"], deps = [ ":dataset_serialization_test", - "//tensorflow/contrib/data/python/ops:dataset_ops", "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", "//tensorflow/python:dtypes", "//tensorflow/python:errors", + "//tensorflow/python/data/ops:dataset_ops", "//third_party/py/numpy", ], ) -py_test( +cuda_py_test( name = "prefetching_ops_test", size = "small", srcs = ["prefetching_ops_test.py"], - srcs_version = "PY2AND3", - deps = [ + additional_deps = [ "//tensorflow/contrib/data/python/ops:prefetching_ops", "//tensorflow/core:protos_all_py", "//tensorflow/python:client_testlib", @@ -497,8 +506,8 @@ tf_py_test( size = "small", srcs = ["slide_dataset_op_test.py"], additional_deps = [ - "//tensorflow/contrib/data/python/ops:dataset_ops", - "//tensorflow/contrib/data/python/ops:transformation_ops", + "//tensorflow/python/data/ops:dataset_ops", + "//tensorflow/contrib/data/python/ops:sliding", "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", "//tensorflow/python:dtypes", @@ -508,3 +517,23 @@ tf_py_test( "//third_party/py/numpy", ], ) + +tf_py_test( + name = "writer_ops_test", + size = "small", + srcs = ["writer_ops_test.py"], + additional_deps = [ + "//tensorflow/contrib/data/python/ops:writers", + "//tensorflow/python:array_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:dataset_ops_gen", + "//tensorflow/python:dtypes", + "//tensorflow/python:errors", + "//tensorflow/python:framework_ops", + "//tensorflow/python:io_ops", + "//tensorflow/python:lib", + "//tensorflow/python:tensor_shape", + "//tensorflow/python:util", + "//tensorflow/python/data/ops:readers", + ], +) diff --git a/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py index 75482f67da11401305b7b342cd5c971da71a4f3c..a4a0ce79b6013d8813f2d8d294168ea8189d53ef 100644 --- a/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py @@ -18,20 +18,26 @@ from __future__ import division from __future__ import print_function import math +import time import numpy as np from tensorflow.contrib.data.python.kernel_tests import dataset_serialization_test_base from tensorflow.contrib.data.python.ops import batching +from tensorflow.python.client import session from tensorflow.python.data.ops import dataset_ops from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors +from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor +from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops +from tensorflow.python.ops import script_ops from tensorflow.python.ops import string_ops from tensorflow.python.platform import test +from tensorflow.python.util import compat class BatchDatasetTest(test.TestCase): @@ -149,6 +155,69 @@ class BatchDatasetTest(test.TestCase): with self.assertRaises(errors.OutOfRangeError): sess.run(op) + def testUnbatchDatasetWithStrings(self): + data = tuple([math_ops.range(10) for _ in range(3)]) + data = dataset_ops.Dataset.from_tensor_slices(data) + data = data.map(lambda x, y, z: (x, string_ops.as_string(y), z)) + expected_types = (dtypes.int32, dtypes.string, dtypes.int32) + data = data.batch(2) + self.assertEqual(expected_types, data.output_types) + data = data.apply(batching.unbatch()) + self.assertEqual(expected_types, data.output_types) + + iterator = data.make_one_shot_iterator() + op = iterator.get_next() + + with self.test_session() as sess: + for i in range(10): + self.assertEqual((i, compat.as_bytes(str(i)), i), sess.run(op)) + + with self.assertRaises(errors.OutOfRangeError): + sess.run(op) + + def testUnbatchDatasetWithSparseTensor(self): + st = sparse_tensor.SparseTensorValue( + indices=[[i, i] for i in range(10)], + values=list(range(10)), + dense_shape=[10, 10]) + data = dataset_ops.Dataset.from_tensors(st) + data = data.apply(batching.unbatch()) + data = data.batch(5) + data = data.apply(batching.unbatch()) + iterator = data.make_one_shot_iterator() + next_element = iterator.get_next() + + with self.test_session() as sess: + for i in range(10): + st_row = sess.run(next_element) + self.assertEqual([i], st_row.indices) + self.assertEqual([i], st_row.values) + self.assertEqual([10], st_row.dense_shape) + with self.assertRaises(errors.OutOfRangeError): + sess.run(next_element) + + def testUnbatchDatasetWithDenseAndSparseTensor(self): + st = sparse_tensor.SparseTensorValue( + indices=[[i, i] for i in range(10)], + values=list(range(10)), + dense_shape=[10, 10]) + data = dataset_ops.Dataset.from_tensors((list(range(10)), st)) + data = data.apply(batching.unbatch()) + data = data.batch(5) + data = data.apply(batching.unbatch()) + iterator = data.make_one_shot_iterator() + next_element = iterator.get_next() + + with self.test_session() as sess: + for i in range(10): + dense_elem, st_row = sess.run(next_element) + self.assertEqual(i, dense_elem) + self.assertEqual([i], st_row.indices) + self.assertEqual([i], st_row.values) + self.assertEqual([10], st_row.dense_shape) + with self.assertRaises(errors.OutOfRangeError): + sess.run(next_element) + def testUnbatchSingleElementTupleDataset(self): data = tuple([(math_ops.range(10),) for _ in range(3)]) data = dataset_ops.Dataset.from_tensor_slices(data) @@ -189,6 +258,53 @@ class BatchDatasetTest(test.TestCase): with self.assertRaises(errors.OutOfRangeError): sess.run(op) + def testUnbatchEmpty(self): + data = dataset_ops.Dataset.from_tensors( + (constant_op.constant([]), constant_op.constant([], shape=[0, 4]), + constant_op.constant([], shape=[0, 4, 0]))) + data = data.apply(batching.unbatch()) + iterator = data.make_one_shot_iterator() + next_element = iterator.get_next() + + with self.test_session() as sess: + with self.assertRaises(errors.OutOfRangeError): + sess.run(next_element) + + def testUnbatchStaticShapeMismatch(self): + data = dataset_ops.Dataset.from_tensors((np.arange(7), np.arange(8), + np.arange(9))) + with self.assertRaises(ValueError): + data.apply(batching.unbatch()) + + def testUnbatchDynamicShapeMismatch(self): + ph1 = array_ops.placeholder(dtypes.int32, shape=[None]) + ph2 = array_ops.placeholder(dtypes.int32, shape=None) + data = dataset_ops.Dataset.from_tensors((ph1, ph2)) + data = data.apply(batching.unbatch()) + iterator = data.make_initializable_iterator() + next_element = iterator.get_next() + + with self.test_session() as sess: + # Mismatch in the 0th dimension. + sess.run( + iterator.initializer, + feed_dict={ + ph1: np.arange(7).astype(np.int32), + ph2: np.arange(8).astype(np.int32) + }) + with self.assertRaises(errors.InvalidArgumentError): + print(sess.run(next_element)) + + # No 0th dimension (i.e. scalar value) for one component. + sess.run( + iterator.initializer, + feed_dict={ + ph1: np.arange(7).astype(np.int32), + ph2: 7 + }) + with self.assertRaises(errors.InvalidArgumentError): + print(sess.run(next_element)) + def testBatchAndDropRemainder(self): components = (np.arange(7), np.array([[1, 2, 3]]) * np.arange(7)[:, np.newaxis], @@ -543,6 +659,59 @@ class BatchDatasetSerializationTest( self.run_core_tests(self._build_dataset_nested_sparse, None, 1) +class UnbatchDatasetSerializationTest( + dataset_serialization_test_base.DatasetSerializationTestBase): + + def build_dataset(self, multiplier=15.0, tensor_slice_len=2, batch_size=2): + components = ( + np.arange(tensor_slice_len), + np.array([[1, 2, 3]]) * np.arange(tensor_slice_len)[:, np.newaxis], + np.array(multiplier) * np.arange(tensor_slice_len)) + + return dataset_ops.Dataset.from_tensor_slices(components).batch( + batch_size).apply(batching.unbatch()) + + def testCore(self): + tensor_slice_len = 8 + batch_size = 2 + num_outputs = tensor_slice_len + self.run_core_tests( + lambda: self.build_dataset(15.0, tensor_slice_len, batch_size), + lambda: self.build_dataset(20.0, tensor_slice_len, batch_size), + num_outputs) + + +class MapAndBatchDatasetSerializationTest( + dataset_serialization_test_base.DatasetSerializationTestBase): + + def testSerializationCore(self): + range_size = 11 + num_repeats = 2 + batch_size = 5 + total_outputs = range_size * num_repeats + num_outputs_drop_remainder = total_outputs // batch_size + num_outputs_keep_remainder = int(math.ceil(total_outputs / batch_size)) + num_parallel_batches = 2 + + def build_ds(range_start, drop_remainder=False): + + def _map_fn(x): + return math_ops.square(x) + + return dataset_ops.Dataset.range( + range_start, range_start + range_size).repeat(num_repeats).apply( + batching.map_and_batch( + map_func=_map_fn, + batch_size=batch_size, + num_parallel_batches=num_parallel_batches, + drop_remainder=drop_remainder)) + + self.run_core_tests(lambda: build_ds(10), lambda: build_ds(15), + num_outputs_keep_remainder) + self.run_core_tests(lambda: build_ds(10, True), lambda: build_ds(15, True), + num_outputs_drop_remainder) + + class PaddedBatchDatasetSerializationTest( dataset_serialization_test_base.DatasetSerializationTestBase): @@ -579,5 +748,149 @@ class PaddedBatchDatasetSerializationTest( lambda: build_dataset(seq_lens2), 8) +class RestructuredDatasetTest(test.TestCase): + + def test_assert_element_shape(self): + + def create_unknown_shape_dataset(x): + return script_ops.py_func( + lambda _: ( # pylint: disable=g-long-lambda + np.ones(2, dtype=np.float32), + np.zeros((3, 4), dtype=np.int32)), + [x], + [dtypes.float32, dtypes.int32]) + + dataset = dataset_ops.Dataset.range(5).map(create_unknown_shape_dataset) + unknown_shapes = (tensor_shape.TensorShape(None), + tensor_shape.TensorShape(None)) + self.assertEqual(unknown_shapes, dataset.output_shapes) + + expected_shapes = (tensor_shape.TensorShape(2), + tensor_shape.TensorShape((3, 4))) + result = dataset.apply(batching.assert_element_shape(expected_shapes)) + self.assertEqual(expected_shapes, result.output_shapes) + + iterator = result.make_initializable_iterator() + init_op = iterator.initializer + get_next = iterator.get_next() + with self.test_session() as sess: + sess.run(init_op) + for _ in range(5): + sess.run(get_next) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + def test_assert_wrong_element_shape(self): + + def create_dataset(_): + return (array_ops.ones(2, dtype=dtypes.float32), + array_ops.zeros((3, 4), dtype=dtypes.int32)) + + dataset = dataset_ops.Dataset.range(3).map(create_dataset) + wrong_shapes = (tensor_shape.TensorShape(2), + tensor_shape.TensorShape((3, 10))) + with self.assertRaises(ValueError): + dataset.apply(batching.assert_element_shape(wrong_shapes)) + + def test_assert_wrong_element_shape_on_unknown_shape_dataset(self): + + def create_unknown_shape_dataset(x): + return script_ops.py_func( + lambda _: ( # pylint: disable=g-long-lambda + np.ones(2, dtype=np.float32), + np.zeros((3, 4), dtype=np.int32)), + [x], + [dtypes.float32, dtypes.int32]) + + dataset = dataset_ops.Dataset.range(3).map(create_unknown_shape_dataset) + unknown_shapes = (tensor_shape.TensorShape(None), + tensor_shape.TensorShape(None)) + self.assertEqual(unknown_shapes, dataset.output_shapes) + + wrong_shapes = (tensor_shape.TensorShape(2), + tensor_shape.TensorShape((3, 10))) + iterator = ( + dataset.apply(batching.assert_element_shape(wrong_shapes)) + .make_initializable_iterator()) + init_op = iterator.initializer + get_next = iterator.get_next() + with self.test_session() as sess: + sess.run(init_op) + with self.assertRaises(errors.InvalidArgumentError): + sess.run(get_next) + + +class UnbatchDatasetBenchmark(test.Benchmark): + + def benchmarkNativeUnbatch(self): + batch_sizes = [1, 2, 5, 10, 20, 50] + elems_per_trial = 10000 + with ops.Graph().as_default(): + dataset = dataset_ops.Dataset.from_tensors("element").repeat(None) + batch_size_placeholder = array_ops.placeholder(dtypes.int64, shape=[]) + dataset = dataset.batch(batch_size_placeholder) + dataset = dataset.apply(batching.unbatch()) + dataset = dataset.skip(elems_per_trial) + iterator = dataset.make_initializable_iterator() + next_element = iterator.get_next() + + with session.Session() as sess: + for batch_size in batch_sizes: + deltas = [] + for _ in range(5): + sess.run( + iterator.initializer, + feed_dict={batch_size_placeholder: batch_size}) + start = time.time() + sess.run(next_element.op) + end = time.time() + deltas.append((end - start) / elems_per_trial) + + median_wall_time = np.median(deltas) + print("Unbatch (native) batch size: %d Median wall time per element:" + " %f microseconds" % (batch_size, median_wall_time * 1e6)) + self.report_benchmark( + iters=10000, + wall_time=median_wall_time, + name="benchmark_unbatch_dataset_native_batch_size_%d" % + batch_size) + + # Include a benchmark of the previous `unbatch()` implementation that uses + # a composition of more primitive ops. Eventually we'd hope to generate code + # that is as good in both cases. + def benchmarkOldUnbatchImplementation(self): + batch_sizes = [1, 2, 5, 10, 20, 50] + elems_per_trial = 10000 + with ops.Graph().as_default(): + dataset = dataset_ops.Dataset.from_tensors("element").repeat(None) + batch_size_placeholder = array_ops.placeholder(dtypes.int64, shape=[]) + dataset = dataset.batch(batch_size_placeholder) + dataset = dataset.flat_map(dataset_ops.Dataset.from_tensor_slices) + dataset = dataset.skip(elems_per_trial) + iterator = dataset.make_initializable_iterator() + next_element = iterator.get_next() + + with session.Session() as sess: + for batch_size in batch_sizes: + deltas = [] + for _ in range(5): + sess.run( + iterator.initializer, + feed_dict={batch_size_placeholder: batch_size}) + start = time.time() + sess.run(next_element.op) + end = time.time() + deltas.append((end - start) / elems_per_trial) + + median_wall_time = np.median(deltas) + print("Unbatch (unfused) batch size: %d Median wall time per element:" + " %f microseconds" % (batch_size, median_wall_time * 1e6)) + self.report_benchmark( + iters=10000, + wall_time=median_wall_time, + name="benchmark_unbatch_dataset_unfused_batch_size_%d" % + batch_size) + + if __name__ == "__main__": test.main() diff --git a/tensorflow/contrib/data/python/kernel_tests/bucketing_test.py b/tensorflow/contrib/data/python/kernel_tests/bucketing_test.py index 6002cc73c8b41c2f20beaf0158af813807e58c90..55a56b83a8efba899c6b296264d766839a824da5 100644 --- a/tensorflow/contrib/data/python/kernel_tests/bucketing_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/bucketing_test.py @@ -61,7 +61,7 @@ class GroupByWindowTest(test.TestCase): self.assertEqual(len(components), sum(counts)) num_full_batches = len([c for c in counts if c == 4]) - self.assertGreaterEqual(num_full_batches, 23) + self.assertGreaterEqual(num_full_batches, 24) self.assertTrue(all(c == 4 for c in counts[:num_full_batches])) def testImmediateOutput(self): diff --git a/tensorflow/contrib/data/python/kernel_tests/dataset_serialization_test_base.py b/tensorflow/contrib/data/python/kernel_tests/dataset_serialization_test_base.py index dbc35097ddda9f0375060d43aeb43efa8107f929..78ecce8f7daaf84002ae78d8d77820755b967d89 100644 --- a/tensorflow/contrib/data/python/kernel_tests/dataset_serialization_test_base.py +++ b/tensorflow/contrib/data/python/kernel_tests/dataset_serialization_test_base.py @@ -163,7 +163,7 @@ class DatasetSerializationTestBase(test.TestCase): num_outputs, sparse_tensors=False, verify_exhausted=True): - """Verifies that restoring into an already initilized iterator works. + """Verifies that restoring into an already initialized iterator works. Args: ds_fn: See `run_core_tests`. diff --git a/tensorflow/contrib/data/python/kernel_tests/interleave_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/interleave_dataset_op_test.py index 256ad8d94dc1a7c2b26df3f1ebf8e8e321882c15..43aa4b1bd02791ff304a990c0bbe8e45534c0c77 100644 --- a/tensorflow/contrib/data/python/kernel_tests/interleave_dataset_op_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/interleave_dataset_op_test.py @@ -30,6 +30,7 @@ from tensorflow.contrib.data.python.ops import interleave_ops from tensorflow.python.data.ops import dataset_ops from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors +from tensorflow.python.framework import random_seed from tensorflow.python.framework import sparse_tensor from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops @@ -94,6 +95,76 @@ class InterleaveDatasetSerializationTest( self.run_core_tests(_build_dataset, None, 20) +class ParallelInterleaveDatasetSerializationTest( + dataset_serialization_test_base.DatasetSerializationTestBase): + + def setUp(self): + self.input_values = np.array([4, 5, 6], dtype=np.int64) + self.num_repeats = 2 + self.num_outputs = np.sum(self.input_values) * 2 + + def _build_ds(self, cycle_length, block_length, sloppy=False): + return (dataset_ops.Dataset.from_tensor_slices( + self.input_values).repeat(self.num_repeats).apply( + interleave_ops.parallel_interleave( + lambda x: dataset_ops.Dataset.range(10 * x, 11 * x), + cycle_length, block_length, sloppy))) + + def testSerializationCore(self): + # cycle_length > 1, block_length > 1 + cycle_length = 2 + block_length = 3 + self.run_core_tests( + lambda: self._build_ds(cycle_length, block_length), + lambda: self._build_ds(cycle_length * 2, block_length * 1), + self.num_outputs) + # cycle_length = 1 + cycle_length = 1 + block_length = 3 + self.run_core_tests(lambda: self._build_ds(cycle_length, block_length), + None, self.num_outputs) + # block_length = 1 + cycle_length = 2 + block_length = 1 + self.run_core_tests(lambda: self._build_ds(cycle_length, block_length), + None, self.num_outputs) + + def testSerializationWithSloppy(self): + break_points = self.gen_break_points(self.num_outputs, 10) + expected_outputs = np.repeat( + np.concatenate([np.arange(10 * x, 11 * x) for x in self.input_values]), + self.num_repeats).tolist() + + def run_test(cycle_length, block_length): + actual = self.gen_outputs( + lambda: self._build_ds(cycle_length, block_length, True), + break_points, self.num_outputs) + self.assertSequenceEqual(sorted(actual), expected_outputs) + + # cycle_length > 1, block_length > 1 + run_test(2, 3) + # cycle_length = 1 + run_test(1, 3) + # block_length = 1 + run_test(2, 1) + + def testSparseCore(self): + + def _map_fn(i): + return sparse_tensor.SparseTensorValue( + indices=[[0, 0], [1, 1]], values=(i * [1, -1]), dense_shape=[2, 2]) + + def _interleave_fn(x): + return dataset_ops.Dataset.from_tensor_slices( + sparse_ops.sparse_to_dense(x.indices, x.dense_shape, x.values)) + + def _build_dataset(): + return dataset_ops.Dataset.range(10).map(_map_fn).apply( + interleave_ops.parallel_interleave(_interleave_fn, 1)) + + self.run_core_tests(_build_dataset, None, 20) + + class ParallelInterleaveDatasetTest(test.TestCase): def setUp(self): @@ -338,7 +409,7 @@ class ParallelInterleaveDatasetTest(test.TestCase): def _testTwoThreadsNoContentionWithRaces(self, sloppy=False): """Tests where all the workers race in producing elements. - Note: this is in contrast with the prevous test which carefully sequences + Note: this is in contrast with the previous test which carefully sequences the execution of the map functions. Args: @@ -424,7 +495,7 @@ class ParallelInterleaveDatasetTest(test.TestCase): def _testTwoThreadsNoContentionWithRacesAndBlocking(self, sloppy=False): """Tests where all the workers race in producing elements. - Note: this is in contrast with the prevous test which carefully sequences + Note: this is in contrast with the previous test which carefully sequences the execution of the map functions. @@ -836,5 +907,114 @@ class ParallelInterleaveDatasetTest(test.TestCase): sess.run(self.next_element) +class DirectedInterleaveDatasetTest(test.TestCase): + + def testBasic(self): + selector_dataset = dataset_ops.Dataset.range(10).repeat(100) + input_datasets = [ + dataset_ops.Dataset.from_tensors(i).repeat(100) for i in range(10) + ] + dataset = interleave_ops.DirectedInterleaveDataset(selector_dataset, + input_datasets) + iterator = dataset.make_initializable_iterator() + next_element = iterator.get_next() + + with self.test_session() as sess: + sess.run(iterator.initializer) + for _ in range(100): + for i in range(10): + self.assertEqual(i, sess.run(next_element)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(next_element) + + def _normalize(self, vec): + return vec / vec.sum() + + def _chi2(self, expected, actual): + actual = np.asarray(actual) + expected = np.asarray(expected) + diff = actual - expected + chi2 = np.sum(diff * diff / expected, axis=0) + return chi2 + + def _testSampleFromDatasetsHelper(self, weights, num_datasets, num_samples): + # Create a dataset that samples each integer in `[0, num_datasets)` + # with probability given by `weights[i]`. + dataset = interleave_ops.sample_from_datasets([ + dataset_ops.Dataset.from_tensors(i).repeat(None) + for i in range(num_datasets) + ], weights) + dataset = dataset.take(num_samples) + iterator = dataset.make_one_shot_iterator() + next_element = iterator.get_next() + + with self.test_session() as sess: + freqs = np.zeros([num_datasets]) + for _ in range(num_samples): + freqs[sess.run(next_element)] += 1 + with self.assertRaises(errors.OutOfRangeError): + sess.run(next_element) + + return freqs + + def testSampleFromDatasets(self): + random_seed.set_random_seed(1619) + num_samples = 10000 + rand_probs = self._normalize(np.random.random_sample((15,))) + + # Use chi-squared test to assert that the observed distribution matches the + # expected distribution. Based on the implementation in + # "tensorflow/python/kernel_tests/multinomial_op_test.py". + for probs in [[.85, .05, .1], rand_probs]: + probs = np.asarray(probs) + classes = len(probs) + freqs = self._testSampleFromDatasetsHelper(probs, classes, num_samples) + self.assertLess(self._chi2(probs, freqs / num_samples), 1e-3) + + # Also check that `weights` as a dataset samples correctly. + probs_ds = dataset_ops.Dataset.from_tensors(probs).repeat() + freqs = self._testSampleFromDatasetsHelper(probs_ds, classes, num_samples) + self.assertLess(self._chi2(probs, freqs / num_samples), 1e-3) + + def testErrors(self): + with self.assertRaisesRegexp(ValueError, + r"vector of length `len\(datasets\)`"): + interleave_ops.sample_from_datasets( + [dataset_ops.Dataset.range(10), + dataset_ops.Dataset.range(20)], + weights=[0.25, 0.25, 0.25, 0.25]) + + with self.assertRaisesRegexp(TypeError, "`tf.float32` or `tf.float64`"): + interleave_ops.sample_from_datasets( + [dataset_ops.Dataset.range(10), + dataset_ops.Dataset.range(20)], + weights=[1, 1]) + + with self.assertRaisesRegexp(TypeError, "must have the same type"): + interleave_ops.sample_from_datasets([ + dataset_ops.Dataset.from_tensors(0), + dataset_ops.Dataset.from_tensors(0.0) + ]) + + +class SampleFromDatasetsSerializationTest( + dataset_serialization_test_base.DatasetSerializationTestBase): + + def _build_dataset(self, probs, num_samples): + dataset = interleave_ops.sample_from_datasets( + [ + dataset_ops.Dataset.from_tensors(i).repeat(None) + for i in range(len(probs)) + ], + probs, + seed=1813) + return dataset.take(num_samples) + + def testSerializationCore(self): + self.run_core_tests( + lambda: self._build_dataset([0.5, 0.5], 100), + lambda: self._build_dataset([0.25, 0.25, 0.25, 0.25], 1000), 100) + + if __name__ == "__main__": test.main() diff --git a/tensorflow/contrib/data/python/kernel_tests/prefetching_ops_test.py b/tensorflow/contrib/data/python/kernel_tests/prefetching_ops_test.py index 676959a900ba7c4cce4580845a3f885081779267..b08132cd72254326d965907a1fdafb8a820926a1 100644 --- a/tensorflow/contrib/data/python/kernel_tests/prefetching_ops_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/prefetching_ops_test.py @@ -28,12 +28,13 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors from tensorflow.python.framework import function from tensorflow.python.framework import ops +from tensorflow.python.framework import sparse_tensor from tensorflow.python.framework import test_util from tensorflow.python.ops import resource_variable_ops from tensorflow.python.platform import test -class StagingAreaOpsTest(test.TestCase): +class PrefetchingKernelsOpsTest(test.TestCase): def setUp(self): self._event = threading.Event() @@ -200,6 +201,9 @@ class StagingAreaOpsTest(test.TestCase): sess.run(destroy_op) + +class PrefetchToDeviceTest(test.TestCase): + def testPrefetchToDevice(self): host_dataset = dataset_ops.Dataset.range(10) device_dataset = host_dataset.apply( @@ -231,6 +235,74 @@ class StagingAreaOpsTest(test.TestCase): with self.assertRaises(errors.OutOfRangeError): sess.run(next_element) + def testPrefetchDictToDevice(self): + host_dataset = dataset_ops.Dataset.range(10).map(lambda x: {"a": x}) + device_dataset = host_dataset.apply( + prefetching_ops.prefetch_to_device("/cpu:1")) + + # NOTE(mrry): This device block creates the "host" dataset and iterator on + # /cpu:0, and ensures that the prefetching is across devices. In typical use + # this would not be necessary, because the GPU device would not support any + # of the dataset-related ops. + with ops.device("/cpu:0"): + iterator = device_dataset.make_one_shot_iterator() + + self.assertEqual(host_dataset.output_types, device_dataset.output_types) + self.assertEqual(host_dataset.output_types, iterator.output_types) + self.assertEqual(host_dataset.output_shapes, device_dataset.output_shapes) + self.assertEqual(host_dataset.output_shapes, iterator.output_shapes) + self.assertEqual(host_dataset.output_classes, device_dataset.output_classes) + self.assertEqual(host_dataset.output_classes, iterator.output_classes) + + next_element = iterator.get_next() + self.assertEqual(dtypes.int64, next_element["a"].dtype) + self.assertEqual([], next_element["a"].shape) + + worker_config = config_pb2.ConfigProto() + worker_config.device_count["CPU"] = 2 + with self.test_session(config=worker_config) as sess: + for i in range(10): + self.assertEqual({"a": i}, sess.run(next_element)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(next_element) + + def testPrefetchSparseTensorsToDevice(self): + def make_tensor(i): + return sparse_tensor.SparseTensorValue( + indices=[[0, 0]], values=(i*[1]), dense_shape=[2, 2]) + host_dataset = dataset_ops.Dataset.range(10).map(make_tensor) + + device_dataset = host_dataset.apply( + prefetching_ops.prefetch_to_device("/cpu:1")) + + # NOTE(mrry): This device block creates the "host" dataset and iterator on + # /cpu:0, and ensures that the prefetching is across devices. In typical use + # this would not be necessary, because the GPU device would not support any + # of the dataset-related ops. + with ops.device("/cpu:0"): + iterator = device_dataset.make_one_shot_iterator() + + self.assertEqual(host_dataset.output_types, device_dataset.output_types) + self.assertEqual(host_dataset.output_types, iterator.output_types) + self.assertEqual(host_dataset.output_shapes, device_dataset.output_shapes) + self.assertEqual(host_dataset.output_shapes, iterator.output_shapes) + self.assertEqual(host_dataset.output_classes, device_dataset.output_classes) + self.assertEqual(host_dataset.output_classes, iterator.output_classes) + + next_element = iterator.get_next() + self.assertEqual(dtypes.int64, next_element.dtype) + + worker_config = config_pb2.ConfigProto() + worker_config.device_count["CPU"] = 2 + with self.test_session(config=worker_config) as sess: + for i in range(10): + actual = sess.run(next_element) + self.assertAllEqual([i], actual.values) + self.assertAllEqual([[0, 0]], actual.indices) + self.assertAllEqual([2, 2], actual.dense_shape) + with self.assertRaises(errors.OutOfRangeError): + sess.run(next_element) + def testPrefetchToDeviceGpu(self): if not test_util.is_gpu_available(): self.skipTest("No GPU available") @@ -248,5 +320,62 @@ class StagingAreaOpsTest(test.TestCase): with self.assertRaises(errors.OutOfRangeError): sess.run(next_element) + def testPrefetchToDeviceWithReInit(self): + host_dataset = dataset_ops.Dataset.range(10) + device_dataset = host_dataset.apply( + prefetching_ops.prefetch_to_device("/cpu:1")) + + # NOTE(mrry): This device block creates the "host" dataset and iterator on + # /cpu:0, and ensures that the prefetching is across devices. In typical use + # this would not be necessary, because the GPU device would not support any + # of the dataset-related ops. + with ops.device("/cpu:0"): + iterator = device_dataset.make_initializable_iterator() + + self.assertEqual(host_dataset.output_types, device_dataset.output_types) + self.assertEqual(host_dataset.output_types, iterator.output_types) + self.assertEqual(host_dataset.output_shapes, device_dataset.output_shapes) + self.assertEqual(host_dataset.output_shapes, iterator.output_shapes) + self.assertEqual(host_dataset.output_classes, device_dataset.output_classes) + self.assertEqual(host_dataset.output_classes, iterator.output_classes) + + next_element = iterator.get_next() + self.assertEqual(dtypes.int64, next_element.dtype) + self.assertEqual([], next_element.shape) + + worker_config = config_pb2.ConfigProto() + worker_config.device_count["CPU"] = 2 + with self.test_session(config=worker_config) as sess: + sess.run(iterator.initializer) + for i in range(5): + self.assertEqual(i, sess.run(next_element)) + sess.run(iterator.initializer) + for i in range(10): + self.assertEqual(i, sess.run(next_element)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(next_element) + + def testPrefetchToDeviceGpuWithReInit(self): + if not test_util.is_gpu_available(): + self.skipTest("No GPU available") + + host_dataset = dataset_ops.Dataset.range(10) + device_dataset = host_dataset.apply( + prefetching_ops.prefetch_to_device("/gpu:0")) + + iterator = device_dataset.make_initializable_iterator() + next_element = iterator.get_next() + + with self.test_session() as sess: + sess.run(iterator.initializer) + for i in range(5): + self.assertEqual(i, sess.run(next_element)) + sess.run(iterator.initializer) + for i in range(10): + self.assertEqual(i, sess.run(next_element)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(next_element) + + if __name__ == "__main__": test.main() diff --git a/tensorflow/contrib/data/python/kernel_tests/reader_dataset_ops_test.py b/tensorflow/contrib/data/python/kernel_tests/reader_dataset_ops_test.py index 6ee1b572f121a9a40dfd638f7a858d5f1176ea3c..1075302bae96ca2e0111efbacdf5e919ea76897d 100644 --- a/tensorflow/contrib/data/python/kernel_tests/reader_dataset_ops_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/reader_dataset_ops_test.py @@ -271,7 +271,8 @@ class ReadBatchFeaturesTest(test.TestCase): reader_num_threads=1, parser_num_threads=1, shuffle=False, - shuffle_seed=None): + shuffle_seed=None, + drop_final_batch=False): self.filenames = filenames self.num_epochs = num_epochs self.batch_size = batch_size @@ -289,22 +290,25 @@ class ReadBatchFeaturesTest(test.TestCase): shuffle=shuffle, shuffle_seed=shuffle_seed, reader_num_threads=reader_num_threads, - parser_num_threads=parser_num_threads).make_one_shot_iterator( + parser_num_threads=parser_num_threads, + drop_final_batch=drop_final_batch).make_one_shot_iterator( ).get_next() def _record(self, f, r): - example = example_pb2.Example(features=feature_pb2.Features( - feature={ - "file": - feature_pb2.Feature(int64_list=feature_pb2.Int64List( - value=[f])), - "record": - feature_pb2.Feature(int64_list=feature_pb2.Int64List( - value=[r])), - "keywords": - feature_pb2.Feature(bytes_list=feature_pb2.BytesList( - value=self._get_keywords(f, r))) - })) + example = example_pb2.Example( + features=feature_pb2.Features( + feature={ + "file": + feature_pb2.Feature( + int64_list=feature_pb2.Int64List(value=[f])), + "record": + feature_pb2.Feature( + int64_list=feature_pb2.Int64List(value=[r])), + "keywords": + feature_pb2.Feature( + bytes_list=feature_pb2.BytesList( + value=self._get_keywords(f, r))) + })) return example.SerializeToString() def _get_keywords(self, f, r): @@ -372,8 +376,8 @@ class ReadBatchFeaturesTest(test.TestCase): record_batch.append(r) keywords = self._get_keywords(f, r) keywords_batch_values.extend(keywords) - keywords_batch_indices.extend([[batch_index, i] - for i in range(len(keywords))]) + keywords_batch_indices.extend( + [[batch_index, i] for i in range(len(keywords))]) batch_index += 1 keywords_batch_max_len = max(keywords_batch_max_len, len(keywords)) if len(file_batch) == batch_size: @@ -473,9 +477,10 @@ class ReadBatchFeaturesTest(test.TestCase): "file": parsing_ops.FixedLenFeature([], dtypes.int64), "record": parsing_ops.FixedLenFeature([], dtypes.int64), } - dataset = (core_readers.TFRecordDataset(self.test_filenames) - .map(lambda x: parsing_ops.parse_single_example(x, features)) - .repeat(10).batch(2)) + dataset = ( + core_readers.TFRecordDataset(self.test_filenames) + .map(lambda x: parsing_ops.parse_single_example(x, features)) + .repeat(10).batch(2)) iterator = dataset.make_initializable_iterator() init_op = iterator.initializer next_element = iterator.get_next() @@ -559,6 +564,20 @@ class ReadBatchFeaturesTest(test.TestCase): with self.assertRaises(errors.OutOfRangeError): self._next_actual_batch(sess) + def testDropFinalBatch(self): + for batch_size in [1, 2]: + for num_epochs in [1, 10]: + with ops.Graph().as_default(): + # Basic test: read from file 0. + self.outputs = self._read_batch_features( + filenames=self.test_filenames[0], + num_epochs=num_epochs, + batch_size=batch_size, + drop_final_batch=True) + for _, tensor in self.outputs.items(): + if isinstance(tensor, ops.Tensor): # Guard against SparseTensor. + self.assertEqual(tensor.shape[0], batch_size) + class MakeCsvDatasetTest(test.TestCase): @@ -591,20 +610,25 @@ class MakeCsvDatasetTest(test.TestCase): "record %d" % recordno if recordno % 2 == 1 else "", ] - def _csv_record(self, fileno, recordno): - return ",".join(str(v) for v in self._csv_values(fileno, recordno)) + def _write_file(self, filename, rows): + for i in range(len(rows)): + if isinstance(rows[i], list): + rows[i] = ",".join(str(v) if v is not None else "" for v in rows[i]) + fn = os.path.join(self.get_temp_dir(), filename) + f = open(fn, "w") + f.write("\n".join(rows)) + f.close() + return fn def _create_file(self, fileno, header=True, comment=True): - fn = os.path.join(self.get_temp_dir(), "csv_file%d.csv" % fileno) - f = open(fn, "w") + rows = [] if header: - f.write(",".join(self.COLUMNS) + "\n") + rows.append(self.COLUMNS) for recno in range(self._num_records): - f.write(self._csv_record(fileno, recno) + "\n") + rows.append(self._csv_values(fileno, recno)) if comment: - f.write("# Some comment goes here. Should be ignored!\n") - f.close() - return fn + rows.append("# Some comment goes here. Ignore me.") + return self._write_file("csv_file%d.csv" % fileno, rows) def _create_files(self): filenames = [] @@ -618,6 +642,7 @@ class MakeCsvDatasetTest(test.TestCase): defaults, column_names=COLUMNS, label_name=LABEL, + select_cols=None, batch_size=1, num_epochs=1, shuffle=False, @@ -640,6 +665,7 @@ class MakeCsvDatasetTest(test.TestCase): comment=comment, na_value=na_value, default_float_type=default_float_type, + select_columns=select_cols, ) def _next_actual_batch(self, file_indices, batch_size, num_epochs, defaults): @@ -696,7 +722,7 @@ class MakeCsvDatasetTest(test.TestCase): with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) - def test_make_csv_dataset(self): + def testMakeCSVDataset(self): defaults = self.DEFAULTS with ops.Graph().as_default() as g: @@ -723,7 +749,7 @@ class MakeCsvDatasetTest(test.TestCase): self._verify_records( sess, dataset, range(self._num_files), batch_size=2, num_epochs=10) - def test_make_csv_dataset_with_bad_columns(self): + def testMakeCSVDataset_withBadColumns(self): """Tests that exception is raised when input is malformed. """ dupe_columns = self.COLUMNS[:-1] + self.COLUMNS[:1] @@ -739,7 +765,7 @@ class MakeCsvDatasetTest(test.TestCase): self._make_csv_dataset( self._test_filenames, defaults, label_name="not_a_real_label") - def test_make_csv_dataset_with_no_label(self): + def testMakeCSVDataset_withNoLabel(self): """Tests that CSV datasets can be created when no label is specified. """ defaults = self.DEFAULTS @@ -760,7 +786,7 @@ class MakeCsvDatasetTest(test.TestCase): num_epochs=10, label_name=None) - def test_make_csv_dataset_with_no_comments(self): + def testMakeCSVDataset_withNoComments(self): """Tests that datasets can be created from CSV files with no header line. """ defaults = self.DEFAULTS @@ -783,7 +809,7 @@ class MakeCsvDatasetTest(test.TestCase): num_epochs=10, ) - def test_make_csv_dataset_with_no_header(self): + def testMakeCSVDataset_withNoHeader(self): """Tests that datasets can be created from CSV files with no header line. """ defaults = self.DEFAULTS @@ -806,7 +832,7 @@ class MakeCsvDatasetTest(test.TestCase): num_epochs=10, ) - def test_make_csv_dataset_with_types(self): + def testMakeCSVDataset_withTypes(self): """Tests that defaults can be a dtype instead of a Tensor for required vals. """ defaults = [d for d in self.COLUMN_TYPES[:-1]] @@ -816,7 +842,7 @@ class MakeCsvDatasetTest(test.TestCase): dataset = self._make_csv_dataset(self._test_filenames, defaults) self._verify_records(sess, dataset, range(self._num_files)) - def test_make_csv_dataset_with_no_col_names(self): + def testMakeCSVDataset_withNoColNames(self): """Tests that datasets can be created when column names are not specified. In that case, we should infer the column names from the header lines. @@ -835,7 +861,17 @@ class MakeCsvDatasetTest(test.TestCase): self._verify_records( sess, dataset, range(self._num_files), batch_size=2, num_epochs=10) - def test_make_csv_dataset_type_inference(self): + def testMakeCSVDataset_withTypeInferenceMismatch(self): + # Test that error is thrown when num fields doesn't match columns + with self.assertRaises(ValueError): + self._make_csv_dataset( + self._test_filenames, + column_names=self.COLUMNS + ["extra_name"], + defaults=None, + batch_size=2, + num_epochs=10) + + def testMakeCSVDataset_withTypeInference(self): """Tests that datasets can be created when no defaults are specified. In that case, we should infer the types from the first N records. @@ -859,19 +895,16 @@ class MakeCsvDatasetTest(test.TestCase): dtypes.int32, dtypes.int64, dtypes.float32, dtypes.float32, dtypes.string, dtypes.string ] - rows = [[0, 0, 0, "NAN", "", "a"], [1, 2**31 + 1, 2**64, 123, "NAN", ""], + col_names = ["col%d" % i for i in range(len(expected_dtypes))] + rows = [[None, None, None, "NAN", "", + "a"], [1, 2**31 + 1, 2**64, 123, "NAN", ""], ['"123"', 2, 2**64, 123.4, "NAN", '"cd,efg"']] expected = [[0, 0, 0, 0, "", "a"], [1, 2**31 + 1, 2**64, 123, "", ""], [123, 2, 2**64, 123.4, "", "cd,efg"]] for row in expected: row[-1] = row[-1].encode("utf-8") # py3 expects byte strings row[-2] = row[-2].encode("utf-8") # py3 expects byte strings - col_names = ["col%d" % i for i in range(len(expected_dtypes))] - with open(fn, "w") as f: - f.write(",".join(col_names)) - f.write("\n") - for row in rows: - f.write(",".join([str(v) if v else "" for v in row]) + "\n") + self._write_file("file.csv", [col_names] + rows) with ops.Graph().as_default() as g: with self.test_session(graph=g) as sess: @@ -879,8 +912,6 @@ class MakeCsvDatasetTest(test.TestCase): fn, defaults=None, column_names=None, - batch_size=1, - num_epochs=1, label_name=None, na_value="NAN", default_float_type=dtypes.float32, @@ -903,8 +934,6 @@ class MakeCsvDatasetTest(test.TestCase): fn, defaults=None, column_names=None, - batch_size=1, - num_epochs=1, label_name=None, na_value="NAN", default_float_type=dtypes.float64, @@ -912,11 +941,99 @@ class MakeCsvDatasetTest(test.TestCase): features = dataset.make_one_shot_iterator().get_next() # Check that types match for i in range(len(expected_dtypes)): - assert features["col%d" % i].dtype == expected_dtypes[i] + self.assertAllEqual(features["col%d" % i].dtype, expected_dtypes[i]) for i in range(len(rows)): - assert sess.run(features) == dict(zip(col_names, expected[i])) + self.assertAllEqual( + sess.run(features), dict(zip(col_names, expected[i]))) + + def testMakeCSVDataset_withSelectColsError(self): + data = [[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]] + col_names = ["col%d" % i for i in range(5)] + fn = self._write_file("file.csv", [col_names] + data) + with self.assertRaises(ValueError): + # Mismatch in number of defaults and number of columns selected, + # should raise an error + self._make_csv_dataset( + fn, + defaults=[[0]] * 5, + column_names=col_names, + label_name=None, + select_cols=[1, 3]) + with self.assertRaises(ValueError): + # Invalid column name should raise an error + self._make_csv_dataset( + fn, + defaults=[[0]], + column_names=col_names, + label_name=None, + select_cols=["invalid_col_name"]) + + def testMakeCSVDataset_withSelectCols(self): + data = [[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]] + col_names = ["col%d" % i for i in range(5)] + fn = self._write_file("file.csv", [col_names] + data) + # If select_cols is specified, should only yield a subset of columns + with ops.Graph().as_default() as g: + with self.test_session(graph=g) as sess: + dataset = self._make_csv_dataset( + fn, + defaults=[[0], [0]], + column_names=col_names, + label_name=None, + select_cols=[1, 3]) + expected = [[1, 3], [6, 8]] + features = dataset.make_one_shot_iterator().get_next() + for i in range(len(data)): + self.assertAllEqual( + sess.run(features), + dict(zip([col_names[1], col_names[3]], expected[i]))) + # Can still do default inference with select_cols + with ops.Graph().as_default() as g: + with self.test_session(graph=g) as sess: + dataset = self._make_csv_dataset( + fn, + defaults=None, + column_names=col_names, + label_name=None, + select_cols=[1, 3]) + expected = [[1, 3], [6, 8]] + features = dataset.make_one_shot_iterator().get_next() + for i in range(len(data)): + self.assertAllEqual( + sess.run(features), + dict(zip([col_names[1], col_names[3]], expected[i]))) + # Can still do column name inference + with ops.Graph().as_default() as g: + with self.test_session(graph=g) as sess: + dataset = self._make_csv_dataset( + fn, + defaults=None, + column_names=None, + label_name=None, + select_cols=[1, 3]) + expected = [[1, 3], [6, 8]] + features = dataset.make_one_shot_iterator().get_next() + for i in range(len(data)): + self.assertAllEqual( + sess.run(features), + dict(zip([col_names[1], col_names[3]], expected[i]))) + # Can specify column names instead of indices + with ops.Graph().as_default() as g: + with self.test_session(graph=g) as sess: + dataset = self._make_csv_dataset( + fn, + defaults=None, + column_names=None, + label_name=None, + select_cols=[col_names[1], col_names[3]]) + expected = [[1, 3], [6, 8]] + features = dataset.make_one_shot_iterator().get_next() + for i in range(len(data)): + self.assertAllEqual( + sess.run(features), + dict(zip([col_names[1], col_names[3]], expected[i]))) - def test_make_csv_dataset_with_shuffle(self): + def testMakeCSVDataset_withShuffle(self): total_records = self._num_files * self._num_records defaults = self.DEFAULTS for batch_size in [1, 2]: diff --git a/tensorflow/contrib/data/python/kernel_tests/scan_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/scan_dataset_op_test.py index e0494736b72ae52f586cb80d42a5c1e50ac17a61..1a97a84b2cba13e82c8af9c4c8ee413ee8264a5e 100644 --- a/tensorflow/contrib/data/python/kernel_tests/scan_dataset_op_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/scan_dataset_op_test.py @@ -24,9 +24,11 @@ import numpy as np from tensorflow.contrib.data.python.kernel_tests import dataset_serialization_test_base from tensorflow.contrib.data.python.ops import scan_ops from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.eager import context from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors +from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops from tensorflow.python.platform import test @@ -57,19 +59,24 @@ class ScanDatasetTest(test.TestCase): with self.assertRaises(errors.OutOfRangeError): sess.run(next_element) + @test_util.run_in_graph_and_eager_modes() def testFibonacci(self): iterator = dataset_ops.Dataset.from_tensors(1).repeat(None).apply( scan_ops.scan([0, 1], lambda a, _: ([a[1], a[0] + a[1]], a[1])) ).make_one_shot_iterator() - next_element = iterator.get_next() - with self.test_session() as sess: - self.assertEqual(1, sess.run(next_element)) - self.assertEqual(1, sess.run(next_element)) - self.assertEqual(2, sess.run(next_element)) - self.assertEqual(3, sess.run(next_element)) - self.assertEqual(5, sess.run(next_element)) - self.assertEqual(8, sess.run(next_element)) + if context.executing_eagerly(): + next_element = iterator.get_next + else: + get_next = iterator.get_next() + next_element = lambda: get_next + + self.assertEqual(1, self.evaluate(next_element())) + self.assertEqual(1, self.evaluate(next_element())) + self.assertEqual(2, self.evaluate(next_element())) + self.assertEqual(3, self.evaluate(next_element())) + self.assertEqual(5, self.evaluate(next_element())) + self.assertEqual(8, self.evaluate(next_element())) def testChangingStateShape(self): # Test the fixed-point shape invariant calculations: start with diff --git a/tensorflow/contrib/data/python/kernel_tests/sequence_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/sequence_dataset_op_test.py index b044ff17757e6972ddefa7456db943f14c74bee8..d0cb203a3afd2775756c8542a1e86faedc5cee53 100644 --- a/tensorflow/contrib/data/python/kernel_tests/sequence_dataset_op_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/sequence_dataset_op_test.py @@ -47,6 +47,11 @@ class SequenceDatasetSerializationTest( # Skip nothing self.run_core_tests(lambda: self._build_skip_dataset(0), None, 10) + def testInvalidSkip(self): + with self.assertRaisesRegexp(ValueError, + 'Shape must be rank 0 but is rank 1'): + self.run_core_tests(lambda: self._build_skip_dataset([1, 2]), None, 0) + def _build_take_dataset(self, count): components = (np.arange(10),) return dataset_ops.Dataset.from_tensor_slices(components).take(count) @@ -69,6 +74,11 @@ class SequenceDatasetSerializationTest( # Take nothing self.run_core_tests(lambda: self._build_take_dataset(0), None, 0) + def testInvalidTake(self): + with self.assertRaisesRegexp(ValueError, + 'Shape must be rank 0 but is rank 1'): + self.run_core_tests(lambda: self._build_take_dataset([1, 2]), None, 0) + def _build_repeat_dataset(self, count, take_count=3): components = (np.arange(10),) return dataset_ops.Dataset.from_tensor_slices(components).take( diff --git a/tensorflow/contrib/data/python/kernel_tests/stats_dataset_ops_test.py b/tensorflow/contrib/data/python/kernel_tests/stats_dataset_ops_test.py index 07bdf920446e953c2a1abaf495d2e9e1256106fd..5c74ed6ae7210e8e22efb6e8fdb773397459ce1e 100644 --- a/tensorflow/contrib/data/python/kernel_tests/stats_dataset_ops_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/stats_dataset_ops_test.py @@ -50,17 +50,17 @@ class StatsDatasetTest(test.TestCase): self.fail("Expected tag %r not found in summary %r" % (tag, summary_proto)) def testBytesProduced(self): + stats_aggregator = stats_ops.StatsAggregator() dataset = dataset_ops.Dataset.range(100).map( lambda x: array_ops.tile([x], ops.convert_to_tensor([x]))).apply( - stats_ops.bytes_produced_stats("bytes_produced")) + stats_ops.bytes_produced_stats("bytes_produced")).apply( + stats_ops.set_stats_aggregator(stats_aggregator)) iterator = dataset.make_initializable_iterator() - stats_aggregator = stats_ops.StatsAggregator() - stats_aggregator_subscriber = stats_aggregator.subscribe(iterator) next_element = iterator.get_next() summary_t = stats_aggregator.get_summary() with self.test_session() as sess: - sess.run([iterator.initializer, stats_aggregator_subscriber]) + sess.run(iterator.initializer) expected_sum = 0.0 for i in range(100): self.assertAllEqual( @@ -76,16 +76,16 @@ class StatsDatasetTest(test.TestCase): self._assertSummaryHasSum(summary_str, "bytes_produced", expected_sum) def testLatencyStats(self): + stats_aggregator = stats_ops.StatsAggregator() dataset = dataset_ops.Dataset.range(100).apply( - stats_ops.latency_stats("record_latency")) + stats_ops.latency_stats("record_latency")).apply( + stats_ops.set_stats_aggregator(stats_aggregator)) iterator = dataset.make_initializable_iterator() - stats_aggregator = stats_ops.StatsAggregator() - stats_aggregator_subscriber = stats_aggregator.subscribe(iterator) next_element = iterator.get_next() summary_t = stats_aggregator.get_summary() with self.test_session() as sess: - sess.run([iterator.initializer, stats_aggregator_subscriber]) + sess.run(iterator.initializer) for i in range(100): self.assertEqual(i, sess.run(next_element)) self._assertSummaryHasCount( @@ -95,16 +95,15 @@ class StatsDatasetTest(test.TestCase): self._assertSummaryHasCount(sess.run(summary_t), "record_latency", 100.0) def testReinitialize(self): + stats_aggregator = stats_ops.StatsAggregator() dataset = dataset_ops.Dataset.range(100).apply( - stats_ops.latency_stats("record_latency")) + stats_ops.latency_stats("record_latency")).apply( + stats_ops.set_stats_aggregator(stats_aggregator)) iterator = dataset.make_initializable_iterator() - stats_aggregator = stats_ops.StatsAggregator() - stats_aggregator_subscriber = stats_aggregator.subscribe(iterator) next_element = iterator.get_next() summary_t = stats_aggregator.get_summary() with self.test_session() as sess: - sess.run(stats_aggregator_subscriber) for j in range(5): sess.run(iterator.initializer) for i in range(100): @@ -130,17 +129,17 @@ class StatsDatasetTest(test.TestCase): sess.run(next_element) def testMultipleTags(self): + stats_aggregator = stats_ops.StatsAggregator() dataset = dataset_ops.Dataset.range(100).apply( stats_ops.latency_stats("record_latency")).apply( - stats_ops.latency_stats("record_latency_2")) + stats_ops.latency_stats("record_latency_2")).apply( + stats_ops.set_stats_aggregator(stats_aggregator)) iterator = dataset.make_initializable_iterator() - stats_aggregator = stats_ops.StatsAggregator() - stats_aggregator_subscriber = stats_aggregator.subscribe(iterator) next_element = iterator.get_next() summary_t = stats_aggregator.get_summary() with self.test_session() as sess: - sess.run([iterator.initializer, stats_aggregator_subscriber]) + sess.run(iterator.initializer) for i in range(100): self.assertEqual(i, sess.run(next_element)) self._assertSummaryHasCount( @@ -154,17 +153,17 @@ class StatsDatasetTest(test.TestCase): sess.run(summary_t), "record_latency_2", 100.0) def testRepeatedTags(self): + stats_aggregator = stats_ops.StatsAggregator() dataset = dataset_ops.Dataset.range(100).apply( stats_ops.latency_stats("record_latency")).apply( - stats_ops.latency_stats("record_latency")) + stats_ops.latency_stats("record_latency")).apply( + stats_ops.set_stats_aggregator(stats_aggregator)) iterator = dataset.make_initializable_iterator() - stats_aggregator = stats_ops.StatsAggregator() - stats_aggregator_subscriber = stats_aggregator.subscribe(iterator) next_element = iterator.get_next() summary_t = stats_aggregator.get_summary() with self.test_session() as sess: - sess.run([iterator.initializer, stats_aggregator_subscriber]) + sess.run(iterator.initializer) for i in range(100): self.assertEqual(i, sess.run(next_element)) self._assertSummaryHasCount( @@ -174,19 +173,17 @@ class StatsDatasetTest(test.TestCase): self._assertSummaryHasCount(sess.run(summary_t), "record_latency", 200.0) def testMultipleIteratorsSameAggregator(self): + stats_aggregator = stats_ops.StatsAggregator() dataset = dataset_ops.Dataset.range(100).apply( - stats_ops.latency_stats("record_latency")) + stats_ops.latency_stats("record_latency")).apply( + stats_ops.set_stats_aggregator(stats_aggregator)) iterator_0 = dataset.make_initializable_iterator() iterator_1 = dataset.make_initializable_iterator() - stats_aggregator = stats_ops.StatsAggregator() - stats_aggregator_subscribers = [stats_aggregator.subscribe(iterator_0), - stats_aggregator.subscribe(iterator_1)] next_element = iterator_0.get_next() + iterator_1.get_next() summary_t = stats_aggregator.get_summary() with self.test_session() as sess: - sess.run([iterator_0.initializer, iterator_1.initializer, - stats_aggregator_subscribers]) + sess.run([iterator_0.initializer, iterator_1.initializer]) for i in range(100): self.assertEqual(i * 2, sess.run(next_element)) self._assertSummaryHasCount( @@ -195,20 +192,6 @@ class StatsDatasetTest(test.TestCase): sess.run(next_element) self._assertSummaryHasCount(sess.run(summary_t), "record_latency", 200.0) - def testMultipleStatsAggregatorsSameIteratorFail(self): - dataset = dataset_ops.Dataset.range(100).apply( - stats_ops.latency_stats("record_latency")) - iterator = dataset.make_initializable_iterator() - stats_aggregator_0 = stats_ops.StatsAggregator() - stats_aggregator_1 = stats_ops.StatsAggregator() - - with self.test_session() as sess: - sess.run(stats_aggregator_0.subscribe(iterator)) - # TODO(mrry): Consider making this allowable (and also allowing - # aggregators to unsubscribe). - with self.assertRaises(errors.FailedPreconditionError): - sess.run(stats_aggregator_1.subscribe(iterator)) - class StatsDatasetSerializationTest( dataset_serialization_test_base.DatasetSerializationTestBase): @@ -218,6 +201,14 @@ class StatsDatasetSerializationTest( lambda x: array_ops.tile([x], ops.convert_to_tensor([x]))).apply( stats_ops.bytes_produced_stats("bytes_produced")) + def test_bytes_produced_stats_invalid_tag_shape(self): + with self.assertRaisesRegexp( + ValueError, 'Shape must be rank 0 but is rank 1'): + self.run_core_tests( + lambda: dataset_ops.Dataset.range(100).apply( + stats_ops.bytes_produced_stats(["bytes_produced"])), + None, 100) + def testBytesStatsDatasetSaveableCore(self): num_outputs = 100 self.run_core_tests( @@ -235,6 +226,14 @@ class StatsDatasetSerializationTest( return dataset_ops.Dataset.range(num_elements).apply( stats_ops.latency_stats(tag1)).apply(stats_ops.latency_stats(tag2)) + def test_latency_stats_invalid_tag_shape(self): + with self.assertRaisesRegexp( + ValueError, 'Shape must be rank 0 but is rank 1'): + self.run_core_tests( + lambda: dataset_ops.Dataset.range(100).apply( + stats_ops.latency_stats(["record_latency", "record_latency_2"])), + None, 100) + def testLatencyStatsDatasetSaveableCore(self): num_outputs = 100 @@ -253,5 +252,9 @@ class StatsDatasetSerializationTest( None, num_outputs) +# TODO(shivaniagrawal): Can not checkpoint input_pipeline with the +# transformation `stats_ops.set_stats_aggregator`, since we don't support +# serializing StatsAggregator yet. + if __name__ == "__main__": test.main() diff --git a/tensorflow/contrib/data/python/kernel_tests/writer_ops_test.py b/tensorflow/contrib/data/python/kernel_tests/writer_ops_test.py new file mode 100644 index 0000000000000000000000000000000000000000..c603ecc5ab27a711557376246b093fd5f80f8aec --- /dev/null +++ b/tensorflow/contrib/data/python/kernel_tests/writer_ops_test.py @@ -0,0 +1,117 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for the experimental input pipeline ops.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os + +from tensorflow.contrib.data.python.ops import writers +from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.data.ops import readers +from tensorflow.python.framework import dtypes +from tensorflow.python.lib.io import python_io +from tensorflow.python.lib.io import tf_record +from tensorflow.python.ops import array_ops +from tensorflow.python.platform import test +from tensorflow.python.util import compat + + +class TFRecordWriterTest(test.TestCase): + + def setUp(self): + super(TFRecordWriterTest, self).setUp() + self._num_records = 7 + self.filename = array_ops.placeholder(dtypes.string, shape=[]) + self.compression_type = array_ops.placeholder_with_default("", shape=[]) + + input_dataset = readers.TFRecordDataset([self.filename], + self.compression_type) + self.writer = writers.TFRecordWriter( + self._outputFilename(), self.compression_type).write(input_dataset) + + def _record(self, i): + return compat.as_bytes("Record %d" % (i)) + + def _createFile(self, options=None): + filename = self._inputFilename() + writer = python_io.TFRecordWriter(filename, options) + for i in range(self._num_records): + writer.write(self._record(i)) + writer.close() + return filename + + def _inputFilename(self): + return os.path.join(self.get_temp_dir(), "tf_record.in.txt") + + def _outputFilename(self): + return os.path.join(self.get_temp_dir(), "tf_record.out.txt") + + def testWrite(self): + with self.test_session() as sess: + sess.run( + self.writer, feed_dict={ + self.filename: self._createFile(), + }) + for i, r in enumerate(tf_record.tf_record_iterator(self._outputFilename())): + self.assertAllEqual(self._record(i), r) + + def testWriteZLIB(self): + options = tf_record.TFRecordOptions(tf_record.TFRecordCompressionType.ZLIB) + with self.test_session() as sess: + sess.run( + self.writer, + feed_dict={ + self.filename: self._createFile(options), + self.compression_type: "ZLIB", + }) + for i, r in enumerate( + tf_record.tf_record_iterator(self._outputFilename(), options=options)): + self.assertAllEqual(self._record(i), r) + + def testWriteGZIP(self): + options = tf_record.TFRecordOptions(tf_record.TFRecordCompressionType.GZIP) + with self.test_session() as sess: + sess.run( + self.writer, + feed_dict={ + self.filename: self._createFile(options), + self.compression_type: "GZIP", + }) + for i, r in enumerate( + tf_record.tf_record_iterator(self._outputFilename(), options=options)): + self.assertAllEqual(self._record(i), r) + + def testFailDataset(self): + with self.assertRaises(TypeError): + writers.TFRecordWriter(self._outputFilename(), + self.compression_type).write("whoops") + + def testFailDType(self): + input_dataset = dataset_ops.Dataset.from_tensors(10) + with self.assertRaises(TypeError): + writers.TFRecordWriter(self._outputFilename(), + self.compression_type).write(input_dataset) + + def testFailShape(self): + input_dataset = dataset_ops.Dataset.from_tensors([["hello"], ["world"]]) + with self.assertRaises(TypeError): + writers.TFRecordWriter(self._outputFilename(), + self.compression_type).write(input_dataset) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/data/python/ops/BUILD b/tensorflow/contrib/data/python/ops/BUILD index 236792bb98ef950963af54a9b5582b0a929f01f8..5b04c5316cfbb7577b3f8b3b6d364fc665d14c21 100644 --- a/tensorflow/contrib/data/python/ops/BUILD +++ b/tensorflow/contrib/data/python/ops/BUILD @@ -12,18 +12,26 @@ load( load("//tensorflow:tensorflow.bzl", "tf_custom_op_py_library") py_library( - name = "dataset_ops", - srcs = [ - "counter.py", - "get_single_element.py", + name = "counter", + srcs = ["counter.py"], + srcs_version = "PY2AND3", + deps = [ + ":scan_ops", + "//tensorflow/python:dtypes", + "//tensorflow/python:framework_ops", + "//tensorflow/python/data/ops:dataset_ops", ], +) + +py_library( + name = "get_single_element", + srcs = ["get_single_element.py"], srcs_version = "PY2AND3", deps = [ - ":transformation_ops", "//tensorflow/python:dataset_ops_gen", - "//tensorflow/python:util", "//tensorflow/python/data/ops:dataset_ops", "//tensorflow/python/data/util:nest", + "//tensorflow/python/data/util:sparse", ], ) @@ -66,7 +74,8 @@ py_library( ], srcs_version = "PY2AND3", deps = [ - ":dataset_ops", + ":batching", + ":interleave_ops", ":shuffle_ops", "//tensorflow/python:constant_op", "//tensorflow/python:dataset_ops_gen", @@ -94,50 +103,192 @@ py_library( ], srcs_version = "PY2AND3", deps = [ - ":random_ops", - ":transformation_ops", "//tensorflow/python/data/ops:dataset_ops", ], ) py_library( - name = "transformation_ops", - srcs = [ - "batching.py", - "enumerate_ops.py", - "error_ops.py", - "grouping.py", - "interleave_ops.py", - "resampling.py", - "scan_ops.py", - "sliding.py", - "stats_ops.py", - "threadpool.py", - "unique.py", + name = "batching", + srcs = ["batching.py"], + srcs_version = "PY2AND3", + deps = [ + "//tensorflow/contrib/framework:framework_py", + "//tensorflow/python:array_ops", + "//tensorflow/python:dataset_ops_gen", + "//tensorflow/python:dtypes", + "//tensorflow/python:framework_ops", + "//tensorflow/python:math_ops", + "//tensorflow/python:tensor_shape", + "//tensorflow/python:tensor_util", + "//tensorflow/python/data/ops:dataset_ops", + "//tensorflow/python/data/util:nest", + "//tensorflow/python/data/util:sparse", + ], +) + +py_library( + name = "enumerate_ops", + srcs = ["enumerate_ops.py"], + srcs_version = "PY2AND3", + deps = [ + "//tensorflow/python:dtypes", + "//tensorflow/python/data/ops:dataset_ops", + ], +) + +py_library( + name = "error_ops", + srcs = ["error_ops.py"], + srcs_version = "PY2AND3", + deps = [ + ":contrib_op_loader", + ":gen_dataset_ops", + "//tensorflow/python/data/ops:dataset_ops", + "//tensorflow/python/data/util:nest", + "//tensorflow/python/data/util:sparse", ], +) + +py_library( + name = "grouping", + srcs = ["grouping.py"], + srcs_version = "PY2AND3", + deps = [ + "//tensorflow/python:array_ops", + "//tensorflow/python:check_ops", + "//tensorflow/python:dtypes", + "//tensorflow/python:framework_ops", + "//tensorflow/python:function", + "//tensorflow/python:math_ops", + "//tensorflow/python:tensor_shape", + "//tensorflow/python/data/ops:dataset_ops", + "//tensorflow/python/data/util:nest", + "//tensorflow/python/data/util:sparse", + ], +) + +py_library( + name = "interleave_ops", + srcs = ["interleave_ops.py"], srcs_version = "PY2AND3", deps = [ ":contrib_op_loader", ":gen_dataset_ops", + ":random_ops", + "//tensorflow/contrib/stateless", + "//tensorflow/python:array_ops", + "//tensorflow/python:dtypes", + "//tensorflow/python:framework_ops", + "//tensorflow/python:math_ops", + "//tensorflow/python:util", + "//tensorflow/python/data/ops:readers", + "//tensorflow/python/data/util:nest", + "//tensorflow/python/data/util:sparse", + ], +) + +py_library( + name = "resampling", + srcs = ["resampling.py"], + srcs_version = "PY2AND3", + deps = [ + ":batching", + ":scan_ops", "//tensorflow/python:array_ops", "//tensorflow/python:control_flow_ops", - "//tensorflow/python:dataset_ops_gen", "//tensorflow/python:dtypes", "//tensorflow/python:framework_ops", - "//tensorflow/python:function", "//tensorflow/python:logging_ops", "//tensorflow/python:math_ops", "//tensorflow/python:random_ops", + "//tensorflow/python/data/ops:dataset_ops", + ], +) + +py_library( + name = "scan_ops", + srcs = ["scan_ops.py"], + srcs_version = "PY2AND3", + deps = [ + "//tensorflow/python:dataset_ops_gen", + "//tensorflow/python:framework_ops", + "//tensorflow/python:function", + "//tensorflow/python/data/ops:dataset_ops", + "//tensorflow/python/data/util:nest", + "//tensorflow/python/data/util:sparse", + ], +) + +py_library( + name = "sliding", + srcs = ["sliding.py"], + srcs_version = "PY2AND3", + deps = [ + "//tensorflow/python:dataset_ops_gen", + "//tensorflow/python:dtypes", + "//tensorflow/python:framework_ops", + "//tensorflow/python:function", + "//tensorflow/python/data/ops:dataset_ops", + "//tensorflow/python/data/util:nest", + "//tensorflow/python/data/util:sparse", + ], +) + +py_library( + name = "stats_ops", + srcs = ["stats_ops.py"], + srcs_version = "PY2AND3", + deps = [ + "//tensorflow/python:dataset_ops_gen", + "//tensorflow/python:dtypes", + "//tensorflow/python:framework_ops", + "//tensorflow/python/data/ops:dataset_ops", + "//tensorflow/python/data/ops:iterator_ops", + "//tensorflow/python/data/util:nest", + "//tensorflow/python/data/util:sparse", + ], +) + +py_library( + name = "threadpool", + srcs = ["threadpool.py"], + srcs_version = "PY2AND3", + deps = [ + ":contrib_op_loader", + ":gen_dataset_ops", "//tensorflow/python:resource_variable_ops", - "//tensorflow/python:tensor_shape", - "//tensorflow/python:tensor_util", - "//tensorflow/python:util", "//tensorflow/python/data/ops:dataset_ops", - "//tensorflow/python/data/ops:readers", - "//tensorflow/python/data/util:convert", "//tensorflow/python/data/util:nest", "//tensorflow/python/data/util:sparse", - "//third_party/py/numpy", + "//tensorflow/python/eager:context", + ], +) + +py_library( + name = "unique", + srcs = [ + "unique.py", + ], + srcs_version = "PY2AND3", + deps = [ + ":contrib_op_loader", + ":gen_dataset_ops", + "//tensorflow/python:dtypes", + "//tensorflow/python/data/ops:dataset_ops", + "//tensorflow/python/data/util:nest", + "//tensorflow/python/data/util:sparse", + ], +) + +py_library( + name = "writers", + srcs = [ + "writers.py", + ], + srcs_version = "PY2AND3", + deps = [ + "//tensorflow/python:dtypes", + "//tensorflow/python/data/ops:dataset_ops", ], ) @@ -183,3 +334,30 @@ py_library( "//tensorflow/python/data/util:sparse", ], ) + +py_library( + name = "dataset_ops", + deps = [ + ":batching", + ":counter", + ":enumerate_ops", + ":error_ops", + ":get_single_element", + ":grouping", + ":interleave_ops", + ":prefetching_ops", + ":readers", + ":resampling", + ":scan_ops", + ":shuffle_ops", + ":sliding", + ":stats_ops", + ":threadpool", + ":unique", + ":writers", + "//tensorflow/python:dataset_ops_gen", + "//tensorflow/python:util", + "//tensorflow/python/data/ops:dataset_ops", + "//tensorflow/python/data/util:nest", + ], +) diff --git a/tensorflow/contrib/data/python/ops/batching.py b/tensorflow/contrib/data/python/ops/batching.py index a212adf6cf580267f9f1e6959bef95f04a4ad782..2152bcde84aae6b0c2b368e43750aafab3a04bf2 100644 --- a/tensorflow/contrib/data/python/ops/batching.py +++ b/tensorflow/contrib/data/python/ops/batching.py @@ -17,6 +17,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from tensorflow.contrib.framework import with_shape from tensorflow.python.data.ops import dataset_ops from tensorflow.python.data.util import nest from tensorflow.python.data.util import sparse @@ -79,28 +80,98 @@ def dense_to_sparse_batch(batch_size, row_shape): return _apply_fn +class UnbatchDataset(dataset_ops.Dataset): + """A dataset that splits the elements of its input into multiple elements.""" + + def __init__(self, input_dataset): + """See `unbatch()` for more details.""" + super(UnbatchDataset, self).__init__() + flat_shapes = nest.flatten(input_dataset.output_shapes) + if any(s.ndims == 0 for s in flat_shapes): + raise ValueError("Cannot unbatch an input with scalar components.") + known_batch_dim = tensor_shape.Dimension(None) + for s in flat_shapes: + try: + known_batch_dim = known_batch_dim.merge_with(s[0]) + except ValueError: + raise ValueError("Cannot unbatch an input whose components have " + "different batch sizes.") + self._input_dataset = input_dataset + + def _as_variant_tensor(self): + return gen_dataset_ops.unbatch_dataset( + self._input_dataset._as_variant_tensor(), # pylint: disable=protected-access + output_shapes=nest.flatten( + sparse.as_dense_shapes(self.output_shapes, self.output_classes)), + output_types=nest.flatten( + sparse.as_dense_types(self.output_types, self.output_classes))) + + @property + def output_classes(self): + return self._input_dataset.output_classes + + @property + def output_shapes(self): + return nest.map_structure(lambda s: s[1:], + self._input_dataset.output_shapes) + + @property + def output_types(self): + return self._input_dataset.output_types + + def unbatch(): - """A Transformation which splits the elements of a dataset. + """Splits elements of a dataset into multiple elements on the batch dimension. For example, if elements of the dataset are shaped `[B, a0, a1, ...]`, - where `B` may vary from element to element, then for each element in - the dataset, the unbatched dataset will contain `B` consecutive elements + where `B` may vary for each input element, then for each element in the + dataset, the unbatched dataset will contain `B` consecutive elements of shape `[a0, a1, ...]`. + ```python + # NOTE: The following example uses `{ ... }` to represent the contents + # of a dataset. + a = { ['a', 'b', 'c'], ['a', 'b'], ['a', 'b', 'c', 'd'] } + + a.apply(tf.contrib.data.unbatch()) == { + 'a', 'b', 'c', 'a', 'b', 'a', 'b', 'c', 'd'} + ``` + Returns: A `Dataset` transformation function, which can be passed to @{tf.data.Dataset.apply}. """ def _apply_fn(dataset): - - def unbatch_map(arg, *rest): + """Function from `Dataset` to `Dataset` that applies the transformation.""" + if not sparse.any_sparse(dataset.output_classes): + return UnbatchDataset(dataset) + + # NOTE(mrry): We must ensure that any SparseTensors in `dataset` + # are normalized to the rank-1 dense representation, so that the + # sparse-oblivious unbatching logic will slice them + # appropriately. This leads to a somewhat inefficient re-encoding step + # for all SparseTensor components. + # TODO(mrry): Consider optimizing this in future + # if it turns out to be a bottleneck. + def normalize(arg, *rest): if rest: - return dataset_ops.Dataset.from_tensor_slices((arg,) + rest) + return sparse.serialize_many_sparse_tensors((arg,) + rest) else: - return dataset_ops.Dataset.from_tensor_slices(arg) + return sparse.serialize_many_sparse_tensors(arg) - return dataset.flat_map(map_func=unbatch_map) + normalized_dataset = dataset.map(normalize) + + # NOTE(mrry): Our `map()` has lost information about the sparseness + # of any SparseTensor components, so re-apply the structure of the + # original dataset. + restructured_dataset = _RestructuredDataset( + normalized_dataset, + dataset.output_types, + dataset.output_shapes, + dataset.output_classes, + allow_unsafe_cast=True) + return UnbatchDataset(restructured_dataset) return _apply_fn @@ -264,7 +335,8 @@ class _RestructuredDataset(dataset_ops.Dataset): dataset, output_types, output_shapes=None, - output_classes=None): + output_classes=None, + allow_unsafe_cast=False): """Creates a new dataset with the given output types and shapes. The given `dataset` must have a structure that is convertible: @@ -282,6 +354,10 @@ class _RestructuredDataset(dataset_ops.Dataset): If omitted, the shapes will be inherited from `dataset`. output_classes: (Optional.) A nested structure of class types. If omitted, the class types will be inherited from `dataset`. + allow_unsafe_cast: (Optional.) If `True`, the caller may switch the + reported output types and shapes of the restructured dataset, e.g. to + switch a sparse tensor represented as `tf.variant` to its user-visible + type and shape. Raises: ValueError: If either `output_types` or `output_shapes` is not compatible @@ -290,14 +366,15 @@ class _RestructuredDataset(dataset_ops.Dataset): super(_RestructuredDataset, self).__init__() self._dataset = dataset - # Validate that the types are compatible. - output_types = nest.map_structure(dtypes.as_dtype, output_types) - flat_original_types = nest.flatten(dataset.output_types) - flat_new_types = nest.flatten(output_types) - if flat_original_types != flat_new_types: - raise ValueError( - "Dataset with output types %r cannot be restructured to have output " - "types %r" % (dataset.output_types, output_types)) + if not allow_unsafe_cast: + # Validate that the types are compatible. + output_types = nest.map_structure(dtypes.as_dtype, output_types) + flat_original_types = nest.flatten(dataset.output_types) + flat_new_types = nest.flatten(output_types) + if flat_original_types != flat_new_types: + raise ValueError( + "Dataset with output types %r cannot be restructured to have " + "output types %r" % (dataset.output_types, output_types)) self._output_types = output_types @@ -307,18 +384,19 @@ class _RestructuredDataset(dataset_ops.Dataset): nest.flatten( dataset.output_shapes)) else: - # Validate that the shapes are compatible. - nest.assert_same_structure(output_types, output_shapes) - flat_original_shapes = nest.flatten(dataset.output_shapes) - flat_new_shapes = nest.flatten_up_to(output_types, output_shapes) - - for original_shape, new_shape in zip(flat_original_shapes, - flat_new_shapes): - if not original_shape.is_compatible_with(new_shape): - raise ValueError( - "Dataset with output shapes %r cannot be restructured to have " - "incompatible output shapes %r" % (dataset.output_shapes, - output_shapes)) + if not allow_unsafe_cast: + # Validate that the shapes are compatible. + nest.assert_same_structure(output_types, output_shapes) + flat_original_shapes = nest.flatten(dataset.output_shapes) + flat_new_shapes = nest.flatten_up_to(output_types, output_shapes) + + for original_shape, new_shape in zip(flat_original_shapes, + flat_new_shapes): + if not original_shape.is_compatible_with(new_shape): + raise ValueError( + "Dataset with output shapes %r cannot be restructured to have " + "incompatible output shapes %r" % (dataset.output_shapes, + output_shapes)) self._output_shapes = nest.map_structure_up_to( output_types, tensor_shape.as_shape, output_shapes) if output_classes is None: @@ -345,6 +423,46 @@ class _RestructuredDataset(dataset_ops.Dataset): return self._output_shapes +def assert_element_shape(expected_shapes): + """Assert the shape of this `Dataset`. + + ```python + shapes = [tf.TensorShape([16, 256]), tf.TensorShape(None)] + result = dataset.apply(tf.contrib.data.assert_element_shape(shapes)) + print(result.output_shapes) # ==> "((16, 256), )" + ``` + + If dataset shapes and expected_shape, are fully defined, assert they match. + Otherwise, add assert op that will validate the shapes when tensors are + evaluated, and set shapes on tensors, respectively. + + Args: + expected_shapes: A nested structure of `tf.TensorShape` objects. + + Returns: + A `Dataset` transformation function, which can be passed to + @{tf.data.Dataset.apply} + """ + + def _check_shape(*elements): + flatten_tensors = nest.flatten(elements) + flatten_shapes = nest.flatten(expected_shapes) + checked_tensors = [ + with_shape(shape, tensor) + for shape, tensor in zip(flatten_shapes, flatten_tensors) + ] + return nest.pack_sequence_as(elements, checked_tensors) + + def _apply_fn(dataset): + return _RestructuredDataset( + dataset.map(_check_shape), + dataset.output_types, + output_shapes=expected_shapes, + output_classes=dataset.output_classes) + + return _apply_fn + + class _MapAndBatchDataset(dataset_ops.MapDataset): """A `Dataset` that maps a function over a batch of elements.""" diff --git a/tensorflow/contrib/data/python/ops/grouping.py b/tensorflow/contrib/data/python/ops/grouping.py index 36591c055ae8f2c54981525ffcc3df128a990a61..0531f9cbb9da6e6df85fa46940ab1661ad742eb4 100644 --- a/tensorflow/contrib/data/python/ops/grouping.py +++ b/tensorflow/contrib/data/python/ops/grouping.py @@ -108,7 +108,7 @@ def bucket_by_sequence_length(element_length_func, fraction of padding in a batch which increases training step efficiency. Args: - element_length_func: function from element in `Dataset` to `tf.int64`, + element_length_func: function from element in `Dataset` to `tf.int32`, determines the length of the element, which will determine the bucket it goes into. bucket_boundaries: `list`, upper length boundaries of the buckets. diff --git a/tensorflow/contrib/data/python/ops/interleave_ops.py b/tensorflow/contrib/data/python/ops/interleave_ops.py index 91f19da02d4a479820782822475d9121125fc38e..812a50ecbf105393f7e422edbbdf5c87311d72c1 100644 --- a/tensorflow/contrib/data/python/ops/interleave_ops.py +++ b/tensorflow/contrib/data/python/ops/interleave_ops.py @@ -17,7 +17,18 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from tensorflow.contrib import stateless +from tensorflow.contrib.data.python.ops import contrib_op_loader # pylint: disable=unused-import +from tensorflow.contrib.data.python.ops import gen_dataset_ops +from tensorflow.contrib.data.python.ops import random_ops +from tensorflow.python.data.ops import dataset_ops from tensorflow.python.data.ops import readers +from tensorflow.python.data.util import nest +from tensorflow.python.data.util import sparse +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import math_ops from tensorflow.python.util import deprecation @@ -140,3 +151,92 @@ def sloppy_interleave(map_func, cycle_length, block_length=1): prefetch_input_elements=None) return _apply_fn + + +class DirectedInterleaveDataset(dataset_ops.Dataset): + """A substitute for `Dataset.interleave()` on a fixed list of datasets.""" + + def __init__(self, selector_input, data_inputs): + self._selector_input = selector_input + self._data_inputs = list(data_inputs) + + for data_input in data_inputs[1:]: + if (data_input.output_types != data_inputs[0].output_types or + data_input.output_classes != data_inputs[0].output_classes): + raise TypeError("All datasets must have the same type.") + + def _as_variant_tensor(self): + # pylint: disable=protected-access + return gen_dataset_ops.directed_interleave_dataset( + self._selector_input._as_variant_tensor(), + [data_input._as_variant_tensor() for data_input in self._data_inputs], + output_shapes=nest.flatten( + sparse.as_dense_shapes(self.output_shapes, self.output_classes)), + output_types=nest.flatten( + sparse.as_dense_types(self.output_types, self.output_classes))) + # pylint: enable=protected-access + + @property + def output_classes(self): + return self._data_inputs[0].output_classes + + @property + def output_shapes(self): + ret = self._data_inputs[0].output_shapes + for data_input in self._data_inputs[1:]: + ret = nest.pack_sequence_as(ret, [ + ts1.most_specific_compatible_shape(ts2) for (ts1, ts2) in zip( + nest.flatten(ret), nest.flatten(data_input.output_shapes)) + ]) + return ret + + @property + def output_types(self): + return self._data_inputs[0].output_types + + +def sample_from_datasets(datasets, weights=None, seed=None): + """Samples elements at random from the datasets in `datasets`. + + Args: + datasets: A list of @{tf.data.Dataset} objects with compatible structure. + weights: (Optional.) A list of `len(datasets)` floating-point values where + `weights[i]` represents the probability with which an element should be + sampled from `datasets[i]`, or a @{tf.data.Dataset} object where each + element is such a list. Defaults to a uniform distribution across + `datasets`. + seed: (Optional.) A `tf.int64` scalar `tf.Tensor`, representing the + random seed that will be used to create the distribution. See + @{tf.set_random_seed} for behavior. + + Returns: + A dataset that interleaves elements from `datasets` at random, according to + `weights` if provided, otherwise with uniform probability. + + Raises: + TypeError: If the `datasets` or `weights` arguments have the wrong type. + ValueError: If the `weights` argument is specified and does not match the + length of the `datasets` element. + """ + num_datasets = len(datasets) + if weights is None: + weights = dataset_ops.Dataset.from_tensors([1.0] * num_datasets).repeat() + elif not isinstance(weights, dataset_ops.Dataset): + weights = ops.convert_to_tensor(weights, name="weights") + if weights.dtype not in (dtypes.float32, dtypes.float64): + raise TypeError("`weights` must be convertible to a tensor of " + "`tf.float32` or `tf.float64` elements.") + if not weights.shape.is_compatible_with([num_datasets]): + raise ValueError("`weights` must be a vector of length `len(datasets)`.") + weights = dataset_ops.Dataset.from_tensors(weights).repeat() + + # The `stateless_multinomial()` op expects log-probabilities, as opposed to + # weights. + logits_ds = weights.map(lambda *p: math_ops.log(p, name="logits")) + def select_dataset(logits, seed): + return array_ops.squeeze( + stateless.stateless_multinomial(logits, 1, seed=seed), axis=[0, 1]) + selector_input = dataset_ops.Dataset.zip( + (logits_ds, random_ops.RandomDataset(seed).batch(2))).map(select_dataset) + + return DirectedInterleaveDataset(selector_input, datasets) diff --git a/tensorflow/contrib/data/python/ops/prefetching_ops.py b/tensorflow/contrib/data/python/ops/prefetching_ops.py index 98651bb568b7da710da97c504a4c0db5e43527af..e4c9f8b58a2a4390004b0ad318163526b443d44f 100644 --- a/tensorflow/contrib/data/python/ops/prefetching_ops.py +++ b/tensorflow/contrib/data/python/ops/prefetching_ops.py @@ -25,9 +25,11 @@ from tensorflow.python.data.ops import dataset_ops from tensorflow.python.data.ops import iterator_ops from tensorflow.python.data.util import nest from tensorflow.python.data.util import sparse +from tensorflow.python.eager import context from tensorflow.python.framework import dtypes from tensorflow.python.framework import function from tensorflow.python.framework import ops +from tensorflow.python.ops import gen_dataset_ops as core_gen_dataset_ops # TODO(rohanj): Add a python class that constructs resource in the __init__ @@ -67,28 +69,67 @@ def function_buffering_resource_reset(function_buffer_resource, name=None): # pylint: disable=protected-access class _PrefetchToDeviceIterator(object): - """A replacement for @{tf.data.Iterator} that prefetches to another device.""" + """A replacement for @{tf.data.Iterator} that prefetches to another device. - def __init__(self, input_dataset, device, buffer_size): + Args: + input_dataset: The input dataset + one_shot: If true, we make a one shot iterator that's already initialized. + device: A fully specified device string where we want to prefetch to + buffer_size: Size of the prefetching buffer. + shared_name: (Optional.) If non-empty, the returned iterator will be + shared under the given name across multiple sessions that share the + same devices (e.g. when using a remote server). + + Returns: + An Iterator type object. + """ + + def __init__(self, + input_dataset, + one_shot, + device, + buffer_size, + shared_name=None): self._input_dataset = input_dataset self._get_next_call_count = 0 - input_iterator = input_dataset.make_one_shot_iterator() - input_iterator_handle = input_iterator.string_handle() + self._one_shot = one_shot + if shared_name is None: + shared_name = "" + + if self._one_shot: + self._input_iterator = input_dataset.make_one_shot_iterator() + else: + self._input_iterator = iterator_ops.Iterator.from_structure( + self._input_dataset.output_types, self._input_dataset.output_shapes, + shared_name, self._input_dataset.output_classes) + input_iterator_handle = self._input_iterator.string_handle() @function.Defun(dtypes.string) def _prefetch_fn(handle): + """Prefetches one element from `input_iterator`.""" remote_iterator = iterator_ops.Iterator.from_string_handle( - handle, input_iterator.output_types, input_iterator.output_shapes, - input_iterator.output_classes) - return remote_iterator.get_next() + handle, self._input_iterator.output_types, + self._input_iterator.output_shapes, + self._input_iterator.output_classes) + ret = remote_iterator.get_next() + return nest.flatten(sparse.serialize_sparse_tensors(ret)) + + iterator_device = gen_dataset_ops.iterator_get_device( + self._input_iterator._iterator_resource) with ops.device(device): self._buffering_resource = function_buffering_resource( f=_prefetch_fn, - target_device=gen_dataset_ops.iterator_get_device( - input_iterator._iterator_resource), + target_device=iterator_device, string_arg=input_iterator_handle, - buffer_size=buffer_size) + buffer_size=buffer_size, + shared_name=shared_name) + + if not self._one_shot: + reset_op = function_buffering_resource_reset(self._buffering_resource) + with ops.control_dependencies([reset_op]): + self._initializer = self._input_iterator.make_initializer( + self._input_dataset) def get_next(self, name=None): """See @{tf.data.Iterator.get_next}.""" @@ -112,6 +153,12 @@ class _PrefetchToDeviceIterator(object): return ret + @property + def initializer(self): + if self._one_shot: + raise NotImplementedError("Can't initialize a one_shot_iterator") + return self._initializer + @property def output_classes(self): return self._input_dataset.output_classes @@ -123,6 +170,68 @@ class _PrefetchToDeviceIterator(object): @property def output_types(self): return self._input_dataset.output_types + + +class _PrefetchToDeviceEagerIterator(iterator_ops.EagerIterator): + """A replacement for @{tf.data.Iterator} that prefetches to another device. + + Args: + input_dataset: The input dataset + one_shot: If true, we make a one shot iterator that's already initialized. + device: A fully specified device string where we want to prefetch to + buffer_size: Size of the prefetching buffer. + shared_name: (Optional.) If non-empty, the returned iterator will be + shared under the given name across multiple sessions that share the + same devices (e.g. when using a remote server). + + Returns: + An Iterator type object. + """ + + def __init__(self, + input_dataset, + device, + buffer_size): + with ops.device("/device:CPU:0"): + super(_PrefetchToDeviceEagerIterator, self).__init__(input_dataset) + input_iterator_handle = core_gen_dataset_ops.iterator_to_string_handle( + self._resource) + + self._device = device + + @function.Defun(dtypes.string) + def _prefetch_fn(handle): + """Prefetches one element from `input_iterator`.""" + remote_iterator = iterator_ops.Iterator.from_string_handle( + handle, self.output_types, self.output_shapes, self.output_classes) + ret = remote_iterator.get_next() + return nest.flatten(sparse.serialize_sparse_tensors(ret)) + + _prefetch_fn.add_to_graph(None) + + with ops.device(device): + self._buffering_resource = function_buffering_resource( + f=_prefetch_fn, + target_device=gen_dataset_ops.iterator_get_device(self._resource), + string_arg=input_iterator_handle, + buffer_size=buffer_size, + shared_name=iterator_ops._generate_shared_name( + "function_buffer_resource")) + + def _next_internal(self): + """Returns a nested structure of `tf.Tensor`s containing the next element. + """ + # This runs in sync mode as iterators use an error status to communicate + # that there is no more data to iterate over. + # TODO(b/77291417): Fix + with context.execution_mode(context.SYNC): + with ops.device(self._device): + ret = gen_dataset_ops.function_buffering_resource_get_next( + function_buffer_resource=self._buffering_resource, + output_types=self._flat_output_types) + return sparse.deserialize_sparse_tensors( + nest.pack_sequence_as(self._output_types, ret), self._output_types, + self._output_shapes, self._output_classes) # pylint: enable=protected-access @@ -134,14 +243,45 @@ class _PrefetchToDeviceDataset(dataset_ops.Dataset): self._device = device self._buffer_size = buffer_size if buffer_size is not None else 1 + # The static analysis cannot tell that the eager iterator's superclass has + # a `next()` method. + # pylint: disable=non-iterator-returned + def __iter__(self): + """Creates an `Iterator` for enumerating the elements of this dataset. + + The returned iterator implements the Python iterator protocol and therefore + can only be used in eager mode. + + Returns: + An `Iterator` over the elements of this dataset. + + Raises: + RuntimeError: If eager execution is enabled. + """ + if context.executing_eagerly(): + return _PrefetchToDeviceEagerIterator(self._input_dataset, self._device, + self._buffer_size) + else: + raise RuntimeError("dataset.__iter__() is only supported when eager " + "execution is enabled.") + # pylint: enable=non-iterator-returned + def make_one_shot_iterator(self): - return _PrefetchToDeviceIterator(self._input_dataset, self._device, - self._buffer_size) + if context.executing_eagerly(): + return _PrefetchToDeviceEagerIterator(self._input_dataset, self._device, + self._buffer_size) + else: + return _PrefetchToDeviceIterator(self._input_dataset, one_shot=True, + device=self._device, + buffer_size=self._buffer_size) def make_initializable_iterator(self, shared_name=None): - raise NotImplementedError("`prefetch_to_device()` is not currently " - "compatible with initializable iterators. Use " - "`make_one_shot_iterator()` instead.") + return _PrefetchToDeviceIterator( + self._input_dataset, + one_shot=False, + device=self._device, + buffer_size=self._buffer_size, + shared_name=shared_name) def _as_variant_tensor(self): # TODO(mrry): Raise this error earlier (e.g. when one of the Dataset diff --git a/tensorflow/contrib/data/python/ops/readers.py b/tensorflow/contrib/data/python/ops/readers.py index 9a48aa02fba4813fc670364bda7f91c0ce091a45..bbb808fbd7730002e48cab47fa8d0fe09e2124d2 100644 --- a/tensorflow/contrib/data/python/ops/readers.py +++ b/tensorflow/contrib/data/python/ops/readers.py @@ -124,18 +124,21 @@ def _next_csv_row(filenames, num_cols, field_delim, use_quote_delim, header, def _infer_column_defaults(filenames, num_cols, field_delim, use_quote_delim, na_value, header, comment, float_dtype, - rows_for_inference): + num_rows_for_inference, select_columns): """Infers column types from the first N valid CSV records of files.""" - inferred_types = [None] * num_cols + if select_columns is None: + select_columns = range(num_cols) + inferred_types = [None] * len(select_columns) - for rows_read, csv_row in enumerate( + for i, csv_row in enumerate( _next_csv_row(filenames, num_cols, field_delim, use_quote_delim, header, comment)): - if rows_for_inference is not None and rows_read >= rows_for_inference: + if num_rows_for_inference is not None and i >= num_rows_for_inference: break - for i, str_val in enumerate(csv_row): - inferred_types[i] = _infer_type(str_val, na_value, inferred_types[i], - float_dtype) + + for j, col_index in enumerate(select_columns): + inferred_types[j] = _infer_type(csv_row[col_index], na_value, + inferred_types[j], float_dtype) # Replace None's with a default type inferred_types = [t or dtypes.string for t in inferred_types] @@ -153,21 +156,55 @@ def _infer_column_names(filenames, field_delim, use_quote_delim): "quoting": csv.QUOTE_MINIMAL if use_quote_delim else csv.QUOTE_NONE } with file_io.FileIO(filenames[0], "r") as f: - column_names = next(csv.reader(f, **csv_kwargs)) + try: + column_names = next(csv.reader(f, **csv_kwargs)) + except StopIteration: + raise ValueError(("Received StopIteration when reading the header line " + "of %s. Empty file?") % filenames[0]) for name in filenames[1:]: with file_io.FileIO(name, "r") as f: - if next(csv.reader(f, **csv_kwargs)) != column_names: - raise ValueError("Files have different column names in the header row.") + try: + if next(csv.reader(f, **csv_kwargs)) != column_names: + raise ValueError( + "Files have different column names in the header row.") + except StopIteration: + raise ValueError(("Received StopIteration when reading the header line " + "of %s. Empty file?") % filenames[0]) return column_names +def _get_sorted_col_indices(select_columns, column_names): + """Transforms select_columns argument into sorted column indices.""" + names_to_indices = {n: i for i, n in enumerate(column_names)} + num_cols = len(column_names) + for i, v in enumerate(select_columns): + if isinstance(v, int): + if v < 0 or v >= num_cols: + raise ValueError( + "Column index %d specified in select_columns out of valid range." % + v) + continue + if v not in names_to_indices: + raise ValueError( + "Value '%s' specified in select_columns not a valid column index or " + "name." % v) + select_columns[i] = names_to_indices[v] + + # Sort and ensure there are no duplicates + result = sorted(set(select_columns)) + if len(result) != len(select_columns): + raise ValueError("select_columns contains duplicate columns") + return result + + def make_csv_dataset( file_pattern, batch_size, column_names=None, column_defaults=None, label_name=None, + select_columns=None, field_delim=",", use_quote_delim=True, na_value="", @@ -201,20 +238,32 @@ def make_csv_dataset( provided, infers the column names from the first row of the records. These names will be the keys of the features dict of each dataset element. column_defaults: A optional list of default values for the CSV fields. One - item per column of the input record. Each item in the list is either a - valid CSV dtype (float32, float64, int32, int64, or string), or a + item per selected column of the input record. Each item in the list is + either a valid CSV dtype (float32, float64, int32, int64, or string), or a `Tensor` with one of the aforementioned types. The tensor can either be a scalar default value (if the column is optional), or an empty tensor (if the column is required). If a dtype is provided instead of a tensor, the column is also treated as required. If this list is not provided, tries to infer types based on reading the first num_rows_for_inference rows of files specified, and assumes all columns are optional, defaulting to `0` - for numeric values and `""` for string values. + for numeric values and `""` for string values. If both this and + `select_columns` are specified, these must have the same lengths, and + `column_defaults` is assumed to be sorted in order of increasing column + index. label_name: A optional string corresponding to the label column. If provided, the data for this column is returned as a separate `Tensor` from the features dictionary, so that the dataset complies with the format expected by a `tf.Estimator.train` or `tf.Estimator.evaluate` input function. + select_columns: An optional list of integer indices or string column + names, that specifies a subset of columns of CSV data to select. If + column names are provided, these must correspond to names provided in + `column_names` or inferred from the file header lines. When this argument + is specified, only a subset of CSV columns will be parsed and returned, + corresponding to the columns specified. Using this results in faster + parsing and lower memory usage. If both this and `column_defaults` are + specified, these must have the same lengths, and `column_defaults` is + assumed to be sorted in order of increasing column index. field_delim: An optional `string`. Defaults to `","`. Char delimiter to separate fields in a record. use_quote_delim: An optional bool. Defaults to `True`. If false, treats @@ -279,6 +328,9 @@ def make_csv_dataset( if len(column_names) != len(set(column_names)): raise ValueError("Cannot have duplicate column names.") + if select_columns is not None: + select_columns = _get_sorted_col_indices(select_columns, column_names) + if column_defaults is not None: column_defaults = [ constant_op.constant([], dtype=x) if x in _ACCEPTABLE_CSV_TYPES else x @@ -289,7 +341,17 @@ def make_csv_dataset( # construction time column_defaults = _infer_column_defaults( filenames, len(column_names), field_delim, use_quote_delim, na_value, - header, comment, default_float_type, num_rows_for_inference) + header, comment, default_float_type, num_rows_for_inference, + select_columns) + + if select_columns is not None and len(column_defaults) != len(select_columns): + raise ValueError( + "If specified, column_defaults and select_columns must have same " + "length." + ) + if select_columns is not None and len(column_names) > len(select_columns): + # Pick the relevant subset of column names + column_names = [column_names[i] for i in select_columns] if label_name is not None and label_name not in column_names: raise ValueError("`label_name` provided must be one of the columns.") @@ -322,6 +384,7 @@ def make_csv_dataset( field_delim=field_delim, use_quote_delim=use_quote_delim, na_value=na_value, + select_cols=select_columns, ) features = dict(zip(column_names, columns)) if label_name is not None: @@ -370,7 +433,8 @@ def make_batched_features_dataset(file_pattern, prefetch_buffer_size=1, reader_num_threads=1, parser_num_threads=2, - sloppy_ordering=False): + sloppy_ordering=False, + drop_final_batch=False): """Returns a `Dataset` of feature dictionaries from `Example` protos. Example: @@ -443,6 +507,9 @@ def make_batched_features_dataset(file_pattern, produced is deterministic prior to shuffling (elements are still randomized if `shuffle=True`. Note that if the seed is set, then order of elements after shuffling is deterministic). Defaults to `False`. + drop_final_batch: If `True`, and the batch size does not evenly divide the + input dataset size, the final smaller batch will be dropped. Defaults to + `False`. Returns: A dataset of `dict` elements. Each `dict` maps feature keys to @@ -481,7 +548,10 @@ def make_batched_features_dataset(file_pattern, elif shuffle: dataset = dataset.shuffle(shuffle_buffer_size, shuffle_seed) - dataset = dataset.batch(batch_size) + if drop_final_batch: + dataset = dataset.apply(batching.batch_and_drop_remainder(batch_size)) + else: + dataset = dataset.batch(batch_size) # Parse `Example` tensors to a dictionary of `Feature` tensors. dataset = dataset.map( diff --git a/tensorflow/contrib/data/python/ops/resampling.py b/tensorflow/contrib/data/python/ops/resampling.py index b465397437adbdfaf865efb8ed2f80e57f48fcab..a182dddd38d23d096979eebb8de29f07573833dd 100644 --- a/tensorflow/contrib/data/python/ops/resampling.py +++ b/tensorflow/contrib/data/python/ops/resampling.py @@ -110,7 +110,6 @@ def rejection_resample(class_func, target_dist, initial_dist=None, seed=None): .filter(lambda _1, p, _2: random_ops.random_uniform([], seed=seed) < p)) return filtered_ds.map(lambda class_value, _, data: (class_value, data)) - return _apply_fn diff --git a/tensorflow/contrib/data/python/ops/scan_ops.py b/tensorflow/contrib/data/python/ops/scan_ops.py index 1c88366273f5d186509454188e02350d4ea9f66b..60ef7efba4bb2bc281bc624ec3f58117ffa9a824 100644 --- a/tensorflow/contrib/data/python/ops/scan_ops.py +++ b/tensorflow/contrib/data/python/ops/scan_ops.py @@ -57,7 +57,7 @@ class _ScanDataset(dataset_ops.Dataset): self._output_shapes = None self._output_types = None - # Iteratively rerun the scan function until reaching a fixed pont on + # Iteratively rerun the scan function until reaching a fixed point on # `self._state_shapes`. need_to_rerun = True while need_to_rerun: @@ -144,6 +144,7 @@ class _ScanDataset(dataset_ops.Dataset): weakened_state_shapes) self._scan_func = tf_scan_func + self._scan_func.add_to_graph(ops.get_default_graph()) def _as_variant_tensor(self): input_t = self._input_dataset._as_variant_tensor() # pylint: disable=protected-access diff --git a/tensorflow/contrib/data/python/ops/stats_ops.py b/tensorflow/contrib/data/python/ops/stats_ops.py index b5cf0fcfe91ebc22444302fca5d488a278ef2994..d39172039683fe8f333572bb1dbf12abfab65113 100644 --- a/tensorflow/contrib/data/python/ops/stats_ops.py +++ b/tensorflow/contrib/data/python/ops/stats_ops.py @@ -18,7 +18,6 @@ from __future__ import division from __future__ import print_function from tensorflow.python.data.ops import dataset_ops -from tensorflow.python.data.ops import iterator_ops from tensorflow.python.data.util import nest from tensorflow.python.data.util import sparse from tensorflow.python.framework import dtypes @@ -85,25 +84,53 @@ class StatsAggregator(object): """ return gen_dataset_ops.stats_aggregator_summary(self._resource) - def subscribe(self, iterator): - """Returns a @{tf.Operation} to associate this aggregator with `iterator`. - Note: Each @{tf.data.Iterator} can be associated with at most one - `StatsAggregator`. After running the operation that this function - returns, all statistics recorded in the iteration of `iterator` - will be stored in `stats_aggregator`. +class _SetStatsAggregatorDataset(dataset_ops.Dataset): + """A `Dataset` that acts as an identity, and sets given stats_aggregator.""" - Args: - iterator: A @{tf.data.Iterator} object. + def __init__(self, input_dataset, stats_aggregator): + super(_SetStatsAggregatorDataset, self).__init__() + self._input_dataset = input_dataset + self._stats_aggregator = stats_aggregator - Returns: - A @{tf.Operation} that, when run, associates this aggregator with - `iterator`. - """ - if not isinstance(iterator, iterator_ops.Iterator): - raise TypeError("`iterator` must be a `tf.data.Iterator` object.") - return gen_dataset_ops.iterator_set_stats_aggregator( - iterator._iterator_resource, self._resource) # pylint: disable=protected-access + def _as_variant_tensor(self): + return gen_dataset_ops.set_stats_aggregator_dataset( + self._input_dataset._as_variant_tensor(), # pylint: disable=protected-access + self._stats_aggregator._resource, # pylint: disable=protected-access + output_types=nest.flatten( + sparse.as_dense_types(self.output_types, self.output_classes)), + output_shapes=nest.flatten( + sparse.as_dense_shapes(self.output_shapes, self.output_classes))) + + @property + def output_shapes(self): + return self._input_dataset.output_shapes + + @property + def output_types(self): + return self._input_dataset.output_types + + @property + def output_classes(self): + return self._input_dataset.output_classes + + +# TODO(shivaniagrawal): Expose these methods in `tf.contrib.data`. +def set_stats_aggregator(stats_aggregator): + """Set the given stats_aggregator for aggregating the input dataset stats. + + Args: + stats_aggregator: A `StatsAggregator` object. + + Returns: + A `Dataset` transformation function, which can be passed to + @{tf.data.Dataset.apply}. + """ + + def _apply_fn(dataset): + return _SetStatsAggregatorDataset(dataset, stats_aggregator) + + return _apply_fn def bytes_produced_stats(tag): diff --git a/tensorflow/contrib/data/python/ops/writers.py b/tensorflow/contrib/data/python/ops/writers.py new file mode 100644 index 0000000000000000000000000000000000000000..f53bd3f7383950d6cfdb35e12811fb1daf24b320 --- /dev/null +++ b/tensorflow/contrib/data/python/ops/writers.py @@ -0,0 +1,58 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Python wrappers for tf.data writers.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.data.util import convert +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_shape +from tensorflow.python.ops import gen_dataset_ops + + +class TFRecordWriter(object): + """Writes data to a TFRecord file.""" + + def __init__(self, filename, compression_type=None): + self._filename = ops.convert_to_tensor( + filename, dtypes.string, name="filename") + self._compression_type = convert.optional_param_to_tensor( + "compression_type", + compression_type, + argument_default="", + argument_dtype=dtypes.string) + + def write(self, dataset): + """Returns a @{tf.Operation} to write a dataset to a file. + + Args: + dataset: a @{tf.data.Dataset} whose elements are to be written to a file + + Returns: + A @{tf.Operation} that, when run, writes contents of `dataset` to a file. + """ + if not isinstance(dataset, dataset_ops.Dataset): + raise TypeError("`dataset` must be a `tf.data.Dataset` object.") + if (dataset.output_types != dtypes.string or + dataset.output_shapes != tensor_shape.scalar()): + raise TypeError( + "`dataset` must produce scalar `DT_STRING` tensors whereas it " + "produces shape {0} and types {1}".format(dataset.output_shapes, + dataset.output_types)) + return gen_dataset_ops.dataset_to_tf_record( + dataset._as_variant_tensor(), self._filename, self._compression_type) # pylint: disable=protected-access diff --git a/tensorflow/contrib/distribute/README.md b/tensorflow/contrib/distribute/README.md index 4af51bec1a0687fb33c8d43f4e41b180288536a0..44a4481021c380e72b535cf0aca39df2bf04d3b7 100644 --- a/tensorflow/contrib/distribute/README.md +++ b/tensorflow/contrib/distribute/README.md @@ -77,7 +77,7 @@ parameter of `Estimator`. ```python distribution = tf.contrib.distribute.MirroredStrategy() -config = tf.estimator.RunConfig(distribute=distribution) +config = tf.estimator.RunConfig(train_distribute=distribution) classifier = tf.estimator.Estimator(model_fn=model_fn, config=config) classifier.train(input_fn=input_fn) ``` @@ -116,8 +116,9 @@ in the input function gives a solid boost in performance. When using ## Caveats This feature is in early stages and there are a lot of improvements forthcoming: -* Metrics are not yet supported during distributed training. -* Summaries are currently computed in every tower. +* Metrics are not yet supported during distributed training. They are still +supported during the evaluation. +* Summaries are only computed in the first tower in `MirroredStrategy`. * Evaluation is not yet distributed. * Eager support is in the works; performance can be more challenging with eager execution. @@ -129,10 +130,6 @@ effective batch size will be `num_gpus * batch_size`. Therefore, consider adjusting your learning rate or batch size according to the number of GPUs. We are working on addressing this limitation by splitting each batch across GPUs instead. -* Dictionaries inside dataset in the input are not supported when prefetching -on GPUs is turned on. (If you need to use dictionaries in the dataset, turn off -prefetching on GPUs by passing param `prefetch_on_device=False` to -`MirroredStrategy`) * PartitionedVariables are not supported yet. ## What's next? diff --git a/tensorflow/contrib/distribute/python/BUILD b/tensorflow/contrib/distribute/python/BUILD index 78b2b0054aa95701ad192b4fb9a0727ce287de4b..c2834d822664b9d60690c5d5dd527bbbd01a106f 100644 --- a/tensorflow/contrib/distribute/python/BUILD +++ b/tensorflow/contrib/distribute/python/BUILD @@ -22,11 +22,13 @@ py_library( visibility = ["//tensorflow:internal"], deps = [ ":prefetching_ops_v2", - "//tensorflow/contrib/data/python/ops:transformation_ops", + "//tensorflow/contrib/data/python/ops:batching", "//tensorflow/contrib/eager/python:datasets", "//tensorflow/python:array_ops", "//tensorflow/python:checkpointable", "//tensorflow/python:control_flow_ops", + "//tensorflow/python:device_util", + "//tensorflow/python:distribute", "//tensorflow/python:framework_ops", "//tensorflow/python:training", "//tensorflow/python:util", @@ -51,6 +53,7 @@ cuda_py_test( "//tensorflow/python:training", "//tensorflow/python:variable_scope", "//tensorflow/python/eager:context", + "//tensorflow/python:device_util", "//tensorflow/python/eager:test", "//tensorflow/python/estimator:model_fn", ], @@ -66,6 +69,8 @@ py_library( ":values", "//tensorflow/python:array_ops", "//tensorflow/python:device", + "//tensorflow/python:device_util", + "//tensorflow/python:distribute", "//tensorflow/python:framework_ops", "//tensorflow/python:pywrap_tensorflow", "//tensorflow/python:training", @@ -84,9 +89,9 @@ py_library( ":values", "//tensorflow/contrib/eager/python:datasets", "//tensorflow/python:array_ops", + "//tensorflow/python:distribute", "//tensorflow/python:framework_ops", "//tensorflow/python:math_ops", - "//tensorflow/python:training", "//tensorflow/python/eager:context", "@six_archive//:six", ], @@ -104,6 +109,7 @@ py_library( "//tensorflow/core:protos_all_py", "//tensorflow/python:array_ops", "//tensorflow/python:constant_op", + "//tensorflow/python:distribute", "//tensorflow/python:framework_ops", "//tensorflow/python:layers", "//tensorflow/python:training", @@ -125,6 +131,7 @@ py_library( deps = [ ":mirrored_strategy", ":one_device_strategy", + ":tpu_strategy", "//tensorflow/contrib/optimizer_v2:training", "//tensorflow/python:framework_ops", "//tensorflow/python:training", @@ -156,8 +163,8 @@ py_test( deps = [ ":mirrored_strategy", ":strategy_test_lib", + "//tensorflow/python:distribute", "//tensorflow/python:framework_test_lib", - "//tensorflow/python:training", "//tensorflow/python:variable_scope", "//tensorflow/python/eager:context", "//tensorflow/python/eager:test", @@ -186,10 +193,10 @@ cuda_py_test( ":mirrored_strategy", ":values", ":strategy_test_lib", + "//tensorflow/python:distribute", "//tensorflow/core:protos_all_py", "//tensorflow/python:constant_op", "//tensorflow/python:layers", - "//tensorflow/python:training", "//tensorflow/python:variable_scope", "//tensorflow/python:array_ops", "//tensorflow/python:framework_test_lib", @@ -219,21 +226,50 @@ py_library( ], ) -cuda_py_test( - name = "minimize_loss_test", +py_library( + name = "tpu_strategy", + srcs = ["tpu_strategy.py"], + visibility = ["//tensorflow:internal"], + deps = [ + ":one_device_strategy", + ":values", + "//tensorflow/contrib/tpu", + "//tensorflow/contrib/tpu:tpu_py", + "//tensorflow/python:constant_op", + "//tensorflow/python:control_flow_ops", + "//tensorflow/python:framework_ops", + "//tensorflow/python:util", + ], +) + +py_library( + name = "minimize_loss_test_lib", + testonly = 1, srcs = ["minimize_loss_test.py"], - additional_deps = [ + deps = [ ":combinations", + ":mirrored_strategy", ":single_loss_example", - "@absl_py//absl/testing:parameterized", - "//third_party/py/numpy", + "//tensorflow/contrib/tpu:tpu_lib", "//tensorflow/python:control_flow_ops", + "//tensorflow/python:framework_ops", "//tensorflow/python:math_ops", + "//tensorflow/python:variable_scope", "//tensorflow/python:variables", "//tensorflow/python/data/ops:dataset_ops", "//tensorflow/python/eager:context", "//tensorflow/python/eager:test", "//tensorflow/python/ops/losses", + "//third_party/py/numpy", + "@absl_py//absl/testing:parameterized", + ], +) + +cuda_py_test( + name = "minimize_loss_test", + srcs = ["minimize_loss_test.py"], + additional_deps = [ + ":minimize_loss_test_lib", ], tags = [ "multi_and_single_gpu", @@ -291,6 +327,7 @@ py_library( srcs = ["single_loss_example.py"], deps = [ ":step_fn", + "//tensorflow/contrib/data/python/ops:batching", "//tensorflow/python:array_ops", "//tensorflow/python:constant_op", "//tensorflow/python:layers", diff --git a/tensorflow/contrib/distribute/python/combinations.py b/tensorflow/contrib/distribute/python/combinations.py index 02b1e7ef9fcd4767c59898bd343e712e285e67d5..946310aa6fc2101d75e86d3ff2e9f3284e6c6625 100644 --- a/tensorflow/contrib/distribute/python/combinations.py +++ b/tensorflow/contrib/distribute/python/combinations.py @@ -45,6 +45,7 @@ from absl.testing import parameterized from tensorflow.contrib.distribute.python import mirrored_strategy from tensorflow.contrib.distribute.python import one_device_strategy +from tensorflow.contrib.distribute.python import tpu_strategy from tensorflow.contrib.optimizer_v2 import adam as adam_v2 from tensorflow.contrib.optimizer_v2 import gradient_descent as gradient_descent_v2 from tensorflow.python.eager import context @@ -55,6 +56,7 @@ from tensorflow.python.util import tf_inspect GPU_TEST = "test_gpu" in sys.argv[0] +TPU_TEST = "test_tpu" in sys.argv[0] def generate(combinations): @@ -108,6 +110,11 @@ def generate(combinations): if "distribution" in kwargs: distribution = kwargs["distribution"] kwargs["distribution"] = distribution.strategy + if distribution.required_tpu and not TPU_TEST: + self.skipTest("Test requires a TPU, but it's not available.") + if not distribution.required_tpu and TPU_TEST: + self.skipTest("Test that doesn't require a TPU.") + if not distribution.required_gpus: if GPU_TEST: self.skipTest("Test that doesn't require GPUs.") @@ -232,10 +239,12 @@ class NamedObject(object): class NamedDistribution(object): """Translates DistributionStrategy and its data into a good name.""" - def __init__(self, name, distribution, required_gpus): + def __init__(self, name, distribution, required_gpus=None, + required_tpu=False): self._distribution = distribution self._name = name self._required_gpus = required_gpus + self._required_tpu = required_tpu def __repr__(self): return self._name @@ -248,10 +257,16 @@ class NamedDistribution(object): def required_gpus(self): return self._required_gpus + @property + def required_tpu(self): + return self._required_tpu + one_device_strategy = NamedDistribution( "OneDeviceCPU", one_device_strategy.OneDeviceStrategy("/cpu:0"), None) +tpu_strategy = NamedDistribution( + "TPU", tpu_strategy.TPUStrategy(), required_tpu=True) mirrored_strategy_with_gpu_and_cpu = NamedDistribution( "MirroredCPUAndGPU", mirrored_strategy.MirroredStrategy(["/gpu:0", "/cpu:0"]), 1) diff --git a/tensorflow/contrib/distribute/python/cross_tower_ops.py b/tensorflow/contrib/distribute/python/cross_tower_ops.py index bbe5e877d59518056db3fea251cdae0ed854d0e4..cff717db80f0bdd377b3c9c7e8ca3578ff273930 100644 --- a/tensorflow/contrib/distribute/python/cross_tower_ops.py +++ b/tensorflow/contrib/distribute/python/cross_tower_ops.py @@ -488,7 +488,8 @@ class AllReduceCrossTowerOps(CrossTowerOps): "agg_small_grads_max_group = %d", len(per_device_values), self.all_reduce_alg, self.agg_small_grads_max_bytes, self.agg_small_grads_max_group) - tensor_packer = AggregateSmallTensorPacker(100, 10) + tensor_packer = AggregateSmallTensorPacker( + self.agg_small_grads_max_bytes, self.agg_small_grads_max_group) device_grad_packs = tensor_packer.pack(grouped) else: logging.info( diff --git a/tensorflow/contrib/distribute/python/estimator_integration_test.py b/tensorflow/contrib/distribute/python/estimator_integration_test.py index 9be186a7241690816898d225b63542f4e81dd44c..c5a520ab5aeafb932092ebbbaaf07480cf40403b 100644 --- a/tensorflow/contrib/distribute/python/estimator_integration_test.py +++ b/tensorflow/contrib/distribute/python/estimator_integration_test.py @@ -61,7 +61,7 @@ class DNNLinearCombinedClassifierIntegrationTest(test.TestCase, mode=['graph'], distribution=[ combinations.one_device_strategy, - combinations.mirrored_strategy_without_prefetch + combinations.mirrored_strategy_with_gpu_and_cpu ])) def test_complete_flow_with_mode(self, distribution): label_dimension = 2 @@ -95,7 +95,7 @@ class DNNLinearCombinedClassifierIntegrationTest(test.TestCase, # TODO(isaprykin): Work around the colocate_with error. dnn_optimizer=adagrad.AdagradOptimizer(0.001), linear_optimizer=adagrad.AdagradOptimizer(0.001), - config=run_config.RunConfig(distribute=distribution)) + config=run_config.RunConfig(train_distribute=distribution)) num_steps = 10 estimator.train(train_input_fn, steps=num_steps) diff --git a/tensorflow/contrib/distribute/python/examples/simple_estimator_example.py b/tensorflow/contrib/distribute/python/examples/simple_estimator_example.py index 5d6e02b4b9625cfc4b0d9c87719c6ecf6b57671f..00c25c7a2482a559c8b94ff3be86c4961dfb439f 100644 --- a/tensorflow/contrib/distribute/python/examples/simple_estimator_example.py +++ b/tensorflow/contrib/distribute/python/examples/simple_estimator_example.py @@ -59,7 +59,7 @@ def build_model_fn_optimizer(): def main(_): distribution = tf.contrib.distribute.MirroredStrategy( ["/device:GPU:0", "/device:GPU:1"]) - config = tf.estimator.RunConfig(distribute=distribution) + config = tf.estimator.RunConfig(train_distribute=distribution) def input_fn(): features = tf.data.Dataset.from_tensors([[1.]]).repeat(10) diff --git a/tensorflow/contrib/distribute/python/examples/simple_tfkeras_example.py b/tensorflow/contrib/distribute/python/examples/simple_tfkeras_example.py index e714255f699accdfa0761fe24529a6912f04c986..b87224251ca3844fc81c6f32a893d2c71664a955 100644 --- a/tensorflow/contrib/distribute/python/examples/simple_tfkeras_example.py +++ b/tensorflow/contrib/distribute/python/examples/simple_tfkeras_example.py @@ -41,7 +41,7 @@ def main(args): strategy = tf.contrib.distribute.MirroredStrategy( ['/device:GPU:0', '/device:GPU:1']) - config = tf.estimator.RunConfig(distribute=strategy) + config = tf.estimator.RunConfig(train_distribute=strategy) optimizer = tf.train.GradientDescentOptimizer(0.2) model = tf.keras.Sequential() diff --git a/tensorflow/contrib/distribute/python/minimize_loss_test.py b/tensorflow/contrib/distribute/python/minimize_loss_test.py index 0fa90df79bbcd621fe7b7d0da04256b7a59d5bfe..e134fe34e10be402f028db986b8cbf14222db07f 100644 --- a/tensorflow/contrib/distribute/python/minimize_loss_test.py +++ b/tensorflow/contrib/distribute/python/minimize_loss_test.py @@ -25,6 +25,7 @@ from tensorflow.contrib.distribute.python import combinations from tensorflow.contrib.distribute.python import mirrored_strategy from tensorflow.contrib.distribute.python.single_loss_example import batchnorm_example from tensorflow.contrib.distribute.python.single_loss_example import minimize_loss_example +from tensorflow.contrib.tpu.python.tpu import tpu from tensorflow.python.data.ops import dataset_ops from tensorflow.python.eager import context from tensorflow.python.eager import test @@ -42,16 +43,27 @@ class MinimizeLossStepTest(test.TestCase, parameterized.TestCase): combinations.times( combinations.distributions_and_v1_optimizers(), combinations.combine(mode=["graph"], use_callable_loss=[True, False]) - + combinations.combine(mode=["eager"], use_callable_loss=[True]))) - def testTrainNetwork(self, distribution, optimizer_fn, - use_callable_loss=True): + + combinations.combine(mode=["eager"], use_callable_loss=[True]), + combinations.combine(is_tpu=[False])) + + combinations.combine( + distribution=[combinations.tpu_strategy], + optimizer_fn=[combinations.adam_optimizer_v1_fn], + mode=["graph"], + use_callable_loss=[False], + is_tpu=[True])) + def testTrainNetwork(self, distribution, optimizer_fn, use_callable_loss, + is_tpu): with distribution.scope(): - model_fn, dataset, layer = minimize_loss_example( - optimizer_fn, - use_bias=True, - use_callable_loss=use_callable_loss) + model_fn, dataset_fn, layer = minimize_loss_example( + optimizer_fn, use_bias=True, use_callable_loss=use_callable_loss) - iterator = distribution.distribute_dataset(dataset) + # TODO(isaprykin): Eliminate `is_tpu`. Probably add a + # `DistributionStrategy.create_monitor` so that each DistributionStrategy + # could influence its training loop. That method would return an instance + # of Monitor. TPUMonitor would execute tpu.initialize_system() and + # tpu.shutdown_system(). + iterator = distribution.distribute_dataset( + dataset_fn).make_one_shot_iterator() def run_step(): return distribution.group( @@ -60,6 +72,8 @@ class MinimizeLossStepTest(test.TestCase, parameterized.TestCase): if not context.executing_eagerly(): with self.test_session() as sess: + if is_tpu: + sess.run(tpu.initialize_system()) run_step = sess.make_callable(run_step()) self.evaluate(variables_lib.global_variables_initializer()) @@ -70,6 +84,10 @@ class MinimizeLossStepTest(test.TestCase, parameterized.TestCase): weights.append(self.evaluate(distribution.fetch(layer.kernel))) biases.append(self.evaluate(distribution.fetch(layer.bias))) + if is_tpu: + with self.test_session() as sess: + sess.run(tpu.shutdown_system()) + error = abs(numpy.add(numpy.squeeze(weights), numpy.squeeze(biases)) - 1) is_not_increasing = all(y <= x for x, y in zip(error, error[1:])) self.assertTrue(is_not_increasing) @@ -78,8 +96,17 @@ class MinimizeLossStepTest(test.TestCase, parameterized.TestCase): combinations.times( combinations.distributions_and_v1_optimizers() + combinations.distributions_and_v2_optimizers(), - combinations.combine(mode=["graph", "eager"]))) - def testOptimizerInsideModelFn(self, distribution, optimizer_fn): + combinations.combine(mode=["graph", "eager"], is_tpu=[False])) + + combinations.combine( + distribution=[combinations.tpu_strategy], + optimizer_fn=[ + combinations.adam_optimizer_v1_fn, + combinations.gradient_descent_optimizer_v1_fn + ], + mode=["graph"], + is_tpu=[True])) + + def testOptimizerInsideModelFn(self, distribution, optimizer_fn, is_tpu): created_variables = [] trainable_variables = [] @@ -94,13 +121,14 @@ class MinimizeLossStepTest(test.TestCase, parameterized.TestCase): # `distribution.scope`. with variable_scope.variable_creator_scope( appending_creator), distribution.scope(): - model_fn, dataset, layer = minimize_loss_example( + model_fn, dataset_fn, layer = minimize_loss_example( optimizer_fn, use_bias=True, use_callable_loss=True, create_optimizer_inside_model_fn=True) - iterator = distribution.distribute_dataset(dataset) + iterator = distribution.distribute_dataset( + dataset_fn).make_one_shot_iterator() def run_step(): return distribution.group( @@ -109,11 +137,17 @@ class MinimizeLossStepTest(test.TestCase, parameterized.TestCase): if not context.executing_eagerly(): with self.test_session() as sess: + if is_tpu: + sess.run(tpu.initialize_system()) run_step = sess.make_callable(run_step()) self.evaluate(variables_lib.global_variables_initializer()) run_step() + if is_tpu: + with self.test_session() as sess: + sess.run(tpu.shutdown_system()) + def get_expected_variables(optimizer_fn, num_parameter_devices): variables_map = { "GradientDescent": ["dense/kernel", "dense/bias"], @@ -147,7 +181,7 @@ class MinimizeLossStepTest(test.TestCase, parameterized.TestCase): """Verifies that moving mean updates are reduced across towers.""" with distribution.scope(): num_towers = len(distribution.worker_devices) - model_fn, dataset, batchnorm = batchnorm_example( + model_fn, dataset_fn, batchnorm = batchnorm_example( optimizer_fn, batch_per_epoch=num_towers, momentum=momentum, @@ -158,7 +192,8 @@ class MinimizeLossStepTest(test.TestCase, parameterized.TestCase): # on each device. if isinstance(distribution, mirrored_strategy.MirroredStrategy): distribution._prefetch_on_device = False - iterator = distribution.distribute_dataset(dataset) + iterator = distribution.distribute_dataset( + dataset_fn).make_one_shot_iterator() def run_step(): return control_flow_ops.group( @@ -230,10 +265,13 @@ class MinimizeLossStepTest(test.TestCase, parameterized.TestCase): else: return optimizer.minimize(loss_fn()) - features = dataset_ops.Dataset.from_tensors([[2.], [7.]]) - labels = dataset_ops.Dataset.from_tensors([[6.], [21.]]) - dataset = dataset_ops.Dataset.zip((features, labels)).repeat() - iterator = distribution.distribute_dataset(dataset) + def dataset_fn(): + features = dataset_ops.Dataset.from_tensors([[2.], [7.]]) + labels = dataset_ops.Dataset.from_tensors([[6.], [21.]]) + return dataset_ops.Dataset.zip((features, labels)).repeat() + + iterator = distribution.distribute_dataset( + dataset_fn).make_one_shot_iterator() def run_step(): return distribution.group( diff --git a/tensorflow/contrib/distribute/python/mirrored_strategy.py b/tensorflow/contrib/distribute/python/mirrored_strategy.py index eb0edb3a11df7788991ca14f957494d87593a449..6efd578a775da7bf326826289bd5bd50a57be892 100644 --- a/tensorflow/contrib/distribute/python/mirrored_strategy.py +++ b/tensorflow/contrib/distribute/python/mirrored_strategy.py @@ -140,10 +140,10 @@ class MirroredStrategy(distribute_lib.DistributionStrategy): g.add_to_collections(collections, result) return result - def distribute_dataset(self, dataset): - per_device_dataset = values.PerDeviceDataset( - dataset, self._devices, self._prefetch_on_device) - return per_device_dataset.make_one_shot_iterator() + def distribute_dataset(self, dataset_fn): + return values.PerDeviceDataset( + self._call_dataset_fn(dataset_fn), self._devices, + self._prefetch_on_device) def _broadcast(self, tensor, destinations): # TODO(josh11b): In eager mode, use one thread per device, or async mode. diff --git a/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py b/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py index 9e9f06da8e2ed185c2c32f79a5a4f5407165fb1d..6c5c055070c0fc88ed8f3a459e3f346596f077a6 100644 --- a/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py +++ b/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py @@ -247,8 +247,9 @@ class MirroredStrategyVariableCreationTest(test.TestCase): dist = mirrored_strategy.MirroredStrategy( ["/device:GPU:0", "/device:CPU:0"]) - features = dataset_ops.Dataset.from_tensors([[1.]]).repeat(10) - features = dist.distribute_dataset(features).get_next() + features = dist.distribute_dataset( + lambda: dataset_ops.Dataset.from_tensors([[1.]]).repeat(10) + ).make_one_shot_iterator().get_next() with dist.scope(): result = dist.call_for_each_tower( diff --git a/tensorflow/contrib/distribute/python/monitor.py b/tensorflow/contrib/distribute/python/monitor.py index fe80bb4df50379042654a010459e23d2a05faa16..7644acedc99361d7287a91832d76bc68cbc6ac0a 100644 --- a/tensorflow/contrib/distribute/python/monitor.py +++ b/tensorflow/contrib/distribute/python/monitor.py @@ -19,6 +19,7 @@ from __future__ import division from __future__ import print_function from tensorflow.python.eager import context +from tensorflow.python.framework import errors from tensorflow.python.ops import variables @@ -55,7 +56,9 @@ class Monitor(object): def run_steps(self, num_steps=None): step = 0 - done = False - while done is not None and (num_steps is None or step < num_steps): - done = self._run_step() - step += 1 + while num_steps is None or step < num_steps: + try: + self._run_step() + step += 1 + except errors.OutOfRangeError: + break diff --git a/tensorflow/contrib/distribute/python/one_device_strategy.py b/tensorflow/contrib/distribute/python/one_device_strategy.py index 39c49442b9c3245cfd0b67a51be68773a6fd3ff4..646d2a5c3b3b0bfcce6f89be0e588baacc6b9237 100644 --- a/tensorflow/contrib/distribute/python/one_device_strategy.py +++ b/tensorflow/contrib/distribute/python/one_device_strategy.py @@ -21,8 +21,6 @@ from __future__ import print_function import six from tensorflow.contrib.distribute.python import values -from tensorflow.contrib.eager.python import datasets -from tensorflow.python.eager import context from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops @@ -62,11 +60,8 @@ class OneDeviceStrategy(distribute_lib.DistributionStrategy): with ops.colocate_with(colocate_with): return next_creator(*args, **kwargs) - def distribute_dataset(self, dataset): - if context.executing_eagerly(): - return datasets.Iterator(dataset) - else: - return dataset.make_one_shot_iterator() + def distribute_dataset(self, dataset_fn): + return self._call_dataset_fn(dataset_fn) def _broadcast(self, tensor, destinations): return tensor diff --git a/tensorflow/contrib/distribute/python/optimizer_v2_test.py b/tensorflow/contrib/distribute/python/optimizer_v2_test.py index a0912b625f44342d22acc0ce9bb52a6b632c75a0..abd3a65ac4e19ece6b69b9834f4218fde55b60c2 100644 --- a/tensorflow/contrib/distribute/python/optimizer_v2_test.py +++ b/tensorflow/contrib/distribute/python/optimizer_v2_test.py @@ -39,10 +39,11 @@ class MinimizeLossOptimizerV2Test(test.TestCase, parameterized.TestCase): def testTrainNetwork(self, distribution, optimizer_fn, use_callable_loss=True): with distribution.scope(): - model_fn, dataset, layer = minimize_loss_example( + model_fn, dataset_fn, layer = minimize_loss_example( optimizer_fn, use_bias=True, use_callable_loss=use_callable_loss) - iterator = distribution.distribute_dataset(dataset) + iterator = distribution.distribute_dataset( + dataset_fn).make_one_shot_iterator() def run_step(): return control_flow_ops.group(distribution.unwrap( diff --git a/tensorflow/contrib/distribute/python/prefetching_ops_v2.py b/tensorflow/contrib/distribute/python/prefetching_ops_v2.py index e1ddf3cece1c3fa549d6d2999a9bff9671fcdd76..7b3670b45aba801cf8c18e04bfea03e23eb67184 100644 --- a/tensorflow/contrib/distribute/python/prefetching_ops_v2.py +++ b/tensorflow/contrib/distribute/python/prefetching_ops_v2.py @@ -26,6 +26,7 @@ from tensorflow.python.data.ops import dataset_ops from tensorflow.python.data.ops import iterator_ops from tensorflow.python.data.util import nest as data_nest from tensorflow.python.data.util import sparse +from tensorflow.python.eager import context from tensorflow.python.framework import dtypes from tensorflow.python.framework import function from tensorflow.python.framework import ops @@ -34,24 +35,55 @@ from tensorflow.python.util import nest # pylint: disable=protected-access class _PrefetchToDeviceIterator(object): - """A replacement for @{tf.data.Iterator} that prefetches to another device.""" + """A replacement for @{tf.data.Iterator} that prefetches to another device. - def __init__(self, input_dataset, devices, buffer_size): + Args: + input_dataset: The input dataset. + one_shot: If true, we make a one shot iterator that's already initialized. + devices: Devices on which to prefetch. + buffer_size: Size of the prefetching buffer. + shared_name: (Optional.) If non-empty, the returned iterator will be + shared under the given name across multiple sessions that share the + same devices (e.g. when using a remote server). Only used if one_shot + is False. + + Returns: + An Iterator type object. + """ + + def __init__(self, + input_dataset, + one_shot, + devices, + buffer_size, + shared_name=None): self._input_dataset = input_dataset self._get_next_call_count = 0 + self._one_shot = one_shot + if shared_name is None: + shared_name = "" self._devices = devices - input_iterator = input_dataset.make_one_shot_iterator() - input_iterator_handle = input_iterator.string_handle() + + if self._one_shot: + self._input_iterator = input_dataset.make_one_shot_iterator() + else: + self._input_iterator = iterator_ops.Iterator.from_structure( + self._input_dataset.output_types, self._input_dataset.output_shapes, + shared_name, self._input_dataset.output_classes) + input_iterator_handle = self._input_iterator.string_handle() @function.Defun(dtypes.string) def _prefetch_fn(handle): + """Prefetches one element from `input_iterator`.""" remote_iterator = iterator_ops.Iterator.from_string_handle( - handle, input_iterator.output_types, input_iterator.output_shapes, - input_iterator.output_classes) - return remote_iterator.get_next() + handle, self._input_iterator.output_types, + self._input_iterator.output_shapes, + self._input_iterator.output_classes) + ret = remote_iterator.get_next() + return nest.flatten(sparse.serialize_sparse_tensors(ret)) target_device = gen_dataset_ops.iterator_get_device( - input_iterator._iterator_resource) + self._input_iterator._iterator_resource) self._buffering_resources = [] for device in nest.flatten(self._devices): with ops.device(device): @@ -59,9 +91,19 @@ class _PrefetchToDeviceIterator(object): f=_prefetch_fn, target_device=target_device, string_arg=input_iterator_handle, - buffer_size=buffer_size) + buffer_size=buffer_size, + shared_name=shared_name) self._buffering_resources.append(buffer_resource_handle) + if not self._one_shot: + reset_ops = [] + for buffer_resource in self._buffering_resources: + reset_ops.append( + prefetching_ops.function_buffering_resource_reset(buffer_resource)) + with ops.control_dependencies(reset_ops): + self._initializer = self._input_iterator.make_initializer( + self._input_dataset) + def get_next(self, name=None): """See @{tf.data.Iterator.get_next}.""" self._get_next_call_count += 1 @@ -90,6 +132,12 @@ class _PrefetchToDeviceIterator(object): return nest.pack_sequence_as(self._devices, flat_result) + @property + def initializer(self): + if self._one_shot: + raise NotImplementedError("Can't initialize a one_shot_iterator") + return self._initializer + @property def output_classes(self): return self._input_dataset.output_classes @@ -113,13 +161,24 @@ class _PrefetchToDeviceDataset(dataset_ops.Dataset): self._buffer_size = buffer_size if buffer_size is not None else 1 def make_one_shot_iterator(self): - return _PrefetchToDeviceIterator(self._input_dataset, self._devices, - self._buffer_size) + return _PrefetchToDeviceIterator( + self._input_dataset, + one_shot=True, + devices=self._devices, + buffer_size=self._buffer_size) def make_initializable_iterator(self, shared_name=None): - raise NotImplementedError("`prefetch_to_devices()` is not currently " - "compatible with initializable iterators. Use " - "`make_one_shot_iterator()` instead.") + if context.executing_eagerly(): + raise RuntimeError( + "make_initializable_iterator is not supported when eager " + "execution is enabled.") + + return _PrefetchToDeviceIterator( + self._input_dataset, + one_shot=False, + devices=self._devices, + buffer_size=self._buffer_size, + shared_name=shared_name) def _as_variant_tensor(self): # TODO(mrry): Raise this error earlier (e.g. when one of the Dataset diff --git a/tensorflow/contrib/distribute/python/prefetching_ops_v2_test.py b/tensorflow/contrib/distribute/python/prefetching_ops_v2_test.py index 8ed16f4607881f2864479c04b4c25e95d9fa1850..a68dbce6c7d03f6a1695ebfcd00178e21ac1cda0 100644 --- a/tensorflow/contrib/distribute/python/prefetching_ops_v2_test.py +++ b/tensorflow/contrib/distribute/python/prefetching_ops_v2_test.py @@ -64,5 +64,27 @@ class PrefetchingOpsV2Test(test.TestCase): with self.assertRaises(errors.OutOfRangeError): sess.run(next_element) + def testPrefetchToTwoDevicesWithReinit(self): + if not test_util.is_gpu_available(): + self.skipTest("No GPU available") + + host_dataset = dataset_ops.Dataset.range(10) + device_dataset = host_dataset.apply( + prefetching_ops_v2.prefetch_to_devices(["/cpu:0", "/gpu:0"])) + + iterator = device_dataset.make_initializable_iterator() + next_element = iterator.get_next() + + with self.test_session() as sess: + sess.run(iterator.initializer) + for _ in range(5): + sess.run(next_element) + with self.assertRaises(errors.OutOfRangeError): + sess.run(next_element) + sess.run(iterator.initializer) + for _ in range(5): + sess.run(next_element) + + if __name__ == "__main__": test.main() diff --git a/tensorflow/contrib/distribute/python/single_loss_example.py b/tensorflow/contrib/distribute/python/single_loss_example.py index cef5fd2f8943d348a0721cd72032bf6cb2199ad9..0db0b59fcacee2785eb8191bb84ed5216a79b081 100644 --- a/tensorflow/contrib/distribute/python/single_loss_example.py +++ b/tensorflow/contrib/distribute/python/single_loss_example.py @@ -18,6 +18,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from tensorflow.contrib.data.python.ops import batching from tensorflow.contrib.distribute.python import step_fn from tensorflow.python.data.ops import dataset_ops from tensorflow.python.framework import constant_op @@ -29,7 +30,10 @@ from tensorflow.python.ops import math_ops def single_loss_example(optimizer_fn, distribution, use_bias=False): """Build a very simple network to use in tests and examples.""" - dataset = dataset_ops.Dataset.from_tensors([[1.]]).repeat() + + def dataset_fn(): + return dataset_ops.Dataset.from_tensors([[1.]]).repeat() + optimizer = optimizer_fn() layer = core.Dense(1, use_bias=use_bias) @@ -37,8 +41,8 @@ def single_loss_example(optimizer_fn, distribution, use_bias=False): y = array_ops.reshape(layer(x), []) - constant_op.constant(1.) return y * y - single_loss_step = step_fn.StandardSingleLossStep(dataset, loss_fn, optimizer, - distribution) + single_loss_step = step_fn.StandardSingleLossStep(dataset_fn, loss_fn, + optimizer, distribution) # Layer is returned for inspecting the kernels in tests. return single_loss_step, layer @@ -49,7 +53,14 @@ def minimize_loss_example(optimizer_fn, use_callable_loss=True, create_optimizer_inside_model_fn=False): """Example of non-distribution-aware legacy code.""" - dataset = dataset_ops.Dataset.from_tensors([[1.]]).repeat() + + def dataset_fn(): + dataset = dataset_ops.Dataset.from_tensors([[1.]]).repeat() + # TODO(isaprykin): map_and_batch with drop_remainder causes shapes to be + # fully defined for TPU. Remove this when XLA supports dynamic shapes. + return dataset.apply( + batching.map_and_batch(lambda x: x, batch_size=2, drop_remainder=True)) + # An Optimizer instance is created either outside or inside model_fn. outer_optimizer = None if not create_optimizer_inside_model_fn: @@ -57,10 +68,11 @@ def minimize_loss_example(optimizer_fn, layer = core.Dense(1, use_bias=use_bias) - def model_fn(x): + def model_fn(xs): """A very simple model written by the user.""" def loss_fn(): + x = math_ops.reduce_mean(xs, keepdims=True) y = array_ops.reshape(layer(x), []) - constant_op.constant(1.) return y * y @@ -71,7 +83,7 @@ def minimize_loss_example(optimizer_fn, else: return optimizer.minimize(loss_fn()) - return model_fn, dataset, layer + return model_fn, dataset_fn, layer def batchnorm_example(optimizer_fn, @@ -79,12 +91,15 @@ def batchnorm_example(optimizer_fn, momentum=0.9, renorm=False): """Example of non-distribution-aware legacy code with batch normalization.""" - # input shape is [16, 8], input values are increasing in both dimensions. - dataset = dataset_ops.Dataset.from_tensor_slices( - [[[float(x * 8 + y + z * 100) - for y in range(8)] - for x in range(16)] - for z in range(batch_per_epoch)]).repeat() + + def dataset_fn(): + # input shape is [16, 8], input values are increasing in both dimensions. + return dataset_ops.Dataset.from_tensor_slices( + [[[float(x * 8 + y + z * 100) + for y in range(8)] + for x in range(16)] + for z in range(batch_per_epoch)]).repeat() + optimizer = optimizer_fn() batchnorm = normalization.BatchNormalization( renorm=renorm, momentum=momentum, fused=False) @@ -99,4 +114,4 @@ def batchnorm_example(optimizer_fn, # Callable loss. return optimizer.minimize(loss_fn) - return model_fn, dataset, batchnorm + return model_fn, dataset_fn, batchnorm diff --git a/tensorflow/contrib/distribute/python/step_fn.py b/tensorflow/contrib/distribute/python/step_fn.py index 82514c64be40b421c4a9887932f2cfb8e1ac4be0..d1910622b38c748fc5a814f9e83c2294850d5d12 100644 --- a/tensorflow/contrib/distribute/python/step_fn.py +++ b/tensorflow/contrib/distribute/python/step_fn.py @@ -49,12 +49,14 @@ class StandardInputStep(Step): """Step with a standard implementation of input handling. Args: - input_dataset: a tf.data Dataset that provides input. + dataset_fn: a function that returns a tf.data Dataset that produces the + input for the model. """ - def __init__(self, input_dataset, distribution): + def __init__(self, dataset_fn, distribution): Step.__init__(self, distribution) - self._distributed_input = distribution.distribute_dataset(input_dataset) + self._distributed_input = distribution.distribute_dataset( + dataset_fn).make_one_shot_iterator() def inputs(self): return self._distributed_input.get_next() @@ -76,14 +78,15 @@ class StandardSingleLossStep(StandardInputStep): ``` Args: - input_dataset: a tf.data Dataset that provides input. + dataset_fn: a function that returns a tf.data Dataset that produces the + input for the model. loss_fn: a function that returns loss. optimizer: an optimizer that implements an update rule. distribution: a `DistributionStrategy` object. """ - def __init__(self, input_dataset, loss_fn, optimizer, distribution): - StandardInputStep.__init__(self, input_dataset, distribution) + def __init__(self, dataset_fn, loss_fn, optimizer, distribution): + StandardInputStep.__init__(self, dataset_fn, distribution) self._loss_fn = loss_fn self._optimizer = optimizer self._is_run_concurrently = False diff --git a/tensorflow/contrib/distribute/python/tpu_strategy.py b/tensorflow/contrib/distribute/python/tpu_strategy.py new file mode 100644 index 0000000000000000000000000000000000000000..a7e4fe80f3e65907fa4b48c5fe0fcfd422ba033f --- /dev/null +++ b/tensorflow/contrib/distribute/python/tpu_strategy.py @@ -0,0 +1,119 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""TPU Distribution Strategy. + +This is experimental. It's not ready for general use. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import itertools + +from tensorflow.contrib import tpu +from tensorflow.contrib.distribute.python import one_device_strategy +from tensorflow.contrib.distribute.python import values +from tensorflow.contrib.tpu.python.ops import tpu_ops +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import ops +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.util import nest + + +# TODO(isaprykin): Consider whether inheriting is really appropriate. +class TPUStrategy(one_device_strategy.OneDeviceStrategy): + """Experimental TPU distribution strategy implementation.""" + + def __init__(self, + num_cores_per_host=2, + iterations_per_step=2): + # TODO(isaprykin): Generalize the defaults. They are currently tailored for + # the unit test. + super(TPUStrategy, self).__init__('/cpu:0') + # TODO(isaprykin): Auto-detect number of cores and hosts. + self._num_cores_per_host = num_cores_per_host + # TODO(isaprykin): This might have to be per-call. + self._iterations_per_step = iterations_per_step + + def distribute_dataset(self, dataset_fn): + return values.PerIterationDataset( + self._call_dataset_fn(dataset_fn), self._iterations_per_step, + self._num_cores_per_host) + + def _call_for_each_tower(self, fn, *args, **kwargs): + kwargs.pop('run_concurrently', None) + + inputs = {'args': args, 'kwargs': kwargs} + flat_inputs = nest.flatten(inputs) + + feed_mask = [isinstance(f, values.PerIteration) for f in flat_inputs] + + feeds = lambda: itertools.compress(flat_inputs, feed_mask) + shapes = [f.get_shape() for f in feeds()] + if any([not s.is_fully_defined() for s in shapes]): + raise ValueError( + 'TPU currently requires fully defined shapes. Either use ' + 'set_shape() on the input tensors or use ' + 'dataset.apply(map_and_batch(..., drop_remainder=True)).') + types = [f.get_dtype() for f in feeds()] + + def infeed_input(i): + """Get input, split it and then enqueue.""" + iteration_inputs = [f.get(i) for f in feeds()] + + infeed_inputs = [[inputs_per_core[core_id] + for inputs_per_core in iteration_inputs] + for core_id in range(self._num_cores_per_host)] + + infeed_ops = [] + for core_id, infeed_input in enumerate(infeed_inputs): + infeed_ops.append( + tpu_ops.infeed_enqueue_tuple( + inputs=infeed_input, shapes=shapes, device_ordinal=core_id)) + + with ops.control_dependencies(infeed_ops): + return i + 1 + + with ops.device('/task:0/device:CPU:0'): + enqueue_ops = control_flow_ops.while_loop( + lambda i: i < self._iterations_per_step, + infeed_input, [constant_op.constant(0)], + parallel_iterations=1) + + def dequeueing_fn(*args, **kwargs): + """Dequeue input arguments and supply them to `fn`.""" + del args, kwargs + dequeued = tpu.infeed_dequeue_tuple(dtypes=types, shapes=shapes) + dequeued = iter(dequeued) + + fn_inputs = [] + for inp, is_feed in zip(flat_inputs, feed_mask): + if is_feed: + fn_inputs.append(next(dequeued)) + else: + fn_inputs.append(inp) + + fn_inputs = nest.pack_sequence_as(inputs, fn_inputs) + return fn(*fn_inputs['args'], **fn_inputs['kwargs']) + + def iterate_on_tpu(): + return tpu.repeat(self._iterations_per_step, dequeueing_fn, []) + + with one_device_strategy._OneDeviceTowerContext(self): # pylint: disable=protected-access + tpu_result = tpu.batch_parallel( + iterate_on_tpu, [], num_shards=self._num_cores_per_host) + + return control_flow_ops.group(tpu_result, enqueue_ops) diff --git a/tensorflow/contrib/distribute/python/values.py b/tensorflow/contrib/distribute/python/values.py index 9acb6a9db93593e98341ee0a90abf84df14f3053..8cb5276579f48f9ea5781c5351cbf9bf3db16e6c 100644 --- a/tensorflow/contrib/distribute/python/values.py +++ b/tensorflow/contrib/distribute/python/values.py @@ -28,7 +28,6 @@ import six from tensorflow.contrib.data.python.ops import batching from tensorflow.contrib.distribute.python import prefetching_ops_v2 -from tensorflow.contrib.eager.python import datasets from tensorflow.python.eager import context from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops @@ -73,7 +72,7 @@ class DistributedValues(object): @property def devices(self): - return self._index.keys() + return list(self._index.keys()) def __str__(self): return "%s:%s" % (self.__class__.__name__, self._index) @@ -510,6 +509,10 @@ class PerDeviceDataIterator(object): self._devices = devices self._prefetch_on_device = prefetch_on_device + @property + def initializer(self): + return self._iterator.initializer + def get_next(self, name=None): """Scatter the input across devices.""" if self._prefetch_on_device: @@ -545,7 +548,8 @@ class PerDeviceDataset(object): "Prefetching is only supported in graph mode currently") if self._prefetch_on_device: - self._dataset = dataset + self._dataset = dataset.apply( + prefetching_ops_v2.prefetch_to_devices(self._devices)) else: # TODO(priyag): If dropping remainder is not appropriate, find another # approach to distributing the dataset when not possible to divide evenly. @@ -555,19 +559,72 @@ class PerDeviceDataset(object): def make_one_shot_iterator(self): """Get a one time use iterator for the distributed PerDeviceDataset.""" - if self._prefetch_on_device: - on_device_dataset = self._dataset.apply( - prefetching_ops_v2.prefetch_to_devices(self._devices)) - dataset_iterator = on_device_dataset.make_one_shot_iterator() - elif context.executing_eagerly(): - dataset_iterator = datasets.Iterator(self._dataset) - else: - dataset_iterator = self._dataset.make_one_shot_iterator() + dataset_iterator = self._dataset.make_one_shot_iterator() + return PerDeviceDataIterator( + dataset_iterator, self._devices, self._prefetch_on_device) + def make_initializable_iterator(self): + """Get an initializable iterator for the distributed PerDeviceDataset.""" + dataset_iterator = self._dataset.make_initializable_iterator() return PerDeviceDataIterator( dataset_iterator, self._devices, self._prefetch_on_device) +class PerIteration(object): + """Holds input for multiple iterations at once.""" + + def __init__(self, index): + self._index = index + + def get(self, iteration): + return array_ops.gather(self._index, iteration) + + def get_shape(self): + return self._index[-1][-1].get_shape() + + def get_dtype(self): + return self._index[-1][-1].dtype + + +class MultiIterator(object): + """Iterator that returns results of multiple get_next()s.""" + + def __init__(self, dataset_iterator, iterations, batches_per_iteration): + self._dataset_iterator = dataset_iterator + self._iterations = iterations + self._batches_per_iteration = batches_per_iteration + + def get_next(self, name=None): + return PerIteration([[ + self._dataset_iterator.get_next(name=name) + for _ in range(self._batches_per_iteration) + ] + for _ in range(self._iterations)]) + + @property + def initializer(self): + return self._dataset_iterator.initializer + + +class PerIterationDataset(object): + """A dataset that returns MultiIterators.""" + + def __init__(self, dataset, iterations, batches_per_iteration): + self._dataset = dataset + self._iterations = iterations + self._batches_per_iteration = batches_per_iteration + + def make_one_shot_iterator(self): + iterator = self._dataset.make_one_shot_iterator() + return MultiIterator(iterator, self._iterations, + self._batches_per_iteration) + + def make_initializable_iterator(self): + iterator = self._dataset.make_initializable_iterator() + return MultiIterator(iterator, self._iterations, + self._batches_per_iteration) + + class MapOutput(object): """Map can result in multiple outputs per device.""" diff --git a/tensorflow/contrib/distribute/python/values_test.py b/tensorflow/contrib/distribute/python/values_test.py index 5c0d4b7d6c78b7cf63c613201d83d4793ecfe76b..e96ce547415fcb2bf3da8b6085ee11f51717db8d 100644 --- a/tensorflow/contrib/distribute/python/values_test.py +++ b/tensorflow/contrib/distribute/python/values_test.py @@ -32,6 +32,7 @@ from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops +from tensorflow.python.ops import random_ops from tensorflow.python.ops import variable_scope from tensorflow.python.training import device_util from tensorflow.python.training import saver as saver_lib @@ -408,6 +409,32 @@ class PerDeviceDatasetTest(test.TestCase): expected_values = [[i, i+1] for i in range(0, 10, 2)] self._test_iterator(devices, dataset, expected_values) + def testInitializableIterator(self): + with context.graph_mode(): + devices = ["/device:CPU:0"] + # Using random input since that is only allowed with initializable + # iterator. + dataset = dataset_ops.Dataset.from_tensor_slices( + random_ops.random_uniform((10,))) + + per_device_dataset = values.PerDeviceDataset( + dataset, devices, prefetch_on_device=False) + iterator = per_device_dataset.make_initializable_iterator() + + self.evaluate(iterator.initializer) + next_element = iterator.get_next() + for _ in range(10): + self.evaluate(next_element) + + # Should fail after the input is finished. + with self.assertRaises(errors.OutOfRangeError): + self.evaluate(next_element) + + # After re-initializing the iterator, should be able to iterate again. + self.evaluate(iterator.initializer) + for _ in range(10): + self.evaluate(next_element) + @test_util.with_c_api class MirroredVariableTest(test.TestCase): diff --git a/tensorflow/contrib/distributions/BUILD b/tensorflow/contrib/distributions/BUILD index de08eb491b8ee40f28086ca490ab79b2da50ec58..2d99e8172d220a697c3efaf07c6606874bd7dd01 100644 --- a/tensorflow/contrib/distributions/BUILD +++ b/tensorflow/contrib/distributions/BUILD @@ -454,6 +454,7 @@ cuda_py_test( "//tensorflow/python:framework_test_lib", "//tensorflow/python:platform_test", ], + tags = ["no_windows"], # TODO: needs investigation on Windows ) cuda_py_test( @@ -489,6 +490,16 @@ cuda_py_test( tags = ["nomsan"], # disable to avoid false positives from scipy. ) +cuda_py_test( + name = "seed_stream_test", + size = "small", + srcs = ["python/kernel_tests/seed_stream_test.py"], + additional_deps = [ + ":distributions_py", + "//tensorflow/python:client_testlib", + ], +) + cuda_py_test( name = "statistical_testing_test", size = "medium", @@ -501,12 +512,6 @@ cuda_py_test( "//tensorflow/python:client_testlib", ], shard_count = 4, - tags = [ - "manual", - "noasan", - "noguitar", - "optonly", - ], ) cuda_py_test( @@ -872,6 +877,7 @@ cuda_py_test( "//tensorflow/python:framework_test_lib", "//tensorflow/python:platform_test", ], + tags = ["optonly"], ) cuda_py_test( @@ -1128,6 +1134,7 @@ cuda_py_test( "//tensorflow/python:math_ops", "//tensorflow/python:platform_test", ], + tags = ["no_windows"], # TODO: needs investigation on Windows ) cuda_py_test( @@ -1168,6 +1175,25 @@ cuda_py_test( ], ) +cuda_py_test( + name = "softsign_test", + size = "small", + srcs = ["python/kernel_tests/bijectors/softsign_test.py"], + additional_deps = [ + ":bijectors_py", + ":distributions_py", + "//third_party/py/numpy", + "@six_archive//:six", + "//tensorflow/contrib/linalg:linalg_py", + "//tensorflow/python:array_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python:math_ops", + "//tensorflow/python:platform_test", + ], +) + cuda_py_test( name = "square_test", size = "small", diff --git a/tensorflow/contrib/distributions/__init__.py b/tensorflow/contrib/distributions/__init__.py index 4d4489468d9dcfbe152c42f5f841f6c25a9f1e6f..ddf59891e626a85e6c917ac74b3cfaabf16eb15d 100644 --- a/tensorflow/contrib/distributions/__init__.py +++ b/tensorflow/contrib/distributions/__init__.py @@ -59,6 +59,7 @@ from tensorflow.contrib.distributions.python.ops.quantized_distribution import * from tensorflow.contrib.distributions.python.ops.relaxed_bernoulli import * from tensorflow.contrib.distributions.python.ops.relaxed_onehot_categorical import * from tensorflow.contrib.distributions.python.ops.sample_stats import * +from tensorflow.contrib.distributions.python.ops.seed_stream import * from tensorflow.contrib.distributions.python.ops.sinh_arcsinh import * from tensorflow.contrib.distributions.python.ops.test_util import * from tensorflow.contrib.distributions.python.ops.vector_diffeomixture import * @@ -126,6 +127,7 @@ _allowed_symbols = [ 'NormalWithSoftplusScale', 'Poisson', 'PoissonLogNormalQuadratureCompound', + 'SeedStream', 'SinhArcsinh', 'StudentT', 'StudentTWithAbsDfSoftplusScale', diff --git a/tensorflow/contrib/distributions/python/kernel_tests/batch_reshape_test.py b/tensorflow/contrib/distributions/python/kernel_tests/batch_reshape_test.py index 4d2f40e27f17ca393dbf1d88ed94d387d3749f82..59d549b7b80a3d80d0b8409542eb6583f645bdaa 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/batch_reshape_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/batch_reshape_test.py @@ -22,6 +22,7 @@ import numpy as np from tensorflow.contrib.distributions.python.ops import batch_reshape as batch_reshape_lib from tensorflow.contrib.distributions.python.ops import mvn_diag as mvn_lib +from tensorflow.contrib.distributions.python.ops import poisson as poisson_lib from tensorflow.contrib.distributions.python.ops import wishart as wishart_lib from tensorflow.python.framework import constant_op from tensorflow.python.ops import array_ops @@ -514,6 +515,42 @@ class _BatchReshapeTest(object): batch_shape=new_batch_shape_ph, validate_args=True).sample().eval() + def test_broadcasting_explicitly_unsupported(self): + old_batch_shape = [4] + new_batch_shape = [1, 4, 1] + rate_ = self.dtype([1, 10, 2, 20]) + + rate = array_ops.placeholder_with_default( + rate_, + shape=old_batch_shape if self.is_static_shape else None) + poisson_4 = poisson_lib.Poisson(rate) + new_batch_shape_ph = ( + constant_op.constant(np.int32(new_batch_shape)) if self.is_static_shape + else array_ops.placeholder_with_default( + np.int32(new_batch_shape), shape=None)) + poisson_141_reshaped = batch_reshape_lib.BatchReshape( + poisson_4, new_batch_shape_ph, validate_args=True) + + x_4 = self.dtype([2, 12, 3, 23]) + x_114 = self.dtype([2, 12, 3, 23]).reshape(1, 1, 4) + + if self.is_static_shape: + with self.assertRaisesRegexp(NotImplementedError, + "too few batch and event dims"): + poisson_141_reshaped.log_prob(x_4) + with self.assertRaisesRegexp(NotImplementedError, + "unexpected batch and event shape"): + poisson_141_reshaped.log_prob(x_114) + return + + with self.assertRaisesOpError("too few batch and event dims"): + with self.test_session(): + poisson_141_reshaped.log_prob(x_4).eval() + + with self.assertRaisesOpError("unexpected batch and event shape"): + with self.test_session(): + poisson_141_reshaped.log_prob(x_114).eval() + class BatchReshapeStaticTest(_BatchReshapeTest, test.TestCase): diff --git a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/absolute_value_test.py b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/absolute_value_test.py index e0d65c79b2654c2949de161d6317f218d11cab43..042c8ebd51c47facfc5c942cae56bd56be9df7c5 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/absolute_value_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/absolute_value_test.py @@ -18,11 +18,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import numpy as np - # pylint: disable=g-importing-member from tensorflow.contrib.distributions.python.ops.bijectors.absolute_value import AbsoluteValue -from tensorflow.python.framework import dtypes from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops from tensorflow.python.platform import test @@ -35,50 +32,38 @@ class AbsoluteValueTest(test.TestCase): def testBijectorVersusNumpyRewriteOfBasicFunctionsEventNdims0(self): with self.test_session() as sess: - bijector = AbsoluteValue(event_ndims=0, validate_args=True) + bijector = AbsoluteValue(validate_args=True) self.assertEqual("absolute_value", bijector.name) x = array_ops.constant([[0., 1., -1], [0., -5., 3.]]) # Shape [2, 3] y = math_ops.abs(x) y_ = y.eval() - zeros = np.zeros((2, 3)) self.assertAllClose(y_, bijector.forward(x).eval()) self.assertAllClose((-y_, y_), sess.run(bijector.inverse(y))) - self.assertAllClose((zeros, zeros), - sess.run(bijector.inverse_log_det_jacobian(y))) + self.assertAllClose((0., 0.), + sess.run(bijector.inverse_log_det_jacobian( + y, event_ndims=0))) # Run things twice to make sure there are no issues in caching the tuples # returned by .inverse* self.assertAllClose(y_, bijector.forward(x).eval()) self.assertAllClose((-y_, y_), sess.run(bijector.inverse(y))) - self.assertAllClose((zeros, zeros), - sess.run(bijector.inverse_log_det_jacobian(y))) - - def testEventNdimsMustBeZeroOrRaiseStatic(self): - with self.test_session(): - with self.assertRaisesRegexp(ValueError, "event_ndims.*was not 0"): - AbsoluteValue(event_ndims=1) - - def testEventNdimsMustBeZeroOrRaiseDynamic(self): - with self.test_session() as sess: - event_ndims = array_ops.placeholder(dtypes.int32) - abs_bijector = AbsoluteValue(event_ndims=event_ndims, validate_args=True) - with self.assertRaisesOpError("event_ndims was not 0"): - sess.run(abs_bijector.inverse_log_det_jacobian([1.]), - feed_dict={event_ndims: 1}) + self.assertAllClose((0., 0.), + sess.run(bijector.inverse_log_det_jacobian( + y, event_ndims=0))) def testNegativeYRaisesForInverseIfValidateArgs(self): with self.test_session() as sess: - bijector = AbsoluteValue(event_ndims=0, validate_args=True) + bijector = AbsoluteValue(validate_args=True) with self.assertRaisesOpError("y was negative"): sess.run(bijector.inverse(-1.)) def testNegativeYRaisesForILDJIfValidateArgs(self): with self.test_session() as sess: - bijector = AbsoluteValue(event_ndims=0, validate_args=True) + bijector = AbsoluteValue(validate_args=True) with self.assertRaisesOpError("y was negative"): - sess.run(bijector.inverse_log_det_jacobian(-1.)) + sess.run(bijector.inverse_log_det_jacobian(-1., event_ndims=0)) if __name__ == "__main__": diff --git a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/affine_linear_operator_test.py b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/affine_linear_operator_test.py index 405ddd292cacd8ace87d6caeebf3e8cfc347c22d..1e4ad724d00f751a55370ef9aa6dde0003a2098c 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/affine_linear_operator_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/affine_linear_operator_test.py @@ -38,9 +38,11 @@ class AffineLinearOperatorTest(test.TestCase): self.assertEqual(affine.name, "affine_linear_operator") self.assertAllClose(y, affine.forward(x).eval()) self.assertAllClose(x, affine.inverse(y).eval()) - self.assertAllClose(ildj, affine.inverse_log_det_jacobian(y).eval()) - self.assertAllClose(-affine.inverse_log_det_jacobian(y).eval(), - affine.forward_log_det_jacobian(x).eval()) + self.assertAllClose(ildj, affine.inverse_log_det_jacobian( + y, event_ndims=2).eval()) + self.assertAllClose( + -affine.inverse_log_det_jacobian(y, event_ndims=2).eval(), + affine.forward_log_det_jacobian(x, event_ndims=2).eval()) def testDiag(self): with self.test_session(): @@ -58,14 +60,16 @@ class AffineLinearOperatorTest(test.TestCase): self.assertEqual(affine.name, "affine_linear_operator") self.assertAllClose(y, affine.forward(x).eval()) self.assertAllClose(x, affine.inverse(y).eval()) - self.assertAllClose(ildj, affine.inverse_log_det_jacobian(y).eval()) - self.assertAllClose(-affine.inverse_log_det_jacobian(y).eval(), - affine.forward_log_det_jacobian(x).eval()) + self.assertAllClose( + ildj, affine.inverse_log_det_jacobian(y, event_ndims=1).eval()) + self.assertAllClose( + -affine.inverse_log_det_jacobian(y, event_ndims=1).eval(), + affine.forward_log_det_jacobian(x, event_ndims=1).eval()) def testTriL(self): with self.test_session(): shift = np.array([-1, 0, 1], dtype=np.float32) - tril = np.array([[[1, 0, 0], + tril = np.array([[[3, 0, 0], [2, -1, 0], [3, 2, 1]], [[2, 0, 0], @@ -85,15 +89,17 @@ class AffineLinearOperatorTest(test.TestCase): # y = np.matmul(x, tril) + shift. y = np.squeeze(np.matmul(tril, np.expand_dims(x, -1)), -1) + shift ildj = -np.sum(np.log(np.abs(np.diagonal( - tril, axis1=-2, axis2=-1))), - axis=-1) + tril, axis1=-2, axis2=-1)))) self.assertEqual(affine.name, "affine_linear_operator") self.assertAllClose(y, affine.forward(x).eval()) self.assertAllClose(x, affine.inverse(y).eval()) - self.assertAllClose(ildj, affine.inverse_log_det_jacobian(y).eval()) - self.assertAllClose(-affine.inverse_log_det_jacobian(y).eval(), - affine.forward_log_det_jacobian(x).eval()) + self.assertAllClose( + ildj, affine.inverse_log_det_jacobian( + y, event_ndims=2).eval()) + self.assertAllClose( + -affine.inverse_log_det_jacobian(y, event_ndims=2).eval(), + affine.forward_log_det_jacobian(x, event_ndims=2).eval()) if __name__ == "__main__": diff --git a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/affine_scalar_test.py b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/affine_scalar_test.py index 16173a166fd943413345036df12245c2a4ab8343..d2533620bebeb0400b6d4a6346e8315c7e37c5c6 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/affine_scalar_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/affine_scalar_test.py @@ -40,13 +40,13 @@ class AffineScalarBijectorTest(test.TestCase): def testNoBatchScalar(self): with self.test_session() as sess: - def static_run(fun, x): - return fun(x).eval() + def static_run(fun, x, **kwargs): + return fun(x, **kwargs).eval() - def dynamic_run(fun, x_value): + def dynamic_run(fun, x_value, **kwargs): x_value = np.array(x_value) x = array_ops.placeholder(dtypes.float32, name="x") - return sess.run(fun(x), feed_dict={x: x_value}) + return sess.run(fun(x, **kwargs), feed_dict={x: x_value}) for run in (static_run, dynamic_run): mu = -1. @@ -55,19 +55,20 @@ class AffineScalarBijectorTest(test.TestCase): x = [1., 2, 3] # Three scalar samples (no batches). self.assertAllClose([1., 3, 5], run(bijector.forward, x)) self.assertAllClose([1., 1.5, 2.], run(bijector.inverse, x)) - self.assertAllClose([-np.log(2.)] * 3, - run(bijector.inverse_log_det_jacobian, x)) + self.assertAllClose( + -np.log(2.), + run(bijector.inverse_log_det_jacobian, x, event_ndims=0)) def testOneBatchScalarViaIdentityIn64BitUserProvidesShiftOnly(self): with self.test_session() as sess: - def static_run(fun, x): - return fun(x).eval() + def static_run(fun, x, **kwargs): + return fun(x, **kwargs).eval() - def dynamic_run(fun, x_value): + def dynamic_run(fun, x_value, **kwargs): x_value = np.array(x_value).astype(np.float64) x = array_ops.placeholder(dtypes.float64, name="x") - return sess.run(fun(x), feed_dict={x: x_value}) + return sess.run(fun(x, **kwargs), feed_dict={x: x_value}) for run in (static_run, dynamic_run): mu = np.float64([1.]) @@ -77,18 +78,20 @@ class AffineScalarBijectorTest(test.TestCase): x = np.float64([1.]) # One sample from one batches. self.assertAllClose([2.], run(bijector.forward, x)) self.assertAllClose([0.], run(bijector.inverse, x)) - self.assertAllClose([0.], run(bijector.inverse_log_det_jacobian, x)) + self.assertAllClose( + 0., + run(bijector.inverse_log_det_jacobian, x, event_ndims=0)) def testOneBatchScalarViaIdentityIn64BitUserProvidesScaleOnly(self): with self.test_session() as sess: - def static_run(fun, x): - return fun(x).eval() + def static_run(fun, x, **kwargs): + return fun(x, **kwargs).eval() - def dynamic_run(fun, x_value): + def dynamic_run(fun, x_value, **kwargs): x_value = np.array(x_value).astype(np.float64) x = array_ops.placeholder(dtypes.float64, name="x") - return sess.run(fun(x), feed_dict={x: x_value}) + return sess.run(fun(x, **kwargs), feed_dict={x: x_value}) for run in (static_run, dynamic_run): multiplier = np.float64([2.]) @@ -98,19 +101,20 @@ class AffineScalarBijectorTest(test.TestCase): x = np.float64([1.]) # One sample from one batches. self.assertAllClose([2.], run(bijector.forward, x)) self.assertAllClose([0.5], run(bijector.inverse, x)) - self.assertAllClose([np.log(0.5)], - run(bijector.inverse_log_det_jacobian, x)) + self.assertAllClose( + [np.log(0.5)], + run(bijector.inverse_log_det_jacobian, x, event_ndims=0)) def testTwoBatchScalarIdentityViaIdentity(self): with self.test_session() as sess: - def static_run(fun, x): - return fun(x).eval() + def static_run(fun, x, **kwargs): + return fun(x, **kwargs).eval() - def dynamic_run(fun, x_value): - x_value = np.array(x_value) + def dynamic_run(fun, x_value, **kwargs): + x_value = np.array(x_value).astype(np.float32) x = array_ops.placeholder(dtypes.float32, name="x") - return sess.run(fun(x), feed_dict={x: x_value}) + return sess.run(fun(x, **kwargs), feed_dict={x: x_value}) for run in (static_run, dynamic_run): mu = [1., -1] @@ -120,18 +124,20 @@ class AffineScalarBijectorTest(test.TestCase): x = [1., 1] # One sample from each of two batches. self.assertAllClose([2., 0], run(bijector.forward, x)) self.assertAllClose([0., 2], run(bijector.inverse, x)) - self.assertAllClose([0., 0.], run(bijector.inverse_log_det_jacobian, x)) + self.assertAllClose( + 0., + run(bijector.inverse_log_det_jacobian, x, event_ndims=0)) def testTwoBatchScalarIdentityViaScale(self): with self.test_session() as sess: - def static_run(fun, x): - return fun(x).eval() + def static_run(fun, x, **kwargs): + return fun(x, **kwargs).eval() - def dynamic_run(fun, x_value): - x_value = np.array(x_value) + def dynamic_run(fun, x_value, **kwargs): + x_value = np.array(x_value).astype(np.float32) x = array_ops.placeholder(dtypes.float32, name="x") - return sess.run(fun(x), feed_dict={x: x_value}) + return sess.run(fun(x, **kwargs), feed_dict={x: x_value}) for run in (static_run, dynamic_run): mu = [1., -1] @@ -142,7 +148,8 @@ class AffineScalarBijectorTest(test.TestCase): self.assertAllClose([3., 0], run(bijector.forward, x)) self.assertAllClose([0., 2], run(bijector.inverse, x)) self.assertAllClose( - [-np.log(2), 0.], run(bijector.inverse_log_det_jacobian, x)) + [-np.log(2), 0.], + run(bijector.inverse_log_det_jacobian, x, event_ndims=0)) def testScalarCongruency(self): with self.test_session(): diff --git a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/affine_test.py b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/affine_test.py index 077e6176b4e7aecb28369d49edad6d1367cc7259..9e14b9a53e6c63876478d876030c476c5d77dbbb 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/affine_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/affine_test.py @@ -40,14 +40,15 @@ class AffineBijectorTest(test.TestCase): def testNoBatchMultivariateIdentity(self): with self.test_session() as sess: + placeholder = array_ops.placeholder(dtypes.float32, name="x") - def static_run(fun, x): - return fun(x).eval() + def static_run(fun, x, **kwargs): + return fun(x, **kwargs).eval() - def dynamic_run(fun, x_value): + def dynamic_run(fun, x_value, **kwargs): x_value = np.array(x_value) - x = array_ops.placeholder(dtypes.float32, name="x") - return sess.run(fun(x), feed_dict={x: x_value}) + return sess.run( + fun(placeholder, **kwargs), feed_dict={placeholder: x_value}) for run in (static_run, dynamic_run): mu = [1., -1] @@ -66,18 +67,20 @@ class AffineBijectorTest(test.TestCase): x = [[1., 1], [-1., -1]] self.assertAllClose([[2., 0], [0., -2]], run(bijector.forward, x)) self.assertAllClose([[0., 2], [-2., 0]], run(bijector.inverse, x)) - self.assertAllClose(0., run(bijector.inverse_log_det_jacobian, x)) + self.assertAllClose( + 0., run(bijector.inverse_log_det_jacobian, x, event_ndims=1)) def testNoBatchMultivariateDiag(self): with self.test_session() as sess: + placeholder = array_ops.placeholder(dtypes.float32, name="x") - def static_run(fun, x): - return fun(x).eval() + def static_run(fun, x, **kwargs): + return fun(x, **kwargs).eval() - def dynamic_run(fun, x_value): + def dynamic_run(fun, x_value, **kwargs): x_value = np.array(x_value) - x = array_ops.placeholder(dtypes.float32, name="x") - return sess.run(fun(x), feed_dict={x: x_value}) + return sess.run( + fun(placeholder, **kwargs), feed_dict={placeholder: x_value}) for run in (static_run, dynamic_run): mu = [1., -1] @@ -89,9 +92,12 @@ class AffineBijectorTest(test.TestCase): # = [-1, -1] + [1, -1] self.assertAllClose([3., 0], run(bijector.forward, x)) self.assertAllClose([0., 2], run(bijector.inverse, x)) - self.assertAllClose(-np.log(2.), - run(bijector.inverse_log_det_jacobian, x)) + self.assertAllClose( + -np.log(2.), + run(bijector.inverse_log_det_jacobian, x, event_ndims=1)) + # Reset bijector. + bijector = Affine(shift=mu, scale_diag=[2., 1]) # x is a 2-batch of 2-vectors. # The first vector is [1, 1], the second is [-1, -1]. # Each undergoes matmul(sigma, x) + shift. @@ -103,8 +109,9 @@ class AffineBijectorTest(test.TestCase): self.assertAllClose([[0., 2], [-1., 0]], run(bijector.inverse, x)) - self.assertAllClose(-np.log(2.), - run(bijector.inverse_log_det_jacobian, x)) + self.assertAllClose( + -np.log(2.), + run(bijector.inverse_log_det_jacobian, x, event_ndims=1)) def testNoBatchMultivariateFullDynamic(self): with self.test_session() as sess: @@ -126,18 +133,20 @@ class AffineBijectorTest(test.TestCase): self.assertAllClose([[0., 1]], sess.run(bijector.inverse(x), feed_dict)) self.assertAllClose( -np.log(4), - sess.run(bijector.inverse_log_det_jacobian(x), feed_dict)) + sess.run(bijector.inverse_log_det_jacobian(x, event_ndims=1), + feed_dict)) def testBatchMultivariateIdentity(self): with self.test_session() as sess: + placeholder = array_ops.placeholder(dtypes.float32, name="x") - def static_run(fun, x): - return fun(x).eval() + def static_run(fun, x, **kwargs): + return fun(x, **kwargs).eval() - def dynamic_run(fun, x_value): - x_value = np.array(x_value, dtype=np.float32) - x = array_ops.placeholder(dtypes.float32, name="x") - return sess.run(fun(x), feed_dict={x: x_value}) + def dynamic_run(fun, x_value, **kwargs): + x_value = np.array(x_value) + return sess.run( + fun(placeholder, **kwargs), feed_dict={placeholder: x_value}) for run in (static_run, dynamic_run): mu = [[1., -1]] @@ -147,19 +156,21 @@ class AffineBijectorTest(test.TestCase): x = [[[1., 1]]] self.assertAllClose([[[3., 1]]], run(bijector.forward, x)) self.assertAllClose([[[0., 1]]], run(bijector.inverse, x)) - self.assertAllClose(-np.log(4), - run(bijector.inverse_log_det_jacobian, x)) + self.assertAllClose( + -np.log(4), + run(bijector.inverse_log_det_jacobian, x, event_ndims=1)) def testBatchMultivariateDiag(self): with self.test_session() as sess: + placeholder = array_ops.placeholder(dtypes.float32, name="x") - def static_run(fun, x): - return fun(x).eval() + def static_run(fun, x, **kwargs): + return fun(x, **kwargs).eval() - def dynamic_run(fun, x_value): - x_value = np.array(x_value, dtype=np.float32) - x = array_ops.placeholder(dtypes.float32, name="x") - return sess.run(fun(x), feed_dict={x: x_value}) + def dynamic_run(fun, x_value, **kwargs): + x_value = np.array(x_value) + return sess.run( + fun(placeholder, **kwargs), feed_dict={placeholder: x_value}) for run in (static_run, dynamic_run): mu = [[1., -1]] @@ -169,8 +180,9 @@ class AffineBijectorTest(test.TestCase): x = [[[1., 1]]] self.assertAllClose([[[3., 1]]], run(bijector.forward, x)) self.assertAllClose([[[0., 1]]], run(bijector.inverse, x)) - self.assertAllClose([-np.log(4)], - run(bijector.inverse_log_det_jacobian, x)) + self.assertAllClose( + [-np.log(4)], + run(bijector.inverse_log_det_jacobian, x, event_ndims=1)) def testBatchMultivariateFullDynamic(self): with self.test_session() as sess: @@ -191,20 +203,22 @@ class AffineBijectorTest(test.TestCase): bijector = Affine(shift=mu, scale_diag=scale_diag) self.assertAllClose([[[3., 1]]], sess.run(bijector.forward(x), feed_dict)) self.assertAllClose([[[0., 1]]], sess.run(bijector.inverse(x), feed_dict)) - self.assertAllClose([-np.log(4)], - sess.run( - bijector.inverse_log_det_jacobian(x), feed_dict)) + self.assertAllClose( + [-np.log(4)], + sess.run(bijector.inverse_log_det_jacobian( + x, event_ndims=1), feed_dict)) def testIdentityWithDiagUpdate(self): with self.test_session() as sess: + placeholder = array_ops.placeholder(dtypes.float32, name="x") - def static_run(fun, x): - return fun(x).eval() + def static_run(fun, x, **kwargs): + return fun(x, **kwargs).eval() - def dynamic_run(fun, x_value): + def dynamic_run(fun, x_value, **kwargs): x_value = np.array(x_value) - x = array_ops.placeholder(dtypes.float32, name="x") - return sess.run(fun(x), feed_dict={x: x_value}) + return sess.run( + fun(placeholder, **kwargs), feed_dict={placeholder: x_value}) for run in (static_run, dynamic_run): mu = -1. @@ -216,19 +230,21 @@ class AffineBijectorTest(test.TestCase): x = [1., 2, 3] # Three scalar samples (no batches). self.assertAllClose([1., 3, 5], run(bijector.forward, x)) self.assertAllClose([1., 1.5, 2.], run(bijector.inverse, x)) - self.assertAllClose(-np.log(2.**3), - run(bijector.inverse_log_det_jacobian, x)) + self.assertAllClose( + -np.log(2.**3), + run(bijector.inverse_log_det_jacobian, x, event_ndims=1)) def testIdentityWithTriL(self): with self.test_session() as sess: + placeholder = array_ops.placeholder(dtypes.float32, name="x") - def static_run(fun, x): - return fun(x).eval() + def static_run(fun, x, **kwargs): + return fun(x, **kwargs).eval() - def dynamic_run(fun, x_value): + def dynamic_run(fun, x_value, **kwargs): x_value = np.array(x_value) - x = array_ops.placeholder(dtypes.float32, name="x") - return sess.run(fun(x), feed_dict={x: x_value}) + return sess.run( + fun(placeholder, **kwargs), feed_dict={placeholder: x_value}) for run in (static_run, dynamic_run): mu = -1. @@ -240,19 +256,21 @@ class AffineBijectorTest(test.TestCase): x = [[1., 2]] # One multivariate sample. self.assertAllClose([[1., 5]], run(bijector.forward, x)) self.assertAllClose([[1., 0.5]], run(bijector.inverse, x)) - self.assertAllClose(-np.log(4.), - run(bijector.inverse_log_det_jacobian, x)) + self.assertAllClose( + -np.log(4.), + run(bijector.inverse_log_det_jacobian, x, event_ndims=1)) def testDiagWithTriL(self): with self.test_session() as sess: + placeholder = array_ops.placeholder(dtypes.float32, name="x") - def static_run(fun, x): - return fun(x).eval() + def static_run(fun, x, **kwargs): + return fun(x, **kwargs).eval() - def dynamic_run(fun, x_value): + def dynamic_run(fun, x_value, **kwargs): x_value = np.array(x_value) - x = array_ops.placeholder(dtypes.float32, name="x") - return sess.run(fun(x), feed_dict={x: x_value}) + return sess.run( + fun(placeholder, **kwargs), feed_dict={placeholder: x_value}) for run in (static_run, dynamic_run): mu = -1. @@ -262,19 +280,21 @@ class AffineBijectorTest(test.TestCase): x = [[1., 2]] # One multivariate sample. self.assertAllClose([[1., 7]], run(bijector.forward, x)) self.assertAllClose([[1., 1 / 3.]], run(bijector.inverse, x)) - self.assertAllClose(-np.log(6.), - run(bijector.inverse_log_det_jacobian, x)) + self.assertAllClose( + -np.log(6.), + run(bijector.inverse_log_det_jacobian, x, event_ndims=1)) def testIdentityAndDiagWithTriL(self): with self.test_session() as sess: + placeholder = array_ops.placeholder(dtypes.float32, name="x") - def static_run(fun, x): - return fun(x).eval() + def static_run(fun, x, **kwargs): + return fun(x, **kwargs).eval() - def dynamic_run(fun, x_value): + def dynamic_run(fun, x_value, **kwargs): x_value = np.array(x_value) - x = array_ops.placeholder(dtypes.float32, name="x") - return sess.run(fun(x), feed_dict={x: x_value}) + return sess.run( + fun(placeholder, **kwargs), feed_dict={placeholder: x_value}) for run in (static_run, dynamic_run): mu = -1. @@ -287,19 +307,21 @@ class AffineBijectorTest(test.TestCase): x = [[1., 2]] # One multivariate sample. self.assertAllClose([[2., 9]], run(bijector.forward, x)) self.assertAllClose([[2 / 3., 5 / 12.]], run(bijector.inverse, x)) - self.assertAllClose(-np.log(12.), - run(bijector.inverse_log_det_jacobian, x)) + self.assertAllClose( + -np.log(12.), + run(bijector.inverse_log_det_jacobian, x, event_ndims=1)) def testIdentityWithVDVTUpdate(self): with self.test_session() as sess: + placeholder = array_ops.placeholder(dtypes.float32, name="x") - def static_run(fun, x): - return fun(x).eval() + def static_run(fun, x, **kwargs): + return fun(x, **kwargs).eval() - def dynamic_run(fun, x_value): + def dynamic_run(fun, x_value, **kwargs): x_value = np.array(x_value) - x = array_ops.placeholder(dtypes.float32, name="x") - return sess.run(fun(x), feed_dict={x: x_value}) + return sess.run( + fun(placeholder, **kwargs), feed_dict={placeholder: x_value}) for run in (static_run, dynamic_run): mu = -1. @@ -319,22 +341,24 @@ class AffineBijectorTest(test.TestCase): self.assertAllClose([0.2, 1.5, 4 / 3.], run(bijector.inverse, x)) self.assertAllClose( run(bijector_ref.inverse, x), run(bijector.inverse, x)) - self.assertAllClose(-np.log(60.), - run(bijector.inverse_log_det_jacobian, x)) self.assertAllClose( - run(bijector.inverse_log_det_jacobian, x), - run(bijector_ref.inverse_log_det_jacobian, x)) + -np.log(60.), + run(bijector.inverse_log_det_jacobian, x, event_ndims=1)) + self.assertAllClose( + run(bijector.inverse_log_det_jacobian, x, event_ndims=1), + run(bijector_ref.inverse_log_det_jacobian, x, event_ndims=1)) def testDiagWithVDVTUpdate(self): with self.test_session() as sess: + placeholder = array_ops.placeholder(dtypes.float32, name="x") - def static_run(fun, x): - return fun(x).eval() + def static_run(fun, x, **kwargs): + return fun(x, **kwargs).eval() - def dynamic_run(fun, x_value): + def dynamic_run(fun, x_value, **kwargs): x_value = np.array(x_value) - x = array_ops.placeholder(dtypes.float32, name="x") - return sess.run(fun(x), feed_dict={x: x_value}) + return sess.run( + fun(placeholder, **kwargs), feed_dict={placeholder: x_value}) for run in (static_run, dynamic_run): mu = -1. @@ -353,22 +377,24 @@ class AffineBijectorTest(test.TestCase): self.assertAllClose([0.2, 1., 0.8], run(bijector.inverse, x)) self.assertAllClose( run(bijector_ref.inverse, x), run(bijector.inverse, x)) - self.assertAllClose(-np.log(150.), - run(bijector.inverse_log_det_jacobian, x)) self.assertAllClose( - run(bijector.inverse_log_det_jacobian, x), - run(bijector_ref.inverse_log_det_jacobian, x)) + -np.log(150.), + run(bijector.inverse_log_det_jacobian, x, event_ndims=1)) + self.assertAllClose( + run(bijector.inverse_log_det_jacobian, x, event_ndims=1), + run(bijector_ref.inverse_log_det_jacobian, x, event_ndims=1)) def testTriLWithVDVTUpdate(self): with self.test_session() as sess: + placeholder = array_ops.placeholder(dtypes.float32, name="x") - def static_run(fun, x): - return fun(x).eval() + def static_run(fun, x, **kwargs): + return fun(x, **kwargs).eval() - def dynamic_run(fun, x_value): + def dynamic_run(fun, x_value, **kwargs): x_value = np.array(x_value) - x = array_ops.placeholder(dtypes.float32, name="x") - return sess.run(fun(x), feed_dict={x: x_value}) + return sess.run( + fun(placeholder, **kwargs), feed_dict={placeholder: x_value}) for run in (static_run, dynamic_run): mu = -1. @@ -388,22 +414,24 @@ class AffineBijectorTest(test.TestCase): self.assertAllClose([0.2, 14 / 15., 4 / 25.], run(bijector.inverse, x)) self.assertAllClose( run(bijector_ref.inverse, x), run(bijector.inverse, x)) - self.assertAllClose(-np.log(150.), - run(bijector.inverse_log_det_jacobian, x)) self.assertAllClose( - run(bijector.inverse_log_det_jacobian, x), - run(bijector_ref.inverse_log_det_jacobian, x)) + -np.log(150.), + run(bijector.inverse_log_det_jacobian, x, event_ndims=1)) + self.assertAllClose( + run(bijector.inverse_log_det_jacobian, x, event_ndims=1), + run(bijector_ref.inverse_log_det_jacobian, x, event_ndims=1)) def testTriLWithVDVTUpdateNoDiagonal(self): with self.test_session() as sess: + placeholder = array_ops.placeholder(dtypes.float32, name="x") - def static_run(fun, x): - return fun(x).eval() + def static_run(fun, x, **kwargs): + return fun(x, **kwargs).eval() - def dynamic_run(fun, x_value): + def dynamic_run(fun, x_value, **kwargs): x_value = np.array(x_value) - x = array_ops.placeholder(dtypes.float32, name="x") - return sess.run(fun(x), feed_dict={x: x_value}) + return sess.run( + fun(placeholder, **kwargs), feed_dict={placeholder: x_value}) for run in (static_run, dynamic_run): mu = -1. @@ -423,11 +451,12 @@ class AffineBijectorTest(test.TestCase): self.assertAllClose([1 / 3., 8 / 9., 4 / 30.], run(bijector.inverse, x)) self.assertAllClose( run(bijector_ref.inverse, x), run(bijector.inverse, x)) - self.assertAllClose(-np.log(90.), - run(bijector.inverse_log_det_jacobian, x)) self.assertAllClose( - run(bijector.inverse_log_det_jacobian, x), - run(bijector_ref.inverse_log_det_jacobian, x)) + -np.log(90.), + run(bijector.inverse_log_det_jacobian, x, event_ndims=1)) + self.assertAllClose( + run(bijector.inverse_log_det_jacobian, x, event_ndims=1), + run(bijector_ref.inverse_log_det_jacobian, x, event_ndims=1)) def testNoBatchMultivariateRaisesWhenSingular(self): with self.test_session(): @@ -530,6 +559,7 @@ class AffineBijectorTest(test.TestCase): backward = np.squeeze(backward, axis=-1) self.assertAllClose(backward, bijector.inverse(x).eval()) + scale *= np.ones(shape=x.shape[:-1], dtype=scale.dtype) ildj = -np.log(np.abs(np.linalg.det(scale))) # TODO(jvdillon): We need to make it so the scale_identity_multiplier # case does not deviate in expected shape. Fixing this will get rid of @@ -540,7 +570,8 @@ class AffineBijectorTest(test.TestCase): ildj = np.squeeze(ildj[0]) elif ildj.ndim < scale.ndim - 2: ildj = np.reshape(ildj, scale.shape[0:-2]) - self.assertAllClose(ildj, bijector.inverse_log_det_jacobian(x).eval()) + self.assertAllClose( + ildj, bijector.inverse_log_det_jacobian(x, event_ndims=1).eval()) def testLegalInputs(self): self._testLegalInputs( diff --git a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/batch_normalization_test.py b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/batch_normalization_test.py index a215a4a2b1ffbea7951bdb9b4352ed567e0b1e41..c832fcaa686c92f83810e4f99ca3b23ae694b723 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/batch_normalization_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/batch_normalization_test.py @@ -83,10 +83,11 @@ class BatchNormTest(test_util.VectorDistributionTestHelpers, moving_mean = array_ops.identity(batch_norm.batchnorm.moving_mean) moving_var = array_ops.identity(batch_norm.batchnorm.moving_variance) denorm_x = batch_norm.forward(array_ops.identity(norm_x)) - fldj = batch_norm.forward_log_det_jacobian(x) + fldj = batch_norm.forward_log_det_jacobian( + x, event_ndims=len(event_dims)) # Use identity to invalidate cache. ildj = batch_norm.inverse_log_det_jacobian( - array_ops.identity(denorm_x)) + array_ops.identity(denorm_x), event_ndims=len(event_dims)) variables.global_variables_initializer().run() # Update variables. norm_x_ = sess.run(norm_x) diff --git a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/chain_test.py b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/chain_test.py index a748acd667e58f9b527bab11d8bc4d086996e9f3..ca20442c3940664feab7526110229872a6cdc41f 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/chain_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/chain_test.py @@ -20,21 +20,33 @@ from __future__ import print_function import numpy as np +from tensorflow.contrib.distributions.python.ops.bijectors.affine import Affine from tensorflow.contrib.distributions.python.ops.bijectors.chain import Chain from tensorflow.contrib.distributions.python.ops.bijectors.exp import Exp from tensorflow.contrib.distributions.python.ops.bijectors.softmax_centered import SoftmaxCentered from tensorflow.contrib.distributions.python.ops.bijectors.softplus import Softplus from tensorflow.python.framework import tensor_shape +from tensorflow.python.ops.distributions import bijector from tensorflow.python.ops.distributions.bijector_test_util import assert_scalar_congruency from tensorflow.python.platform import test +class ShapeChanging(bijector.Bijector): + """Only used for op_ndims manipulation.""" + + def __init__(self, forward_min_event_ndims=0, inverse_min_event_ndims=3): + super(ShapeChanging, self).__init__( + forward_min_event_ndims=forward_min_event_ndims, + inverse_min_event_ndims=inverse_min_event_ndims, + validate_args=False, name="shape_changer") + + class ChainBijectorTest(test.TestCase): """Tests the correctness of the Y = Chain(bij1, bij2, bij3) transformation.""" def testBijector(self): with self.test_session(): - chain = Chain((Exp(event_ndims=1), Softplus(event_ndims=1))) + chain = Chain((Exp(), Softplus())) self.assertEqual("chain_of_exp_of_softplus", chain.name) x = np.asarray([[[1., 2.], [2., 3.]]]) @@ -42,9 +54,10 @@ class ChainBijectorTest(test.TestCase): self.assertAllClose(np.log(x - 1.), chain.inverse(x).eval()) self.assertAllClose( -np.sum(np.log(x - 1.), axis=2), - chain.inverse_log_det_jacobian(x).eval()) + chain.inverse_log_det_jacobian(x, event_ndims=1).eval()) self.assertAllClose( - np.sum(x, axis=2), chain.forward_log_det_jacobian(x).eval()) + np.sum(x, axis=2), + chain.forward_log_det_jacobian(x, event_ndims=1).eval()) def testBijectorIdentity(self): with self.test_session(): @@ -54,31 +67,126 @@ class ChainBijectorTest(test.TestCase): [2., 3.]]]) self.assertAllClose(x, chain.forward(x).eval()) self.assertAllClose(x, chain.inverse(x).eval()) - self.assertAllClose(0., chain.inverse_log_det_jacobian(x).eval()) - self.assertAllClose(0., chain.forward_log_det_jacobian(x).eval()) + self.assertAllClose( + 0., chain.inverse_log_det_jacobian(x, event_ndims=1).eval()) + self.assertAllClose( + 0., chain.forward_log_det_jacobian(x, event_ndims=1).eval()) def testScalarCongruency(self): with self.test_session(): - bijector = Chain((Exp(), Softplus())) + chain = Chain((Exp(), Softplus())) assert_scalar_congruency( - bijector, lower_x=1e-3, upper_x=1.5, rtol=0.05) + chain, lower_x=1e-3, upper_x=1.5, rtol=0.05) def testShapeGetters(self): with self.test_session(): - bijector = Chain([ + chain = Chain([ SoftmaxCentered(validate_args=True), SoftmaxCentered(validate_args=True), ]) x = tensor_shape.TensorShape([1]) y = tensor_shape.TensorShape([2 + 1]) - self.assertAllEqual(y, bijector.forward_event_shape(x)) + self.assertAllEqual(y, chain.forward_event_shape(x)) self.assertAllEqual( y.as_list(), - bijector.forward_event_shape_tensor(x.as_list()).eval()) - self.assertAllEqual(x, bijector.inverse_event_shape(y)) + chain.forward_event_shape_tensor(x.as_list()).eval()) + self.assertAllEqual(x, chain.inverse_event_shape(y)) self.assertAllEqual( x.as_list(), - bijector.inverse_event_shape_tensor(y.as_list()).eval()) + chain.inverse_event_shape_tensor(y.as_list()).eval()) + + def testMinEventNdimsChain(self): + chain = Chain([Exp(), Exp(), Exp()]) + self.assertEqual(0, chain.forward_min_event_ndims) + self.assertEqual(0, chain.inverse_min_event_ndims) + + chain = Chain([Affine(), Affine(), Affine()]) + self.assertEqual(1, chain.forward_min_event_ndims) + self.assertEqual(1, chain.inverse_min_event_ndims) + + chain = Chain([Exp(), Affine()]) + self.assertEqual(1, chain.forward_min_event_ndims) + self.assertEqual(1, chain.inverse_min_event_ndims) + + chain = Chain([Affine(), Exp()]) + self.assertEqual(1, chain.forward_min_event_ndims) + self.assertEqual(1, chain.inverse_min_event_ndims) + + chain = Chain([Affine(), Exp(), Softplus(), Affine()]) + self.assertEqual(1, chain.forward_min_event_ndims) + self.assertEqual(1, chain.inverse_min_event_ndims) + + def testMinEventNdimsShapeChangingAddDims(self): + chain = Chain([ShapeChanging()]) + self.assertEqual(0, chain.forward_min_event_ndims) + self.assertEqual(3, chain.inverse_min_event_ndims) + + chain = Chain([ShapeChanging(), Affine()]) + self.assertEqual(1, chain.forward_min_event_ndims) + self.assertEqual(4, chain.inverse_min_event_ndims) + + chain = Chain([Affine(), ShapeChanging()]) + self.assertEqual(0, chain.forward_min_event_ndims) + self.assertEqual(3, chain.inverse_min_event_ndims) + + chain = Chain([ShapeChanging(), ShapeChanging()]) + self.assertEqual(0, chain.forward_min_event_ndims) + self.assertEqual(6, chain.inverse_min_event_ndims) + + def testMinEventNdimsShapeChangingRemoveDims(self): + chain = Chain([ShapeChanging(3, 0)]) + self.assertEqual(3, chain.forward_min_event_ndims) + self.assertEqual(0, chain.inverse_min_event_ndims) + + chain = Chain([ShapeChanging(3, 0), Affine()]) + self.assertEqual(3, chain.forward_min_event_ndims) + self.assertEqual(0, chain.inverse_min_event_ndims) + + chain = Chain([Affine(), ShapeChanging(3, 0)]) + self.assertEqual(4, chain.forward_min_event_ndims) + self.assertEqual(1, chain.inverse_min_event_ndims) + + chain = Chain([ShapeChanging(3, 0), ShapeChanging(3, 0)]) + self.assertEqual(6, chain.forward_min_event_ndims) + self.assertEqual(0, chain.inverse_min_event_ndims) + + def testMinEventNdimsShapeChangingAddRemoveDims(self): + chain = Chain([ + ShapeChanging(2, 1), + ShapeChanging(3, 0), + ShapeChanging(1, 2)]) + self.assertEqual(4, chain.forward_min_event_ndims) + self.assertEqual(1, chain.inverse_min_event_ndims) + + def testChainExpAffine(self): + scale_diag = np.array([1., 2., 3.], dtype=np.float32) + chain = Chain([Exp(), Affine(scale_diag=scale_diag)]) + x = [0., np.log(2., dtype=np.float32), np.log(3., dtype=np.float32)] + y = [1., 4., 27.] + self.assertAllClose(y, self.evaluate(chain.forward(x))) + self.assertAllClose(x, self.evaluate(chain.inverse(y))) + self.assertAllClose( + np.log(6, dtype=np.float32) + np.sum(scale_diag * x), + self.evaluate(chain.forward_log_det_jacobian(x, event_ndims=1))) + + self.assertAllClose( + -np.log(6, dtype=np.float32) - np.sum(scale_diag * x), + self.evaluate(chain.inverse_log_det_jacobian(y, event_ndims=1))) + + def testChainAffineExp(self): + scale_diag = np.array([1., 2., 3.], dtype=np.float32) + chain = Chain([Affine(scale_diag=scale_diag), Exp()]) + x = [0., np.log(2., dtype=np.float32), np.log(3., dtype=np.float32)] + y = [1., 4., 9.] + self.assertAllClose(y, self.evaluate(chain.forward(x))) + self.assertAllClose(x, self.evaluate(chain.inverse(y))) + self.assertAllClose( + np.log(6, dtype=np.float32) + np.sum(x), + self.evaluate(chain.forward_log_det_jacobian(x, event_ndims=1))) + + self.assertAllClose( + -np.log(6, dtype=np.float32) - np.sum(x), + self.evaluate(chain.inverse_log_det_jacobian(y, event_ndims=1))) if __name__ == "__main__": diff --git a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/cholesky_outer_product_test.py b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/cholesky_outer_product_test.py index f392e83d2c3da9dac43c2e87070e952ae2060b34..e281e81bdf0698c1f7b2f60fb27783dd1351773f 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/cholesky_outer_product_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/cholesky_outer_product_test.py @@ -51,10 +51,13 @@ class CholeskyOuterProductBijectorTest(test.TestCase): self.assertAllClose(y, bijector.forward(x).eval()) self.assertAllClose(x, bijector.inverse(y).eval()) self.assertAllClose( - ildj, bijector.inverse_log_det_jacobian(y).eval(), atol=0., rtol=1e-7) + ildj, bijector.inverse_log_det_jacobian( + y, event_ndims=2).eval(), atol=0., rtol=1e-7) self.assertAllClose( - -bijector.inverse_log_det_jacobian(y).eval(), - bijector.forward_log_det_jacobian(x).eval(), + -bijector.inverse_log_det_jacobian( + y, event_ndims=2).eval(), + bijector.forward_log_det_jacobian( + x, event_ndims=2).eval(), atol=0., rtol=1e-7) diff --git a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/conditional_bijector_test.py b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/conditional_bijector_test.py index 26e0d2a539c78540603281ae0f361987a7bf8d90..8b279ebcd908b6f375b35594ac5f3db9228a1e31 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/conditional_bijector_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/conditional_bijector_test.py @@ -27,7 +27,7 @@ class _TestBijector(ConditionalBijector): def __init__(self): super(_TestBijector, self).__init__( - event_ndims=0, + forward_min_event_ndims=0, graph_parents=[], is_constant_jacobian=True, validate_args=False, @@ -51,11 +51,15 @@ class ConditionalBijectorTest(test.TestCase): def testConditionalBijector(self): b = _TestBijector() - for name in ["forward", "inverse", "inverse_log_det_jacobian", - "forward_log_det_jacobian"]: + for name in ["forward", "inverse"]: method = getattr(b, name) with self.assertRaisesRegexp(ValueError, name + ".*b1.*b2"): - method(1.0, arg1="b1", arg2="b2") + method(1., arg1="b1", arg2="b2") + + for name in ["inverse_log_det_jacobian", "forward_log_det_jacobian"]: + method = getattr(b, name) + with self.assertRaisesRegexp(ValueError, name + ".*b1.*b2"): + method(1., event_ndims=0., arg1="b1", arg2="b2") if __name__ == "__main__": diff --git a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/exp_test.py b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/exp_test.py index 9970c0b4d86afda188d9401ebaf3c98d3fffbfdf..7be939cd274e6f0e33c9b01c82494755db2caa73 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/exp_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/exp_test.py @@ -31,17 +31,21 @@ class ExpBijectorTest(test.TestCase): def testBijector(self): with self.test_session(): - bijector = Exp(event_ndims=1) + bijector = Exp() self.assertEqual("exp", bijector.name) x = [[[1.], [2.]]] y = np.exp(x) self.assertAllClose(y, bijector.forward(x).eval()) self.assertAllClose(x, bijector.inverse(y).eval()) self.assertAllClose( - -np.sum(np.log(y), axis=-1), - bijector.inverse_log_det_jacobian(y).eval()) - self.assertAllClose(-bijector.inverse_log_det_jacobian(np.exp(x)).eval(), - bijector.forward_log_det_jacobian(x).eval()) + -np.squeeze(np.log(y), axis=-1), + bijector.inverse_log_det_jacobian( + y, event_ndims=1).eval()) + self.assertAllClose( + -bijector.inverse_log_det_jacobian( + np.exp(x), event_ndims=1).eval(), + bijector.forward_log_det_jacobian( + x, event_ndims=1).eval()) def testScalarCongruency(self): with self.test_session(): @@ -51,10 +55,10 @@ class ExpBijectorTest(test.TestCase): def testBijectiveAndFinite(self): with self.test_session(): - bijector = Exp(event_ndims=0) + bijector = Exp() x = np.linspace(-10, 10, num=10).astype(np.float32) y = np.logspace(-10, 10, num=10).astype(np.float32) - assert_bijective_and_finite(bijector, x, y) + assert_bijective_and_finite(bijector, x, y, event_ndims=0) if __name__ == "__main__": diff --git a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/gumbel_test.py b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/gumbel_test.py index 9a905980c7581a86bbcda8c6c726da57c09fe4f8..54e54c3296a89a4fe29a3cce971760502b65e784 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/gumbel_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/gumbel_test.py @@ -34,7 +34,7 @@ class GumbelBijectorTest(test.TestCase): with self.test_session(): loc = 0.3 scale = 5. - bijector = Gumbel(loc=loc, scale=scale, event_ndims=1, validate_args=True) + bijector = Gumbel(loc=loc, scale=scale, validate_args=True) self.assertEqual("gumbel", bijector.name) x = np.array([[[-3.], [0.], [0.5], [4.2], [12.]]], dtype=np.float32) # Gumbel distribution @@ -43,13 +43,11 @@ class GumbelBijectorTest(test.TestCase): self.assertAllClose(y, bijector.forward(x).eval()) self.assertAllClose(x, bijector.inverse(y).eval()) self.assertAllClose( - # We should lose a dimension from calculating the determinant of the - # jacobian. - np.squeeze(gumbel_dist.logpdf(x), axis=2), - bijector.forward_log_det_jacobian(x).eval()) + np.squeeze(gumbel_dist.logpdf(x), axis=-1), + bijector.forward_log_det_jacobian(x, event_ndims=1).eval()) self.assertAllClose( - -bijector.inverse_log_det_jacobian(y).eval(), - bijector.forward_log_det_jacobian(x).eval(), + -bijector.inverse_log_det_jacobian(y, event_ndims=1).eval(), + bijector.forward_log_det_jacobian(x, event_ndims=1).eval(), rtol=1e-4, atol=0.) @@ -60,10 +58,10 @@ class GumbelBijectorTest(test.TestCase): def testBijectiveAndFinite(self): with self.test_session(): - bijector = Gumbel(loc=0., scale=3.0, event_ndims=0, validate_args=True) + bijector = Gumbel(loc=0., scale=3.0, validate_args=True) x = np.linspace(-10., 10., num=10).astype(np.float32) y = np.linspace(0.01, 0.99, num=10).astype(np.float32) - assert_bijective_and_finite(bijector, x, y, rtol=1e-3) + assert_bijective_and_finite(bijector, x, y, event_ndims=0, rtol=1e-3) if __name__ == "__main__": diff --git a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/inline_test.py b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/inline_test.py index 739fa6d439a8bce993ab1b4601489d9bbcd69bee..7d3bd758cd2db307f95d2d934923ea2133dc1217 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/inline_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/inline_test.py @@ -33,15 +33,13 @@ class InlineBijectorTest(test.TestCase): def testBijector(self): with self.test_session(): - exp = Exp(event_ndims=1) + exp = Exp() inline = Inline( forward_fn=math_ops.exp, inverse_fn=math_ops.log, - inverse_log_det_jacobian_fn=( - lambda y: -math_ops.reduce_sum( # pylint: disable=g-long-lambda - math_ops.log(y), reduction_indices=-1)), - forward_log_det_jacobian_fn=( - lambda x: math_ops.reduce_sum(x, reduction_indices=-1)), + inverse_log_det_jacobian_fn=lambda y: -math_ops.log(y), + forward_log_det_jacobian_fn=lambda x: x, + forward_min_event_ndims=0, name="exp") self.assertEqual(exp.name, inline.name) @@ -51,9 +49,10 @@ class InlineBijectorTest(test.TestCase): self.assertAllClose(x, inline.inverse(y).eval()) self.assertAllClose( -np.sum(np.log(y), axis=-1), - inline.inverse_log_det_jacobian(y).eval()) - self.assertAllClose(-inline.inverse_log_det_jacobian(y).eval(), - inline.forward_log_det_jacobian(x).eval()) + inline.inverse_log_det_jacobian(y, event_ndims=1).eval()) + self.assertAllClose( + -inline.inverse_log_det_jacobian(y, event_ndims=1).eval(), + inline.forward_log_det_jacobian(x, event_ndims=1).eval()) def testShapeGetters(self): with self.test_session(): @@ -62,6 +61,7 @@ class InlineBijectorTest(test.TestCase): forward_event_shape_fn=lambda x: x.as_list() + [1], inverse_event_shape_tensor_fn=lambda x: x[:-1], inverse_event_shape_fn=lambda x: x[:-1], + forward_min_event_ndims=0, name="shape_only") x = tensor_shape.TensorShape([1, 2, 3]) y = tensor_shape.TensorShape([1, 2, 3, 1]) diff --git a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/invert_test.py b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/invert_test.py index 58ba9cedb1437df4e000ce32fe39664afa76c3b5..8b14c8327f08902044f50483f9f8dfe67b58cd70 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/invert_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/invert_test.py @@ -34,9 +34,9 @@ class InvertBijectorTest(test.TestCase): with self.test_session(): for fwd in [ bijectors.Identity(), - bijectors.Exp(event_ndims=1), + bijectors.Exp(), bijectors.Affine(shift=[0., 1.], scale_diag=[2., 3.]), - bijectors.Softplus(event_ndims=1), + bijectors.Softplus(), bijectors.SoftmaxCentered(), ]: rev = bijectors.Invert(fwd) @@ -46,11 +46,11 @@ class InvertBijectorTest(test.TestCase): self.assertAllClose(fwd.inverse(x).eval(), rev.forward(x).eval()) self.assertAllClose(fwd.forward(x).eval(), rev.inverse(x).eval()) self.assertAllClose( - fwd.forward_log_det_jacobian(x).eval(), - rev.inverse_log_det_jacobian(x).eval()) + fwd.forward_log_det_jacobian(x, event_ndims=1).eval(), + rev.inverse_log_det_jacobian(x, event_ndims=1).eval()) self.assertAllClose( - fwd.inverse_log_det_jacobian(x).eval(), - rev.forward_log_det_jacobian(x).eval()) + fwd.inverse_log_det_jacobian(x, event_ndims=1).eval(), + rev.forward_log_det_jacobian(x, event_ndims=1).eval()) def testScalarCongruency(self): with self.test_session(): diff --git a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/kumaraswamy_bijector_test.py b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/kumaraswamy_bijector_test.py index 074b5f275d107fa49de42df262476bd4aa48ffae..a8089881f684db9f8876d6dd738e52bf2f1f7606 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/kumaraswamy_bijector_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/kumaraswamy_bijector_test.py @@ -34,8 +34,7 @@ class KumaraswamyBijectorTest(test.TestCase): a = 2. b = 0.3 bijector = Kumaraswamy( - concentration1=a, concentration0=b, - event_ndims=0, validate_args=True) + concentration1=a, concentration0=b, validate_args=True) self.assertEqual("kumaraswamy", bijector.name) x = np.array([[[0.1], [0.2], [0.3], [0.4], [0.5]]], dtype=np.float32) # Kumaraswamy cdf. This is the same as inverse(x). @@ -46,13 +45,11 @@ class KumaraswamyBijectorTest(test.TestCase): (b - 1) * np.log1p(-x ** a)) self.assertAllClose( - # We should lose a dimension from calculating the determinant of the - # jacobian. - kumaraswamy_log_pdf, - bijector.inverse_log_det_jacobian(x).eval()) + np.squeeze(kumaraswamy_log_pdf, axis=-1), + bijector.inverse_log_det_jacobian(x, event_ndims=1).eval()) self.assertAllClose( - -bijector.inverse_log_det_jacobian(x).eval(), - bijector.forward_log_det_jacobian(y).eval(), + -bijector.inverse_log_det_jacobian(x, event_ndims=1).eval(), + bijector.forward_log_det_jacobian(y, event_ndims=1).eval(), rtol=1e-4, atol=0.) @@ -73,7 +70,7 @@ class KumaraswamyBijectorTest(test.TestCase): # endpoints. y = np.linspace(.01, 0.99, num=10).astype(np.float32) x = 1 - (1 - y ** concentration1) ** concentration0 - assert_bijective_and_finite(bijector, x, y, rtol=1e-3) + assert_bijective_and_finite(bijector, x, y, event_ndims=0, rtol=1e-3) if __name__ == "__main__": diff --git a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/masked_autoregressive_test.py b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/masked_autoregressive_test.py index dcfb0eb05185d36d96947905c2eb91b2201aece1..5ba5a2083bf11791d7d58146dc2e6283b524d241 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/masked_autoregressive_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/masked_autoregressive_test.py @@ -79,9 +79,10 @@ class MaskedAutoregressiveFlowTest(test_util.VectorDistributionTestHelpers, forward_x = ma.forward(x) # Use identity to invalidate cache. inverse_y = ma.inverse(array_ops.identity(forward_x)) - fldj = ma.forward_log_det_jacobian(x) + fldj = ma.forward_log_det_jacobian(x, event_ndims=1) # Use identity to invalidate cache. - ildj = ma.inverse_log_det_jacobian(array_ops.identity(forward_x)) + ildj = ma.inverse_log_det_jacobian( + array_ops.identity(forward_x), event_ndims=1) variables.global_variables_initializer().run() [ forward_x_, diff --git a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/permute_test.py b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/permute_test.py index 54590de373441c32cc3214cb04d45cfc2d1807ed..7eef4ab599951bbb624652f13a0091363b36b93d 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/permute_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/permute_test.py @@ -53,8 +53,8 @@ class PermuteBijectorTest(test.TestCase): bijector.permutation, bijector.inverse(expected_y), bijector.forward(expected_x), - bijector.forward_log_det_jacobian(expected_x), - bijector.inverse_log_det_jacobian(expected_y), + bijector.forward_log_det_jacobian(expected_x, event_ndims=1), + bijector.inverse_log_det_jacobian(expected_y, event_ndims=1), ], feed_dict={permutation_ph: expected_permutation}) self.assertEqual("permute", bijector.name) self.assertAllEqual(expected_permutation, permutation_) @@ -78,10 +78,9 @@ class PermuteBijectorTest(test.TestCase): x = np.random.randn(4, 2, 3) y = x[..., permutation] with self.test_session(): - bijector = Permute( - permutation=permutation, - validate_args=True) - assert_bijective_and_finite(bijector, x, y, rtol=1e-6, atol=0) + bijector = Permute(permutation=permutation, validate_args=True) + assert_bijective_and_finite( + bijector, x, y, event_ndims=1, rtol=1e-6, atol=0) if __name__ == "__main__": test.main() diff --git a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/power_transform_test.py b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/power_transform_test.py index de1659aa9f4d0f7d19ec2e8185715573b78eaf2b..85d22830132816cd6c77cd0b07870f3a22ae9798 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/power_transform_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/power_transform_test.py @@ -32,8 +32,7 @@ class PowerTransformBijectorTest(test.TestCase): def testBijector(self): with self.test_session(): c = 0.2 - bijector = PowerTransform( - power=c, event_ndims=1, validate_args=True) + bijector = PowerTransform(power=c, validate_args=True) self.assertEqual("power_transform", bijector.name) x = np.array([[[-1.], [2.], [-5. + 1e-4]]]) y = (1. + x * c)**(1. / c) @@ -41,27 +40,25 @@ class PowerTransformBijectorTest(test.TestCase): self.assertAllClose(x, bijector.inverse(y).eval()) self.assertAllClose( (c - 1.) * np.sum(np.log(y), axis=-1), - bijector.inverse_log_det_jacobian(y).eval()) + bijector.inverse_log_det_jacobian(y, event_ndims=1).eval()) self.assertAllClose( - -bijector.inverse_log_det_jacobian(y).eval(), - bijector.forward_log_det_jacobian(x).eval(), + -bijector.inverse_log_det_jacobian(y, event_ndims=1).eval(), + bijector.forward_log_det_jacobian(x, event_ndims=1).eval(), rtol=1e-4, atol=0.) def testScalarCongruency(self): with self.test_session(): - bijector = PowerTransform( - power=0.2, validate_args=True) + bijector = PowerTransform(power=0.2, validate_args=True) assert_scalar_congruency( bijector, lower_x=-2., upper_x=1.5, rtol=0.05) def testBijectiveAndFinite(self): with self.test_session(): - bijector = PowerTransform( - power=0.2, event_ndims=0, validate_args=True) + bijector = PowerTransform(power=0.2, validate_args=True) x = np.linspace(-4.999, 10, num=10).astype(np.float32) y = np.logspace(0.001, 10, num=10).astype(np.float32) - assert_bijective_and_finite(bijector, x, y, rtol=1e-3) + assert_bijective_and_finite(bijector, x, y, event_ndims=0, rtol=1e-3) if __name__ == "__main__": diff --git a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/real_nvp_test.py b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/real_nvp_test.py index 46fe7797419a9906ecdad60dd0dfe1e9d7c743ed..2d52895fbe0967cdd2260d6d298a291286858d09 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/real_nvp_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/real_nvp_test.py @@ -52,24 +52,28 @@ class RealNVPTest(test_util.VectorDistributionTestHelpers, test.TestCase): forward_x = nvp.forward(x) # Use identity to invalidate cache. inverse_y = nvp.inverse(array_ops.identity(forward_x)) - fldj = nvp.forward_log_det_jacobian(x) + forward_inverse_y = nvp.forward(inverse_y) + fldj = nvp.forward_log_det_jacobian(x, event_ndims=1) # Use identity to invalidate cache. - ildj = nvp.inverse_log_det_jacobian(array_ops.identity(forward_x)) + ildj = nvp.inverse_log_det_jacobian( + array_ops.identity(forward_x), event_ndims=1) variables.global_variables_initializer().run() [ forward_x_, inverse_y_, + forward_inverse_y_, ildj_, fldj_, ] = sess.run([ forward_x, inverse_y, + forward_inverse_y, ildj, fldj, ]) self.assertEqual("real_nvp", nvp.name) - self.assertAllClose(forward_x_, forward_x_, rtol=1e-6, atol=0.) - self.assertAllClose(x_, inverse_y_, rtol=1e-5, atol=0.) + self.assertAllClose(forward_x_, forward_inverse_y_, rtol=1e-1, atol=0.) + self.assertAllClose(x_, inverse_y_, rtol=1e-1, atol=0.) self.assertAllClose(ildj_, -fldj_, rtol=1e-6, atol=0.) def testMutuallyConsistent(self): diff --git a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/reshape_test.py b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/reshape_test.py index e216d88cb190dc16fc0056186f80817d6f2d7c67..46f2c63f9b0f78b25bb1948e6ea55ab20c5cfa6e 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/reshape_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/reshape_test.py @@ -65,8 +65,8 @@ class _ReshapeBijectorTest(object): ildj_) = sess.run(( bijector.inverse(expected_y), bijector.forward(expected_x), - bijector.forward_log_det_jacobian(expected_x), - bijector.inverse_log_det_jacobian(expected_y), + bijector.forward_log_det_jacobian(expected_x, event_ndims=2), + bijector.inverse_log_det_jacobian(expected_y, event_ndims=2), ), feed_dict=feed_dict) self.assertEqual("reshape", bijector.name) self.assertAllClose(expected_y, y_, rtol=1e-6, atol=0) @@ -301,7 +301,8 @@ class ReshapeBijectorTestStatic(test.TestCase, _ReshapeBijectorTest): event_shape_in=[2, 3], event_shape_out=[1, 2, 3], validate_args=True) - assert_bijective_and_finite(bijector, x, y, rtol=1e-6, atol=0) + assert_bijective_and_finite( + bijector, x, y, event_ndims=2, rtol=1e-6, atol=0) def testInvalidDimensionsOpError(self): if ops._USE_C_API: diff --git a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/sigmoid_test.py b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/sigmoid_test.py index e4f9d72785c301284812a48c0a67614ca439ffae..cea4a62c22af5d98d38ee881b29c773e6a27a4b4 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/sigmoid_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/sigmoid_test.py @@ -36,12 +36,13 @@ class SigmoidBijectorTest(test.TestCase): x = np.linspace(-10., 10., 100).reshape([2, 5, 10]).astype(np.float32) y = special.expit(x) ildj = -np.log(y) - np.log1p(-y) - self.assertAllClose(y, Sigmoid().forward(x).eval(), atol=0., rtol=1e-2) - self.assertAllClose(x, Sigmoid().inverse(y).eval(), atol=0., rtol=1e-4) - self.assertAllClose(ildj, Sigmoid().inverse_log_det_jacobian(y).eval(), - atol=0., rtol=1e-6) - self.assertAllClose(-ildj, Sigmoid().forward_log_det_jacobian(x).eval(), - atol=0., rtol=1e-4) + bijector = Sigmoid() + self.assertAllClose(y, bijector.forward(x).eval(), atol=0., rtol=1e-2) + self.assertAllClose(x, bijector.inverse(y).eval(), atol=0., rtol=1e-4) + self.assertAllClose(ildj, bijector.inverse_log_det_jacobian( + y, event_ndims=0).eval(), atol=0., rtol=1e-6) + self.assertAllClose(-ildj, bijector.forward_log_det_jacobian( + x, event_ndims=0).eval(), atol=0., rtol=1e-4) def testScalarCongruency(self): with self.test_session(): @@ -52,7 +53,8 @@ class SigmoidBijectorTest(test.TestCase): x = np.linspace(-7., 7., 100).astype(np.float32) eps = 1e-3 y = np.linspace(eps, 1. - eps, 100).astype(np.float32) - assert_bijective_and_finite(Sigmoid(), x, y, atol=0., rtol=1e-4) + assert_bijective_and_finite( + Sigmoid(), x, y, event_ndims=0, atol=0., rtol=1e-4) if __name__ == "__main__": diff --git a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/sinh_arcsinh_bijector_test.py b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/sinh_arcsinh_bijector_test.py index 172c180a44229089f06f250a872bc47a89991cf0..45760a29ee42835da69ef63803ccec7ce82a5a8f 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/sinh_arcsinh_bijector_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/sinh_arcsinh_bijector_test.py @@ -39,7 +39,6 @@ class SinhArcsinhBijectorTest(test.TestCase): bijector = SinhArcsinh( skewness=skewness, tailweight=tailweight, - event_ndims=1, validate_args=True) self.assertEqual("SinhArcsinh", bijector.name) x = np.array([[[-2.01], [2.], [1e-4]]]).astype(np.float32) @@ -50,10 +49,11 @@ class SinhArcsinhBijectorTest(test.TestCase): np.sum( np.log(np.cosh(np.arcsinh(y) / tailweight - skewness)) - np.log(tailweight) - np.log(np.sqrt(y**2 + 1)), - axis=-1), bijector.inverse_log_det_jacobian(y).eval()) + axis=-1), + bijector.inverse_log_det_jacobian(y, event_ndims=1).eval()) self.assertAllClose( - -bijector.inverse_log_det_jacobian(y).eval(), - bijector.forward_log_det_jacobian(x).eval(), + -bijector.inverse_log_det_jacobian(y, event_ndims=1).eval(), + bijector.forward_log_det_jacobian(x, event_ndims=1).eval(), rtol=1e-4, atol=0.) @@ -106,14 +106,15 @@ class SinhArcsinhBijectorTest(test.TestCase): bijector = SinhArcsinh(skewness=-1., tailweight=0.5, validate_args=True) x = np.concatenate((-np.logspace(-2, 10, 1000), [0], np.logspace( -2, 10, 1000))).astype(np.float32) - assert_bijective_and_finite(bijector, x, x, rtol=1e-3) + assert_bijective_and_finite(bijector, x, x, event_ndims=0, rtol=1e-3) def testBijectiveAndFiniteSkewness1Tailweight3(self): with self.test_session(): bijector = SinhArcsinh(skewness=1., tailweight=3., validate_args=True) x = np.concatenate((-np.logspace(-2, 5, 1000), [0], np.logspace( -2, 5, 1000))).astype(np.float32) - assert_bijective_and_finite(bijector, x, x, rtol=1e-3) + assert_bijective_and_finite( + bijector, x, x, event_ndims=0, rtol=1e-3) def testBijectorEndpoints(self): with self.test_session(): @@ -124,7 +125,8 @@ class SinhArcsinhBijectorTest(test.TestCase): [np.finfo(dtype).min, np.finfo(dtype).max], dtype=dtype) # Note that the above bijector is the identity bijector. Hence, the # log_det_jacobian will be 0. Because of this we use atol. - assert_bijective_and_finite(bijector, bounds, bounds, atol=2e-6) + assert_bijective_and_finite( + bijector, bounds, bounds, event_ndims=0, atol=2e-6) def testBijectorOverRange(self): with self.test_session(): @@ -156,12 +158,12 @@ class SinhArcsinhBijectorTest(test.TestCase): np.arcsinh(y_float128) / tailweight - skewness) / np.sqrt( y_float128**2 + 1)) - np.log(tailweight), - bijector.inverse_log_det_jacobian(y).eval(), + bijector.inverse_log_det_jacobian(y, event_ndims=0).eval(), rtol=1e-4, atol=0.) self.assertAllClose( - -bijector.inverse_log_det_jacobian(y).eval(), - bijector.forward_log_det_jacobian(x).eval(), + -bijector.inverse_log_det_jacobian(y, event_ndims=0).eval(), + bijector.forward_log_det_jacobian(x, event_ndims=0).eval(), rtol=1e-4, atol=0.) diff --git a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/softmax_centered_test.py b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/softmax_centered_test.py index cad4dd1ac8de0da6405aacb9047714b37eec73e3..0f0a2fa531a0585a709df4c2c3e2631e5c275986 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/softmax_centered_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/softmax_centered_test.py @@ -44,12 +44,12 @@ class SoftmaxCenteredBijectorTest(test.TestCase): self.assertAllClose(x, softmax.inverse(y).eval()) self.assertAllClose( -np.sum(np.log(y), axis=1), - softmax.inverse_log_det_jacobian(y).eval(), + softmax.inverse_log_det_jacobian(y, event_ndims=1).eval(), atol=0., rtol=1e-7) self.assertAllClose( - -softmax.inverse_log_det_jacobian(y).eval(), - softmax.forward_log_det_jacobian(x).eval(), + -softmax.inverse_log_det_jacobian(y, event_ndims=1).eval(), + softmax.forward_log_det_jacobian(x, event_ndims=1).eval(), atol=0., rtol=1e-7) @@ -67,14 +67,14 @@ class SoftmaxCenteredBijectorTest(test.TestCase): feed_dict={y: real_y})) self.assertAllClose( -np.sum(np.log(real_y), axis=1), - softmax.inverse_log_det_jacobian(y).eval( + softmax.inverse_log_det_jacobian(y, event_ndims=1).eval( feed_dict={y: real_y}), atol=0., rtol=1e-7) self.assertAllClose( - -softmax.inverse_log_det_jacobian(y).eval( + -softmax.inverse_log_det_jacobian(y, event_ndims=1).eval( feed_dict={y: real_y}), - softmax.forward_log_det_jacobian(x).eval( + softmax.forward_log_det_jacobian(x, event_ndims=1).eval( feed_dict={x: real_x}), atol=0., rtol=1e-7) @@ -104,7 +104,7 @@ class SoftmaxCenteredBijectorTest(test.TestCase): y = np.array([y_0, y_1, y_2]) y /= y.sum(axis=0) y = y.T # y.shape = [5, 3] - assert_bijective_and_finite(softmax, x, y) + assert_bijective_and_finite(softmax, x, y, event_ndims=1) if __name__ == "__main__": diff --git a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/softplus_test.py b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/softplus_test.py index d9af9aec50d3d69bb10f69f2ffd6ca3a24c316f8..3d8a0a32bba3539f732140e8eb7ebeb532d73ff5 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/softplus_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/softplus_test.py @@ -43,13 +43,13 @@ class SoftplusBijectorTest(test.TestCase): def testHingeSoftnessZeroRaises(self): with self.test_session(): - bijector = Softplus(event_ndims=0, hinge_softness=0., validate_args=True) + bijector = Softplus(hinge_softness=0., validate_args=True) with self.assertRaisesOpError("must be non-zero"): bijector.forward([1., 1.]).eval() def testBijectorForwardInverseEventDimsZero(self): with self.test_session(): - bijector = Softplus(event_ndims=0) + bijector = Softplus() self.assertEqual("softplus", bijector.name) x = 2 * rng.randn(2, 10) y = self._softplus(x) @@ -59,7 +59,7 @@ class SoftplusBijectorTest(test.TestCase): def testBijectorForwardInverseWithHingeSoftnessEventDimsZero(self): with self.test_session(): - bijector = Softplus(event_ndims=0, hinge_softness=1.5) + bijector = Softplus(hinge_softness=1.5) x = 2 * rng.randn(2, 10) y = 1.5 * self._softplus(x / 1.5) @@ -68,16 +68,17 @@ class SoftplusBijectorTest(test.TestCase): def testBijectorLogDetJacobianEventDimsZero(self): with self.test_session(): - bijector = Softplus(event_ndims=0) + bijector = Softplus() y = 2 * rng.rand(2, 10) # No reduction needed if event_dims = 0. ildj = self._softplus_ildj_before_reduction(y) - self.assertAllClose(ildj, bijector.inverse_log_det_jacobian(y).eval()) + self.assertAllClose(ildj, bijector.inverse_log_det_jacobian( + y, event_ndims=0).eval()) def testBijectorForwardInverseEventDimsOne(self): with self.test_session(): - bijector = Softplus(event_ndims=1) + bijector = Softplus() self.assertEqual("softplus", bijector.name) x = 2 * rng.randn(2, 10) y = self._softplus(x) @@ -87,58 +88,59 @@ class SoftplusBijectorTest(test.TestCase): def testBijectorLogDetJacobianEventDimsOne(self): with self.test_session(): - bijector = Softplus(event_ndims=1) + bijector = Softplus() y = 2 * rng.rand(2, 10) ildj_before = self._softplus_ildj_before_reduction(y) ildj = np.sum(ildj_before, axis=1) - self.assertAllClose(ildj, bijector.inverse_log_det_jacobian(y).eval()) + self.assertAllClose(ildj, bijector.inverse_log_det_jacobian( + y, event_ndims=1).eval()) def testScalarCongruency(self): with self.test_session(): - bijector = Softplus(event_ndims=0) + bijector = Softplus() assert_scalar_congruency( bijector, lower_x=-2., upper_x=2.) def testScalarCongruencyWithPositiveHingeSoftness(self): with self.test_session(): - bijector = Softplus(event_ndims=0, hinge_softness=1.3) + bijector = Softplus(hinge_softness=1.3) assert_scalar_congruency( bijector, lower_x=-2., upper_x=2.) def testScalarCongruencyWithNegativeHingeSoftness(self): with self.test_session(): - bijector = Softplus(event_ndims=0, hinge_softness=-1.3) + bijector = Softplus(hinge_softness=-1.3) assert_scalar_congruency( bijector, lower_x=-2., upper_x=2.) def testBijectiveAndFinite32bit(self): with self.test_session(): - bijector = Softplus(event_ndims=0) + bijector = Softplus() x = np.linspace(-20., 20., 100).astype(np.float32) y = np.logspace(-10, 10, 100).astype(np.float32) assert_bijective_and_finite( - bijector, x, y, rtol=1e-2, atol=1e-2) + bijector, x, y, event_ndims=0, rtol=1e-2, atol=1e-2) def testBijectiveAndFiniteWithPositiveHingeSoftness32Bit(self): with self.test_session(): - bijector = Softplus(event_ndims=0, hinge_softness=1.23) + bijector = Softplus(hinge_softness=1.23) x = np.linspace(-20., 20., 100).astype(np.float32) y = np.logspace(-10, 10, 100).astype(np.float32) assert_bijective_and_finite( - bijector, x, y, rtol=1e-2, atol=1e-2) + bijector, x, y, event_ndims=0, rtol=1e-2, atol=1e-2) def testBijectiveAndFiniteWithNegativeHingeSoftness32Bit(self): with self.test_session(): - bijector = Softplus(event_ndims=0, hinge_softness=-0.7) + bijector = Softplus(hinge_softness=-0.7) x = np.linspace(-20., 20., 100).astype(np.float32) y = -np.logspace(-10, 10, 100).astype(np.float32) assert_bijective_and_finite( - bijector, x, y, rtol=1e-2, atol=1e-2) + bijector, x, y, event_ndims=0, rtol=1e-2, atol=1e-2) def testBijectiveAndFinite16bit(self): with self.test_session(): - bijector = Softplus(event_ndims=0) + bijector = Softplus() # softplus(-20) is zero, so we can't use such a large range as in 32bit. x = np.linspace(-10., 20., 100).astype(np.float16) # Note that float16 is only in the open set (0, inf) for a smaller @@ -146,7 +148,7 @@ class SoftplusBijectorTest(test.TestCase): # for the test. y = np.logspace(-6, 3, 100).astype(np.float16) assert_bijective_and_finite( - bijector, x, y, rtol=1e-1, atol=1e-3) + bijector, x, y, event_ndims=0, rtol=1e-1, atol=1e-3) if __name__ == "__main__": diff --git a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/softsign_test.py b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/softsign_test.py new file mode 100644 index 0000000000000000000000000000000000000000..2ac06fce55b448a5f3da7ccb7f8766b5b1404ad7 --- /dev/null +++ b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/softsign_test.py @@ -0,0 +1,111 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for Bijector.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.contrib.distributions.python.ops.bijectors.softsign import Softsign +from tensorflow.python.framework import test_util +from tensorflow.python.ops.distributions.bijector_test_util import assert_bijective_and_finite +from tensorflow.python.ops.distributions.bijector_test_util import assert_scalar_congruency +from tensorflow.python.platform import test + + +class SoftsignBijectorTest(test.TestCase): + """Tests the correctness of the Y = g(X) = X / (1 + |X|) transformation.""" + + def _softsign(self, x): + return x / (1. + np.abs(x)) + + def _softsign_ildj_before_reduction(self, y): + """Inverse log det jacobian, before being reduced.""" + return -2. * np.log1p(-np.abs(y)) + + def setUp(self): + self._rng = np.random.RandomState(42) + + @test_util.run_in_graph_and_eager_modes() + def testBijectorBounds(self): + bijector = Softsign(validate_args=True) + with self.test_session(): + with self.assertRaisesOpError("greater than -1"): + bijector.inverse(-3.).eval() + with self.assertRaisesOpError("greater than -1"): + bijector.inverse_log_det_jacobian(-3., event_ndims=0).eval() + + with self.assertRaisesOpError("less than 1"): + bijector.inverse(3.).eval() + with self.assertRaisesOpError("less than 1"): + bijector.inverse_log_det_jacobian(3., event_ndims=0).eval() + + @test_util.run_in_graph_and_eager_modes() + def testBijectorForwardInverse(self): + bijector = Softsign(validate_args=True) + self.assertEqual("softsign", bijector.name) + x = 2. * self._rng.randn(2, 10) + y = self._softsign(x) + + self.assertAllClose(y, self.evaluate(bijector.forward(x))) + self.assertAllClose(x, self.evaluate(bijector.inverse(y))) + + @test_util.run_in_graph_and_eager_modes() + def testBijectorLogDetJacobianEventDimsZero(self): + bijector = Softsign(validate_args=True) + y = self._rng.rand(2, 10) + # No reduction needed if event_dims = 0. + ildj = self._softsign_ildj_before_reduction(y) + + self.assertAllClose(ildj, self.evaluate( + bijector.inverse_log_det_jacobian(y, event_ndims=0))) + + @test_util.run_in_graph_and_eager_modes() + def testBijectorForwardInverseEventDimsOne(self): + bijector = Softsign(validate_args=True) + self.assertEqual("softsign", bijector.name) + x = 2. * self._rng.randn(2, 10) + y = self._softsign(x) + self.assertAllClose(y, self.evaluate(bijector.forward(x))) + self.assertAllClose(x, self.evaluate(bijector.inverse(y))) + + @test_util.run_in_graph_and_eager_modes() + def testBijectorLogDetJacobianEventDimsOne(self): + bijector = Softsign(validate_args=True) + y = self._rng.rand(2, 10) + ildj_before = self._softsign_ildj_before_reduction(y) + ildj = np.sum(ildj_before, axis=1) + self.assertAllClose( + ildj, self.evaluate( + bijector.inverse_log_det_jacobian(y, event_ndims=1))) + + def testScalarCongruency(self): + with self.test_session(): + bijector = Softsign(validate_args=True) + assert_scalar_congruency(bijector, lower_x=-20., upper_x=20.) + + def testBijectiveAndFinite(self): + with self.test_session(): + bijector = Softsign(validate_args=True) + x = np.linspace(-20., 20., 100).astype(np.float32) + y = np.linspace(-0.99, 0.99, 100).astype(np.float32) + assert_bijective_and_finite( + bijector, x, y, event_ndims=0, rtol=1e-3, atol=1e-3) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/square_test.py b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/square_test.py index f03d6f1343a11ae4517f9034ceb0c99ca6fe7fa2..30c7a738c320b609ce90685512e6b8344dffc9dc 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/square_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/square_test.py @@ -41,10 +41,11 @@ class SquareBijectorTest(test.TestCase): self.assertAllClose(y, bijector.forward(x).eval()) self.assertAllClose(x, bijector.inverse(y).eval()) self.assertAllClose( - ildj, bijector.inverse_log_det_jacobian(y).eval(), atol=0., rtol=1e-7) + ildj, bijector.inverse_log_det_jacobian( + y, event_ndims=0).eval(), atol=0., rtol=1e-7) self.assertAllClose( - -bijector.inverse_log_det_jacobian(y).eval(), - bijector.forward_log_det_jacobian(x).eval(), + -bijector.inverse_log_det_jacobian(y, event_ndims=0).eval(), + bijector.forward_log_det_jacobian(x, event_ndims=0).eval(), atol=0., rtol=1e-7) diff --git a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/weibull_test.py b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/weibull_test.py index 7a31228d1ade55ce32b511dca073657d3bab53ae..f57adcda898a1fdb18aacbb0804411db1bb4e4c8 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/weibull_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/weibull_test.py @@ -36,7 +36,7 @@ class WeibullBijectorTest(test.TestCase): concentration = 0.3 bijector = Weibull( scale=scale, concentration=concentration, - event_ndims=1, validate_args=True) + validate_args=True) self.assertEqual("weibull", bijector.name) x = np.array([[[0.], [1.], [14.], [20.], [100.]]], dtype=np.float32) # Weibull distribution @@ -45,13 +45,11 @@ class WeibullBijectorTest(test.TestCase): self.assertAllClose(y, bijector.forward(x).eval()) self.assertAllClose(x, bijector.inverse(y).eval()) self.assertAllClose( - # We should lose a dimension from calculating the determinant of the - # jacobian. - np.squeeze(weibull_dist.logpdf(x), axis=2), - bijector.forward_log_det_jacobian(x).eval()) + weibull_dist.logpdf(x), + bijector.forward_log_det_jacobian(x, event_ndims=0).eval()) self.assertAllClose( - -bijector.inverse_log_det_jacobian(y).eval(), - bijector.forward_log_det_jacobian(x).eval(), + -bijector.inverse_log_det_jacobian(y, event_ndims=0).eval(), + bijector.forward_log_det_jacobian(x, event_ndims=0).eval(), rtol=1e-4, atol=0.) @@ -64,12 +62,12 @@ class WeibullBijectorTest(test.TestCase): def testBijectiveAndFinite(self): with self.test_session(): bijector = Weibull( - scale=20., concentration=2., event_ndims=0, validate_args=True) + scale=20., concentration=2., validate_args=True) x = np.linspace(1., 8., num=10).astype(np.float32) y = np.linspace( -np.expm1(-1 / 400.), -np.expm1(-16), num=10).astype(np.float32) - assert_bijective_and_finite(bijector, x, y, rtol=1e-3) + assert_bijective_and_finite(bijector, x, y, event_ndims=0, rtol=1e-3) if __name__ == "__main__": diff --git a/tensorflow/contrib/distributions/python/kernel_tests/conditional_transformed_distribution_test.py b/tensorflow/contrib/distributions/python/kernel_tests/conditional_transformed_distribution_test.py index 545471907f1eabc822b3d28ea9c57e183a09ff50..4e8989b6c2f93560b1fccbc99491d7809f494263 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/conditional_transformed_distribution_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/conditional_transformed_distribution_test.py @@ -44,6 +44,7 @@ class _ChooseLocation(ConditionalBijector): graph_parents=[self._loc], is_constant_jacobian=True, validate_args=False, + forward_min_event_ndims=0, name=name) def _forward(self, x, z): @@ -52,7 +53,7 @@ class _ChooseLocation(ConditionalBijector): def _inverse(self, x, z): return x - self._gather_loc(z) - def _inverse_log_det_jacobian(self, x, z=None): + def _inverse_log_det_jacobian(self, x, event_ndims, z=None): return 0. def _gather_loc(self, z): diff --git a/tensorflow/contrib/distributions/python/kernel_tests/distribution_test.py b/tensorflow/contrib/distributions/python/kernel_tests/distribution_test.py index 68e0d9cb8277f3953039963fec0da499db7a16d1..f42feae25d851eb9ae0bf48649fc3bbe2a221be0 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/distribution_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/distribution_test.py @@ -190,11 +190,30 @@ class DistributionTest(test.TestCase): y = dist._set_sample_static_shape(x, sample_shape) self.assertTrue(y.get_shape().ndims is None) + def testNameScopeWorksCorrectly(self): + x = tfd.Normal(loc=0., scale=1., name="x") + x_duplicate = tfd.Normal(loc=0., scale=1., name="x") + with ops.name_scope("y") as name: + y = tfd.Bernoulli(logits=0., name=name) + x_sample = x.sample(name="custom_sample") + x_sample_duplicate = x.sample(name="custom_sample") + x_log_prob = x.log_prob(0., name="custom_log_prob") + x_duplicate_sample = x_duplicate.sample(name="custom_sample") + + self.assertEqual(x.name, "x/") + self.assertEqual(x_duplicate.name, "x_1/") + self.assertEqual(y.name, "y/") + self.assertTrue(x_sample.name.startswith("x/custom_sample")) + self.assertTrue(x_sample_duplicate.name.startswith("x/custom_sample_1")) + self.assertTrue(x_log_prob.name.startswith("x/custom_log_prob")) + self.assertTrue(x_duplicate_sample.name.startswith( + "x_1/custom_sample")) + def testStrWorksCorrectlyScalar(self): normal = tfd.Normal(loc=np.float16(0), scale=np.float16(1)) self.assertEqual( ("tf.distributions.Normal(" - "\"Normal\", " + "\"Normal/\", " "batch_shape=(), " "event_shape=(), " "dtype=float16)"), # Got the dtype right. @@ -203,7 +222,7 @@ class DistributionTest(test.TestCase): chi2 = tfd.Chi2(df=np.float32([1., 2.]), name="silly") self.assertEqual( ("tf.distributions.Chi2(" - "\"silly\", " # What a silly name that is! + "\"silly/\", " # What a silly name that is! "batch_shape=(2,), " "event_shape=(), " "dtype=float32)"), @@ -211,7 +230,7 @@ class DistributionTest(test.TestCase): exp = tfd.Exponential(rate=array_ops.placeholder(dtype=dtypes.float32)) self.assertEqual( - ("tf.distributions.Exponential(\"Exponential\", " + ("tf.distributions.Exponential(\"Exponential/\", " # No batch shape. "event_shape=(), " "dtype=float32)"), @@ -222,7 +241,7 @@ class DistributionTest(test.TestCase): loc=np.zeros([2, 2]), name="MVN") self.assertEqual( ("tf.distributions.MultivariateNormalDiag(" - "\"MVN\", " + "\"MVN/\", " "batch_shape=(2,), " "event_shape=(2,), " "dtype=float64)"), @@ -233,7 +252,7 @@ class DistributionTest(test.TestCase): name="MVN2") self.assertEqual( ("tf.distributions.MultivariateNormalDiag(" - "\"MVN2\", " + "\"MVN2/\", " "batch_shape=(?,), " # Partially known. "event_shape=(3,), " "dtype=float32)"), @@ -243,7 +262,7 @@ class DistributionTest(test.TestCase): normal = tfd.Normal(loc=np.float16(0), scale=np.float16(1)) self.assertEqual( (""), # Got the dtype right. @@ -252,7 +271,7 @@ class DistributionTest(test.TestCase): chi2 = tfd.Chi2(df=np.float32([1., 2.]), name="silly") self.assertEqual( (""), @@ -261,7 +280,7 @@ class DistributionTest(test.TestCase): exp = tfd.Exponential(rate=array_ops.placeholder(dtype=dtypes.float32)) self.assertEqual( ("" " event_shape=()" " dtype=float32>"), @@ -272,7 +291,7 @@ class DistributionTest(test.TestCase): loc=np.zeros([2, 2]), name="MVN") self.assertEqual( (""), @@ -283,7 +302,7 @@ class DistributionTest(test.TestCase): name="MVN2") self.assertEqual( (""), diff --git a/tensorflow/contrib/distributions/python/kernel_tests/mvn_diag_test.py b/tensorflow/contrib/distributions/python/kernel_tests/mvn_diag_test.py index 933756aa8e12cca4c42eb98d9193512bbf2ad585..9635134b08db47a47a17c869fe813e0376ae6f1e 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/mvn_diag_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/mvn_diag_test.py @@ -68,7 +68,7 @@ class MultivariateNormalDiagTest(test.TestCase): dist = ds.TransformedDistribution( base_dist, validate_args=True, - bijector=bijectors.Softplus(event_ndims=1)) + bijector=bijectors.Softplus()) samps = dist.sample(5) # Shape [5, 1, 3]. self.assertAllEqual([5, 1], dist.log_prob(samps).get_shape()) diff --git a/tensorflow/contrib/distributions/python/kernel_tests/mvn_full_covariance_test.py b/tensorflow/contrib/distributions/python/kernel_tests/mvn_full_covariance_test.py index 1a02fbefb8e88599f5fedeb38fb06f5a09036439..7435bcbc684c1660a648cef4ab30c888723853f8 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/mvn_full_covariance_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/mvn_full_covariance_test.py @@ -52,7 +52,7 @@ class MultivariateNormalFullCovarianceTest(test.TestCase): mu = [1., 2.] sigma = [[1., 0.], [0., 1.]] mvn = ds.MultivariateNormalFullCovariance(mu, sigma, name="Billy") - self.assertEqual(mvn.name, "Billy") + self.assertEqual(mvn.name, "Billy/") def testDoesNotRaiseIfInitializedWithSymmetricMatrix(self): with self.test_session(): diff --git a/tensorflow/contrib/distributions/python/kernel_tests/seed_stream_test.py b/tensorflow/contrib/distributions/python/kernel_tests/seed_stream_test.py new file mode 100644 index 0000000000000000000000000000000000000000..968057331787059240110b90545f70c0ab128aa8 --- /dev/null +++ b/tensorflow/contrib/distributions/python/kernel_tests/seed_stream_test.py @@ -0,0 +1,70 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for the SeedStream class.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.distributions.python.ops import seed_stream +from tensorflow.python.platform import test + + +class SeedStreamTest(test.TestCase): + + def assertAllUnique(self, items): + self.assertEqual(len(items), len(set(items))) + + def testNonRepetition(self): + # The probability of repetitions in a short stream from a correct + # PRNG is negligible; this test catches bugs that prevent state + # updates. + strm = seed_stream.SeedStream(seed=4, salt="salt") + output = [strm() for _ in range(50)] + self.assertEqual(sorted(output), sorted(list(set(output)))) + + def testReproducibility(self): + strm1 = seed_stream.SeedStream(seed=4, salt="salt") + strm2 = seed_stream.SeedStream(seed=4, salt="salt") + strm3 = seed_stream.SeedStream(seed=4, salt="salt") + outputs = [strm1() for _ in range(50)] + self.assertEqual(outputs, [strm2() for _ in range(50)]) + self.assertEqual(outputs, [strm3() for _ in range(50)]) + + def testSeededDistinctness(self): + strm1 = seed_stream.SeedStream(seed=4, salt="salt") + strm2 = seed_stream.SeedStream(seed=5, salt="salt") + self.assertAllUnique( + [strm1() for _ in range(50)] + [strm2() for _ in range(50)]) + + def testSaltedDistinctness(self): + strm1 = seed_stream.SeedStream(seed=4, salt="salt") + strm2 = seed_stream.SeedStream(seed=4, salt="another salt") + self.assertAllUnique( + [strm1() for _ in range(50)] + [strm2() for _ in range(50)]) + + def testNestingRobustness(self): + # SeedStreams started from generated seeds should not collide with + # the master or with each other, even if the salts are the same. + strm1 = seed_stream.SeedStream(seed=4, salt="salt") + strm2 = seed_stream.SeedStream(strm1(), salt="salt") + strm3 = seed_stream.SeedStream(strm1(), salt="salt") + outputs = [strm1() for _ in range(50)] + self.assertAllUnique( + outputs + [strm2() for _ in range(50)] + [strm3() for _ in range(50)]) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/distributions/python/kernel_tests/shape_test.py b/tensorflow/contrib/distributions/python/kernel_tests/shape_test.py index c8d795c3f6afbec5b41755951174439f7703efb9..243b5a034859288b0e2e120f09258cfee77fbdea 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/shape_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/shape_test.py @@ -584,7 +584,6 @@ class DistributionShapeTest(test.TestCase): def testDistributionShapeGetDimsStatic(self): with self.test_session(): - shaper = _DistributionShape(batch_ndims=0, event_ndims=0) shaper = _DistributionShape(batch_ndims=0, event_ndims=0) x = 1 self.assertAllEqual((_empty_shape, _empty_shape, _empty_shape), diff --git a/tensorflow/contrib/distributions/python/kernel_tests/statistical_testing_test.py b/tensorflow/contrib/distributions/python/kernel_tests/statistical_testing_test.py index 0400c80c29cf0c36090168b7a1a6358ad49fde49..ce6cf702d522792f1ad26066a3d9be42003a0e3c 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/statistical_testing_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/statistical_testing_test.py @@ -21,7 +21,7 @@ from __future__ import print_function import numpy as np from tensorflow.contrib.distributions.python.ops import statistical_testing as st -from tensorflow.python.framework import errors +from tensorflow.python.framework import ops from tensorflow.python.platform import test @@ -129,13 +129,13 @@ class StatisticalTestingTest(test.TestCase): # Test that the test assertion confirms that the mean of the # standard uniform distribution is not 0.4. - with self.assertRaises(errors.InvalidArgumentError): + with self.assertRaisesOpError("Mean confidence interval too high"): sess.run(st.assert_true_mean_equal_by_dkwm( samples, 0., 1., 0.4, false_fail_rate=1e-6)) # Test that the test assertion confirms that the mean of the # standard uniform distribution is not 0.6. - with self.assertRaises(errors.InvalidArgumentError): + with self.assertRaisesOpError("Mean confidence interval too low"): sess.run(st.assert_true_mean_equal_by_dkwm( samples, 0., 1., 0.6, false_fail_rate=1e-6)) @@ -172,7 +172,7 @@ class StatisticalTestingTest(test.TestCase): # Test that the test assertion confirms that the mean of the # standard uniform distribution is different from the mean of beta(2, 1). beta_high_samples = rng.beta(2, 1, size=num_samples).astype(np.float32) - with self.assertRaises(errors.InvalidArgumentError): + with self.assertRaisesOpError("samples1 has a smaller mean"): sess.run(st.assert_true_mean_equal_by_dkwm_two_sample( samples1, 0., 1., beta_high_samples, 0., 1., @@ -190,7 +190,7 @@ class StatisticalTestingTest(test.TestCase): # Test that the test assertion confirms that the mean of the # standard uniform distribution is different from the mean of beta(1, 2). beta_low_samples = rng.beta(1, 2, size=num_samples).astype(np.float32) - with self.assertRaises(errors.InvalidArgumentError): + with self.assertRaisesOpError("samples2 has a smaller mean"): sess.run(st.assert_true_mean_equal_by_dkwm_two_sample( samples1, 0., 1., beta_low_samples, 0., 1., @@ -198,23 +198,46 @@ class StatisticalTestingTest(test.TestCase): def test_dkwm_argument_validity_checking(self): rng = np.random.RandomState(seed=0) - samples = rng.uniform(size=5000).astype(np.float32) + samples = rng.uniform( + low=[0., 1.], high=[1., 2.], size=(2500, 1, 2)).astype(np.float32) # Test that the test library complains if the given samples fall # outside the purported bounds. with self.test_session() as sess: - with self.assertRaises(errors.InvalidArgumentError): + with self.assertRaisesOpError("maximum value exceeds expectations"): sess.run(st.true_mean_confidence_interval_by_dkwm( - samples, 0., 0.5, error_rate=0.5)) - with self.assertRaises(errors.InvalidArgumentError): + samples, [[0., 1.]], [[0.5, 1.5]], error_rate=0.5)) + with self.assertRaisesOpError("minimum value falls below expectations"): sess.run(st.true_mean_confidence_interval_by_dkwm( - samples, 0.5, 1., error_rate=0.5)) + samples, [[0.5, 1.5]], [[1., 2.]], error_rate=0.5)) # But doesn't complain if they don't. op = st.true_mean_confidence_interval_by_dkwm( - samples, 0., 1., error_rate=0.5) + samples, [[0., 1.]], [[1., 2.]], error_rate=0.5) _ = sess.run(op) + def test_do_maximum_mean(self): + n = 117 + envelope = 0.02 # > 2 / n, but < 3 / n + rng = np.random.RandomState(seed=8) + samples = rng.uniform(size=n).astype(np.float32) + + # Compute the answer in TF using the code under test + with self.test_session() as sess: + envelope_t = ops.convert_to_tensor(envelope) + max_mean = st._do_maximum_mean(samples, envelope_t, 1) + max_mean = sess.run(max_mean) + + # Compute the correct answer for this case in numpy. In this + # example, `n` and `envelope` are such that `samples[2]` is the + # element that should be taken partially, regardless of the + # content of the `samples` array (see algorithm description in + # `../ops/statistical_testing.py`). + samples = sorted(samples) + weight = 1. / n - (envelope - 2. / n) + answer = samples[2] * weight + sum(samples[3:]) / n + envelope * 1. + self.assertAllClose(max_mean, answer, rtol=1e-9) + if __name__ == '__main__': test.main() diff --git a/tensorflow/contrib/distributions/python/kernel_tests/transformed_distribution_test.py b/tensorflow/contrib/distributions/python/kernel_tests/transformed_distribution_test.py index f0ba1ec3eb57c67c1a0edb15639e91916a4509b7..5fe1331d2c34612e980c7b376367cd63b627533d 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/transformed_distribution_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/transformed_distribution_test.py @@ -28,6 +28,7 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import array_ops +from tensorflow.python.ops import linalg_ops from tensorflow.python.ops import math_ops from tensorflow.python.platform import test @@ -36,6 +37,35 @@ ds = distributions la = linalg +class DummyMatrixTransform(bs.Bijector): + """Tractable matrix transformation. + + This is a non-sensical bijector that has forward/inverse_min_event_ndims=2. + The main use is to check that transformed distribution calculations are done + appropriately. + """ + + def __init__(self): + super(DummyMatrixTransform, self).__init__( + forward_min_event_ndims=2, + is_constant_jacobian=False, + validate_args=False, + name="dummy") + + def _forward(self, x): + return x + + def _inverse(self, y): + return y + + # Note: These jacobians don't make sense. + def _forward_log_det_jacobian(self, x): + return -linalg_ops.matrix_determinant(x) + + def _inverse_log_det_jacobian(self, x): + return linalg_ops.matrix_determinant(x) + + class TransformedDistributionTest(test.TestCase): def _cls(self): @@ -55,7 +85,7 @@ class TransformedDistributionTest(test.TestCase): # you may or may not need a reduce_sum. log_normal = self._cls()( distribution=ds.Normal(loc=mu, scale=sigma), - bijector=bs.Exp(event_ndims=0)) + bijector=bs.Exp()) sp_dist = stats.lognorm(s=sigma, scale=np.exp(mu)) # sample @@ -87,7 +117,7 @@ class TransformedDistributionTest(test.TestCase): sigma = 2.0 abs_normal = self._cls()( distribution=ds.Normal(loc=mu, scale=sigma), - bijector=bs.AbsoluteValue(event_ndims=0)) + bijector=bs.AbsoluteValue()) sp_normal = stats.norm(mu, sigma) # sample @@ -129,7 +159,7 @@ class TransformedDistributionTest(test.TestCase): self.assertAllClose(grid, cdf_, rtol=1e-6, atol=0.) def testCachedSamples(self): - exp_forward_only = bs.Exp(event_ndims=0) + exp_forward_only = bs.Exp() exp_forward_only._inverse = self._make_unimplemented( "inverse") exp_forward_only._inverse_event_shape_tensor = self._make_unimplemented( @@ -153,7 +183,7 @@ class TransformedDistributionTest(test.TestCase): self.assertAllClose(expected_log_pdf, log_pdf_val, rtol=1e-4, atol=0.) def testCachedSamplesInvert(self): - exp_inverse_only = bs.Exp(event_ndims=0) + exp_inverse_only = bs.Exp() exp_inverse_only._forward = self._make_unimplemented( "forward") exp_inverse_only._forward_event_shape_tensor = self._make_unimplemented( @@ -210,8 +240,11 @@ class TransformedDistributionTest(test.TestCase): int_identity = bs.Inline( forward_fn=array_ops.identity, inverse_fn=array_ops.identity, - inverse_log_det_jacobian_fn=lambda x: math_ops.cast(0, dtypes.int32), - forward_log_det_jacobian_fn=lambda x: math_ops.cast(0, dtypes.int32), + inverse_log_det_jacobian_fn=( + lambda y: math_ops.cast(0, dtypes.int32)), + forward_log_det_jacobian_fn=( + lambda x: math_ops.cast(0, dtypes.int32)), + forward_min_event_ndims=0, is_constant_jacobian=True) normal = self._cls()( distribution=ds.Normal(loc=0., scale=1.), @@ -435,6 +468,82 @@ class ScalarToMultiTest(test.TestCase): event_shape=[3], validate_args=True) + def testMatrixEvent(self): + with self.test_session() as sess: + batch_shape = [2] + event_shape = [2, 3, 3] + batch_shape_pl = array_ops.placeholder( + dtypes.int32, name="dynamic_batch_shape") + event_shape_pl = array_ops.placeholder( + dtypes.int32, name="dynamic_event_shape") + feed_dict = {batch_shape_pl: np.array(batch_shape, dtype=np.int32), + event_shape_pl: np.array(event_shape, dtype=np.int32)} + + scale = 2. + loc = 0. + fake_mvn_dynamic = self._cls()( + distribution=ds.Normal( + loc=loc, + scale=scale), + bijector=DummyMatrixTransform(), + batch_shape=batch_shape_pl, + event_shape=event_shape_pl, + validate_args=True) + + fake_mvn_static = self._cls()( + distribution=ds.Normal( + loc=loc, + scale=scale), + bijector=DummyMatrixTransform(), + batch_shape=batch_shape, + event_shape=event_shape, + validate_args=True) + + def actual_mvn_log_prob(x): + # This distribution is the normal PDF, reduced over the + # last 3 dimensions + a jacobian term which corresponds + # to the determinant of x. + return (np.sum( + stats.norm(loc, scale).logpdf(x), axis=(-1, -2, -3)) + + np.sum(np.linalg.det(x), axis=-1)) + + self.assertAllEqual([2, 3, 3], fake_mvn_static.event_shape) + self.assertAllEqual([2], fake_mvn_static.batch_shape) + + self.assertAllEqual(tensor_shape.TensorShape(None), + fake_mvn_dynamic.event_shape) + self.assertAllEqual(tensor_shape.TensorShape(None), + fake_mvn_dynamic.batch_shape) + + num_samples = 5e3 + for fake_mvn, feed_dict in ((fake_mvn_static, {}), + (fake_mvn_dynamic, feed_dict)): + # Ensure sample works by checking first, second moments. + y = fake_mvn.sample(int(num_samples), seed=0) + x = y[0:5, ...] + [ + x_, + fake_event_shape_, + fake_batch_shape_, + fake_log_prob_, + fake_prob_, + ] = sess.run([ + x, + fake_mvn.event_shape_tensor(), + fake_mvn.batch_shape_tensor(), + fake_mvn.log_prob(x), + fake_mvn.prob(x), + ], feed_dict=feed_dict) + + # Ensure all other functions work as intended. + self.assertAllEqual([5, 2, 2, 3, 3], x_.shape) + self.assertAllEqual([2, 3, 3], fake_event_shape_) + self.assertAllEqual([2], fake_batch_shape_) + self.assertAllClose(actual_mvn_log_prob(x_), fake_log_prob_, + atol=0., rtol=1e-6) + self.assertAllClose(np.exp(actual_mvn_log_prob(x_)), fake_prob_, + atol=0., rtol=1e-5) + if __name__ == "__main__": test.main() diff --git a/tensorflow/contrib/distributions/python/kernel_tests/vector_laplace_diag_test.py b/tensorflow/contrib/distributions/python/kernel_tests/vector_laplace_diag_test.py index c355adeedbfff1072281a81de726ddb0ece07882..1226c66113ec4b43f57371abf4983aef1a529ec1 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/vector_laplace_diag_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/vector_laplace_diag_test.py @@ -61,7 +61,7 @@ class VectorLaplaceDiagTest(test.TestCase): dist = ds.TransformedDistribution( base_dist, validate_args=True, - bijector=bijectors.Softplus(event_ndims=1)) + bijector=bijectors.Softplus()) samps = dist.sample(5) # Shape [5, 1, 3]. self.assertAllEqual([5, 1], dist.log_prob(samps).get_shape()) diff --git a/tensorflow/contrib/distributions/python/ops/autoregressive.py b/tensorflow/contrib/distributions/python/ops/autoregressive.py index 69f3d57ff000d6c9acc8aa9e3d0ad8d9cbb6bb3c..88ed0127841093cc1a1168d988f14e7bb0277b12 100644 --- a/tensorflow/contrib/distributions/python/ops/autoregressive.py +++ b/tensorflow/contrib/distributions/python/ops/autoregressive.py @@ -145,7 +145,7 @@ class Autoregressive(distribution_lib.Distribution): ValueError: if `num_steps < 1`. """ parameters = locals() - with ops.name_scope(name): + with ops.name_scope(name) as name: self._distribution_fn = distribution_fn self._sample0 = sample0 self._distribution0 = (distribution_fn() if sample0 is None diff --git a/tensorflow/contrib/distributions/python/ops/batch_reshape.py b/tensorflow/contrib/distributions/python/ops/batch_reshape.py index c7ee9b211725c0cc2d2db7e695022c27b08dcf54..bf5590cd552a915a3ecfc1912ee530baf79665a6 100644 --- a/tensorflow/contrib/distributions/python/ops/batch_reshape.py +++ b/tensorflow/contrib/distributions/python/ops/batch_reshape.py @@ -115,7 +115,7 @@ class BatchReshape(distribution_lib.Distribution): self._batch_shape_static = tensor_util.constant_value(self._batch_shape_) if self._batch_shape_static is not None: self._batch_shape_static = np.int32(self._batch_shape_static) - self._runtime_assertions = make_runtime_assertions( + self._runtime_assertions = validate_init_args( self._distribution, self._batch_shape_, validate_args, @@ -229,7 +229,8 @@ class BatchReshape(distribution_lib.Distribution): def _call_reshape_input_output(self, fn, x): """Calls `fn`, appropriately reshaping its input `x` and output.""" - with ops.control_dependencies(self._runtime_assertions): + with ops.control_dependencies( + self._runtime_assertions + self._validate_sample_arg(x)): sample_shape, static_sample_shape = self._sample_shape(x) old_shape = array_ops.concat([ sample_shape, @@ -273,61 +274,143 @@ class BatchReshape(distribution_lib.Distribution): result.set_shape(static_shape) return result - -def make_runtime_assertions( + def _validate_sample_arg(self, x): + """Helper which validates sample arg, e.g., input to `log_prob`.""" + with ops.name_scope(name="validate_sample_arg", values=[x]): + x_ndims = (array_ops.rank(x) if x.shape.ndims is None else x.shape.ndims) + event_ndims = (array_ops.size(self.event_shape_tensor()) + if self.event_shape.ndims is None + else self.event_shape.ndims) + batch_ndims = (array_ops.size(self.batch_shape_tensor()) + if self.batch_shape.ndims is None + else self.batch_shape.ndims) + expected_batch_event_ndims = batch_ndims + event_ndims + + if (isinstance(x_ndims, int) and + isinstance(expected_batch_event_ndims, int)): + if x_ndims < expected_batch_event_ndims: + raise NotImplementedError( + "Broadcasting is not supported; too few batch and event dims " + "(expected at least {}, saw {}).".format( + expected_batch_event_ndims, x_ndims)) + ndims_assertion = [] + elif self.validate_args: + ndims_assertion = [ + check_ops.assert_greater_equal( + x_ndims, + expected_batch_event_ndims, + message=("Broadcasting is not supported; too few " + "batch and event dims."), + name="assert_batch_and_event_ndims_large_enough"), + ] + + if (self.batch_shape.is_fully_defined() and + self.event_shape.is_fully_defined()): + expected_batch_event_shape = np.int32(self.batch_shape.concatenate( + self.event_shape).as_list()) + else: + expected_batch_event_shape = array_ops.concat([ + self.batch_shape_tensor(), + self.event_shape_tensor(), + ], axis=0) + + sample_ndims = x_ndims - expected_batch_event_ndims + if isinstance(sample_ndims, int): + sample_ndims = max(sample_ndims, 0) + if (isinstance(sample_ndims, int) and + x.shape[sample_ndims:].is_fully_defined()): + actual_batch_event_shape = np.int32(x.shape[sample_ndims:].as_list()) + else: + sample_ndims = math_ops.maximum(sample_ndims, 0) + actual_batch_event_shape = array_ops.shape(x)[sample_ndims:] + + if (isinstance(expected_batch_event_shape, np.ndarray) and + isinstance(actual_batch_event_shape, np.ndarray)): + if any(expected_batch_event_shape != actual_batch_event_shape): + raise NotImplementedError("Broadcasting is not supported; " + "unexpected batch and event shape " + "(expected {}, saw {}).".format( + expected_batch_event_shape, + actual_batch_event_shape)) + # We need to set the final runtime-assertions to `ndims_assertion` since + # its possible this assertion was created. We could add a condition to + # only do so if `self.validate_args == True`, however this is redundant + # as `ndims_assertion` already encodes this information. + runtime_assertions = ndims_assertion + elif self.validate_args: + # We need to make the `ndims_assertion` a control dep because otherwise + # TF itself might raise an exception owing to this assertion being + # ill-defined, ie, one cannot even compare different rank Tensors. + with ops.control_dependencies(ndims_assertion): + shape_assertion = check_ops.assert_equal( + expected_batch_event_shape, + actual_batch_event_shape, + message=("Broadcasting is not supported; " + "unexpected batch and event shape."), + name="assert_batch_and_event_shape_same") + runtime_assertions = [shape_assertion] + else: + runtime_assertions = [] + + return runtime_assertions + + +def validate_init_args( distribution, batch_shape, validate_args, batch_shape_static): """Helper to __init__ which makes or raises assertions.""" - runtime_assertions = [] - - if batch_shape.shape.ndims is not None: - if batch_shape.shape.ndims != 1: - raise ValueError("`batch_shape` must be a vector " - "(saw rank: {}).".format( - batch_shape.shape.ndims)) - elif validate_args: - runtime_assertions += [ - check_ops.assert_rank( - batch_shape, - 1, - message="`batch_shape` must be a vector.", - name="assert_batch_shape_is_vector"), - ] - - batch_size_static = np.prod(batch_shape_static) - dist_batch_size_static = ( - None if not distribution.batch_shape.is_fully_defined() - else np.prod(distribution.batch_shape).value) - - if batch_size_static is not None and dist_batch_size_static is not None: - if batch_size_static != dist_batch_size_static: - raise ValueError("`batch_shape` size ({}) must match " - "`distribution.batch_shape` size ({}).".format( - batch_size_static, - dist_batch_size_static)) - elif validate_args: - runtime_assertions += [ - check_ops.assert_equal( - math_ops.reduce_prod(batch_shape), - math_ops.reduce_prod(distribution.batch_shape_tensor()), - message=("`batch_shape` size must match " - "`distributions.batch_shape` size."), - name="assert_batch_size"), - ] - - if batch_shape_static is not None: - if np.any(batch_shape_static < 1): - raise ValueError("`batch_shape` elements must be positive " - "(i.e., larger than zero).") - elif validate_args: - runtime_assertions += [ - check_ops.assert_positive( - batch_shape, - message=("`batch_shape` elements must be positive " - "(i.e., larger than zero)."), - name="assert_batch_shape_positive") - ] - - return runtime_assertions + with ops.name_scope(name="validate_init_args", + values=[batch_shape] + distribution._graph_parents): # pylint: disable=protected-access + runtime_assertions = [] + + if batch_shape.shape.ndims is not None: + if batch_shape.shape.ndims != 1: + raise ValueError("`batch_shape` must be a vector " + "(saw rank: {}).".format( + batch_shape.shape.ndims)) + elif validate_args: + runtime_assertions += [ + check_ops.assert_rank( + batch_shape, + 1, + message="`batch_shape` must be a vector.", + name="assert_batch_shape_is_vector"), + ] + + batch_size_static = np.prod(batch_shape_static) + dist_batch_size_static = ( + None if not distribution.batch_shape.is_fully_defined() + else np.prod(distribution.batch_shape).value) + + if batch_size_static is not None and dist_batch_size_static is not None: + if batch_size_static != dist_batch_size_static: + raise ValueError("`batch_shape` size ({}) must match " + "`distribution.batch_shape` size ({}).".format( + batch_size_static, + dist_batch_size_static)) + elif validate_args: + runtime_assertions += [ + check_ops.assert_equal( + math_ops.reduce_prod(batch_shape), + math_ops.reduce_prod(distribution.batch_shape_tensor()), + message=("`batch_shape` size must match " + "`distributions.batch_shape` size."), + name="assert_batch_size"), + ] + + if batch_shape_static is not None: + if np.any(batch_shape_static < 1): + raise ValueError("`batch_shape` elements must be positive " + "(i.e., larger than zero).") + elif validate_args: + runtime_assertions += [ + check_ops.assert_positive( + batch_shape, + message=("`batch_shape` elements must be positive " + "(i.e., larger than zero)."), + name="assert_batch_shape_positive") + ] + + return runtime_assertions diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/__init__.py b/tensorflow/contrib/distributions/python/ops/bijectors/__init__.py index bc6b02542ebf3b83d58f888509dafb86351de8a7..babce80396cfc41b53e99f91038d4f077c7efe82 100644 --- a/tensorflow/contrib/distributions/python/ops/bijectors/__init__.py +++ b/tensorflow/contrib/distributions/python/ops/bijectors/__init__.py @@ -38,6 +38,7 @@ @@SinhArcsinh @@SoftmaxCentered @@Softplus +@@Softsign @@Square @@Weibull @@ -74,6 +75,7 @@ from tensorflow.contrib.distributions.python.ops.bijectors.sigmoid import * from tensorflow.contrib.distributions.python.ops.bijectors.sinh_arcsinh import * from tensorflow.contrib.distributions.python.ops.bijectors.softmax_centered import * from tensorflow.contrib.distributions.python.ops.bijectors.softplus import * +from tensorflow.contrib.distributions.python.ops.bijectors.softsign import * from tensorflow.contrib.distributions.python.ops.bijectors.square import * from tensorflow.python.ops.distributions.bijector import * from tensorflow.python.ops.distributions.identity_bijector import Identity diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/absolute_value.py b/tensorflow/contrib/distributions/python/ops/bijectors/absolute_value.py index 0fe9f6aa78fbe845b99d0668f075b0162ec2a9f7..c9e31d7712f09f6c4b4cc6ae51a34c42a19c291d 100644 --- a/tensorflow/contrib/distributions/python/ops/bijectors/absolute_value.py +++ b/tensorflow/contrib/distributions/python/ops/bijectors/absolute_value.py @@ -18,9 +18,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.python.framework import ops -from tensorflow.python.framework import tensor_util -from tensorflow.python.ops import array_ops +from tensorflow.python.framework import constant_op from tensorflow.python.ops import check_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import math_ops @@ -72,38 +70,22 @@ class AbsoluteValue(bijector.Bijector): """ - def __init__(self, event_ndims=0, validate_args=False, name="absolute_value"): + def __init__(self, validate_args=False, name="absolute_value"): """Instantiates the `AbsoluteValue` bijector. Args: - event_ndims: Python scalar indicating the number of dimensions associated - with a particular draw from the distribution. Currently only zero is - supported. validate_args: Python `bool` indicating whether arguments should be checked for correctness, in particular whether inputs to `inverse` and `inverse_log_det_jacobian` are non-negative. name: Python `str` name given to ops managed by this object. - - Raises: - ValueError: If `event_ndims` is not zero. """ self._graph_parents = [] self._name = name - event_ndims = ops.convert_to_tensor(event_ndims, name="event_ndims") - event_ndims_const = tensor_util.constant_value(event_ndims) - if event_ndims_const is not None and event_ndims_const not in (0,): - raise ValueError("event_ndims(%s) was not 0" % event_ndims_const) - else: - if validate_args: - event_ndims = control_flow_ops.with_dependencies( - [check_ops.assert_equal( - event_ndims, 0, message="event_ndims was not 0")], - event_ndims) - with self._name_scope("init"): super(AbsoluteValue, self).__init__( - event_ndims=event_ndims, + forward_min_event_ndims=0, + is_constant_jacobian=True, validate_args=validate_args, name=name) @@ -121,8 +103,7 @@ class AbsoluteValue(bijector.Bijector): # If event_ndims = 2, # F^{-1}(y) = (-y, y), so DF^{-1}(y) = (-1, 1), # so Log|DF^{-1}(y)| = Log[1, 1] = [0, 0]. - batch_shape = array_ops.shape(y)[:array_ops.rank(y) - self.event_ndims] - zeros = array_ops.zeros(batch_shape, dtype=y.dtype) + zeros = constant_op.constant(0., dtype=y.dtype) if self.validate_args: zeros = control_flow_ops.with_dependencies( [check_ops.assert_non_negative(y, message="Argument y was negative")], diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/affine.py b/tensorflow/contrib/distributions/python/ops/bijectors/affine.py index bef7bbb49b715497695f7513e19ecab4fa56c47e..b4c2939eb914d50475ba6b1c1e979a804090f641 100644 --- a/tensorflow/contrib/distributions/python/ops/bijectors/affine.py +++ b/tensorflow/contrib/distributions/python/ops/bijectors/affine.py @@ -184,6 +184,7 @@ class Affine(bijector.Bijector): with self._name_scope("init", values=[ shift, scale_identity_multiplier, scale_diag, scale_tril, scale_perturb_diag, scale_perturb_factor]): + # In the absence of `loc` and `scale`, we'll assume `dtype` is `float32`. dtype = dtypes.float32 @@ -234,7 +235,7 @@ class Affine(bijector.Bijector): event_ndims=1, validate_args=validate_args) super(Affine, self).__init__( - event_ndims=1, + forward_min_event_ndims=1, graph_parents=( [self._scale] if tensor_util.is_tensor(self._scale) else self._scale.graph_parents + @@ -360,16 +361,17 @@ class Affine(bijector.Bijector): x, sample_shape, expand_batch_dim=False) return x - def _inverse_log_det_jacobian(self, y): - return -self._forward_log_det_jacobian(y) - def _forward_log_det_jacobian(self, x): + # is_constant_jacobian = True for this bijector, hence the + # `log_det_jacobian` need only be specified for a single input, as this will + # be tiled to match `event_ndims`. if self._is_only_identity_multiplier: # We don't pad in this case and instead let the fldj be applied # via broadcast. event_size = array_ops.shape(x)[-1] event_size = math_ops.cast(event_size, dtype=self._scale.dtype) return math_ops.log(math_ops.abs(self._scale)) * event_size + return self.scale.log_abs_determinant() def _maybe_check_scale(self): diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/affine_linear_operator.py b/tensorflow/contrib/distributions/python/ops/bijectors/affine_linear_operator.py index 89043b1410370074f11f2cfa59b6b6663fa62521..59f9742d576a7804f401d3a47ba31ae61d6c6e54 100644 --- a/tensorflow/contrib/distributions/python/ops/bijectors/affine_linear_operator.py +++ b/tensorflow/contrib/distributions/python/ops/bijectors/affine_linear_operator.py @@ -22,9 +22,6 @@ from tensorflow.contrib.distributions.python.ops.shape import _DistributionShape from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops -from tensorflow.python.framework import tensor_util -from tensorflow.python.ops import check_ops -from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops.distributions import bijector from tensorflow.python.ops.linalg import linear_operator @@ -94,7 +91,6 @@ class AffineLinearOperator(bijector.Bijector): def __init__(self, shift=None, scale=None, - event_ndims=1, validate_args=False, name="affine_linear_operator"): """Instantiates the `AffineLinearOperator` bijector. @@ -103,14 +99,11 @@ class AffineLinearOperator(bijector.Bijector): shift: Floating-point `Tensor`. scale: Subclass of `LinearOperator`. Represents the (batch) positive definite matrix `M` in `R^{k x k}`. - event_ndims: Scalar `integer` `Tensor` indicating the number of dimensions - associated with a particular draw from the distribution. Must be 0 or 1. validate_args: Python `bool` indicating whether arguments should be checked for correctness. name: Python `str` name given to ops managed by this object. Raises: - ValueError: if `event_ndims` is not 0 or 1. TypeError: if `scale` is not a `LinearOperator`. TypeError: if `shift.dtype` does not match `scale.dtype`. ValueError: if not `scale.is_non_singular`. @@ -120,20 +113,6 @@ class AffineLinearOperator(bijector.Bijector): self._validate_args = validate_args graph_parents = [] with self._name_scope("init", values=[shift]): - event_ndims = ops.convert_to_tensor(event_ndims, name="event_ndims") - if tensor_util.constant_value(event_ndims) is not None: - event_ndims = tensor_util.constant_value(event_ndims) - if event_ndims not in (0, 1): - raise ValueError("event_ndims({}) was not 0 or 1".format(event_ndims)) - else: - if validate_args: - # Shape tool will catch if event_ndims is negative. - event_ndims = control_flow_ops.with_dependencies( - [check_ops.assert_less( - event_ndims, 2, message="event_ndims must be 0 or 1")], - event_ndims) - graph_parents += [event_ndims] - # In the absence of `loc` and `scale`, we'll assume `dtype` is `float32`. dtype = dtypes.float32 @@ -166,10 +145,10 @@ class AffineLinearOperator(bijector.Bijector): self._scale = scale self._shaper = _DistributionShape( batch_ndims=batch_ndims, - event_ndims=event_ndims, + event_ndims=1, validate_args=validate_args) super(AffineLinearOperator, self).__init__( - event_ndims=event_ndims, + forward_min_event_ndims=1, graph_parents=graph_parents, is_constant_jacobian=True, dtype=dtype, @@ -213,12 +192,13 @@ class AffineLinearOperator(bijector.Bijector): x, sample_shape, expand_batch_dim=False) return x - def _inverse_log_det_jacobian(self, y): - return -self._forward_log_det_jacobian(y) - - def _forward_log_det_jacobian(self, x): # pylint: disable=unused-argument + def _forward_log_det_jacobian(self, x): + # is_constant_jacobian = True for this bijector, hence the + # `log_det_jacobian` need only be specified for a single input, as this will + # be tiled to match `event_ndims`. if self.scale is None: - return constant_op.constant(0, dtype=x.dtype.base_dtype) + return constant_op.constant(0., dtype=x.dtype.base_dtype) + with ops.control_dependencies(self._maybe_collect_assertions() if self.validate_args else []): return self.scale.log_abs_determinant() diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/affine_scalar.py b/tensorflow/contrib/distributions/python/ops/bijectors/affine_scalar.py index 8adaa54c843d1b243a02967402a37b7c63fabbdf..cd792e2c8cf48602daf9fb5eb56b8c34bac050c7 100644 --- a/tensorflow/contrib/distributions/python/ops/bijectors/affine_scalar.py +++ b/tensorflow/contrib/distributions/python/ops/bijectors/affine_scalar.py @@ -18,6 +18,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from tensorflow.python.framework import constant_op from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import check_ops @@ -99,7 +100,7 @@ class AffineScalar(bijector.Bijector): self._scale) super(AffineScalar, self).__init__( - event_ndims=0, + forward_min_event_ndims=0, is_constant_jacobian=True, validate_args=validate_args, name=name) @@ -131,8 +132,10 @@ class AffineScalar(bijector.Bijector): return x def _forward_log_det_jacobian(self, x): - log_det_jacobian = array_ops.zeros_like(x) + # is_constant_jacobian = True for this bijector, hence the + # `log_det_jacobian` need only be specified for a single input, as this will + # be tiled to match `event_ndims`. if self.scale is None: - return log_det_jacobian - log_det_jacobian += math_ops.log(math_ops.abs(self.scale)) - return log_det_jacobian + return constant_op.constant(0., dtype=x.dtype.base_dtype) + + return math_ops.log(math_ops.abs(self.scale)) diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/batch_normalization.py b/tensorflow/contrib/distributions/python/ops/bijectors/batch_normalization.py index 33fdd32d7a0a01685690e598c69adca2c95972e9..224cec8a63dba53a528490117efac890312fe8d5 100644 --- a/tensorflow/contrib/distributions/python/ops/bijectors/batch_normalization.py +++ b/tensorflow/contrib/distributions/python/ops/bijectors/batch_normalization.py @@ -157,7 +157,12 @@ class BatchNormalization(bijector.Bijector): gamma_constraint=g_constraint) self._validate_bn_layer(self.batchnorm) self._training = training + if isinstance(self.batchnorm.axis, int): + forward_min_event_ndims = 1 + else: + forward_min_event_ndims = len(self.batchnorm.axis) super(BatchNormalization, self).__init__( + forward_min_event_ndims=forward_min_event_ndims, validate_args=validate_args, name=name) def _validate_bn_layer(self, layer): @@ -186,7 +191,6 @@ class BatchNormalization(bijector.Bijector): input_shape = np.int32(x.shape.as_list()) ndims = len(input_shape) - # event_dims = self._compute_event_dims(x) reduction_axes = [i for i in range(ndims) if i not in self.batchnorm.axis] # Broadcasting only necessary for single-axis batch norm where the axis is # not the last dimension diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/chain.py b/tensorflow/contrib/distributions/python/ops/bijectors/chain.py index 3ce7c26213034c7345a20faa803c94a1bfa8d579..85ad23e4133ef09051cdc8b45e489caeea90fbb3 100644 --- a/tensorflow/contrib/distributions/python/ops/bijectors/chain.py +++ b/tensorflow/contrib/distributions/python/ops/bijectors/chain.py @@ -21,6 +21,9 @@ from __future__ import print_function import itertools from tensorflow.python.framework import constant_op +from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_util +from tensorflow.python.ops import array_ops from tensorflow.python.ops.distributions import bijector @@ -29,6 +32,91 @@ __all__ = [ ] +def _use_static_shape(input_tensor, ndims): + return input_tensor.shape.is_fully_defined() and isinstance(ndims, int) + + +def _maybe_get_event_ndims_statically(event_ndims): + static_event_ndims = (event_ndims if isinstance(event_ndims, int) + else tensor_util.constant_value(event_ndims)) + if static_event_ndims is not None: + return static_event_ndims + + return event_ndims + + +def _compute_min_event_ndims(bijector_list, compute_forward=True): + """Computes the min_event_ndims associated with the give list of bijectors. + + Given a list `bijector_list` of bijectors, compute the min_event_ndims that is + associated with the composition of bijectors in that list. + + min_event_ndims is the # of right most dimensions for which the bijector has + done necessary computation on (i.e. the non-broadcastable part of the + computation). + + We can derive the min_event_ndims for a chain of bijectors as follows: + + In the case where there are no rank changing bijectors, this will simply be + `max(b.forward_min_event_ndims for b in bijector_list)`. This is because the + bijector with the most forward_min_event_ndims requires the most dimensions, + and hence the chain also requires operating on those dimensions. + + However in the case of rank changing, more care is needed in determining the + exact amount of dimensions. Padding dimensions causes subsequent bijectors to + operate on the padded dimensions, and Removing dimensions causes bijectors to + operate more left. + + Args: + bijector_list: List of bijectors to be composed by chain. + compute_forward: Boolean. If True, computes the min_event_ndims associated + with a forward call to Chain, and otherwise computes the min_event_ndims + associated with an inverse call to Chain. The latter is the same as the + min_event_ndims associated with a forward call to Invert(Chain(....)). + + Returns: + min_event_ndims + """ + min_event_ndims = 0 + # This is a mouthful, but what this encapsulates is that if not for rank + # changing bijectors, we'd only need to compute the largest of the min + # required ndims. Hence "max_min". Due to rank changing bijectors, we need to + # account for synthetic rank growth / synthetic rank decrease from a rank + # changing bijector. + rank_changed_adjusted_max_min_event_ndims = 0 + + if compute_forward: + bijector_list = reversed(bijector_list) + + for b in bijector_list: + if compute_forward: + current_min_event_ndims = b.forward_min_event_ndims + current_inverse_min_event_ndims = b.inverse_min_event_ndims + else: + current_min_event_ndims = b.inverse_min_event_ndims + current_inverse_min_event_ndims = b.forward_min_event_ndims + + # New dimensions were touched. + if rank_changed_adjusted_max_min_event_ndims < current_min_event_ndims: + min_event_ndims += ( + current_min_event_ndims - rank_changed_adjusted_max_min_event_ndims) + rank_changed_adjusted_max_min_event_ndims = max( + current_min_event_ndims, rank_changed_adjusted_max_min_event_ndims) + + # If the number of dimensions has increased via forward, then + # inverse_min_event_ndims > forward_min_event_ndims, and hence the + # dimensions we computed on, have moved left (so we have operated + # on additional dimensions). + # Conversely, if the number of dimensions has decreased via forward, + # then we have inverse_min_event_ndims < forward_min_event_ndims, + # and so we will have operated on fewer right most dimensions. + + number_of_changed_dimensions = ( + current_min_event_ndims - current_inverse_min_event_ndims) + rank_changed_adjusted_max_min_event_ndims -= number_of_changed_dimensions + return min_event_ndims + + class Chain(bijector.Bijector): """Bijector which applies a sequence of bijectors. @@ -93,21 +181,24 @@ class Chain(bijector.Bijector): raise ValueError("incompatible dtypes: %s" % dtype) elif len(dtype) == 2: dtype = dtype[1] if dtype[0] is None else dtype[0] - event_ndims = bijectors[0].event_ndims elif len(dtype) == 1: dtype = dtype[0] - event_ndims = bijectors[0].event_ndims else: dtype = None - event_ndims = None + + inverse_min_event_ndims = _compute_min_event_ndims( + bijectors, compute_forward=False) + forward_min_event_ndims = _compute_min_event_ndims( + bijectors, compute_forward=True) super(Chain, self).__init__( graph_parents=list(itertools.chain.from_iterable( b.graph_parents for b in bijectors)), + forward_min_event_ndims=forward_min_event_ndims, + inverse_min_event_ndims=inverse_min_event_ndims, is_constant_jacobian=all(b.is_constant_jacobian for b in bijectors), validate_args=validate_args, dtype=dtype, - event_ndims=event_ndims, name=name or ("identity" if not bijectors else "_of_".join(["chain"] + [b.name for b in bijectors]))) @@ -147,10 +238,31 @@ class Chain(bijector.Bijector): return y def _inverse_log_det_jacobian(self, y, **kwargs): - ildj = constant_op.constant(0., dtype=y.dtype, - name="inverse_log_det_jacobian") + ildj = constant_op.constant( + 0., dtype=y.dtype.base_dtype, name="inverse_log_det_jacobian") + + if not self.bijectors: + return ildj + + event_ndims = _maybe_get_event_ndims_statically( + self.inverse_min_event_ndims) + + if _use_static_shape(y, event_ndims): + event_shape = y.shape[y.shape.ndims - event_ndims:] + else: + event_shape = array_ops.shape(y)[array_ops.rank(y) - event_ndims:] + for b in self.bijectors: - ildj += b.inverse_log_det_jacobian(y, **kwargs.get(b.name, {})) + ildj += b.inverse_log_det_jacobian( + y, event_ndims=event_ndims, **kwargs.get(b.name, {})) + + if _use_static_shape(y, event_ndims): + event_shape = b.inverse_event_shape(event_shape) + event_ndims = _maybe_get_event_ndims_statically(event_shape.ndims) + else: + event_shape = b.inverse_event_shape_tensor(event_shape) + event_ndims = _maybe_get_event_ndims_statically( + array_ops.rank(event_shape)) y = b.inverse(y, **kwargs.get(b.name, {})) return ildj @@ -160,9 +272,34 @@ class Chain(bijector.Bijector): return x def _forward_log_det_jacobian(self, x, **kwargs): - fldj = constant_op.constant(0., dtype=x.dtype, - name="forward_log_det_jacobian") + x = ops.convert_to_tensor(x, name="x") + + fldj = constant_op.constant( + 0., dtype=x.dtype, name="inverse_log_det_jacobian") + + if not self.bijectors: + return fldj + + event_ndims = _maybe_get_event_ndims_statically( + self.forward_min_event_ndims) + + if _use_static_shape(x, event_ndims): + event_shape = x.shape[x.shape.ndims - event_ndims:] + else: + event_shape = array_ops.shape(x)[array_ops.rank(x) - event_ndims:] + for b in reversed(self.bijectors): - fldj += b.forward_log_det_jacobian(x, **kwargs.get(b.name, {})) + fldj += b.forward_log_det_jacobian( + x, event_ndims=event_ndims, **kwargs.get(b.name, {})) + if _use_static_shape(x, event_ndims): + event_shape = b.forward_event_shape(event_shape) + event_ndims = _maybe_get_event_ndims_statically(event_shape.ndims) + else: + event_shape = b.forward_event_shape_tensor(event_shape) + event_ndims = _maybe_get_event_ndims_statically( + array_ops.rank(event_shape)) + x = b.forward(x, **kwargs.get(b.name, {})) + return fldj + diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/cholesky_outer_product.py b/tensorflow/contrib/distributions/python/ops/bijectors/cholesky_outer_product.py index 8f09e16058b766c788ab3acced6940fd0026b521..ecdb8967f43e5960b2285de05125d0c3dbafe63c 100644 --- a/tensorflow/contrib/distributions/python/ops/bijectors/cholesky_outer_product.py +++ b/tensorflow/contrib/distributions/python/ops/bijectors/cholesky_outer_product.py @@ -80,7 +80,7 @@ class CholeskyOuterProduct(bijector.Bijector): self._graph_parents = [] self._name = name super(CholeskyOuterProduct, self).__init__( - event_ndims=2, + forward_min_event_ndims=2, validate_args=validate_args, name=name) @@ -170,7 +170,7 @@ class CholeskyOuterProduct(bijector.Bijector): sum_weighted_log_diag = array_ops.squeeze( math_ops.matmul(math_ops.log(diag), exponents[..., array_ops.newaxis]), - squeeze_dims=-1) + axis=-1) fldj = p_float * np.log(2.) + sum_weighted_log_diag return fldj diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/conditional_bijector.py b/tensorflow/contrib/distributions/python/ops/bijectors/conditional_bijector.py index ccb1f029277bc07011df7be047a075274f2b3a27..e9e994f839ab2fe0a0f52f5f404fb2a0c8f9cd94 100644 --- a/tensorflow/contrib/distributions/python/ops/bijectors/conditional_bijector.py +++ b/tensorflow/contrib/distributions/python/ops/bijectors/conditional_bijector.py @@ -44,12 +44,16 @@ class ConditionalBijector(bijector.Bijector): "**condition_kwargs": "Named arguments forwarded to subclass implementation."}) def inverse_log_det_jacobian( - self, y, name="inverse_log_det_jacobian", **condition_kwargs): - return self._call_inverse_log_det_jacobian(y, name, **condition_kwargs) + self, y, event_ndims, name="inverse_log_det_jacobian", + **condition_kwargs): + return self._call_inverse_log_det_jacobian( + y, event_ndims, name, **condition_kwargs) @distribution_util.AppendDocstring(kwargs_dict={ "**condition_kwargs": "Named arguments forwarded to subclass implementation."}) def forward_log_det_jacobian( - self, x, name="forward_log_det_jacobian", **condition_kwargs): - return self._call_forward_log_det_jacobian(x, name, **condition_kwargs) + self, x, event_ndims, name="forward_log_det_jacobian", + **condition_kwargs): + return self._call_forward_log_det_jacobian( + x, event_ndims, name, **condition_kwargs) diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/exp.py b/tensorflow/contrib/distributions/python/ops/bijectors/exp.py index b1ff840d62a73c941a4d67dec73b5c9f4d5353f9..9fc1bbf052b419d07a9db149b990c2b80190d72b 100644 --- a/tensorflow/contrib/distributions/python/ops/bijectors/exp.py +++ b/tensorflow/contrib/distributions/python/ops/bijectors/exp.py @@ -33,8 +33,8 @@ class Exp(power_transform.PowerTransform): ```python # Create the Y=g(X)=exp(X) transform which works only on Tensors with 1 - # batch ndim and 2 event ndims (i.e., vector of matrices). - exp = Exp(event_ndims=2) + # batch ndim 2. + exp = Exp() x = [[[1., 2], [3, 4]], [[5, 6], @@ -48,19 +48,17 @@ class Exp(power_transform.PowerTransform): """ def __init__(self, - event_ndims=0, validate_args=False, name="exp"): """Instantiates the `Exp` bijector. Args: - event_ndims: Scalar `int32` `Tensor` indicating the number of dimensions - associated with a particular draw from the distribution. validate_args: Python `bool` indicating whether arguments should be checked for correctness. name: Python `str` name given to ops managed by this object. """ + # forward_min_event_ndims = 0. + # No forward_min_event_ndims specified as this is done in PowerTransform. super(Exp, self).__init__( - event_ndims=event_ndims, validate_args=validate_args, name=name) diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/gumbel.py b/tensorflow/contrib/distributions/python/ops/bijectors/gumbel.py index 67f39785563255be0fe154aca3cbcf01c6a01e73..e656a258e56e71898ecb719dd2af876f158cf799 100644 --- a/tensorflow/contrib/distributions/python/ops/bijectors/gumbel.py +++ b/tensorflow/contrib/distributions/python/ops/bijectors/gumbel.py @@ -48,7 +48,6 @@ class Gumbel(bijector.Bijector): def __init__(self, loc=0., scale=1., - event_ndims=0, validate_args=False, name="gumbel"): """Instantiates the `Gumbel` bijector. @@ -60,8 +59,6 @@ class Gumbel(bijector.Bijector): scale: Positive Float-like `Tensor` that is the same dtype and is broadcastable with `loc`. This is `scale` in `Y = g(X) = exp(-exp(-(X - loc) / scale))`. - event_ndims: Python scalar indicating the number of dimensions associated - with a particular draw from the distribution. validate_args: Python `bool` indicating whether arguments should be checked for correctness. name: Python `str` name given to ops managed by this object. @@ -80,7 +77,9 @@ class Gumbel(bijector.Bijector): ], self._scale) super(Gumbel, self).__init__( - event_ndims=event_ndims, validate_args=validate_args, name=name) + validate_args=validate_args, + forward_min_event_ndims=0, + name=name) @property def loc(self): @@ -102,15 +101,11 @@ class Gumbel(bijector.Bijector): def _inverse_log_det_jacobian(self, y): y = self._maybe_assert_valid_y(y) - event_dims = self._event_dims_tensor(y) - return math_ops.reduce_sum( - math_ops.log(self.scale / (-math_ops.log(y) * y)), axis=event_dims) + return math_ops.log(self.scale / (-math_ops.log(y) * y)) def _forward_log_det_jacobian(self, x): - event_dims = self._event_dims_tensor(x) z = (x - self.loc) / self.scale - return math_ops.reduce_sum( - -z - math_ops.exp(-z) - math_ops.log(self.scale), axis=event_dims) + return -z - math_ops.exp(-z) - math_ops.log(self.scale) def _maybe_assert_valid_y(self, y): if not self.validate_args: diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/inline.py b/tensorflow/contrib/distributions/python/ops/bijectors/inline.py index fab1b22fbf92e7b92a5ec86ec62d66bec71a8c94..2bde956d1345129285acae4684256c5ac828b9a1 100644 --- a/tensorflow/contrib/distributions/python/ops/bijectors/inline.py +++ b/tensorflow/contrib/distributions/python/ops/bijectors/inline.py @@ -40,7 +40,7 @@ class Inline(bijector.Bijector): name="exp") ``` - The above example is equivalent to the `Bijector` `Exp(event_ndims=1)`. + The above example is equivalent to the `Bijector` `Exp()`. """ def __init__(self, @@ -54,6 +54,8 @@ class Inline(bijector.Bijector): inverse_event_shape_tensor_fn=None, is_constant_jacobian=False, validate_args=False, + forward_min_event_ndims=None, + inverse_min_event_ndims=None, name="inline"): """Creates a `Bijector` from callables. @@ -76,10 +78,15 @@ class Inline(bijector.Bijector): constant for all input arguments. validate_args: Python `bool` indicating whether arguments should be checked for correctness. + forward_min_event_ndims: Python `int` indicating the minimal + dimensionality this bijector acts on. + inverse_min_event_ndims: Python `int` indicating the minimal + dimensionality this bijector acts on. name: Python `str`, name given to ops managed by this object. """ super(Inline, self).__init__( - event_ndims=0, + forward_min_event_ndims=forward_min_event_ndims, + inverse_min_event_ndims=inverse_min_event_ndims, is_constant_jacobian=is_constant_jacobian, validate_args=validate_args, name=name) @@ -134,8 +141,8 @@ class Inline(bijector.Bijector): "inverse_log_det_jacobian_fn is not a callable function.") return self._inverse_log_det_jacobian_fn(y, **kwargs) - def _forward_log_det_jacobian(self, y, **kwargs): + def _forward_log_det_jacobian(self, x, **kwargs): if not callable(self._forward_log_det_jacobian_fn): raise NotImplementedError( "forward_log_det_jacobian_fn is not a callable function.") - return self._forward_log_det_jacobian_fn(y, **kwargs) + return self._forward_log_det_jacobian_fn(x, **kwargs) diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/invert.py b/tensorflow/contrib/distributions/python/ops/bijectors/invert.py index 2c603fe61f36dd27f4984fe6c13c11f2fb534321..1904239a0e7009c35cc4f3c8876fd749463a2b83 100644 --- a/tensorflow/contrib/distributions/python/ops/bijectors/invert.py +++ b/tensorflow/contrib/distributions/python/ops/bijectors/invert.py @@ -66,8 +66,9 @@ class Invert(bijector_lib.Bijector): self._bijector = bijector super(Invert, self).__init__( - event_ndims=bijector.event_ndims, graph_parents=bijector.graph_parents, + forward_min_event_ndims=bijector.inverse_min_event_ndims, + inverse_min_event_ndims=bijector.forward_min_event_ndims, is_constant_jacobian=bijector.is_constant_jacobian, validate_args=validate_args, dtype=bijector.dtype, diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/kumaraswamy.py b/tensorflow/contrib/distributions/python/ops/bijectors/kumaraswamy.py index f5de052c9ed18b1ebf4c174aeea3a951b1ddcd9d..97000c17262d3efdef10274711364c2bc2083bd4 100644 --- a/tensorflow/contrib/distributions/python/ops/bijectors/kumaraswamy.py +++ b/tensorflow/contrib/distributions/python/ops/bijectors/kumaraswamy.py @@ -19,7 +19,6 @@ from __future__ import division from __future__ import print_function from tensorflow.python.framework import ops -from tensorflow.python.framework import tensor_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import check_ops from tensorflow.python.ops import control_flow_ops @@ -48,7 +47,6 @@ class Kumaraswamy(bijector.Bijector): def __init__(self, concentration1=None, concentration0=None, - event_ndims=0, validate_args=False, name="kumaraswamy"): """Instantiates the `Kumaraswamy` bijector. @@ -60,31 +58,14 @@ class Kumaraswamy(bijector.Bijector): concentration0: Python `float` scalar indicating the transform power, i.e., `Y = g(X) = (1 - (1 - X)**(1 / b))**(1 / a)` where `b` is `concentration0`. - event_ndims: Python scalar indicating the number of dimensions associated - with a particular draw from the distribution. Currently only zero is - supported. validate_args: Python `bool` indicating whether arguments should be checked for correctness. name: Python `str` name given to ops managed by this object. - - Raises: - ValueError: If `event_ndims` is not zero. """ self._graph_parents = [] self._name = name self._validate_args = validate_args - event_ndims = ops.convert_to_tensor(event_ndims, name="event_ndims") - event_ndims_const = tensor_util.constant_value(event_ndims) - if event_ndims_const is not None and event_ndims_const not in (0,): - raise ValueError("event_ndims(%s) was not 0" % event_ndims_const) - else: - if validate_args: - event_ndims = control_flow_ops.with_dependencies( - [check_ops.assert_equal( - event_ndims, 0, message="event_ndims was not 0")], - event_ndims) - with self._name_scope("init", values=[concentration1, concentration0]): concentration1 = self._maybe_assert_valid_concentration( ops.convert_to_tensor(concentration1, name="concentration1"), @@ -96,7 +77,7 @@ class Kumaraswamy(bijector.Bijector): self._concentration1 = concentration1 self._concentration0 = concentration0 super(Kumaraswamy, self).__init__( - event_ndims=0, + forward_min_event_ndims=0, validate_args=validate_args, name=name) @@ -123,12 +104,10 @@ class Kumaraswamy(bijector.Bijector): def _inverse_log_det_jacobian(self, y): y = self._maybe_assert_valid(y) - event_dims = self._event_dims_tensor(y) - return math_ops.reduce_sum( + return ( math_ops.log(self.concentration1) + math_ops.log(self.concentration0) + (self.concentration1 - 1) * math_ops.log(y) + - (self.concentration0 - 1) * math_ops.log1p(-y**self.concentration1), - axis=event_dims) + (self.concentration0 - 1) * math_ops.log1p(-y**self.concentration1)) def _maybe_assert_valid_concentration(self, concentration, validate_args): """Checks the validity of a concentration parameter.""" diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/masked_autoregressive.py b/tensorflow/contrib/distributions/python/ops/bijectors/masked_autoregressive.py index 84b2340c75514c3d2c12bf4d775ba74450a0dc26..ef56cf6ddda4dca2b1575e844b2584689e531b81 100644 --- a/tensorflow/contrib/distributions/python/ops/bijectors/masked_autoregressive.py +++ b/tensorflow/contrib/distributions/python/ops/bijectors/masked_autoregressive.py @@ -61,7 +61,7 @@ class MaskedAutoregressiveFlow(bijector_lib.Bijector): this property by zeroing out weights in its `masked_dense` layers. In the `tf.distributions` framework, a "normalizing flow" is implemented as a - `tf.distributions.bijectors.Bijector`. The `forward` "autoregression" + `tf.contrib.distributions.bijectors.Bijector`. The `forward` "autoregression" is implemented using a `tf.while_loop` and a deep neural network (DNN) with masked weights such that the autoregressive property is automatically met in the `inverse`. @@ -220,6 +220,7 @@ class MaskedAutoregressiveFlow(bijector_lib.Bijector): self._shift_and_log_scale_fn = shift_and_log_scale_fn self._unroll_loop = unroll_loop super(MaskedAutoregressiveFlow, self).__init__( + forward_min_event_ndims=1, is_constant_jacobian=is_constant_jacobian, validate_args=validate_args, name=name) diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/permute.py b/tensorflow/contrib/distributions/python/ops/bijectors/permute.py index 8654cc39d0c41ec4f1b85cd5fc4366ceaf4b224d..4978167803fc38b112c95922519c8c296cee2561 100644 --- a/tensorflow/contrib/distributions/python/ops/bijectors/permute.py +++ b/tensorflow/contrib/distributions/python/ops/bijectors/permute.py @@ -114,6 +114,7 @@ class Permute(bijector_lib.Bijector): ], permutation) self._permutation = permutation super(Permute, self).__init__( + forward_min_event_ndims=1, is_constant_jacobian=True, validate_args=validate_args, name=name or "permute") @@ -132,7 +133,10 @@ class Permute(bijector_lib.Bijector): axis=-1) def _inverse_log_det_jacobian(self, y): - return constant_op.constant(0., dtype=y.dtype) + # is_constant_jacobian = True for this bijector, hence the + # `log_det_jacobian` need only be specified for a single input, as this will + # be tiled to match `event_ndims`. + return constant_op.constant(0., dtype=y.dtype.base_dtype) def _forward_log_det_jacobian(self, x): - return constant_op.constant(0., dtype=x.dtype) + return constant_op.constant(0., dtype=x.dtype.base_dtype) diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/power_transform.py b/tensorflow/contrib/distributions/python/ops/bijectors/power_transform.py index c37db61720d10949f294ff7b2e9778ba6efa57f0..71f123f2a998458edaa9c8da07ea2932f62625ca 100644 --- a/tensorflow/contrib/distributions/python/ops/bijectors/power_transform.py +++ b/tensorflow/contrib/distributions/python/ops/bijectors/power_transform.py @@ -43,7 +43,6 @@ class PowerTransform(bijector.Bijector): def __init__(self, power=0., - event_ndims=0, validate_args=False, name="power_transform"): """Instantiates the `PowerTransform` bijector. @@ -51,8 +50,6 @@ class PowerTransform(bijector.Bijector): Args: power: Python `float` scalar indicating the transform power, i.e., `Y = g(X) = (1 + X * c)**(1 / c)` where `c` is the `power`. - event_ndims: Python scalar indicating the number of dimensions associated - with a particular draw from the distribution. validate_args: Python `bool` indicating whether arguments should be checked for correctness. name: Python `str` name given to ops managed by this object. @@ -70,7 +67,7 @@ class PowerTransform(bijector.Bijector): raise ValueError("`power` must be a non-negative TF constant.") self._power = power super(PowerTransform, self).__init__( - event_ndims=event_ndims, + forward_min_event_ndims=0, validate_args=validate_args, name=name) @@ -97,18 +94,13 @@ class PowerTransform(bijector.Bijector): def _inverse_log_det_jacobian(self, y): y = self._maybe_assert_valid_y(y) - event_dims = self._event_dims_tensor(y) - return (self.power - 1.) * math_ops.reduce_sum( - math_ops.log(y), axis=event_dims) + return (self.power - 1.) * math_ops.log(y) def _forward_log_det_jacobian(self, x): x = self._maybe_assert_valid_x(x) - event_dims = self._event_dims_tensor(x) if self.power == 0.: - return math_ops.reduce_sum(x, axis=event_dims) - return (1. / self.power - 1.) * math_ops.reduce_sum( - math_ops.log1p(x * self.power), - axis=event_dims) + return x + return (1. / self.power - 1.) * math_ops.log1p(x * self.power) def _maybe_assert_valid_x(self, x): if not self.validate_args or self.power == 0.: diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/real_nvp.py b/tensorflow/contrib/distributions/python/ops/bijectors/real_nvp.py index 71ab369d01aafc33854a2c2437f96bbb493cc6fb..f09ab21bce100e9dafb77eff1f3999ce4b71c681 100644 --- a/tensorflow/contrib/distributions/python/ops/bijectors/real_nvp.py +++ b/tensorflow/contrib/distributions/python/ops/bijectors/real_nvp.py @@ -166,7 +166,7 @@ class RealNVP(bijector_lib.Bijector): self._input_depth = None self._shift_and_log_scale_fn = shift_and_log_scale_fn super(RealNVP, self).__init__( - event_ndims=1, + forward_min_event_ndims=1, is_constant_jacobian=is_constant_jacobian, validate_args=validate_args, name=name) @@ -224,7 +224,7 @@ class RealNVP(bijector_lib.Bijector): _, log_scale = self._shift_and_log_scale_fn( x0, self._input_depth - self._num_masked) if log_scale is None: - return constant_op.constant(0., dtype=x.dtype, name="ildj") + return constant_op.constant(0., dtype=x.dtype, name="fldj") return math_ops.reduce_sum(log_scale, axis=-1) diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/reshape.py b/tensorflow/contrib/distributions/python/ops/bijectors/reshape.py index 55eca063126797d577653f0d6bcdfddf8192bdb5..f21b982ba664b312c716827c7925767a0b5a037a 100644 --- a/tensorflow/contrib/distributions/python/ops/bijectors/reshape.py +++ b/tensorflow/contrib/distributions/python/ops/bijectors/reshape.py @@ -128,15 +128,17 @@ class Reshape(bijector_lib.Bijector): self._event_shape_in = event_shape_in self._event_shape_out = event_shape_out - super(Reshape, self).__init__(is_constant_jacobian=True, - validate_args=validate_args, - name=name or "reshape") + super(Reshape, self).__init__( + forward_min_event_ndims=0, + is_constant_jacobian=True, + validate_args=validate_args, + name=name or "reshape") def _maybe_check_valid_shape(self, shape, validate_args): """Check that a shape Tensor is int-type and otherwise sane.""" if not shape.dtype.is_integer: raise TypeError("{} dtype ({}) should be `int`-like.".format( - shape.op.name, shape.dtype.name)) + shape, shape.dtype.name)) assertions = [] @@ -144,10 +146,10 @@ class Reshape(bijector_lib.Bijector): ndims_ = tensor_util.constant_value(ndims) if ndims_ is not None and ndims_ > 1: raise ValueError("`{}` rank ({}) should be <= 1.".format( - shape.op.name, ndims_)) + shape, ndims_)) elif validate_args: assertions.append(check_ops.assert_less_equal( - ndims, 1, message="`{}` rank should be <= 1.".format(shape.op.name))) + ndims, 1, message="`{}` rank should be <= 1.".format(shape))) shape_ = tensor_util.constant_value_as_shape(shape) if shape_.is_fully_defined(): @@ -155,12 +157,12 @@ class Reshape(bijector_lib.Bijector): if sum(es == -1) > 1: raise ValueError( "`{}` must have at most one `-1` (given {})" - .format(shape.op.name, es)) + .format(shape, es)) if np.any(es < -1): raise ValueError( "`{}` elements must be either positive integers or `-1`" "(given {})." - .format(shape.op.name, es)) + .format(shape, es)) elif validate_args: assertions.extend([ check_ops.assert_less_equal( @@ -168,11 +170,11 @@ class Reshape(bijector_lib.Bijector): math_ops.cast(math_ops.equal(shape, -1), dtypes.int32)), 1, message="`{}` elements must have at most one `-1`." - .format(shape.op.name)), + .format(shape)), check_ops.assert_greater_equal( shape, -1, message="`{}` elements must be either positive integers or `-1`." - .format(shape.op.name)), + .format(shape)), ]) return assertions diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/sigmoid.py b/tensorflow/contrib/distributions/python/ops/bijectors/sigmoid.py index a640dfe7dfbcce96261589c7fc49107deaefdd54..5df8c886315ff75cdc884e3b9b4665fb64bb109d 100644 --- a/tensorflow/contrib/distributions/python/ops/bijectors/sigmoid.py +++ b/tensorflow/contrib/distributions/python/ops/bijectors/sigmoid.py @@ -33,7 +33,9 @@ class Sigmoid(bijector.Bijector): def __init__(self, validate_args=False, name="sigmoid"): super(Sigmoid, self).__init__( - event_ndims=0, validate_args=validate_args, name=name) + forward_min_event_ndims=0, + validate_args=validate_args, + name=name) def _forward(self, x): return math_ops.sigmoid(x) diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/sinh_arcsinh.py b/tensorflow/contrib/distributions/python/ops/bijectors/sinh_arcsinh.py index 3a75e4ae9495793901b0da91a5aa3982aab35852..2a32e8abcde940b0056b0faf2955ec1b3bd71803 100644 --- a/tensorflow/contrib/distributions/python/ops/bijectors/sinh_arcsinh.py +++ b/tensorflow/contrib/distributions/python/ops/bijectors/sinh_arcsinh.py @@ -91,7 +91,6 @@ class SinhArcsinh(bijector.Bijector): def __init__(self, skewness=None, tailweight=None, - event_ndims=0, validate_args=False, name="SinhArcsinh"): """Instantiates the `SinhArcsinh` bijector. @@ -101,8 +100,6 @@ class SinhArcsinh(bijector.Bijector): of type `float32`. tailweight: Tailweight parameter. Positive `Tensor` of same `dtype` as `skewness` and broadcastable `shape`. Default is `1` of type `float32`. - event_ndims: Python scalar indicating the number of dimensions associated - with a particular draw from the distribution. validate_args: Python `bool` indicating whether arguments should be checked for correctness. name: Python `str` name given to ops managed by this object. @@ -125,7 +122,9 @@ class SinhArcsinh(bijector.Bijector): message="Argument tailweight was not positive") ], self._tailweight) super(SinhArcsinh, self).__init__( - event_ndims=event_ndims, validate_args=validate_args, name=name) + forward_min_event_ndims=0, + validate_args=validate_args, + name=name) @property def skewness(self): @@ -149,31 +148,29 @@ class SinhArcsinh(bijector.Bijector): # dx/dy # = cosh(arcsinh(y) / tailweight - skewness) # / (tailweight * sqrt(y**2 + 1)) - event_dims = self._event_dims_tensor(y) - return math_ops.reduce_sum( - # This is computed inside the log to avoid catastrophic cancellations - # from cosh((arcsinh(y) / tailweight) - skewness) and sqrt(x**2 + 1). + + # This is computed inside the log to avoid catastrophic cancellations + # from cosh((arcsinh(y) / tailweight) - skewness) and sqrt(x**2 + 1). + return ( math_ops.log(math_ops.cosh( math_ops.asinh(y) / self.tailweight - self.skewness) # TODO(srvasude): Consider using cosh(arcsinh(x)) in cases # where (arcsinh(x) / tailweight) - skewness ~= arcsinh(x). / _sqrtx2p1(y)) - - math_ops.log(self.tailweight), - axis=event_dims) + - math_ops.log(self.tailweight)) def _forward_log_det_jacobian(self, x): # y = sinh((arcsinh(x) + skewness) * tailweight) # Using sinh' = cosh, arcsinh'(x) = 1 / sqrt(x**2 + 1), # dy/dx # = cosh((arcsinh(x) + skewness) * tailweight) * tailweight / sqrt(x**2 + 1) - event_dims = self._event_dims_tensor(x) - return math_ops.reduce_sum( - # This is computed inside the log to avoid catastrophic cancellations - # from cosh((arcsinh(x) + skewness) * tailweight) and sqrt(x**2 + 1). + + # This is computed inside the log to avoid catastrophic cancellations + # from cosh((arcsinh(x) + skewness) * tailweight) and sqrt(x**2 + 1). + return ( math_ops.log(math_ops.cosh( (math_ops.asinh(x) + self.skewness) * self.tailweight) # TODO(srvasude): Consider using cosh(arcsinh(x)) in cases # where (arcsinh(x) + skewness) * tailweight ~= arcsinh(x). / _sqrtx2p1(x)) - + math_ops.log(self.tailweight), - axis=event_dims) + + math_ops.log(self.tailweight)) diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/softmax_centered.py b/tensorflow/contrib/distributions/python/ops/bijectors/softmax_centered.py index dc94fd0a38de29f5a7ee6ca826aab0ecf8712966..f52b91550edff7390d8094a4508d862674e85d59 100644 --- a/tensorflow/contrib/distributions/python/ops/bijectors/softmax_centered.py +++ b/tensorflow/contrib/distributions/python/ops/bijectors/softmax_centered.py @@ -66,7 +66,7 @@ class SoftmaxCentered(bijector.Bijector): self._graph_parents = [] self._name = name super(SoftmaxCentered, self).__init__( - event_ndims=1, + forward_min_event_ndims=1, validate_args=validate_args, name=name) @@ -105,8 +105,6 @@ class SoftmaxCentered(bijector.Bijector): y.shape.assert_is_compatible_with(shape) y.set_shape(shape) - # Since we only support event_ndims in [0, 1] and we do padding, we always - # reduce over the last dimension, i.e., dim=-1 (which is the default). return nn_ops.softmax(y) def _inverse(self, y): @@ -162,8 +160,6 @@ class SoftmaxCentered(bijector.Bijector): # -log_normalization + reduce_sum(logits - log_normalization) log_normalization = nn_ops.softplus( math_ops.reduce_logsumexp(x, axis=-1, keep_dims=True)) - fldj = (-log_normalization + - math_ops.reduce_sum(x - log_normalization, - axis=-1, - keep_dims=True)) - return array_ops.squeeze(fldj, squeeze_dims=-1) + return array_ops.squeeze( + (-log_normalization + math_ops.reduce_sum( + x - log_normalization, axis=-1, keepdims=True)), axis=-1) diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/softplus.py b/tensorflow/contrib/distributions/python/ops/bijectors/softplus.py index 81957fcf78922fa15fd20a25d144071f431161ae..96a938c803418ff818f9c531754b47ba1eb8667a 100644 --- a/tensorflow/contrib/distributions/python/ops/bijectors/softplus.py +++ b/tensorflow/contrib/distributions/python/ops/bijectors/softplus.py @@ -62,7 +62,7 @@ class Softplus(bijector.Bijector): ```python # Create the Y=g(X)=softplus(X) transform which works only on Tensors with 1 # batch ndim and 2 event ndims (i.e., vector of matrices). - softplus = Softplus(event_ndims=2) + softplus = Softplus() x = [[[1., 2], [3, 4]], [[5, 6], @@ -81,7 +81,6 @@ class Softplus(bijector.Bijector): "Nonzero floating point `Tensor`. Controls the softness of what " "would otherwise be a kink at the origin. Default is 1.0")}) def __init__(self, - event_ndims=0, hinge_softness=None, validate_args=False, name="softplus"): @@ -101,7 +100,7 @@ class Softplus(bijector.Bijector): [nonzero_check], self.hinge_softness) super(Softplus, self).__init__( - event_ndims=event_ndims, + forward_min_event_ndims=0, validate_args=validate_args, name=name) @@ -130,14 +129,12 @@ class Softplus(bijector.Bijector): # 1 - exp{-Y} approx Y. if self.hinge_softness is not None: y /= math_ops.cast(self.hinge_softness, y.dtype) - return -math_ops.reduce_sum(math_ops.log(-math_ops.expm1(-y)), - axis=self._event_dims_tensor(y)) + return -math_ops.log(-math_ops.expm1(-y)) def _forward_log_det_jacobian(self, x): if self.hinge_softness is not None: x /= math_ops.cast(self.hinge_softness, x.dtype) - return -math_ops.reduce_sum(nn_ops.softplus(-x), - axis=self._event_dims_tensor(x)) + return -nn_ops.softplus(-x) @property def hinge_softness(self): diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/softsign.py b/tensorflow/contrib/distributions/python/ops/bijectors/softsign.py new file mode 100644 index 0000000000000000000000000000000000000000..b4a658c171b8313358754228aabbfa4bf93fd84d --- /dev/null +++ b/tensorflow/contrib/distributions/python/ops/bijectors/softsign.py @@ -0,0 +1,86 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Softsign bijector.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.ops import check_ops +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops.distributions import bijector + + +__all__ = [ + "Softsign", +] + + +class Softsign(bijector.Bijector): + """Bijector which computes `Y = g(X) = X / (1 + |X|)`. + + The softsign `Bijector` has the following two useful properties: + + * The domain is all real numbers + * `softsign(x) approx sgn(x)`, for large `|x|`. + + #### Examples + + ```python + # Create the Y = softsign(X) transform. + softsign = Softsign() + x = [[[1., 2], + [3, 4]], + [[5, 6], + [7, 8]]] + x / (1 + abs(x)) == softsign.forward(x) + x / (1 - abs(x)) == softsign.inverse(x) + ``` + """ + + def __init__(self, validate_args=False, name="softsign"): + super(Softsign, self).__init__( + forward_min_event_ndims=0, + validate_args=validate_args, + name=name) + + def _forward(self, x): + return x / (1. + math_ops.abs(x)) + + def _inverse(self, y): + y = self._maybe_assert_valid_y(y) + return y / (1. - math_ops.abs(y)) + + def _forward_log_det_jacobian(self, x): + return -2. * math_ops.log1p(math_ops.abs(x)) + + def _inverse_log_det_jacobian(self, y): + y = self._maybe_assert_valid_y(y) + return -2. * math_ops.log1p(-math_ops.abs(y)) + + def _maybe_assert_valid_y(self, y): + if not self.validate_args: + return y + is_valid = [ + check_ops.assert_greater( + y, math_ops.cast(-1., dtype=y.dtype.base_dtype), + message="Inverse transformation input must be greater than -1."), + check_ops.assert_less( + y, math_ops.cast(1., dtype=y.dtype.base_dtype), + message="Inverse transformation input must be less than 1.") + ] + + return control_flow_ops.with_dependencies(is_valid, y) diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/square.py b/tensorflow/contrib/distributions/python/ops/bijectors/square.py index 1e9dbf35091fe51f2478dc085c394a77295ca4ee..2ccfdc95970e387e708603e2614ad29fb6a18db3 100644 --- a/tensorflow/contrib/distributions/python/ops/bijectors/square.py +++ b/tensorflow/contrib/distributions/python/ops/bijectors/square.py @@ -59,7 +59,7 @@ class Square(bijector.Bijector): """ self._name = name super(Square, self).__init__( - event_ndims=0, + forward_min_event_ndims=0, validate_args=validate_args, name=name) diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/weibull.py b/tensorflow/contrib/distributions/python/ops/bijectors/weibull.py index 00520bcda85e9527767e6342bf75f10667c264a8..39129cd22cdbf9ca1b4edd7cb5c3571a33837a29 100644 --- a/tensorflow/contrib/distributions/python/ops/bijectors/weibull.py +++ b/tensorflow/contrib/distributions/python/ops/bijectors/weibull.py @@ -50,7 +50,6 @@ class Weibull(bijector.Bijector): def __init__(self, scale=1., concentration=1., - event_ndims=0, validate_args=False, name="weibull"): """Instantiates the `Weibull` bijector. @@ -62,8 +61,6 @@ class Weibull(bijector.Bijector): concentration: Positive Float-type `Tensor` that is the same dtype and is broadcastable with `scale`. This is `k` in `Y = g(X) = 1 - exp((-x / l) ** k)`. - event_ndims: Python scalar indicating the number of dimensions associated - with a particular draw from the distribution. validate_args: Python `bool` indicating whether arguments should be checked for correctness. name: Python `str` name given to ops managed by this object. @@ -89,7 +86,7 @@ class Weibull(bijector.Bijector): ], self._concentration) super(Weibull, self).__init__( - event_ndims=event_ndims, + forward_min_event_ndims=0, validate_args=validate_args, name=name) @@ -113,22 +110,18 @@ class Weibull(bijector.Bijector): def _inverse_log_det_jacobian(self, y): y = self._maybe_assert_valid_y(y) - event_dims = self._event_dims_tensor(y) - return math_ops.reduce_sum( + return ( -math_ops.log1p(-y) + (1 / self.concentration - 1) * math_ops.log(-math_ops.log1p(-y)) + - math_ops.log(self.scale / self.concentration), - axis=event_dims) + math_ops.log(self.scale / self.concentration)) def _forward_log_det_jacobian(self, x): x = self._maybe_assert_valid_x(x) - event_dims = self._event_dims_tensor(x) - return math_ops.reduce_sum( + return ( -(x / self.scale) ** self.concentration + (self.concentration - 1) * math_ops.log(x) + math_ops.log(self.concentration) + - -self.concentration * math_ops.log(self.scale), - axis=event_dims) + -self.concentration * math_ops.log(self.scale)) def _maybe_assert_valid_x(self, x): if not self.validate_args: diff --git a/tensorflow/contrib/distributions/python/ops/binomial.py b/tensorflow/contrib/distributions/python/ops/binomial.py index 6a1bb39ab28218a411bdf4329965186bcf32bf30..12d16031783b78dc3ea6273af77c1eaeb77ca94e 100644 --- a/tensorflow/contrib/distributions/python/ops/binomial.py +++ b/tensorflow/contrib/distributions/python/ops/binomial.py @@ -164,7 +164,7 @@ class Binomial(distribution.Distribution): name: Python `str` name prefixed to Ops created by this class. """ parameters = locals() - with ops.name_scope(name, values=[total_count, logits, probs]): + with ops.name_scope(name, values=[total_count, logits, probs]) as name: self._total_count = self._maybe_assert_valid_total_count( ops.convert_to_tensor(total_count, name="total_count"), validate_args) diff --git a/tensorflow/contrib/distributions/python/ops/cauchy.py b/tensorflow/contrib/distributions/python/ops/cauchy.py index 6f5d724a2a945ed8f9c159d8314327c6f994d1db..daacfe657fe154dce8d0db98894fe8b73546c476 100644 --- a/tensorflow/contrib/distributions/python/ops/cauchy.py +++ b/tensorflow/contrib/distributions/python/ops/cauchy.py @@ -121,7 +121,7 @@ class Cauchy(distribution.Distribution): TypeError: if `loc` and `scale` have different `dtype`. """ parameters = locals() - with ops.name_scope(name, values=[loc, scale]): + with ops.name_scope(name, values=[loc, scale]) as name: with ops.control_dependencies([check_ops.assert_positive(scale)] if validate_args else []): self._loc = array_ops.identity(loc, name="loc") diff --git a/tensorflow/contrib/distributions/python/ops/chi2.py b/tensorflow/contrib/distributions/python/ops/chi2.py index e610f469e5d5f446b75c734cc39811de30a8cb9a..c77c5fd20895a6220604d76a95a152a22cd3d914 100644 --- a/tensorflow/contrib/distributions/python/ops/chi2.py +++ b/tensorflow/contrib/distributions/python/ops/chi2.py @@ -88,7 +88,7 @@ class Chi2(gamma.Gamma): # not true in the parent class "gamma." therefore, passing # allow_nan_stats=True # through to the parent class results in unnecessary asserts. - with ops.name_scope(name, values=[df]): + with ops.name_scope(name, values=[df]) as name: with ops.control_dependencies([ check_ops.assert_positive(df), ] if validate_args else []): @@ -120,7 +120,7 @@ class Chi2WithAbsDf(Chi2): allow_nan_stats=True, name="Chi2WithAbsDf"): parameters = locals() - with ops.name_scope(name, values=[df]): + with ops.name_scope(name, values=[df]) as name: super(Chi2WithAbsDf, self).__init__( df=math_ops.floor( math_ops.abs(df, name="abs_df"), diff --git a/tensorflow/contrib/distributions/python/ops/conditional_transformed_distribution.py b/tensorflow/contrib/distributions/python/ops/conditional_transformed_distribution.py index 1d4c5660d8d73b7b6a7e758fc834ccfddeb5c8ea..10b45361358b40a3c8fd725f27ad84ef9b8a37f5 100644 --- a/tensorflow/contrib/distributions/python/ops/conditional_transformed_distribution.py +++ b/tensorflow/contrib/distributions/python/ops/conditional_transformed_distribution.py @@ -20,6 +20,7 @@ from __future__ import print_function from tensorflow.contrib.distributions.python.ops import conditional_distribution from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops.distributions import transformed_distribution @@ -105,7 +106,9 @@ class ConditionalTransformedDistribution( bijector_kwargs = bijector_kwargs or {} distribution_kwargs = distribution_kwargs or {} x = self.bijector.inverse(y, **bijector_kwargs) - ildj = self.bijector.inverse_log_det_jacobian(y, **bijector_kwargs) + event_ndims = self._maybe_get_event_ndims_statically() + ildj = self.bijector.inverse_log_det_jacobian( + y, event_ndims=event_ndims, **bijector_kwargs) if self.bijector._is_injective: # pylint: disable=protected-access return self._finish_log_prob_for_one_fiber(y, x, ildj, distribution_kwargs) @@ -128,7 +131,9 @@ class ConditionalTransformedDistribution( bijector_kwargs = bijector_kwargs or {} distribution_kwargs = distribution_kwargs or {} x = self.bijector.inverse(y, **bijector_kwargs) - ildj = self.bijector.inverse_log_det_jacobian(y, **bijector_kwargs) + event_ndims = self._maybe_get_event_ndims_statically() + ildj = self.bijector.inverse_log_det_jacobian( + y, event_ndims=event_ndims, **bijector_kwargs) if self.bijector._is_injective: # pylint: disable=protected-access return self._finish_prob_for_one_fiber(y, x, ildj, distribution_kwargs) @@ -214,3 +219,15 @@ class ConditionalTransformedDistribution( # implies the qth quantile of Y is g(x_q). inv_cdf = self.distribution.quantile(value, **distribution_kwargs) return self.bijector.forward(inv_cdf, **bijector_kwargs) + + def _maybe_get_event_ndims_statically(self): + if self.event_shape.ndims is not None: + return self.event_shape.ndims + + event_ndims = array_ops.size(self.event_shape_tensor()) + static_event_ndims = tensor_util.constant_value(event_ndims) + + if static_event_ndims is not None: + return static_event_ndims + + return event_ndims diff --git a/tensorflow/contrib/distributions/python/ops/deterministic.py b/tensorflow/contrib/distributions/python/ops/deterministic.py index 8049522e9f5dc26b244b7e710a9ae8b981efd6b6..a42350430e98515e521ce357bf5a87ff2daefedc 100644 --- a/tensorflow/contrib/distributions/python/ops/deterministic.py +++ b/tensorflow/contrib/distributions/python/ops/deterministic.py @@ -87,7 +87,7 @@ class _BaseDeterministic(distribution.Distribution): ValueError: If `loc` is a scalar. """ parameters = locals() - with ops.name_scope(name, values=[loc, atol, rtol]): + with ops.name_scope(name, values=[loc, atol, rtol]) as name: loc = ops.convert_to_tensor(loc, name="loc") if is_vector and validate_args: msg = "Argument loc must be at least rank 1." diff --git a/tensorflow/contrib/distributions/python/ops/geometric.py b/tensorflow/contrib/distributions/python/ops/geometric.py index 8f190e48a7148d84082d73771cba4660a1a0d221..53dd42f4c83fcea0ec5b1374c8e3109ebe1dd127 100644 --- a/tensorflow/contrib/distributions/python/ops/geometric.py +++ b/tensorflow/contrib/distributions/python/ops/geometric.py @@ -86,7 +86,7 @@ class Geometric(distribution.Distribution): """ parameters = locals() - with ops.name_scope(name, values=[logits, probs]): + with ops.name_scope(name, values=[logits, probs]) as name: self._logits, self._probs = distribution_util.get_logits_and_probs( logits, probs, validate_args=validate_args, name=name) diff --git a/tensorflow/contrib/distributions/python/ops/gumbel.py b/tensorflow/contrib/distributions/python/ops/gumbel.py index 8d05ad6b8032fb8bada99389959091fb1c28beda..2c261073ee16462599740cb241108bfe08c773ec 100644 --- a/tensorflow/contrib/distributions/python/ops/gumbel.py +++ b/tensorflow/contrib/distributions/python/ops/gumbel.py @@ -125,7 +125,7 @@ class _Gumbel(distribution.Distribution): TypeError: if loc and scale are different dtypes. """ parameters = locals() - with ops.name_scope(name, values=[loc, scale]): + with ops.name_scope(name, values=[loc, scale]) as name: with ops.control_dependencies([check_ops.assert_positive(scale)] if validate_args else []): self._loc = array_ops.identity(loc, name="loc") diff --git a/tensorflow/contrib/distributions/python/ops/half_normal.py b/tensorflow/contrib/distributions/python/ops/half_normal.py index fc0751a6e0b78cb3d79bd3478e740bb05cd26428..d0df2befd6e46ca93e5a0b5d1cb5407d6719c7f2 100644 --- a/tensorflow/contrib/distributions/python/ops/half_normal.py +++ b/tensorflow/contrib/distributions/python/ops/half_normal.py @@ -106,7 +106,7 @@ class HalfNormal(distribution.Distribution): name: Python `str` name prefixed to Ops created by this class. """ parameters = locals() - with ops.name_scope(name, values=[scale]): + with ops.name_scope(name, values=[scale]) as name: with ops.control_dependencies([check_ops.assert_positive(scale)] if validate_args else []): self._scale = array_ops.identity(scale, name="scale") diff --git a/tensorflow/contrib/distributions/python/ops/independent.py b/tensorflow/contrib/distributions/python/ops/independent.py index b1bacb91b03093fa93a7e5f7eb855dc944dafb44..fbde55ef310de1d926b8ddd503499fbed4809373 100644 --- a/tensorflow/contrib/distributions/python/ops/independent.py +++ b/tensorflow/contrib/distributions/python/ops/independent.py @@ -119,7 +119,7 @@ class Independent(distribution_lib.Distribution): parameters = locals() name = name or "Independent" + distribution.name self._distribution = distribution - with ops.name_scope(name): + with ops.name_scope(name) as name: if reinterpreted_batch_ndims is None: reinterpreted_batch_ndims = self._get_default_reinterpreted_batch_ndims( distribution) diff --git a/tensorflow/contrib/distributions/python/ops/inverse_gamma.py b/tensorflow/contrib/distributions/python/ops/inverse_gamma.py index 51ac61dcf640ca89f22c47127bda71316a179ca4..502bd4f493337bab180129cd0ddfaf5a76a0ca4e 100644 --- a/tensorflow/contrib/distributions/python/ops/inverse_gamma.py +++ b/tensorflow/contrib/distributions/python/ops/inverse_gamma.py @@ -126,7 +126,7 @@ class InverseGamma(distribution.Distribution): TypeError: if `concentration` and `rate` are different dtypes. """ parameters = locals() - with ops.name_scope(name, values=[concentration, rate]): + with ops.name_scope(name, values=[concentration, rate]) as name: with ops.control_dependencies([ check_ops.assert_positive(concentration), check_ops.assert_positive(rate), @@ -281,7 +281,7 @@ class InverseGammaWithSoftplusConcentrationRate(InverseGamma): allow_nan_stats=True, name="InverseGammaWithSoftplusConcentrationRate"): parameters = locals() - with ops.name_scope(name, values=[concentration, rate]): + with ops.name_scope(name, values=[concentration, rate]) as name: super(InverseGammaWithSoftplusConcentrationRate, self).__init__( concentration=nn.softplus(concentration, name="softplus_concentration"), diff --git a/tensorflow/contrib/distributions/python/ops/kumaraswamy.py b/tensorflow/contrib/distributions/python/ops/kumaraswamy.py index 192dede6ff1d4de8d4be9965c414e7453d7b5d4b..66682b2ff5493f8565410138e770b45ffc6b5d77 100644 --- a/tensorflow/contrib/distributions/python/ops/kumaraswamy.py +++ b/tensorflow/contrib/distributions/python/ops/kumaraswamy.py @@ -151,10 +151,11 @@ class Kumaraswamy(transformed_distribution.TransformedDistribution): more of the statistic's batch members are undefined. name: Python `str` name prefixed to Ops created by this class. """ - concentration1 = ops.convert_to_tensor( - concentration1, name="concentration1") - concentration0 = ops.convert_to_tensor( - concentration0, name="concentration0") + with ops.name_scope(name, values=[concentration1, concentration0]) as name: + concentration1 = ops.convert_to_tensor( + concentration1, name="concentration1") + concentration0 = ops.convert_to_tensor( + concentration0, name="concentration0") super(Kumaraswamy, self).__init__( distribution=uniform.Uniform( low=array_ops.zeros([], dtype=concentration1.dtype), diff --git a/tensorflow/contrib/distributions/python/ops/logistic.py b/tensorflow/contrib/distributions/python/ops/logistic.py index 68e6bca5a554b29a450911073eb5c4fe55f313c6..c83b5bc2e3a8c56f5c52d063a7d0d399be1c1870 100644 --- a/tensorflow/contrib/distributions/python/ops/logistic.py +++ b/tensorflow/contrib/distributions/python/ops/logistic.py @@ -120,7 +120,7 @@ class Logistic(distribution.Distribution): TypeError: if loc and scale are different dtypes. """ parameters = locals() - with ops.name_scope(name, values=[loc, scale]): + with ops.name_scope(name, values=[loc, scale]) as name: with ops.control_dependencies([check_ops.assert_positive(scale)] if validate_args else []): self._loc = array_ops.identity(loc, name="loc") diff --git a/tensorflow/contrib/distributions/python/ops/mixture.py b/tensorflow/contrib/distributions/python/ops/mixture.py index cef6a143fc615901315a3780bf4ed53b8c7cd177..2ef294af2e8bc9beff735ec2e0fd6b619ce96176 100644 --- a/tensorflow/contrib/distributions/python/ops/mixture.py +++ b/tensorflow/contrib/distributions/python/ops/mixture.py @@ -145,7 +145,7 @@ class Mixture(distribution.Distribution): "none of the components provide a static number of ndims") # Ensure that all batch and event ndims are consistent. - with ops.name_scope(name, values=[cat.logits]): + with ops.name_scope(name, values=[cat.logits]) as name: num_components = cat.event_size static_num_components = tensor_util.constant_value(num_components) if static_num_components is None: diff --git a/tensorflow/contrib/distributions/python/ops/mixture_same_family.py b/tensorflow/contrib/distributions/python/ops/mixture_same_family.py index b93bdc5ab4010663baddda1410b302644853648b..0b1301e551728f74bb0048d2dcf3c356ae110c75 100644 --- a/tensorflow/contrib/distributions/python/ops/mixture_same_family.py +++ b/tensorflow/contrib/distributions/python/ops/mixture_same_family.py @@ -131,7 +131,7 @@ class MixtureSameFamily(distribution.Distribution): `components_distribution` rightmost batch shape. """ parameters = locals() - with ops.name_scope(name): + with ops.name_scope(name) as name: self._mixture_distribution = mixture_distribution self._components_distribution = components_distribution self._runtime_assertions = [] diff --git a/tensorflow/contrib/distributions/python/ops/mvn_diag.py b/tensorflow/contrib/distributions/python/ops/mvn_diag.py index e862552880f4073c8fa8e90134d0633e7484b0bf..e3236c2db93695a5e007bba9a1414773f3935f2e 100644 --- a/tensorflow/contrib/distributions/python/ops/mvn_diag.py +++ b/tensorflow/contrib/distributions/python/ops/mvn_diag.py @@ -194,7 +194,7 @@ class MultivariateNormalDiag( ValueError: if at most `scale_identity_multiplier` is specified. """ parameters = locals() - with ops.name_scope(name): + with ops.name_scope(name) as name: with ops.name_scope("init", values=[ loc, scale_diag, scale_identity_multiplier]): # No need to validate_args while making diag_scale. The returned @@ -225,7 +225,7 @@ class MultivariateNormalDiagWithSoftplusScale(MultivariateNormalDiag): allow_nan_stats=True, name="MultivariateNormalDiagWithSoftplusScale"): parameters = locals() - with ops.name_scope(name, values=[scale_diag]): + with ops.name_scope(name, values=[scale_diag]) as name: super(MultivariateNormalDiagWithSoftplusScale, self).__init__( loc=loc, scale_diag=nn.softplus(scale_diag), diff --git a/tensorflow/contrib/distributions/python/ops/mvn_diag_plus_low_rank.py b/tensorflow/contrib/distributions/python/ops/mvn_diag_plus_low_rank.py index 413e88f03ae0286c294f3404549a73e1a47dcff7..2f6a6f198cbcfbdcbd0993d3074ddde1c389585f 100644 --- a/tensorflow/contrib/distributions/python/ops/mvn_diag_plus_low_rank.py +++ b/tensorflow/contrib/distributions/python/ops/mvn_diag_plus_low_rank.py @@ -218,7 +218,7 @@ class MultivariateNormalDiagPlusLowRank( parameters = locals() def _convert_to_tensor(x, name): return None if x is None else ops.convert_to_tensor(x, name=name) - with ops.name_scope(name): + with ops.name_scope(name) as name: with ops.name_scope("init", values=[ loc, scale_diag, scale_identity_multiplier, scale_perturb_factor, scale_perturb_diag]): diff --git a/tensorflow/contrib/distributions/python/ops/mvn_full_covariance.py b/tensorflow/contrib/distributions/python/ops/mvn_full_covariance.py index 4bea99fbb75349f97fde473cb5716fe6c426ce90..86fcd4db54ad851af670839a5a93a68dce047675 100644 --- a/tensorflow/contrib/distributions/python/ops/mvn_full_covariance.py +++ b/tensorflow/contrib/distributions/python/ops/mvn_full_covariance.py @@ -159,7 +159,7 @@ class MultivariateNormalFullCovariance(mvn_tril.MultivariateNormalTriL): parameters = locals() # Convert the covariance_matrix up to a scale_tril and call MVNTriL. - with ops.name_scope(name): + with ops.name_scope(name) as name: with ops.name_scope("init", values=[loc, covariance_matrix]): if covariance_matrix is None: scale_tril = None diff --git a/tensorflow/contrib/distributions/python/ops/mvn_linear_operator.py b/tensorflow/contrib/distributions/python/ops/mvn_linear_operator.py index a7399792892f4c179c05168184d76ec95c168b51..44c92312c7dc758500051f89923ec9fafe850c0e 100644 --- a/tensorflow/contrib/distributions/python/ops/mvn_linear_operator.py +++ b/tensorflow/contrib/distributions/python/ops/mvn_linear_operator.py @@ -176,7 +176,7 @@ class MultivariateNormalLinearOperator( if not scale.dtype.is_floating: raise TypeError("`scale` parameter must have floating-point dtype.") - with ops.name_scope(name, values=[loc] + scale.graph_parents): + with ops.name_scope(name, values=[loc] + scale.graph_parents) as name: # Since expand_dims doesn't preserve constant-ness, we obtain the # non-dynamic value if possible. loc = ops.convert_to_tensor(loc, name="loc") if loc is not None else loc diff --git a/tensorflow/contrib/distributions/python/ops/mvn_tril.py b/tensorflow/contrib/distributions/python/ops/mvn_tril.py index 6c7dc4ca7aaf5b3a20b072e9360d15528ad10556..d6f8b731cbeed5fed3b43365e7c668d0434a267e 100644 --- a/tensorflow/contrib/distributions/python/ops/mvn_tril.py +++ b/tensorflow/contrib/distributions/python/ops/mvn_tril.py @@ -184,7 +184,7 @@ class MultivariateNormalTriL( return None if x is None else ops.convert_to_tensor(x, name=name) if loc is None and scale_tril is None: raise ValueError("Must specify one or both of `loc`, `scale_tril`.") - with ops.name_scope(name): + with ops.name_scope(name) as name: with ops.name_scope("init", values=[loc, scale_tril]): loc = _convert_to_tensor(loc, name="loc") scale_tril = _convert_to_tensor(scale_tril, name="scale_tril") diff --git a/tensorflow/contrib/distributions/python/ops/negative_binomial.py b/tensorflow/contrib/distributions/python/ops/negative_binomial.py index 3a58df80da6c02b056f5e5a63bf41de5fc6d44a4..eeaf9c0a5ebc1323e137ff73f82588f6907031c7 100644 --- a/tensorflow/contrib/distributions/python/ops/negative_binomial.py +++ b/tensorflow/contrib/distributions/python/ops/negative_binomial.py @@ -91,7 +91,7 @@ class NegativeBinomial(distribution.Distribution): """ parameters = locals() - with ops.name_scope(name, values=[total_count, logits, probs]): + with ops.name_scope(name, values=[total_count, logits, probs]) as name: self._logits, self._probs = distribution_util.get_logits_and_probs( logits, probs, validate_args=validate_args, name=name) with ops.control_dependencies( diff --git a/tensorflow/contrib/distributions/python/ops/onehot_categorical.py b/tensorflow/contrib/distributions/python/ops/onehot_categorical.py index e3e40b2e9ca232b9970768f21fb95887fdf0df2d..305b138fdc2318523ee078195213caf865d96b4d 100644 --- a/tensorflow/contrib/distributions/python/ops/onehot_categorical.py +++ b/tensorflow/contrib/distributions/python/ops/onehot_categorical.py @@ -116,7 +116,7 @@ class OneHotCategorical(distribution.Distribution): name: Python `str` name prefixed to Ops created by this class. """ parameters = locals() - with ops.name_scope(name, values=[logits, probs]): + with ops.name_scope(name, values=[logits, probs]) as name: self._logits, self._probs = distribution_util.get_logits_and_probs( name=name, logits=logits, probs=probs, validate_args=validate_args, multidimensional=True) diff --git a/tensorflow/contrib/distributions/python/ops/poisson.py b/tensorflow/contrib/distributions/python/ops/poisson.py index 02e97c0a2fd004c4fa9382d5367af9f5b034a869..a84aad6fc9372395ac021fa3aa006ddf9272e6a9 100644 --- a/tensorflow/contrib/distributions/python/ops/poisson.py +++ b/tensorflow/contrib/distributions/python/ops/poisson.py @@ -94,7 +94,7 @@ class Poisson(distribution.Distribution): TypeError: if `log_rate` is not a float-type. """ parameters = locals() - with ops.name_scope(name, values=[rate]): + with ops.name_scope(name, values=[rate]) as name: if (rate is None) == (log_rate is None): raise ValueError("Must specify exactly one of `rate` and `log_rate`.") elif log_rate is None: diff --git a/tensorflow/contrib/distributions/python/ops/poisson_lognormal.py b/tensorflow/contrib/distributions/python/ops/poisson_lognormal.py index 92f2bba1828696248c9d9460566a08ba372c3358..19c99dcee92978e938a73af9be445cd098e5fe90 100644 --- a/tensorflow/contrib/distributions/python/ops/poisson_lognormal.py +++ b/tensorflow/contrib/distributions/python/ops/poisson_lognormal.py @@ -114,7 +114,7 @@ def quadrature_scheme_lognormal_quantiles( # Create a LogNormal distribution. dist = transformed_lib.TransformedDistribution( distribution=normal_lib.Normal(loc=loc, scale=scale), - bijector=Exp(event_ndims=0), + bijector=Exp(), validate_args=validate_args) batch_ndims = dist.batch_shape.ndims if batch_ndims is None: @@ -256,7 +256,7 @@ class PoissonLogNormalQuadratureCompound(distribution_lib.Distribution): `dtype`. """ parameters = locals() - with ops.name_scope(name, values=[loc, scale]): + with ops.name_scope(name, values=[loc, scale]) as name: if loc is not None: loc = ops.convert_to_tensor(loc, name="loc") if scale is not None: diff --git a/tensorflow/contrib/distributions/python/ops/quantized_distribution.py b/tensorflow/contrib/distributions/python/ops/quantized_distribution.py index 8aebb79b9138cce1373e6472d17cf9072d2bc285..1ef7651d03a3388e72618b1d9bb8b819bde17e92 100644 --- a/tensorflow/contrib/distributions/python/ops/quantized_distribution.py +++ b/tensorflow/contrib/distributions/python/ops/quantized_distribution.py @@ -217,7 +217,7 @@ class QuantizedDistribution(distributions.Distribution): values = ( list(distribution.parameters.values()) + [low, high]) - with ops.name_scope(name, values=values): + with ops.name_scope(name, values=values) as name: self._dist = distribution if low is not None: diff --git a/tensorflow/contrib/distributions/python/ops/relaxed_bernoulli.py b/tensorflow/contrib/distributions/python/ops/relaxed_bernoulli.py index e454a53c6275e0c60edd8c87b1c3be670f2b22de..84c8d29072c2f1f3888329638c4695bccf70eab7 100644 --- a/tensorflow/contrib/distributions/python/ops/relaxed_bernoulli.py +++ b/tensorflow/contrib/distributions/python/ops/relaxed_bernoulli.py @@ -166,7 +166,7 @@ class RelaxedBernoulli(transformed_distribution.TransformedDistribution): ValueError: If both `probs` and `logits` are passed, or if neither. """ parameters = locals() - with ops.name_scope(name, values=[logits, probs, temperature]): + with ops.name_scope(name, values=[logits, probs, temperature]) as name: with ops.control_dependencies([check_ops.assert_positive(temperature)] if validate_args else []): self._temperature = array_ops.identity(temperature, name="temperature") diff --git a/tensorflow/contrib/distributions/python/ops/relaxed_onehot_categorical.py b/tensorflow/contrib/distributions/python/ops/relaxed_onehot_categorical.py index f56ba0781604cb5a4fb3070b79aa86e09ceb6766..325f41e37c928ba8e81e45e63a7f7f8126bc80f8 100644 --- a/tensorflow/contrib/distributions/python/ops/relaxed_onehot_categorical.py +++ b/tensorflow/contrib/distributions/python/ops/relaxed_onehot_categorical.py @@ -163,7 +163,7 @@ class ExpRelaxedOneHotCategorical(distribution.Distribution): name: Python `str` name prefixed to Ops created by this class. """ parameters = locals() - with ops.name_scope(name, values=[logits, probs, temperature]): + with ops.name_scope(name, values=[logits, probs, temperature]) as name: self._logits, self._probs = distribution_util.get_logits_and_probs( name=name, logits=logits, probs=probs, validate_args=validate_args, @@ -409,5 +409,5 @@ class RelaxedOneHotCategorical( validate_args=validate_args, allow_nan_stats=allow_nan_stats) super(RelaxedOneHotCategorical, self).__init__(dist, - bijectors.Exp(event_ndims=1), + bijectors.Exp(), name=name) diff --git a/tensorflow/contrib/distributions/python/ops/seed_stream.py b/tensorflow/contrib/distributions/python/ops/seed_stream.py new file mode 100644 index 0000000000000000000000000000000000000000..056d349688511e19a4fa3d58a5b3c1c8355671a3 --- /dev/null +++ b/tensorflow/contrib/distributions/python/ops/seed_stream.py @@ -0,0 +1,228 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Local PRNG for amplifying seed entropy into seeds for base operations.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import hashlib + + +class SeedStream(object): + """Local PRNG for amplifying seed entropy into seeds for base operations. + + Writing sampling code which correctly sets the pseudo-random number + generator (PRNG) seed is surprisingly difficult. This class serves as + a helper for the TensorFlow Probability coding pattern designed to + avoid common mistakes. + + # Motivating Example + + A common first-cut implementation of a sampler for the beta + distribution is to compute the ratio of a gamma with itself plus + another gamma. This code snippet tries to do that, but contains a + surprisingly common error: + + ```python + def broken_beta(shape, alpha, beta, seed): + x = tf.random_gamma(shape, alpha, seed=seed) + y = tf.random_gamma(shape, beta, seed=seed) + return x / (x + y) + ``` + + The mistake is that the two gamma draws are seeded with the same + seed. This causes them to always produce the same results, which, + in turn, leads this code snippet to always return `0.5`. Because it + can happen across abstraction boundaries, this kind of error is + surprisingly easy to make when handling immutable seeds. + + # Goals + + TensorFlow Probability adopts a code style designed to eliminate the + above class of error, without exacerbating others. The goals of + this code style are: + + - Support reproducibility of results (by encouraging seeding of all + pseudo-random operations). + + - Avoid shared-write global state (by not relying on a global PRNG). + + - Prevent accidental seed reuse by TF Probability implementers. This + goal is served with the local pseudo-random seed generator provided + in this module. + + - Mitigate potential accidental seed reuse by TF Probability clients + (with a salting scheme). + + - Prevent accidental resonances with downstream PRNGs (by hashing the + output). + + ## Non-goals + + - Implementing a high-performance PRNG for generating large amounts of + entropy. That's the job of the underlying TensorFlow PRNG we are + seeding. + + - Avoiding random seed collisions, aka "birthday attacks". + + # Code pattern + + ```python + def random_beta(shape, alpha, beta, seed): # (a) + seed = SeedStream(seed, salt="random_beta") # (b) + x = tf.random_gamma(shape, alpha, seed=seed()) # (c) + y = tf.random_gamma(shape, beta, seed=seed()) # (c) + return x / (x + y) + ``` + + The elements of this pattern are: + + - Accept an explicit seed (line a) as an argument in all public + functions, and write the function to be deterministic (up to any + numerical issues) for fixed seed. + + - Rationale: This provides the client with the ability to reproduce + results. Accepting an immutable seed rather than a mutable PRNG + object reduces code coupling, permitting different sections to be + reproducible independently. + + - Use that seed only to initialize a local `SeedStream` instance (line b). + + - Rationale: Avoids accidental seed reuse. + + - Supply the name of the function being implemented as a salt to the + `SeedStream` instance (line b). This serves to keep the salts + unique; unique salts ensure that clients of TF Probability will see + different functions always produce independent results even if + called with the same seeds. + + - Seed each callee operation with the output of a unique call to the + `SeedStream` instance (lines c). This ensures reproducibility of + results while preventing seed reuse across callee invocations. + + # Why salt? + + Salting the `SeedStream` instances (with unique salts) is defensive + programming against a client accidentally committing a mistake + similar to our motivating example. Consider the following situation + that might arise without salting: + + ```python + def tfp_foo(seed): + seed = SeedStream(seed, salt="") + foo_stuff = tf.random_normal(seed=seed()) + ... + + def tfp_bar(seed): + seed = SeedStream(seed, salt="") + bar_stuff = tf.random_normal(seed=seed()) + ... + + def client_baz(seed): + foo = tfp_foo(seed=seed) + bar = tfp_bar(seed=seed) + ... + ``` + + The client should have used different seeds as inputs to `foo` and + `bar`. However, because they didn't, *and because `foo` and `bar` + both sample a Gaussian internally as their first action*, the + internal `foo_stuff` and `bar_stuff` will be the same, and the + returned `foo` and `bar` will not be independent, leading to subtly + incorrect answers from the client's simulation. This kind of bug is + particularly insidious for the client, because it depends on a + Distributions implementation detail, namely the order in which `foo` + and `bar` invoke the samplers they depend on. In particular, a + Bayesflow team member can introduce such a bug in previously + (accidentally) correct client code by performing an internal + refactoring that causes this operation order alignment. + + A salting discipline eliminates this problem by making sure that the + seeds seen by `foo`'s callees will differ from those seen by `bar`'s + callees, even if `foo` and `bar` are invoked with the same input + seed. + """ + + def __init__(self, seed, salt): + """Initializes a `SeedStream`. + + Args: + seed: Any Python object convertible to string, supplying the + initial entropy. If `None`, operations seeded with seeds + drawn from this `SeedStream` will follow TensorFlow semantics + for not being seeded. + salt: Any Python object convertible to string, supplying + auxiliary entropy. Must be unique across the Distributions + and TensorFlow Probability code base. See class docstring for + rationale. + """ + self._seed = seed + self._salt = salt + self._counter = 0 + + def __call__(self): + """Returns a fresh integer usable as a seed in downstream operations. + + If this `SeedStream` was initialized with `seed=None`, returns + `None`. This has the effect that downstream operations (both + `SeedStream`s and primitive TensorFlow ops) will behave as though + they were unseeded. + + The returned integer is non-negative, and uniformly distributed in + the half-open interval `[0, 2**512)`. This is consistent with + TensorFlow, as TensorFlow operations internally use the residue of + the given seed modulo `2**31 - 1` (see + `tensorflow/python/framework/random_seed.py`). + + Returns: + seed: A fresh integer usable as a seed in downstream operations, + or `None`. + """ + self._counter += 1 + if self._seed is None: + return None + composite = str((self._seed, self._counter, self._salt)).encode("utf-8") + return int(hashlib.sha512(composite).hexdigest(), 16) + + @property + def original_seed(self): + return self._seed + + @property + def salt(self): + return self._salt + +# Design rationales for the SeedStream class +# +# - Salts are accepted for the reason given above to supply them. +# +# - A `None` seed propagates to downstream seeds, so they exhibit +# their "unseeded" behavior. +# +# - The return value is a Python int so it can be passed directly to +# TensorFlow operations as a seed. It is large to avoid losing seed +# space needlessly (TF will internally read only the last 31 bits). +# +# - The output is hashed with a crypto-grade hash function as a form +# of defensive programming: this reliably prevents all possible +# accidental resonances with all possible downstream PRNGs. The +# specific function used is not important; SHA512 was ready to hand. +# +# - The internal state update is a simple counter because (a) given +# that the output is hashed anyway, this is enough, and (b) letting +# it be this predictable permits a future "generate many seeds in +# parallel" operation whose results would agree with running +# sequentially. diff --git a/tensorflow/contrib/distributions/python/ops/shape.py b/tensorflow/contrib/distributions/python/ops/shape.py index bac0b79d5908712f4e64259768fb6f3b4558f620..6a7f28713acefd2285b07a212e2e47a6db1ae5e1 100644 --- a/tensorflow/contrib/distributions/python/ops/shape.py +++ b/tensorflow/contrib/distributions/python/ops/shape.py @@ -439,7 +439,7 @@ class _DistributionShape(object): if self._batch_ndims_is_0 and expand_batch_dim: squeeze_dims += [1] if squeeze_dims: - x = array_ops.squeeze(x, squeeze_dims=squeeze_dims) + x = array_ops.squeeze(x, axis=squeeze_dims) # x.shape: [prod(S)]+B+E _, batch_shape, event_shape = self.get_shape(x) else: diff --git a/tensorflow/contrib/distributions/python/ops/sinh_arcsinh.py b/tensorflow/contrib/distributions/python/ops/sinh_arcsinh.py index 0d8a1926913766da374cb65767dccfa28bf75579..03828fa61277eeaf7ce90de8023b4ed91f6cc4dc 100644 --- a/tensorflow/contrib/distributions/python/ops/sinh_arcsinh.py +++ b/tensorflow/contrib/distributions/python/ops/sinh_arcsinh.py @@ -134,7 +134,8 @@ class SinhArcsinh(transformed_distribution.TransformedDistribution): """ parameters = locals() - with ops.name_scope(name, values=[loc, scale, skewness, tailweight]): + with ops.name_scope(name, + values=[loc, scale, skewness, tailweight]) as name: loc = ops.convert_to_tensor(loc, name="loc") dtype = loc.dtype scale = ops.convert_to_tensor(scale, name="scale", dtype=dtype) @@ -166,13 +167,13 @@ class SinhArcsinh(transformed_distribution.TransformedDistribution): # Make the SAS bijector, 'F'. f = bijectors.SinhArcsinh( - skewness=skewness, tailweight=tailweight, event_ndims=0) + skewness=skewness, tailweight=tailweight) if has_default_skewness: f_noskew = f else: f_noskew = bijectors.SinhArcsinh( skewness=skewness.dtype.as_numpy_dtype(0.), - tailweight=tailweight, event_ndims=0) + tailweight=tailweight) # Make the AffineScalar bijector, Z --> loc + scale * Z (2 / F_0(2)) c = 2 * scale / f_noskew.forward(ops.convert_to_tensor(2, dtype=dtype)) diff --git a/tensorflow/contrib/distributions/python/ops/statistical_testing.py b/tensorflow/contrib/distributions/python/ops/statistical_testing.py index d66c34cc1a45cc09da5138a5f72ae3817690db49..9c69435fac109914ff29b307dfad105f62849339 100644 --- a/tensorflow/contrib/distributions/python/ops/statistical_testing.py +++ b/tensorflow/contrib/distributions/python/ops/statistical_testing.py @@ -12,7 +12,114 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Statistical test assertions calibrated for their error rates.""" +"""Statistical test assertions calibrated for their error rates. + +Statistical tests have an inescapable probability of error: a correct +sampler can still fail a test by chance, and an incorrect sampler can +still pass a test by chance. This library is about bounding both of +those error rates. This requires admitting a task-specific notion of +"discrepancy": Correct code will fail rarely, code that misbehaves by +more than the discrepancy will pass rarely, and nothing reliable can +be said about code that misbehaves, but misbehaves by less than the +discrepancy. + +# Example + +Consider testing that the mean of a scalar probability distribution P +is some expected constant. Suppose the support of P is the interval +`[0, 1]`. Then you might do this: + +```python +tfd = tf.contrib.distributions + +expected_mean = ... +num_samples = 5000 +samples = ... draw 5000 samples from P + +# Check that the mean looks right +check1 = tfd.assert_true_mean_equal_by_dkwm( + samples, low=0., high=1., expected=expected_mean, + false_fail_rate=1e-6) + +# Check that the difference in means detectable with 5000 samples is +# small enough +check2 = tf.assert_less( + tfd.min_discrepancy_of_true_means_detectable_by_dkwm( + num_samples, low=0., high=1.0, + false_fail_rate=1e-6, false_pass_rate=1e-6), + 0.01) + +# Be sure to execute both assertion ops +sess.run([check1, check2]) +``` + +The second assertion is an instance of experiment design. It's a +deterministic computation (independent of the code under test) that +checks that `5000` samples is enough to reliably resolve mean +differences of `0.01` or more. Here "reliably" means that if the code +under test is correct, the probability of drawing an unlucky sample +that causes this test to fail is at most 1e-6; and if the code under +test is incorrect enough that its true mean is 0.01 more or less than +expected, then the probability of drawing a "lucky" sample that causes +the test to false-pass is also at most 1e-6. + +# Overview + +Every function in this library can be characterized in terms of: + +- The property being tested, such as the full density of the + distribution under test, or just its true mean, or a single + Bernoulli probability, etc. + +- The relation being asserted, e.g., whether the mean is less, more, + or equal to the given expected value. + +- The stochastic bound being relied upon, such as the + [Dvoretzky-Kiefer-Wolfowitz-Massart inequality] + (https://en.wikipedia.org/wiki/CDF-based_nonparametric_confidence_interval) + or the CDF of the binomial distribution (for assertions about + Bernoulli probabilities). + +- The number of sample sets in the statistical test. For example, + testing equality of means has a one-sample variant, where the + expected mean is given exactly, and a two-sample variant, where the + expected mean is itself given by a set of samples (e.g., from an + alternative algorithm). + +- What operation(s) of the test are to be performed. Each test has + three of these: + + 1. `assert` executes the test. Specifically, it creates a TF op that + produces an error if it has enough evidence to prove that the + property under test is violated. These functions depend on the + desired false failure rate, because that determines the sizes of + appropriate confidence intervals, etc. + + 2. `min_discrepancy` computes the smallest difference reliably + detectable by that test, given the sample count and error rates. + What it's a difference of is test-specific. For example, a test + for equality of means would make detection guarantees about the + difference the true means. + + 3. `min_num_samples` computes the minimum number of samples needed + to reliably detect a given discrepancy with given error rates. + + The latter two are for experimental design, and are meant to be + usable either interactively or inline in the overall test method. + +This library follows a naming convention, to make room for every +combination of the above. A name mentions the operation first, then +the property, then the relation, then the bound, then, if the test +takes more than one set of samples, a token indicating this. For +example, `assert_true_mean_equal_by_dkwm` (which is implicitly +one-sample). Each name is a grammatically sound noun phrase (or verb +phrase, for the asserts). + +# Asymptotic properties + +The number of samples needed tends to scale as `O(1/discrepancy**2)` and +as `O(log(1/error_rate))`. +""" from __future__ import absolute_import from __future__ import division @@ -23,7 +130,7 @@ import itertools from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import check_ops -from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import clip_ops from tensorflow.python.ops import gen_math_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn_ops @@ -40,7 +147,7 @@ __all__ = [ def _batch_sort_vector(x, ascending=True, name=None): - with ops.name_scope(name, "sort_each_row", [x]): + with ops.name_scope(name, "_batch_sort_vector", [x]): x = ops.convert_to_tensor(x, name="x") n = array_ops.shape(x)[-1] if ascending: @@ -62,31 +169,27 @@ def _do_maximum_mean(samples, envelope, high, name=None): samples = array_ops.transpose(samples, perm) samples = _batch_sort_vector(samples) - batch_shape = array_ops.shape(samples)[:-1] - n = array_ops.shape(samples)[-1] - step = 1. / math_ops.cast(n, dtype=samples.dtype.base_dtype) - - def _loop_body(iter_, total, to_skip): - total = array_ops.where( - step <= to_skip, - total, - array_ops.where( - to_skip > 0., - total + (step - to_skip) * samples[..., iter_], - total + step * samples[..., iter_])) - to_skip = array_ops.where(step <= to_skip, to_skip - step, 0.) - return [iter_ + 1, total, to_skip] - - _, total, _ = control_flow_ops.while_loop( - cond=lambda iter_, *args: iter_ < n, - body=_loop_body, - loop_vars=[ - 0, - array_ops.zeros(batch_shape, dtype=samples.dtype.base_dtype), - envelope, # to_skip - ]) - - return total + envelope * high + + # The maximum mean is given by taking `envelope`-worth of + # probability from the smallest samples and moving it to the + # maximum value. This amounts to: + # - ignoring the smallest k samples, where `k/n < envelope` + # - taking a `1/n - (envelope - k/n)` part of the index k sample + # - taking all the other samples + # - and adding `envelope * high` at the end. + # The following is a vectorized and batched way of computing this. + # `max_mean_contrib` is a mask implementing the previous. + batch_size = array_ops.shape(samples)[-1] + batch_size = math_ops.cast(batch_size, dtype=samples.dtype.base_dtype) + step = 1. / batch_size + cum_steps = step * math_ops.range( + 1, batch_size + 1, dtype=samples.dtype.base_dtype) + max_mean_contrib = clip_ops.clip_by_value( + cum_steps - envelope[..., array_ops.newaxis], + clip_value_min=0., + clip_value_max=step) + return math_ops.reduce_sum( + samples * max_mean_contrib, axis=-1) + envelope * high def _maximum_mean(samples, envelope, high, name=None): @@ -127,7 +230,7 @@ def _maximum_mean(samples, envelope, high, name=None): envelope = ops.convert_to_tensor(envelope, name="envelope") high = ops.convert_to_tensor(high, name="high") - xmax = math_ops.reduce_max(samples, axis=[-1]) + xmax = math_ops.reduce_max(samples, axis=[0]) msg = "Given sample maximum value exceeds expectations" check_op = check_ops.assert_less_equal(xmax, high, message=msg) with ops.control_dependencies([check_op]): @@ -172,7 +275,7 @@ def _minimum_mean(samples, envelope, low, name=None): envelope = ops.convert_to_tensor(envelope, name="envelope") low = ops.convert_to_tensor(low, name="low") - xmin = math_ops.reduce_min(samples, axis=[-1]) + xmin = math_ops.reduce_min(samples, axis=[0]) msg = "Given sample minimum value falls below expectations" check_op = check_ops.assert_greater_equal(xmin, low, message=msg) with ops.control_dependencies([check_op]): @@ -212,8 +315,8 @@ def _dkwm_cdf_envelope(n, error_rate, name=None): return math_ops.sqrt(-gen_math_ops.log(error_rate / 2.) / (2. * n)) -def _check_shape_dominates(tensor, tensors): - """Check that broadcasting `tensor` against `tensors` does not expand it. +def _check_shape_dominates(samples, parameters): + """Check that broadcasting `samples` against `parameters` does not expand it. Why? Because I want to be very sure that the samples tensor is not accidentally enlarged by broadcasting against tensors that are @@ -221,24 +324,27 @@ def _check_shape_dominates(tensor, tensors): sample counts end up inflated. Args: - tensor: A Tensor whose shape is to be protected against broadcasting. - tensors: A list of Tensors to check + samples: A Tensor whose shape is to be protected against broadcasting. + parameters: A list of Tensors who are parameters for the statistical test. Returns: - tensor: `tf.identity(tensor)` with control dependencies attached; - be sure to use that downstream. + samples: Return original `samples` with control dependencies attached + to ensure no broadcasting. """ def check(t): - target = array_ops.shape(tensor)[1:] - result = array_ops.broadcast_dynamic_shape(target, array_ops.shape(t)) + samples_batch_shape = array_ops.shape(samples)[1:] + broadcasted_batch_shape = array_ops.broadcast_dynamic_shape( + samples_batch_shape, array_ops.shape(t)) # This rank check ensures that I don't get a wrong answer from the # _shapes_ broadcasting against each other. - gt = check_ops.assert_greater(array_ops.rank(target), array_ops.rank(t)) - eq = check_ops.assert_equal(target, result) - return gt, eq - checks = list(itertools.chain(*[check(t) for t in tensors])) + samples_batch_ndims = array_ops.size(samples_batch_shape) + ge = check_ops.assert_greater_equal( + samples_batch_ndims, array_ops.rank(t)) + eq = check_ops.assert_equal(samples_batch_shape, broadcasted_batch_shape) + return ge, eq + checks = list(itertools.chain(*[check(t) for t in parameters])) with ops.control_dependencies(checks): - return array_ops.identity(array_ops.identity(tensor)) + return array_ops.identity(samples) def true_mean_confidence_interval_by_dkwm( @@ -577,9 +683,13 @@ def assert_true_mean_equal_by_dkwm_two_sample( # I want to assert # not (max_mean_1 < min_mean_2 or min_mean_1 > max_mean_2), # but I think I only have and-combination of asserts, so use DeMorgan. - clause1_op = check_ops.assert_greater_equal(max_mean_1, min_mean_2) - with ops.control_dependencies([clause1_op]): - return check_ops.assert_less_equal(min_mean_1, max_mean_2) + check_confidence_intervals_can_intersect = check_ops.assert_greater_equal( + max_mean_1, min_mean_2, message="Confidence intervals do not " + "intersect: samples1 has a smaller mean than samples2") + with ops.control_dependencies([check_confidence_intervals_can_intersect]): + return check_ops.assert_less_equal( + min_mean_1, max_mean_2, message="Confidence intervals do not " + "intersect: samples2 has a smaller mean than samples1") def min_discrepancy_of_true_means_detectable_by_dkwm_two_sample( diff --git a/tensorflow/contrib/distributions/python/ops/vector_diffeomixture.py b/tensorflow/contrib/distributions/python/ops/vector_diffeomixture.py index 971d65c4a69140161461fdac93bb588014dd3e88..af6ff8162b173015dca2d568e13d63127af7853a 100644 --- a/tensorflow/contrib/distributions/python/ops/vector_diffeomixture.py +++ b/tensorflow/contrib/distributions/python/ops/vector_diffeomixture.py @@ -396,7 +396,7 @@ class VectorDiffeomixture(distribution_lib.Distribution): ValueError: if `not distribution.is_scalar_event`. """ parameters = locals() - with ops.name_scope(name, values=[mix_loc, temperature]): + with ops.name_scope(name, values=[mix_loc, temperature]) as name: if not scale or len(scale) < 2: raise ValueError("Must specify list (or list-like object) of scale " "LinearOperators, one for each component with " @@ -427,7 +427,6 @@ class VectorDiffeomixture(distribution_lib.Distribution): self._endpoint_affine = [ AffineLinearOperator(shift=loc_, scale=scale_, - event_ndims=1, validate_args=validate_args, name="endpoint_affine_{}".format(k)) for k, (loc_, scale_) in enumerate(zip(loc, scale))] @@ -467,7 +466,6 @@ class VectorDiffeomixture(distribution_lib.Distribution): self._interpolated_affine = [ AffineLinearOperator(shift=loc_, scale=scale_, - event_ndims=1, validate_args=validate_args, name="interpolated_affine_{}".format(k)) for k, (loc_, scale_) in enumerate(zip( @@ -621,9 +619,11 @@ class VectorDiffeomixture(distribution_lib.Distribution): log_prob = math_ops.reduce_sum(self.distribution.log_prob(y), axis=-2) # Because the affine transformation has a constant Jacobian, it is the case # that `affine.fldj(x) = -affine.ildj(x)`. This is not true in general. - fldj = array_ops.stack( - [aff.forward_log_det_jacobian(x) for aff in self.interpolated_affine], - axis=-1) + fldj = array_ops.stack([ + aff.forward_log_det_jacobian( + x, + event_ndims=array_ops.rank(self.event_shape_tensor()) + ) for aff in self.interpolated_affine], axis=-1) return math_ops.reduce_logsumexp( self.mixture_distribution.logits - fldj + log_prob, axis=-1) diff --git a/tensorflow/contrib/distributions/python/ops/vector_exponential_diag.py b/tensorflow/contrib/distributions/python/ops/vector_exponential_diag.py index 526fe2d39aef9aed833b889de80e849c469435e7..e265b5d0f7c10b2782a1a8924babdca9b986f622 100644 --- a/tensorflow/contrib/distributions/python/ops/vector_exponential_diag.py +++ b/tensorflow/contrib/distributions/python/ops/vector_exponential_diag.py @@ -176,7 +176,7 @@ class VectorExponentialDiag( ValueError: if at most `scale_identity_multiplier` is specified. """ parameters = locals() - with ops.name_scope(name): + with ops.name_scope(name) as name: with ops.name_scope("init", values=[ loc, scale_diag, scale_identity_multiplier]): # No need to validate_args while making diag_scale. The returned diff --git a/tensorflow/contrib/distributions/python/ops/vector_exponential_linear_operator.py b/tensorflow/contrib/distributions/python/ops/vector_exponential_linear_operator.py index 9d5fd9ac4178a1ae29b1ce32f304b22fd3d234dc..89136d6760bb663b5ff86a77c5945ce900f072b9 100644 --- a/tensorflow/contrib/distributions/python/ops/vector_exponential_linear_operator.py +++ b/tensorflow/contrib/distributions/python/ops/vector_exponential_linear_operator.py @@ -181,7 +181,7 @@ class VectorExponentialLinearOperator( if not scale.dtype.is_floating: raise TypeError("`scale` parameter must have floating-point dtype.") - with ops.name_scope(name, values=[loc] + scale.graph_parents): + with ops.name_scope(name, values=[loc] + scale.graph_parents) as name: # Since expand_dims doesn't preserve constant-ness, we obtain the # non-dynamic value if possible. loc = ops.convert_to_tensor(loc, name="loc") if loc is not None else loc diff --git a/tensorflow/contrib/distributions/python/ops/vector_sinh_arcsinh_diag.py b/tensorflow/contrib/distributions/python/ops/vector_sinh_arcsinh_diag.py index 003c66b9413fdcad20fbcc8b4bf47259692932e7..1438ede26500bca4541fa9b2020ff22d4c071098 100644 --- a/tensorflow/contrib/distributions/python/ops/vector_sinh_arcsinh_diag.py +++ b/tensorflow/contrib/distributions/python/ops/vector_sinh_arcsinh_diag.py @@ -169,7 +169,7 @@ class VectorSinhArcsinhDiag(transformed_distribution.TransformedDistribution): name, values=[ loc, scale_diag, scale_identity_multiplier, skewness, tailweight - ]): + ]) as name: loc = ops.convert_to_tensor(loc, name="loc") if loc is not None else loc tailweight = 1. if tailweight is None else tailweight has_default_skewness = skewness is None @@ -215,13 +215,13 @@ class VectorSinhArcsinhDiag(transformed_distribution.TransformedDistribution): tailweight = ops.convert_to_tensor( tailweight, dtype=dtype, name="tailweight") f = bijectors.SinhArcsinh( - skewness=skewness, tailweight=tailweight, event_ndims=1) + skewness=skewness, tailweight=tailweight) if has_default_skewness: f_noskew = f else: f_noskew = bijectors.SinhArcsinh( skewness=skewness.dtype.as_numpy_dtype(0.), - tailweight=tailweight, event_ndims=0) + tailweight=tailweight) # Make the Affine bijector, Z --> loc + C * Z. c = 2 * scale_diag_part / f_noskew.forward( diff --git a/tensorflow/contrib/distributions/python/ops/vector_student_t.py b/tensorflow/contrib/distributions/python/ops/vector_student_t.py index 887981d64ef077e2636f8031581c390f177edac8..7e78ded9df07564126b46b6beeeccf95bf1eef94 100644 --- a/tensorflow/contrib/distributions/python/ops/vector_student_t.py +++ b/tensorflow/contrib/distributions/python/ops/vector_student_t.py @@ -178,7 +178,7 @@ class _VectorStudentT(transformed_distribution.TransformedDistribution): parameters = locals() graph_parents = [df, loc, scale_identity_multiplier, scale_diag, scale_tril, scale_perturb_factor, scale_perturb_diag] - with ops.name_scope(name): + with ops.name_scope(name) as name: with ops.name_scope("init", values=graph_parents): # The shape of the _VectorStudentT distribution is governed by the # relationship between df.batch_shape and affine.batch_shape. In diff --git a/tensorflow/contrib/distributions/python/ops/wishart.py b/tensorflow/contrib/distributions/python/ops/wishart.py index 5a8c94dabf4c3c430bee544a48ee7acfe7dd7ed0..91453fed5d279178a0e062b71dad3b0f957b11b4 100644 --- a/tensorflow/contrib/distributions/python/ops/wishart.py +++ b/tensorflow/contrib/distributions/python/ops/wishart.py @@ -109,7 +109,7 @@ class _WishartLinearOperator(distribution.Distribution): """ parameters = locals() self._cholesky_input_output_matrices = cholesky_input_output_matrices - with ops.name_scope(name) as ns: + with ops.name_scope(name) as name: with ops.name_scope("init", values=[df, scale_operator]): if not scale_operator.dtype.is_floating: raise TypeError( @@ -163,7 +163,7 @@ class _WishartLinearOperator(distribution.Distribution): parameters=parameters, graph_parents=([self._df, self._dimension] + self._scale_operator.graph_parents), - name=ns) + name=name) @property def df(self): @@ -531,7 +531,7 @@ class WishartCholesky(_WishartLinearOperator): name: Python `str` name prefixed to Ops created by this class. """ parameters = locals() - with ops.name_scope(name, values=[scale]): + with ops.name_scope(name, values=[scale]) as name: with ops.name_scope("init", values=[scale]): scale = ops.convert_to_tensor(scale) if validate_args: @@ -647,7 +647,7 @@ class WishartFull(_WishartLinearOperator): name: Python `str` name prefixed to Ops created by this class. """ parameters = locals() - with ops.name_scope(name) as ns: + with ops.name_scope(name) as name: with ops.name_scope("init", values=[scale]): scale = ops.convert_to_tensor(scale) if validate_args: @@ -666,5 +666,5 @@ class WishartFull(_WishartLinearOperator): cholesky_input_output_matrices=cholesky_input_output_matrices, validate_args=validate_args, allow_nan_stats=allow_nan_stats, - name=ns) + name=name) self._parameters = parameters diff --git a/tensorflow/contrib/eager/proto/BUILD b/tensorflow/contrib/eager/proto/BUILD deleted file mode 100644 index b016d2dcb504044372c895e1eedf3511751bc13e..0000000000000000000000000000000000000000 --- a/tensorflow/contrib/eager/proto/BUILD +++ /dev/null @@ -1,13 +0,0 @@ -licenses(["notice"]) # Apache 2.0 - -exports_files(["LICENSE"]) - -load("//tensorflow/core:platform/default/build_config.bzl", "tf_proto_library") - -tf_proto_library( - name = "checkpointable_object_graph_proto", - srcs = [ - "checkpointable_object_graph.proto", - ], - visibility = ["//tensorflow/contrib/eager/python:__subpackages__"], -) diff --git a/tensorflow/contrib/eager/python/BUILD b/tensorflow/contrib/eager/python/BUILD index edb9130266e4ea93d2ec6ee373a90df504da18cf..99abbae03fc14f241dae27f317902f7335819037 100644 --- a/tensorflow/contrib/eager/python/BUILD +++ b/tensorflow/contrib/eager/python/BUILD @@ -11,7 +11,6 @@ py_library( srcs_version = "PY2AND3", visibility = ["//visibility:public"], deps = [ - ":checkpointable_utils", ":datasets", ":metrics", ":network", @@ -19,15 +18,14 @@ py_library( "//tensorflow/python:framework_ops", "//tensorflow/python:framework_test_lib", "//tensorflow/python:gradients", - "//tensorflow/python:numerics", "//tensorflow/python:resource_variable_ops", "//tensorflow/python:script_ops", "//tensorflow/python:template", + "//tensorflow/python:training", "//tensorflow/python:util", "//tensorflow/python:variable_scope", "//tensorflow/python/eager:backprop", "//tensorflow/python/eager:context", - "//tensorflow/python/eager:core", "//tensorflow/python/eager:execution_callbacks", "//tensorflow/python/eager:function", ], @@ -70,13 +68,15 @@ cuda_py_test( srcs = ["datasets_test.py"], additional_deps = [ ":datasets", - ":checkpointable_utils", - "//tensorflow/contrib/data/python/ops:transformation_ops", + "//tensorflow/contrib/data/python/ops:prefetching_ops", + "//tensorflow/contrib/data/python/ops:threadpool", + "//tensorflow/contrib/data/python/ops:unique", "//tensorflow/contrib/lookup:lookup_py", "//tensorflow/python:dtypes", "//tensorflow/python:framework_ops", "//tensorflow/python:math_ops", "//tensorflow/python:script_ops", + "//tensorflow/python:training", "//tensorflow/python/data", "//tensorflow/python/eager:test", ], @@ -119,14 +119,14 @@ py_library( srcs_version = "PY2AND3", visibility = ["//tensorflow:internal"], deps = [ - "//tensorflow/contrib/eager/python:checkpointable_utils", - "//tensorflow/contrib/summary:summary_ops", "//tensorflow/python:array_ops", + "//tensorflow/python:checkpointable", "//tensorflow/python:control_flow_ops", "//tensorflow/python:dtypes", "//tensorflow/python:framework_ops", "//tensorflow/python:init_ops", "//tensorflow/python:math_ops", + "//tensorflow/python:summary_ops_v2", "//tensorflow/python:util", "//tensorflow/python:variable_scope", "//tensorflow/python/eager:context", @@ -140,11 +140,11 @@ py_test( srcs_version = "PY2AND3", deps = [ ":metrics", - "//tensorflow/contrib/summary:summary_ops", "//tensorflow/contrib/summary:summary_test_util", "//tensorflow/python:array_ops", "//tensorflow/python:dtypes", "//tensorflow/python:framework_ops", + "//tensorflow/python:summary_ops_v2", "//tensorflow/python:training", "//tensorflow/python/eager:context", "//tensorflow/python/eager:test", @@ -161,10 +161,10 @@ py_library( deps = [ ":datasets", ":metrics", - "//tensorflow/contrib/summary:summary_ops", "//tensorflow/python:control_flow_ops", "//tensorflow/python:errors", "//tensorflow/python:framework_ops", + "//tensorflow/python:summary_ops_v2", "//tensorflow/python/eager:context", "//tensorflow/python/eager:function", "@six_archive//:six", @@ -223,57 +223,3 @@ py_test( "//tensorflow/python/eager:test", ], ) - -py_library( - name = "checkpointable_utils", - srcs = ["checkpointable_utils.py"], - srcs_version = "PY2AND3", - visibility = ["//tensorflow:internal"], - deps = [ - "//tensorflow/contrib/eager/proto:checkpointable_object_graph_proto_py", - "//tensorflow/python:constant_op", - "//tensorflow/python:control_flow_ops", - "//tensorflow/python:dtypes", - "//tensorflow/python:errors", - "//tensorflow/python:framework_ops", - "//tensorflow/python:init_ops", - "//tensorflow/python:pywrap_tensorflow", - "//tensorflow/python:resource_variable_ops", - "//tensorflow/python:session", - "//tensorflow/python:tensor_shape", - "//tensorflow/python:training", - "//tensorflow/python:util", - "//tensorflow/python:variable_scope", - "//tensorflow/python/eager:context", - ], -) - -cuda_py_test( - name = "checkpointable_utils_test", - srcs = ["checkpointable_utils_test.py"], - additional_deps = [ - ":checkpointable_utils", - ":network", - "@six_archive//:six", - "//tensorflow/python:constant_op", - "//tensorflow/python:dtypes", - "//tensorflow/python:framework_ops", - "//tensorflow/python:framework_test_lib", - "//tensorflow/python:init_ops", - "//tensorflow/python:layers", - "//tensorflow/python:layers_base", - "//tensorflow/python:resource_variable_ops", - "//tensorflow/python:state_ops", - "//tensorflow/python:training", - "//tensorflow/python:variable_scope", - "//tensorflow/python:variables", - "//tensorflow/python/eager:context", - "//tensorflow/python/eager:test", - "//tensorflow/python/keras", - ], - tags = [ - "no_oss", # b/74395663 - "no_windows", # TODO: needs investigation on Windows - "notsan", - ], -) diff --git a/tensorflow/contrib/eager/python/checkpointable_utils.py b/tensorflow/contrib/eager/python/checkpointable_utils.py deleted file mode 100644 index 91a7aded11db6b4c8bcb061da6d6c69253603c85..0000000000000000000000000000000000000000 --- a/tensorflow/contrib/eager/python/checkpointable_utils.py +++ /dev/null @@ -1,869 +0,0 @@ -"""Utilities for working with Checkpointable objects.""" -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import abc -import collections -import weakref - -from tensorflow.contrib.eager.proto import checkpointable_object_graph_pb2 -from tensorflow.python import pywrap_tensorflow -from tensorflow.python.client import session as session_lib -from tensorflow.python.eager import context -from tensorflow.python.framework import constant_op -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import errors_impl -from tensorflow.python.framework import ops -from tensorflow.python.framework import tensor_shape -from tensorflow.python.ops import control_flow_ops -from tensorflow.python.ops import init_ops -from tensorflow.python.ops import resource_variable_ops -from tensorflow.python.ops import variable_scope -from tensorflow.python.training import checkpointable as core_checkpointable -from tensorflow.python.training import checkpointable_utils as core_checkpointable_utils -from tensorflow.python.training import optimizer as optimizer_lib -from tensorflow.python.training import saver as saver_lib -from tensorflow.python.util import deprecation - - -_ESCAPE_CHAR = "." # For avoiding conflicts with user-specified names. - -# Keyword for identifying that the next bit of a checkpoint variable name is a -# slot name. Checkpoint names for slot variables look like: -# -# /<_OPTIMIZER_SLOTS_NAME>// -# -# Where is a full path from the checkpoint root to the -# variable being slotted for. -_OPTIMIZER_SLOTS_NAME = _ESCAPE_CHAR + "OPTIMIZER_SLOT" -# Keyword for separating the path to an object from the name of an -# attribute in checkpoint names. Used like: -# /<_OBJECT_ATTRIBUTES_NAME>/ -_OBJECT_ATTRIBUTES_NAME = _ESCAPE_CHAR + "ATTRIBUTES" -# Key where the object graph proto is saved in a TensorBundle -_OBJECT_GRAPH_PROTO_KEY = "_CHECKPOINTABLE_OBJECT_GRAPH" - - -# TODO(allenl): If this ends up in a public API, consider adding LINT.IfChange -# or consolidating the implementation with get_variable. -def _default_getter(name, shape, dtype, initializer=None, - partition_info=None, **kwargs): - """A pared-down version of get_variable which does not reuse variables.""" - dtype = dtypes.as_dtype(dtype) - shape_object = tensor_shape.as_shape(shape) - with ops.init_scope(): - if initializer is None: - initializer, initializing_from_value = ( - variable_scope._get_default_variable_store()._get_default_initializer( # pylint: disable=protected-access - name=name, shape=shape_object, dtype=dtype)) - else: - initializing_from_value = not callable(initializer) - # Same logic as get_variable - variable_dtype = dtype.base_dtype - if initializing_from_value: - if shape is not None: - raise ValueError("If initializer is a constant, do not specify shape.") - initial_value = initializer - else: - # Instantiate initializer if provided initializer is a type object. - if isinstance(initializer, type(init_ops.Initializer)): - initializer = initializer(dtype=dtype) - def initial_value(): - return initializer( - shape_object.as_list(), dtype=dtype, partition_info=partition_info) - return resource_variable_ops.ResourceVariable( - initial_value=initial_value, - name=name, - dtype=variable_dtype, - **kwargs - ) - - -def add_variable(checkpointable, name, shape=None, dtype=dtypes.float32, - initializer=None): - """Add a variable to a Checkpointable with no scope influence.""" - return checkpointable._add_variable_with_custom_getter( # pylint: disable=protected-access - name=name, shape=shape, dtype=dtype, - initializer=initializer, getter=_default_getter) - - -def _breadth_first_checkpointable_traversal(root_checkpointable): - """Find shortest paths to all variables owned by dependencies of root.""" - bfs_sorted = [] - to_visit = collections.deque([root_checkpointable]) - path_to_root = {root_checkpointable: ()} - while to_visit: - current_checkpointable = to_visit.popleft() - current_checkpointable._maybe_initialize_checkpointable() # pylint: disable=protected-access - bfs_sorted.append(current_checkpointable) - for child_checkpointable in ( - current_checkpointable._checkpoint_dependencies): # pylint: disable=protected-access - if child_checkpointable.ref not in path_to_root: - path_to_root[child_checkpointable.ref] = ( - path_to_root[current_checkpointable] + (child_checkpointable,)) - to_visit.append(child_checkpointable.ref) - return bfs_sorted, path_to_root - - -def _escape_local_name(name): - # We need to support slashes in local names for compatibility, since this - # naming scheme is being patched in to things like Layer.add_variable where - # slashes were previously accepted. We also want to use slashes to indicate - # edges traversed to reach the variable, so we escape forward slashes in - # names. - return (name.replace(_ESCAPE_CHAR, _ESCAPE_CHAR + _ESCAPE_CHAR) - .replace(r"/", _ESCAPE_CHAR + "S")) - - -def _object_prefix_from_path(path_to_root): - return "/".join( - (_escape_local_name(checkpointable.name) - for checkpointable in path_to_root)) - - -def _slot_variable_naming_for_optimizer(optimizer_path): - """Make a function for naming slot variables in an optimizer.""" - # Name slot variables: - # - # /<_OPTIMIZER_SLOTS_NAME>// - # - # where is exactly the checkpoint name used for the original - # variable, including the path from the checkpoint root and the local name in - # the object which owns it. Note that we only save slot variables if the - # variable it's slotting for is also being saved. - - optimizer_identifier = "/%s/%s/" % (_OPTIMIZER_SLOTS_NAME, optimizer_path) - - def _name_slot_variable(variable_path, slot_name): - """With an optimizer specified, name a slot variable.""" - return (variable_path - + optimizer_identifier - + _escape_local_name(slot_name)) - - return _name_slot_variable - - -def _serialize_slot_variables(checkpointable_objects, node_ids, object_names): - """Gather and name slot variables.""" - non_slot_objects = list(checkpointable_objects) - slot_variables = {} - for checkpointable in non_slot_objects: - if isinstance(checkpointable, optimizer_lib.Optimizer): - naming_scheme = _slot_variable_naming_for_optimizer( - optimizer_path=object_names[checkpointable]) - slot_names = checkpointable.get_slot_names() - for slot_name in slot_names: - for original_variable_node_id, original_variable in enumerate( - non_slot_objects): - try: - slot_variable = checkpointable.get_slot( - original_variable, slot_name) - except AttributeError: - slot_variable = None - if slot_variable is None: - continue - slot_variable._maybe_initialize_checkpointable() # pylint: disable=protected-access - if slot_variable._checkpoint_dependencies: # pylint: disable=protected-access - # TODO(allenl): Gather dependencies of slot variables. - raise NotImplementedError( - "Currently only variables with no dependencies can be saved as " - "slot variables. File a feature request if this limitation " - "bothers you.") - if slot_variable in node_ids: - raise NotImplementedError( - "A slot variable was re-used as a dependency of a " - "Checkpointable object. This is not currently allowed. File a " - "feature request if this limitation bothers you.") - checkpoint_name = naming_scheme( - variable_path=object_names[original_variable], - slot_name=slot_name) - object_names[slot_variable] = checkpoint_name - slot_variable_node_id = len(checkpointable_objects) - node_ids[slot_variable] = slot_variable_node_id - checkpointable_objects.append(slot_variable) - slot_variable_proto = ( - checkpointable_object_graph_pb2.CheckpointableObjectGraph - .Object.SlotVariableReference( - slot_name=slot_name, - original_variable_node_id=original_variable_node_id, - slot_variable_node_id=slot_variable_node_id)) - slot_variables.setdefault(checkpointable, []).append( - slot_variable_proto) - return slot_variables - - -def _serialize_checkpointables( - checkpointable_objects, node_ids, object_names, slot_variables): - """Name non-slot `Checkpointable`s and add them to `object_graph_proto`.""" - object_graph_proto = ( - checkpointable_object_graph_pb2.CheckpointableObjectGraph()) - named_saveables = {} - - for checkpoint_id, checkpointable in enumerate(checkpointable_objects): - assert node_ids[checkpointable] == checkpoint_id - object_proto = object_graph_proto.nodes.add() - object_proto.slot_variables.extend(slot_variables.get(checkpointable, ())) - object_name = object_names[checkpointable] - for name, saveable_factory in ( - checkpointable._gather_saveables_for_checkpoint().items()): # pylint: disable=protected-access - attribute = object_proto.attributes.add() - attribute.name = name - attribute.checkpoint_key = "%s/%s/%s" % ( - object_name, _OBJECT_ATTRIBUTES_NAME, _escape_local_name(name)) - if callable(saveable_factory): - saveable = saveable_factory(name=attribute.checkpoint_key) - else: - saveable = saveable_factory - # Figure out the name-based Saver's name for this variable. - saver_dict = saver_lib.BaseSaverBuilder.OpListToDict( - [saveable], convert_variable_to_tensor=False) - attribute.full_name, = saver_dict.keys() - named_saveables[attribute.checkpoint_key] = saveable - - for child in checkpointable._checkpoint_dependencies: # pylint: disable=protected-access - child_proto = object_proto.children.add() - child_proto.node_id = node_ids[child.ref] - child_proto.local_name = child.name - - return named_saveables, object_graph_proto - - -def _serialize_object_graph(root_checkpointable): - """Determine checkpoint keys for variables and build a serialized graph. - - Non-slot variables are keyed based on a shortest path from the root saveable - to the object which owns the variable (i.e. the one which called - `Checkpointable._add_variable` to create it). - - Slot variables are keyed based on a shortest path to the variable being - slotted for, a shortest path to their optimizer, and the slot name. - - Args: - root_checkpointable: A `Checkpointable` object whose variables (including - the variables of dependencies, recursively) should be saved. - - Returns: - A tuple of (named_variables, object_graph_proto): - named_variables: A dictionary mapping names to variable objects. - object_graph_proto: A CheckpointableObjectGraph protocol buffer containing - the serialized object graph and variable references. - - Raises: - ValueError: If there are invalid characters in an optimizer's slot names. - """ - checkpointable_objects, path_to_root = ( - _breadth_first_checkpointable_traversal(root_checkpointable)) - object_names = { - obj: _object_prefix_from_path(path) - for obj, path in path_to_root.items()} - node_ids = {node: node_id for node_id, node - in enumerate(checkpointable_objects)} - slot_variables = _serialize_slot_variables( - checkpointable_objects=checkpointable_objects, - node_ids=node_ids, - object_names=object_names) - return _serialize_checkpointables( - checkpointable_objects=checkpointable_objects, - node_ids=node_ids, - object_names=object_names, - slot_variables=slot_variables) - - -def gather_initializers(root_checkpointable): - """Traverse the object graph and find initialization ops. - - Looks for `Checkpointable` objects which are dependencies of - `root_checkpointable` and which have an `initializer` property. Includes - initializers for slot variables only if the variable they are slotting for and - the optimizer are dependencies of `root_checkpointable` (i.e. if they would be - saved with a checkpoint). - - Args: - root_checkpointable: A `Checkpointable` object to gather initializers for. - Returns: - A list of initialization ops. - """ - # TODO(allenl): Extract out gathering logic so the naming logic doesn't have - # to run. - checkpointable_objects, path_to_root = ( - _breadth_first_checkpointable_traversal(root_checkpointable)) - object_names = { - obj: _object_prefix_from_path(path) - for obj, path in path_to_root.items()} - node_ids = {node: node_id for node_id, node - in enumerate(checkpointable_objects)} - _serialize_slot_variables( - checkpointable_objects=checkpointable_objects, - node_ids=node_ids, - object_names=object_names) - return [c.initializer for c in checkpointable_objects - if hasattr(c, "initializer") and c.initializer is not None] - - -class _NoRestoreSaveable(saver_lib.BaseSaverBuilder.SaveableObject): - - def __init__(self, tensor, name): - spec = saver_lib.BaseSaverBuilder.SaveSpec(tensor, "", name) - super(_NoRestoreSaveable, self).__init__(tensor, [spec], name) - - def restore(self, restored_tensors, restored_shapes): - return control_flow_ops.no_op() - - -class _LoadStatus(object): - """Abstract base for load status callbacks.""" - - @abc.abstractmethod - def assert_consumed(self): - """Raises an exception unless a non-trivial restoration has completed.""" - pass - - @abc.abstractmethod - def run_restore_ops(self, session=None): - """Runs restore ops from the checkpoint. Requires a valid checkpoint.""" - pass - - @abc.abstractmethod - def initialize_or_restore(self, session=None): - """Runs restore ops from the checkpoint, or initializes variables.""" - pass - - -class CheckpointLoadStatus(_LoadStatus): - """Checks the status of checkpoint loading and manages restore ops. - - Returned from `Saver.restore`. Since `restore` may defer the loading of values - in the checkpoint which don't yet have corresponding Python objects, - `CheckpointLoadStatus` provides a callback to verify that checkpoint loading - is complete (`assert_consumed`). - - When graph building, `restore` does not run restore ops itself since their - creation may be deferred. The `run_restore_ops` method must be called once all - Python objects with values to restore have been created and added to the - dependency graph (this does not necessarily have to be the whole checkpoint; - calling `run_restore_ops` while `assert_consumed` fails is supported and will - partially restore the checkpoint). - - See `Saver.restore` for usage examples. - """ - - def __init__(self, checkpoint, feed_dict): - self._checkpoint = checkpoint - self._feed_dict = feed_dict - - def assert_consumed(self): - """Asserts that all objects in the checkpoint have been created/matched. - - Returns: - `self` for chaining. - Raises: - AssertionError: If there are any Python objects in the dependency graph - which have not been restored from this checkpoint or a later `restore`, - or if there are any checkpointed values which have not been matched to - Python objects. - """ - for node_id, node in enumerate(self._checkpoint.object_graph_proto.nodes): - checkpointable = self._checkpoint.object_by_proto_id.get(node_id, None) - if checkpointable is None: - raise AssertionError("Unresolved object in checkpoint: %s" % (node,)) - if checkpointable._update_uid < self._checkpoint.restore_uid: # pylint: disable=protected-access - raise AssertionError( - "Object not assigned a value from checkpoint: %s" % (node,)) - if self._checkpoint.slot_restorations: - # Sanity check; this collection should be clear if everything has been - # restored. - raise AssertionError("Unresolved slot restorations: %s" % ( - self._checkpoint.slot_restorations,)) - if self._checkpoint.unused_attributes: - raise AssertionError( - ("Unused attributes in these objects (the attributes exist in the " - "checkpoint but not in the objects): %s") % ( - self._checkpoint.unused_attributes.items(),)) - return self - - def run_restore_ops(self, session=None): - """Run operations to restore objects in the dependency graph.""" - if context.executing_eagerly(): - return # Run eagerly - if session is None: - session = ops.get_default_session() - session.run(self._checkpoint.restore_ops, feed_dict=self._feed_dict) - - def initialize_or_restore(self, session=None): - """Alias for `run_restore_ops`. - - This method has a sibling in `InitializationOnlyStatus` which instead - initializes variables. That type is returned if no checkpoint is specified - in `Saver.restore`. - - Args: - session: The session to run restore ops in. If `None`, uses the default - session. - """ - self.run_restore_ops(session=session) - - -class InitializationOnlyStatus(_LoadStatus): - """Returned from `Saver.restore` when no checkpoint has been specified. - - Objects of this type have the same `assert_consumed` method as - `CheckpointLoadStatus`, but it always fails. However, - `initialize_or_restore` works on objects of both types, and will - initialize variables in `InitializationOnlyStatus` objects or restore them - otherwise. - """ - - def __init__(self, root_checkpointable): - self._root_checkpointable = root_checkpointable - - def assert_consumed(self): - """Assertion for consistency with `CheckpointLoadStatus`. Always fails.""" - raise AssertionError( - "No checkpoint specified (save_path=None); nothing is being restored.") - - def run_restore_ops(self, session=None): - """For consistency with `CheckpointLoadStatus`. - - Use `initialize_or_restore` for initializing if no checkpoint was passed - to `Saver.restore` and restoring otherwise. - - Args: - session: Not used. - """ - raise AssertionError( - "No checkpoint specified, so no restore ops are available " - "(save_path=None to Saver.restore).") - - def initialize_or_restore(self, session=None): - """Runs initialization ops for variables. - - Only objects which would be saved by `Saver.save` will be initialized. See - `gather_initializers` for details. - - This method does nothing when executing eagerly (initializers get run - eagerly). - - Args: - session: The session to run initialization ops in. If `None`, uses the - default session. - """ - if context.executing_eagerly(): - return # run eagerly - if session is None: - session = ops.get_default_session() - session.run(gather_initializers(self._root_checkpointable)) - - -_DEPRECATED_RESTORE_INSTRUCTIONS = ( - "Restoring a name-based tf.train.Saver checkpoint using the object-based " - "restore API. This mode uses global names to match variables, and so is " - "somewhat fragile. It also adds new restore ops to the graph each time it " - "is called. Prefer re-encoding training checkpoints in the object-based " - "format: run save() on the object-based saver (the same one this message " - "is coming from) and use that checkpoint in the future.") - - -class NameBasedSaverStatus(_LoadStatus): - """Status for loading a name-based training checkpoint.""" - - def __init__(self, object_saver, save_path): - self._object_saver = object_saver - self._save_path = save_path - - def assert_consumed(self): - """Assertion for consistency with `CheckpointLoadStatus`. Always fails.""" - raise AssertionError( - "Restoring a name-based checkpoint. No load status is available.") - - @deprecation.deprecated( - date=None, instructions=_DEPRECATED_RESTORE_INSTRUCTIONS) - def run_restore_ops(self, session=None): - """Load the name-based training checkpoint using a new `tf.train.Saver`.""" - if session is None and not context.executing_eagerly(): - session = ops.get_default_session() - with ops.device("/cpu:0"): - saver_lib.Saver(self._object_saver._global_variable_names()).restore( # pylint: disable=protected-access - sess=session, save_path=self._save_path) - - def initialize_or_restore(self, session=None): - """Alias for `run_restore_ops`.""" - self.run_restore_ops(session=session) - - -class _SessionWithFeedDictAdditions(session_lib.SessionInterface): - """Pretends to be a session, inserts extra feeds on run().""" - - def __init__(self, session, feed_additions): - self._wrapped_session = session - self._feed_additions = feed_additions - - def run(self, fetches, feed_dict=None, **kwargs): - if feed_dict is None: - feed_dict = {} - else: - feed_dict = feed_dict.copy() - feed_dict.update(self._feed_additions) - return self._wrapped_session.run( - fetches=fetches, feed_dict=feed_dict, **kwargs) - - -def _copy_saver_with_new_var_list(old_saver, new_var_list): - """Copy a `tf.train.Saver`'s state to a new Saver with different variables.""" - new_saver = saver_lib.Saver(var_list=new_var_list) - # TODO(allenl): Move to copying functionality to Saver? - # pylint: disable=protected-access - new_saver._last_checkpoints = old_saver._last_checkpoints - new_saver._checkpoints_to_be_deleted = old_saver._checkpoints_to_be_deleted - new_saver._next_checkpoint_time = old_saver._next_checkpoint_time - # pylint: enable=protected-access - return new_saver - - -class CheckpointableSaver(object): - """Saves and restores a `Checkpointable` object and its dependencies. - - See `Checkpointable` for details of dependency management. `Saver` wraps - `tf.train.Saver` for saving, including extra information about the graph of - dependencies between Python objects. When restoring, it uses this information - about the save-time dependency graph to more robustly match objects with their - checkpointed values. When executing eagerly, it supports restoring variables - on object creation (see `Saver.restore`). - - Values in a checkpoint are mapped to `Checkpointable` Python objects - (`Variable`s, `Optimizer`s, `Layer`s) based on the names provided when the - checkpoint was written. To avoid breaking existing checkpoints when modifying - a class, dependency names (the names of attributes to which `Checkpointable` - objects are assigned) may not change. These names are local to objects, in - contrast to the `Variable.name`-based save/restore from `tf.train.Saver`, and - so allow additional program transformations. - """ - - def __init__(self, root_checkpointable): - """Configure saving. - - Args: - root_checkpointable: The root of the object graph to save/restore. This - object and all of its dependencies are saved in the checkpoint. When - restoring, objects are matched and restored starting from this root. - """ - # Allow passing in a weak reference to avoid reference cycles when - # `Checkpointable` objects save themselves. - self._root_checkpointable_ref = root_checkpointable - if not context.executing_eagerly(): - with ops.device("/cpu:0"): - self._file_prefix_placeholder = constant_op.constant("model") - else: - self._file_prefix_placeholder = None - - # Op caching for save - self._object_graph_feed_tensor = None - self._last_save_object_graph = None - self._last_save_saver = None - - # Op caching for restore - self._last_restore_object_graph = None - self._last_restore_checkpoint = None - - @property - def _root_checkpointable(self): - if isinstance(self._root_checkpointable_ref, weakref.ref): - derefed = self._root_checkpointable_ref() - assert derefed is not None - return derefed - else: - return self._root_checkpointable_ref - - def save(self, file_prefix, checkpoint_number=None, session=None): - """Save a training checkpoint. - - The saved checkpoint includes variables created by this object and any - Checkpointable objects it depends on at the time `Saver.save()` is called. - - Args: - file_prefix: A prefix to use for the checkpoint filenames - (/path/to/directory/and_a_prefix). Names are generated based on this - prefix and `checkpoint_number`, if provided. - checkpoint_number: An integer variable or Tensor, used to number - checkpoints. Typically this value is saved along with other variables in - training checkpoints, which will happen automatically if it was created - by `root_checkpointable` or one of its dependencies (via - `Checkpointable._add_variable`). - session: The session to evaluate variables in. Ignored when executing - eagerly. If not provided when graph building, the default session is - used. - - Returns: - The full path to the checkpoint. - """ - named_variables, graph_proto = _serialize_object_graph( - self._root_checkpointable) - if not context.executing_eagerly(): - if session is None: - session = ops.get_default_session() - if self._object_graph_feed_tensor is None: - with ops.device("/cpu:0"): - self._object_graph_feed_tensor = constant_op.constant( - "", dtype=dtypes.string) - object_graph_tensor = self._object_graph_feed_tensor - feed_additions = {object_graph_tensor: graph_proto.SerializeToString()} - else: - session = None - with ops.device("/cpu:0"): - object_graph_tensor = constant_op.constant( - graph_proto.SerializeToString(), dtype=dtypes.string) - feed_additions = None - assert _OBJECT_GRAPH_PROTO_KEY not in named_variables - named_variables[_OBJECT_GRAPH_PROTO_KEY] = _NoRestoreSaveable( - tensor=object_graph_tensor, - name=_OBJECT_GRAPH_PROTO_KEY) - if (self._last_save_object_graph != graph_proto - # When executing eagerly, we need to re-create SaveableObjects each time - # save() is called so they pick up new Tensors passed to their - # constructors. That means the Saver needs to be copied with a new - # var_list. - or context.executing_eagerly()): - if self._last_save_object_graph is not None: - self._last_save_saver = _copy_saver_with_new_var_list( - old_saver=self._last_save_saver, new_var_list=named_variables) - else: - self._last_save_saver = saver_lib.Saver(var_list=named_variables) - self._last_save_object_graph = graph_proto - with ops.device("/cpu:0"): - save_path = self._last_save_saver.save( - sess=_SessionWithFeedDictAdditions( - session=session, feed_additions=feed_additions), - save_path=file_prefix, - write_meta_graph=False, - global_step=checkpoint_number) - return save_path - - def _global_variable_names(self): - """Generate a `tf.train.Saver`-style `var_list` using `variable.name`s.""" - named_saveables, graph_proto = _serialize_object_graph( - self._root_checkpointable) - saver_names = {} - for object_proto in graph_proto.nodes: - for attribute_proto in object_proto.attributes: - saver_names[attribute_proto.full_name] = named_saveables[ - attribute_proto.checkpoint_key] - return saver_names - - def restore(self, save_path): - """Restore a training checkpoint. - - Restores `root_checkpointable` and any objects that it tracks - (transitive). Either assigns values immediately if variables to restore have - been created already, or defers restoration until the variables are - created. Dependencies added to the `root_checkpointable` passed to the - constructor after this call will be matched if they have a corresponding - object in the checkpoint. - - When building a graph, restorations are added to the graph but not run. - - To disallow deferred loading, assert immediately that all checkpointed - variables have been matched to variable objects: - - ```python - saver = Saver(root) - saver.restore(path).assert_consumed() - ``` - - An exception will be raised unless every object was matched and its - variables already exist. - - When graph building, `assert_consumed()` indicates that all of the restore - ops which will be created for this checkpoint have been created. They can be - run via the `run_restore_ops()` function of the status object: - - ```python - saver.restore(path).assert_consumed().run_restore_ops() - ``` - - If the checkpoint has not been consumed completely, then the list of restore - ops will grow as more objects are added to the dependency graph. - - Name-based `tf.train.Saver` checkpoints can be loaded using this - method. There is no deferred loading, and names are used to match - variables. No restore ops are created/run until `run_restore_ops()` or - `initialize_or_restore()` are called on the returned status object, even - when executing eagerly. Re-encode name-based checkpoints using this - object-based `Saver.save` as soon as possible. - - Args: - save_path: The path to the checkpoint, as returned by `save` or - `tf.train.latest_checkpoint`. If None (as when there is no latest - checkpoint for `tf.train.latest_checkpoint` to return), returns an - object which may run initializers for objects in the dependency - graph. If the checkpoint was written by the name-based `tf.train.Saver`, - names are used to match variables. - - Returns: - A load status object, which can be used to make assertions about the - status of checkpoint restoration and run initialization/restore ops - (of type `CheckpointLoadStatus`, or `InitializationOnlyStatus` if - `save_path` is `None`). - - If `save_path` points to a name-based checkpoint, a `NameBasedSaverStatus` - object is returned which runs restore ops from a name-based saver. - """ - if save_path is None: - return InitializationOnlyStatus(self._root_checkpointable) - in_graph_mode = not context.executing_eagerly() - if in_graph_mode: - file_prefix_tensor = self._file_prefix_placeholder - file_prefix_feed_dict = {self._file_prefix_placeholder: save_path} - else: - with ops.device("/cpu:0"): - file_prefix_tensor = constant_op.constant(save_path) - file_prefix_feed_dict = None - reader = pywrap_tensorflow.NewCheckpointReader(save_path) - try: - object_graph_string = reader.get_tensor(_OBJECT_GRAPH_PROTO_KEY) - except errors_impl.NotFoundError: - # The object graph proto does not exist in this checkpoint. Try again with - # name-based saving. - return NameBasedSaverStatus(self, save_path) - - object_graph_proto = ( - checkpointable_object_graph_pb2.CheckpointableObjectGraph()) - object_graph_proto.ParseFromString(object_graph_string) - if in_graph_mode and object_graph_proto == self._last_restore_object_graph: - checkpoint = self._last_restore_checkpoint - else: - if in_graph_mode: - dtype_map = None - else: - dtype_map = reader.get_variable_to_dtype_map() - checkpoint = core_checkpointable_utils._Checkpoint( # pylint: disable=protected-access - object_graph_proto=object_graph_proto, - save_path=file_prefix_tensor, - dtype_map=dtype_map) - if in_graph_mode: - if self._last_restore_object_graph is not None: - raise NotImplementedError( - "Using a single Saver to restore different object graphs is not " - "currently supported when graph building. Use a different Saver " - "for each object graph (restore ops will be duplicated), or " - "file a feature request if this limitation bothers you.") - self._last_restore_checkpoint = checkpoint - self._last_restore_object_graph = object_graph_proto - core_checkpointable._CheckpointPosition( # pylint: disable=protected-access - checkpoint=checkpoint, proto_id=0).restore(self._root_checkpointable) - load_status = CheckpointLoadStatus( - checkpoint, feed_dict=file_prefix_feed_dict) - return load_status - - -class Checkpoint(core_checkpointable.Checkpointable): - """A utility class which groups `Checkpointable` objects. - - Accepts arbitrary keyword arguments to its constructor and saves those values - with a checkpoint. Maintains a `save_counter` for numbering checkpoints. - - Example usage: - - ```python - import tensorflow as tf - import tensorflow.contrib.eager as tfe - import os - - checkpoint_directory = "/tmp/training_checkpoints" - checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") - - root = tfe.Checkpoint(optimizer=optimizer, model=model) - root.restore(tf.train.latest_checkpoint(checkpoint_directory)) - for _ in range(num_training_steps): - optimizer.minimize( ... ) - root.save(file_prefix=checkpoint_prefix) - ``` - - For more manual control over saving, use `tfe.CheckpointableSaver` directly. - - Attributes: - save_counter: Incremented when `save()` is called. Used to number - checkpoints. - """ - - def __init__(self, **kwargs): - """Group objects into a training checkpoint. - - Args: - **kwargs: Keyword arguments are set as attributes of this object, and are - saved with the checkpoint. Attribute values must derive from - `CheckpointableBase`. - Raises: - ValueError: If objects in `kwargs` are not Checkpointable. - """ - super(Checkpoint, self).__init__() - for k, v in sorted(kwargs.items(), key=lambda item: item[0]): - if not isinstance(v, core_checkpointable.CheckpointableBase): - raise ValueError( - ("`Checkpoint` was expecting an object derived from " - "`CheckpointableBase`, got %s.") % (v,)) - setattr(self, k, v) - self._save_counter = None # Created lazily for restore-on-create. - self._saver = CheckpointableSaver(weakref.ref(self)) - - def _maybe_create_save_counter(self): - """Create a save counter if it does not yet exist.""" - if self._save_counter is None: - # Initialized to 0 and incremented before saving. - with ops.device("/cpu:0"): - self._save_counter = add_variable( - self, name="save_counter", initializer=0, dtype=dtypes.int64) - - @property - def save_counter(self): - """An integer variable which starts at zero and is incremented on save. - - Used to number checkpoints. - - Returns: - The save counter variable. - """ - self._maybe_create_save_counter() - return self._save_counter - - def save(self, file_prefix, session=None): - """Save a checkpoint. Wraps `tfe.CheckpointableSaver.save`.""" - in_graph_mode = not context.executing_eagerly() - if in_graph_mode: - if session is None: - session = ops.get_default_session() - if self._save_counter is None: - # When graph building, if this is a new save counter variable then it - # needs to be initialized before assign_add. This is only an issue if - # restore() has not been called first. - session.run(self.save_counter.initializer) - with ops.colocate_with(self.save_counter): - assign_op = self.save_counter.assign_add(1) - if in_graph_mode: - session.run(assign_op) - return self._saver.save( - file_prefix=file_prefix, - checkpoint_number=self.save_counter, - session=session) - - def restore(self, save_path): - """Restore a checkpoint. Wraps `tfe.CheckpointableSaver.restore`.""" - status = self._saver.restore(save_path=save_path) - # Create the save counter now so it gets initialized with other variables - # when graph building. Creating it earlier would lead to double - # initialization when executing eagerly. - self._maybe_create_save_counter() - return status diff --git a/tensorflow/contrib/eager/python/datasets.py b/tensorflow/contrib/eager/python/datasets.py index 60453006f4f36de452ba1484771733b56a6cc7e9..0783d1b5d70e502e6edd80b59f37fdd93b413e12 100644 --- a/tensorflow/contrib/eager/python/datasets.py +++ b/tensorflow/contrib/eager/python/datasets.py @@ -71,8 +71,15 @@ class Iterator(iterator_ops.EagerIterator, checkpointable.CheckpointableBase): dataset: A `tf.data.Dataset` object. Raises: + TypeError: If `dataset` is an unsupported type. RuntimeError: When invoked without eager execution enabled. """ + if isinstance(dataset, prefetching_ops._PrefetchToDeviceDataset): # pylint: disable=protected-access + raise TypeError( + "`tf.contrib.data.prefetch_to_device()` is not compatible with " + "`tf.contrib.eager.Iterator`. Use `for ... in dataset:` to iterate " + "over the dataset instead.") + super(Iterator, self).__init__(dataset) if not context.context().device_spec.device_type: is_remote_device = False @@ -107,16 +114,20 @@ class Iterator(iterator_ops.EagerIterator, checkpointable.CheckpointableBase): def _next_internal(self): """Returns a nested structure of `tf.Tensor`s containing the next element. """ - if self._buffer_resource_handle is not None: - with ops.device(self._device): - ret = prefetching_ops.function_buffering_resource_get_next( - function_buffer_resource=self._buffer_resource_handle, - output_types=self._flat_output_types) - return sparse.deserialize_sparse_tensors( - nest.pack_sequence_as(self._output_types, ret), self._output_types, - self._output_shapes, self._output_classes) - else: - return super(Iterator, self)._next_internal() + # This runs in sync mode as iterators use an error status to communicate + # that there is no more data to iterate over. + # TODO(b/77291417): Fix + with context.execution_mode(context.SYNC): + if self._buffer_resource_handle is not None: + with ops.device(self._device): + ret = prefetching_ops.function_buffering_resource_get_next( + function_buffer_resource=self._buffer_resource_handle, + output_types=self._flat_output_types) + return sparse.deserialize_sparse_tensors( + nest.pack_sequence_as(self._output_types, ret), self._output_types, + self._output_shapes, self._output_classes) + else: + return super(Iterator, self)._next_internal() # TODO(shivaniagrawal): Expose checkpointable stateful objects from dataset # attributes(potential). diff --git a/tensorflow/contrib/eager/python/datasets_test.py b/tensorflow/contrib/eager/python/datasets_test.py index c658505de41bb6a0007440f4850fef720c3e97f1..7b123707cc3a26073088cf2c57c6211e831c19fd 100644 --- a/tensorflow/contrib/eager/python/datasets_test.py +++ b/tensorflow/contrib/eager/python/datasets_test.py @@ -24,9 +24,9 @@ import time import numpy as np from tensorflow.contrib import lookup +from tensorflow.contrib.data.python.ops import prefetching_ops from tensorflow.contrib.data.python.ops import threadpool from tensorflow.contrib.data.python.ops import unique -from tensorflow.contrib.eager.python import checkpointable_utils from tensorflow.contrib.eager.python import datasets from tensorflow.python.data import Dataset from tensorflow.python.eager import test @@ -37,6 +37,7 @@ from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor from tensorflow.python.ops import math_ops from tensorflow.python.ops import script_ops +from tensorflow.python.training import checkpointable_utils class IteratorTest(test.TestCase): @@ -192,6 +193,18 @@ class IteratorTest(test.TestCase): x = math_ops.add(x, x) self.assertAllEqual([0., 2.], x.numpy()) + def testTensorsExplicitPrefetchToDevice(self): + ds = Dataset.from_tensor_slices([0., 1.]) + ds = ds.apply(prefetching_ops.prefetch_to_device(test.gpu_device_name())) + + with self.assertRaisesRegexp(TypeError, 'prefetch_to_device'): + datasets.Iterator(ds) + + for i, x in enumerate(ds): + with ops.device(test.gpu_device_name()): + x = math_ops.add(x, x) + self.assertEqual(float(i) + float(i), x.numpy()) + def testOverrideThreadPool(self): def get_thread_id(_): diff --git a/tensorflow/contrib/eager/python/evaluator.py b/tensorflow/contrib/eager/python/evaluator.py index 37c8f0d47adbde6932bf409cdcae9a1845d700b5..7949a3f6da293abdd85512209242bae76ab4d816 100644 --- a/tensorflow/contrib/eager/python/evaluator.py +++ b/tensorflow/contrib/eager/python/evaluator.py @@ -22,12 +22,12 @@ import six from tensorflow.contrib.eager.python import datasets from tensorflow.contrib.eager.python import metrics -from tensorflow.contrib.summary import summary_ops from tensorflow.python.eager import context from tensorflow.python.eager import function from tensorflow.python.framework import errors_impl from tensorflow.python.framework import ops from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import summary_ops_v2 as summary_ops class Evaluator(object): diff --git a/tensorflow/contrib/eager/python/examples/linear_regression/BUILD b/tensorflow/contrib/eager/python/examples/linear_regression/BUILD index f86331af6f7928f0f86c888e22706c6e0a5978b2..2f6cfdf31e852d5d69a7a87980c9a441da504cf2 100644 --- a/tensorflow/contrib/eager/python/examples/linear_regression/BUILD +++ b/tensorflow/contrib/eager/python/examples/linear_regression/BUILD @@ -22,6 +22,7 @@ cuda_py_test( ":linear_regression", "//tensorflow:tensorflow_py", ], + tags = ["no_windows"], # TODO: needs investigation on Windows ) cuda_py_test( diff --git a/tensorflow/contrib/eager/python/examples/rnn_ptb/rnn_ptb.py b/tensorflow/contrib/eager/python/examples/rnn_ptb/rnn_ptb.py index a90048d813bf345e8be32e9674a452175471b268..be5d60449d7e08c99cc28e76befce56f468c77fd 100644 --- a/tensorflow/contrib/eager/python/examples/rnn_ptb/rnn_ptb.py +++ b/tensorflow/contrib/eager/python/examples/rnn_ptb/rnn_ptb.py @@ -315,32 +315,37 @@ def main(_): have_gpu = tfe.num_gpus() > 0 use_cudnn_rnn = not FLAGS.no_use_cudnn_rnn and have_gpu - with tfe.restore_variables_on_create( - tf.train.latest_checkpoint(FLAGS.logdir)): - with tf.device("/device:GPU:0" if have_gpu else None): - # Make learning_rate a Variable so it can be included in the checkpoint - # and we can resume training with the last saved learning_rate. - learning_rate = tfe.Variable(20.0, name="learning_rate") - sys.stderr.write("learning_rate=%f\n" % learning_rate.numpy()) - model = PTBModel(corpus.vocab_size(), FLAGS.embedding_dim, - FLAGS.hidden_dim, FLAGS.num_layers, FLAGS.dropout, - use_cudnn_rnn) - optimizer = tf.train.GradientDescentOptimizer(learning_rate) - - best_loss = None - for _ in range(FLAGS.epoch): - train(model, optimizer, train_data, FLAGS.seq_len, FLAGS.clip) - eval_loss = evaluate(model, eval_data) - if not best_loss or eval_loss < best_loss: - if FLAGS.logdir: - tfe.Saver(model.trainable_weights + [learning_rate]).save( - os.path.join(FLAGS.logdir, "ckpt")) - best_loss = eval_loss - else: - learning_rate.assign(learning_rate / 4.0) - sys.stderr.write("eval_loss did not reduce in this epoch, " - "changing learning rate to %f for the next epoch\n" % - learning_rate.numpy()) + with tf.device("/device:GPU:0" if have_gpu else None): + # Make learning_rate a Variable so it can be included in the checkpoint + # and we can resume training with the last saved learning_rate. + learning_rate = tfe.Variable(20.0, name="learning_rate") + model = PTBModel(corpus.vocab_size(), FLAGS.embedding_dim, + FLAGS.hidden_dim, FLAGS.num_layers, FLAGS.dropout, + use_cudnn_rnn) + optimizer = tf.train.GradientDescentOptimizer(learning_rate) + checkpoint = tfe.Checkpoint( + learning_rate=learning_rate, model=model, + # GradientDescentOptimizer has no state to checkpoint, but noting it + # here lets us swap in an optimizer that does. + optimizer=optimizer) + # Restore existing variables now (learning_rate), and restore new variables + # on creation if a checkpoint exists. + checkpoint.restore(tf.train.latest_checkpoint(FLAGS.logdir)) + sys.stderr.write("learning_rate=%f\n" % learning_rate.numpy()) + + best_loss = None + for _ in range(FLAGS.epoch): + train(model, optimizer, train_data, FLAGS.seq_len, FLAGS.clip) + eval_loss = evaluate(model, eval_data) + if not best_loss or eval_loss < best_loss: + if FLAGS.logdir: + checkpoint.save(os.path.join(FLAGS.logdir, "ckpt")) + best_loss = eval_loss + else: + learning_rate.assign(learning_rate / 4.0) + sys.stderr.write("eval_loss did not reduce in this epoch, " + "changing learning rate to %f for the next epoch\n" % + learning_rate.numpy()) if __name__ == "__main__": diff --git a/tensorflow/contrib/eager/python/examples/spinn/spinn_test.py b/tensorflow/contrib/eager/python/examples/spinn/spinn_test.py index 9adf47d505fc2933d9c009e5863351bd123c3797..f825a2a7363fbe144162eca96398920ead0c4e50 100644 --- a/tensorflow/contrib/eager/python/examples/spinn/spinn_test.py +++ b/tensorflow/contrib/eager/python/examples/spinn/spinn_test.py @@ -33,8 +33,8 @@ import tensorflow as tf import tensorflow.contrib.eager as tfe from tensorflow.contrib.eager.python.examples.spinn import data from third_party.examples.eager.spinn import spinn -from tensorflow.contrib.eager.proto import checkpointable_object_graph_pb2 from tensorflow.contrib.summary import summary_test_util +from tensorflow.core.protobuf import checkpointable_object_graph_pb2 from tensorflow.python.eager import test from tensorflow.python.framework import test_util from tensorflow.python.training import checkpoint_utils diff --git a/tensorflow/contrib/eager/python/metrics_impl.py b/tensorflow/contrib/eager/python/metrics_impl.py index 2f2347736a073c7d9b3fb6685f52f8d58cc40570..907f9204c2d31a652ca2a0539a23db4722b4e154 100644 --- a/tensorflow/contrib/eager/python/metrics_impl.py +++ b/tensorflow/contrib/eager/python/metrics_impl.py @@ -20,7 +20,6 @@ from __future__ import print_function import re -from tensorflow.contrib.summary import summary_ops from tensorflow.python.eager import context from tensorflow.python.eager import function from tensorflow.python.framework import dtypes @@ -29,6 +28,7 @@ from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import init_ops from tensorflow.python.ops import math_ops +from tensorflow.python.ops import summary_ops_v2 as summary_ops from tensorflow.python.ops import variable_scope from tensorflow.python.training import checkpointable diff --git a/tensorflow/contrib/eager/python/metrics_test.py b/tensorflow/contrib/eager/python/metrics_test.py index 15ac889191e0fe51269bc5740d5e0ab1bc0e2b72..f0fe4ce8c53bb80c03a3f0de37078bcdb975a0b4 100644 --- a/tensorflow/contrib/eager/python/metrics_test.py +++ b/tensorflow/contrib/eager/python/metrics_test.py @@ -21,9 +21,7 @@ from __future__ import print_function import os import tempfile -from tensorflow.contrib.eager.python import checkpointable_utils from tensorflow.contrib.eager.python import metrics -from tensorflow.contrib.summary import summary_ops from tensorflow.contrib.summary import summary_test_util from tensorflow.python.eager import context from tensorflow.python.eager import test @@ -31,6 +29,8 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops +from tensorflow.python.ops import summary_ops_v2 as summary_ops +from tensorflow.python.training import checkpointable_utils from tensorflow.python.training import training_util diff --git a/tensorflow/contrib/eager/python/network.py b/tensorflow/contrib/eager/python/network.py index e55a9276ab53f44f76dc5e537b3bdde7c975f463..2f8721324f5fc12565d047a64af22b8df215a92b 100644 --- a/tensorflow/contrib/eager/python/network.py +++ b/tensorflow/contrib/eager/python/network.py @@ -25,6 +25,7 @@ import weakref from tensorflow.python.eager import context from tensorflow.python.estimator import util as estimator_util from tensorflow.python.framework import ops +from tensorflow.python.keras._impl.keras.engine import base_layer as keras_base_layer from tensorflow.python.layers import base from tensorflow.python.ops import variable_scope from tensorflow.python.training import checkpoint_utils @@ -176,7 +177,7 @@ class Network(base.Layer): avoid_names = parent_network._owned_layers name_uid_map = parent_network._sub_layer_name_uids else: - name_uid_map = base._get_default_graph_uid_map() + name_uid_map = keras_base_layer.get_default_graph_uid_map() # Figure out which names we have to avoid based on which variable scope # we're nested in. strip_name = self._default_parent_variable_scope.name @@ -326,6 +327,8 @@ class Network(base.Layer): raise TypeError( "Network.track_layer() passed type %s, not a tf.layers.Layer" % (type(layer),)) + # Always use `ResourceVariable` with legacy layers. + layer._use_resource_variables = True if isinstance(layer, Network): layer._finalize_name(parent_network=self) else: diff --git a/tensorflow/contrib/eager/python/network_test.py b/tensorflow/contrib/eager/python/network_test.py index 3329fc6c513265deff41a368f5688dd605209c14..f43376d5d777a7f17d975e07b746f7b1c731e8ea 100644 --- a/tensorflow/contrib/eager/python/network_test.py +++ b/tensorflow/contrib/eager/python/network_test.py @@ -20,12 +20,10 @@ import gc from tensorflow.contrib.eager.python import network from tensorflow.contrib.layers.python.layers import regularizers -from tensorflow.python.eager import context from tensorflow.python.eager import function from tensorflow.python.eager import test from tensorflow.python.framework import constant_op from tensorflow.python.framework import errors_impl -from tensorflow.python.framework import ops from tensorflow.python.framework import test_util from tensorflow.python.layers import core from tensorflow.python.ops import math_ops @@ -469,36 +467,6 @@ class NetworkTest(test.TestCase): self.assertIsInstance(net.trainable_weights[0], resource_variable_ops.ResourceVariable) - def testGraphOpNames(self): - """Network operation names should match variable naming.""" - - def _check_op_prefixes(expected_prefix, checked_ops): - for operation in ops.get_default_graph().get_operations(): - if operation.name == "ignore": - continue - if operation.name in checked_ops: - continue - checked_ops.add(operation.name) - self.assertStartsWith(expected_start=expected_prefix, - actual=operation.name) - self.assertNotIn("my_network", operation.name[len(expected_prefix):]) - self.assertNotIn("dense", operation.name[len(expected_prefix):]) - - with context.graph_mode(): - net = MyNetwork() - zero = constant_op.constant([[0.]], name="ignore") - net(zero) - checked_ops = set() - _check_op_prefixes(expected_prefix="my_network/dense/", - checked_ops=checked_ops) - net.net2 = net.track_layer(MyNetwork()) - net.net2(zero) - _check_op_prefixes(expected_prefix="my_network/my_network/dense/", - checked_ops=checked_ops) - MyNetwork()(zero) - _check_op_prefixes(expected_prefix="my_network_1/dense/", - checked_ops=checked_ops) - @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True) def testVariableRegularizers(self): net = RegularizedNetwork() diff --git a/tensorflow/contrib/eager/python/saver_test.py b/tensorflow/contrib/eager/python/saver_test.py index 1a7f7b85e688e80e3cf482f2754462888187d311..4032e755f6e7dea9dcb42587f14e8386e5db2338 100644 --- a/tensorflow/contrib/eager/python/saver_test.py +++ b/tensorflow/contrib/eager/python/saver_test.py @@ -102,7 +102,6 @@ class SaverTest(test.TestCase): # Can still restore it. saver.restore(ckpt_prefix) self.assertEqual(v1.read_value().numpy(), 1.0) - self.assertEqual(v1.read_value().numpy(), 1.0) # However, cannot restore it with default name. with self.assertRaisesOpError('not found in checkpoint'): saver = _saver.Saver([v1, v2]).restore(ckpt_prefix) diff --git a/tensorflow/contrib/eager/python/tfe.py b/tensorflow/contrib/eager/python/tfe.py index c6f3f20e781147140f2c4b339ed465ab7e919d37..79dd117854e5fe9f066f671d8ce62e08579e0ed9 100644 --- a/tensorflow/contrib/eager/python/tfe.py +++ b/tensorflow/contrib/eager/python/tfe.py @@ -84,8 +84,6 @@ from __future__ import print_function # pylint:disable=g-bad-import-order,g-import-not-at-top,unused-import # from tensorflow.contrib.eager.python import metrics -from tensorflow.contrib.eager.python.checkpointable_utils import CheckpointableSaver -from tensorflow.contrib.eager.python.checkpointable_utils import Checkpoint from tensorflow.contrib.eager.python.datasets import Iterator from tensorflow.contrib.eager.python.network import Network from tensorflow.contrib.eager.python.network import Sequential @@ -123,6 +121,8 @@ from tensorflow.python.ops.variable_scope import EagerVariableStore from tensorflow.python.ops import script_ops from tensorflow.python.ops import template from tensorflow.python.training.checkpointable import Checkpointable +from tensorflow.python.training.checkpointable_utils import CheckpointableSaver +from tensorflow.python.training.checkpointable_utils import Checkpoint from tensorflow.python.util.all_util import remove_undocumented py_func = script_ops.eager_py_func diff --git a/tensorflow/contrib/estimator/BUILD b/tensorflow/contrib/estimator/BUILD index 2be62c94381c7ddb9183f20d391b62b652b3c8dc..62ddb3d290e6c6455d79af50b2cfd894ff9f449a 100644 --- a/tensorflow/contrib/estimator/BUILD +++ b/tensorflow/contrib/estimator/BUILD @@ -23,6 +23,7 @@ py_library( ":logit_fns", ":multi_head", ":replicate_model_fn", + ":rnn", "//tensorflow/python:util", ], ) @@ -89,6 +90,7 @@ py_test( "//tensorflow/python/estimator:numpy_io", "//tensorflow/python/estimator:prediction_keys", "//tensorflow/python/feature_column", + "//tensorflow/python/ops/losses", "//third_party/py/numpy", "@six_archive//:six", ], @@ -129,6 +131,7 @@ py_test( "//tensorflow/python/estimator:numpy_io", "//tensorflow/python/estimator:prediction_keys", "//tensorflow/python/feature_column", + "//tensorflow/python/ops/losses", "//third_party/py/numpy", "@six_archive//:six", ], @@ -207,7 +210,7 @@ py_library( py_test( name = "head_test", - size = "small", + size = "medium", srcs = ["python/estimator/head_test.py"], srcs_version = "PY2AND3", deps = [ @@ -247,7 +250,7 @@ py_library( py_test( name = "linear_test", - size = "small", + size = "medium", srcs = ["python/estimator/linear_test.py"], srcs_version = "PY2AND3", tags = [ @@ -266,6 +269,7 @@ py_test( "//tensorflow/python/estimator:numpy_io", "//tensorflow/python/estimator:prediction_keys", "//tensorflow/python/feature_column", + "//tensorflow/python/ops/losses", "//third_party/py/numpy", "@six_archive//:six", ], @@ -409,3 +413,58 @@ cuda_py_test( "notap", ], ) + +py_library( + name = "rnn", + srcs = ["python/estimator/rnn.py"], + srcs_version = "PY2AND3", + deps = [ + ":extenders", + "//tensorflow/contrib/feature_column:feature_column_py", + "//tensorflow/python:array_ops", + "//tensorflow/python:check_ops", + "//tensorflow/python:framework_ops", + "//tensorflow/python:init_ops", + "//tensorflow/python:layers", + "//tensorflow/python:partitioned_variables", + "//tensorflow/python:rnn", + "//tensorflow/python:rnn_cell", + "//tensorflow/python:summary", + "//tensorflow/python:training", + "//tensorflow/python:variable_scope", + "//tensorflow/python/estimator", + "//tensorflow/python/estimator:head", + "//tensorflow/python/estimator:optimizers", + "//tensorflow/python/feature_column", + "@six_archive//:six", + ], +) + +py_test( + name = "rnn_test", + size = "medium", + srcs = ["python/estimator/rnn_test.py"], + srcs_version = "PY2AND3", + tags = [ + "no_pip", + "noasan", # times out + "notsan", + ], + deps = [ + ":rnn", + "//tensorflow/core:protos_all_py", + "//tensorflow/python:check_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:dtypes", + "//tensorflow/python:framework_ops", + "//tensorflow/python:math_ops", + "//tensorflow/python:state_ops", + "//tensorflow/python:summary", + "//tensorflow/python:training", + "//tensorflow/python:variables", + "//tensorflow/python/estimator:numpy_io", + "//tensorflow/python/feature_column", + "//third_party/py/numpy", + "@six_archive//:six", + ], +) diff --git a/tensorflow/contrib/estimator/__init__.py b/tensorflow/contrib/estimator/__init__.py index d2fc2c4bfa448227819c8d706387c1c75062b80b..be20d1b7770d3f3df21ac9c0f811d924bf4152ee 100644 --- a/tensorflow/contrib/estimator/__init__.py +++ b/tensorflow/contrib/estimator/__init__.py @@ -28,6 +28,7 @@ from tensorflow.contrib.estimator.python.estimator.linear import * from tensorflow.contrib.estimator.python.estimator.logit_fns import * from tensorflow.contrib.estimator.python.estimator.multi_head import * from tensorflow.contrib.estimator.python.estimator.replicate_model_fn import * +from tensorflow.contrib.estimator.python.estimator.rnn import * from tensorflow.python.util.all_util import remove_undocumented # pylint: enable=unused-import,line-too-long,wildcard-import @@ -52,6 +53,7 @@ _allowed_symbols = [ 'linear_logit_fn_builder', 'replicate_model_fn', 'TowerOptimizer', + 'RNNClassifier', ] remove_undocumented(__name__, allowed_exception_list=_allowed_symbols) diff --git a/tensorflow/contrib/estimator/python/estimator/boosted_trees.py b/tensorflow/contrib/estimator/python/estimator/boosted_trees.py index 314c54ed00372eca62ffc6930e6d492dd7d57163..bd641014e9eec6623d66574bccd08ff03ebc28ac 100644 --- a/tensorflow/contrib/estimator/python/estimator/boosted_trees.py +++ b/tensorflow/contrib/estimator/python/estimator/boosted_trees.py @@ -17,10 +17,22 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from tensorflow.python.data.ops import dataset_ops from tensorflow.python.estimator import estimator from tensorflow.python.estimator.canned import boosted_trees as canned_boosted_trees +def _validate_input_fn_and_repeat_dataset(train_input_fn): + """Validates whether the input_fn is valid, and repeat() if tf.Dataset.""" + def _input_fn(): + result_input_fn = train_input_fn() + if isinstance(result_input_fn, dataset_ops.Dataset): + return result_input_fn.repeat() + return result_input_fn + + return _input_fn + + class _BoostedTreesEstimator(estimator.Estimator): """An Estimator for Tensorflow Boosted Trees models.""" @@ -36,6 +48,7 @@ class _BoostedTreesEstimator(estimator.Estimator): l1_regularization=0., l2_regularization=0., tree_complexity=0., + min_node_weight=0., config=None): """Initializes a `BoostedTreesEstimator` instance. @@ -65,13 +78,16 @@ class _BoostedTreesEstimator(estimator.Estimator): l2_regularization: regularization multiplier applied to the square weights of the tree leafs. tree_complexity: regularization factor to penalize trees with more leaves. + min_node_weight: minimum hessian a node must have for a split to be + considered. The value will be compared with sum(leaf_hessian)/ + (batch_size * n_batches_per_layer). config: `RunConfig` object to configure the runtime settings. """ # pylint:disable=protected-access # HParams for the model. tree_hparams = canned_boosted_trees._TreeHParams( n_trees, max_depth, learning_rate, l1_regularization, l2_regularization, - tree_complexity) + tree_complexity, min_node_weight) def _model_fn(features, labels, mode, config): return canned_boosted_trees._bt_model_fn( @@ -96,6 +112,7 @@ def boosted_trees_classifier_train_in_memory( l1_regularization=0., l2_regularization=0., tree_complexity=0., + min_node_weight=0., config=None, train_hooks=None): """Trains a boosted tree classifier with in memory dataset. @@ -108,10 +125,13 @@ def boosted_trees_classifier_train_in_memory( bucketized_feature_2 = bucketized_column( numeric_column('feature_2'), BUCKET_BOUNDARIES_2) - def input_fn_train(): + def train_input_fn(): dataset = create-dataset-from-training-data - # Don't use repeat or cache, since it is assumed to be one epoch - # This is either tf.data.Dataset, or a tuple of feature dict and label. + # This is tf.data.Dataset of a tuple of feature dict and label. + # e.g. Dataset.zip((Dataset.from_tensors({'f1': f1_array, ...}), + # Dataset.from_tensors(label_array))) + # The returned Dataset shouldn't be batched. + # If Dataset repeats, only the first repetition would be used for training. return dataset classifier = boosted_trees_classifier_train_in_memory( @@ -162,6 +182,9 @@ def boosted_trees_classifier_train_in_memory( l2_regularization: regularization multiplier applied to the square weights of the tree leafs. tree_complexity: regularization factor to penalize trees with more leaves. + min_node_weight: minimum hessian a node must have for a split to be + considered. The value will be compared with sum(leaf_hessian)/ + (batch_size * n_batches_per_layer). config: `RunConfig` object to configure the runtime settings. train_hooks: a list of Hook instances to be passed to estimator.train(). @@ -184,7 +207,7 @@ def boosted_trees_classifier_train_in_memory( # HParams for the model. tree_hparams = canned_boosted_trees._TreeHParams( n_trees, max_depth, learning_rate, l1_regularization, l2_regularization, - tree_complexity) + tree_complexity, min_node_weight) def _model_fn(features, labels, mode, config): return canned_boosted_trees._bt_model_fn( @@ -202,7 +225,9 @@ def boosted_trees_classifier_train_in_memory( in_memory_classifier = estimator.Estimator( model_fn=_model_fn, model_dir=model_dir, config=config) - in_memory_classifier.train(input_fn=train_input_fn, hooks=train_hooks) + in_memory_classifier.train( + input_fn=_validate_input_fn_and_repeat_dataset(train_input_fn), + hooks=train_hooks) return in_memory_classifier # pylint: enable=protected-access @@ -220,6 +245,7 @@ def boosted_trees_regressor_train_in_memory( l1_regularization=0., l2_regularization=0., tree_complexity=0., + min_node_weight=0., config=None, train_hooks=None): """Trains a boosted tree regressor with in memory dataset. @@ -232,10 +258,13 @@ def boosted_trees_regressor_train_in_memory( bucketized_feature_2 = bucketized_column( numeric_column('feature_2'), BUCKET_BOUNDARIES_2) - def input_fn_train(): + def train_input_fn(): dataset = create-dataset-from-training-data - # Don't use repeat or cache, since it is assumed to be one epoch - # This is either tf.data.Dataset, or a tuple of feature dict and label. + # This is tf.data.Dataset of a tuple of feature dict and label. + # e.g. Dataset.zip((Dataset.from_tensors({'f1': f1_array, ...}), + # Dataset.from_tensors(label_array))) + # The returned Dataset shouldn't be batched. + # If Dataset repeats, only the first repetition would be used for training. return dataset regressor = boosted_trees_regressor_train_in_memory( @@ -279,6 +308,9 @@ def boosted_trees_regressor_train_in_memory( l2_regularization: regularization multiplier applied to the square weights of the tree leafs. tree_complexity: regularization factor to penalize trees with more leaves. + min_node_weight: minimum hessian a node must have for a split to be + considered. The value will be compared with sum(leaf_hessian)/ + (batch_size * n_batches_per_layer). config: `RunConfig` object to configure the runtime settings. train_hooks: a list of Hook instances to be passed to estimator.train(). @@ -300,7 +332,7 @@ def boosted_trees_regressor_train_in_memory( # HParams for the model. tree_hparams = canned_boosted_trees._TreeHParams( n_trees, max_depth, learning_rate, l1_regularization, l2_regularization, - tree_complexity) + tree_complexity, min_node_weight) def _model_fn(features, labels, mode, config): return canned_boosted_trees._bt_model_fn( @@ -317,7 +349,9 @@ def boosted_trees_regressor_train_in_memory( in_memory_regressor = estimator.Estimator( model_fn=_model_fn, model_dir=model_dir, config=config) - in_memory_regressor.train(input_fn=train_input_fn, hooks=train_hooks) + in_memory_regressor.train( + input_fn=_validate_input_fn_and_repeat_dataset(train_input_fn), + hooks=train_hooks) return in_memory_regressor # pylint: enable=protected-access diff --git a/tensorflow/contrib/estimator/python/estimator/boosted_trees_test.py b/tensorflow/contrib/estimator/python/estimator/boosted_trees_test.py index e99a87f3b3c0e7c5840fa250506e600645bf6a29..76cbefe5e94502188388df6fc2816d130ac896d5 100644 --- a/tensorflow/contrib/estimator/python/estimator/boosted_trees_test.py +++ b/tensorflow/contrib/estimator/python/estimator/boosted_trees_test.py @@ -20,6 +20,8 @@ from __future__ import print_function import numpy as np from tensorflow.contrib.estimator.python.estimator import boosted_trees +from tensorflow.core.kernels.boosted_trees import boosted_trees_pb2 +from tensorflow.python.data.ops import dataset_ops from tensorflow.python.estimator.canned import boosted_trees as canned_boosted_trees from tensorflow.python.estimator.inputs import numpy_io from tensorflow.python.feature_column import feature_column @@ -48,12 +50,24 @@ def _make_train_input_fn(is_classification): """Makes train input_fn for classification/regression.""" def _input_fn(): - features = dict(FEATURES_DICT) - if is_classification: - labels = CLASSIFICATION_LABELS - else: - labels = REGRESSION_LABELS - return features, labels + features_dict = dict(FEATURES_DICT) + labels = CLASSIFICATION_LABELS if is_classification else REGRESSION_LABELS + return features_dict, labels + + return _input_fn + + +def _make_train_input_fn_dataset(is_classification): + """Makes input_fn using Dataset.""" + + def _input_fn(): + features_dict = dict(FEATURES_DICT) + labels = CLASSIFICATION_LABELS if is_classification else REGRESSION_LABELS + ds = dataset_ops.Dataset.zip( + (dataset_ops.Dataset.from_tensors(features_dict), + dataset_ops.Dataset.from_tensors(labels) + )) + return ds return _input_fn @@ -69,10 +83,18 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): for i in range(NUM_FEATURES) } - def _assert_checkpoint(self, model_dir, expected_global_step): - self.assertEqual(expected_global_step, - checkpoint_utils.load_variable(model_dir, - ops.GraphKeys.GLOBAL_STEP)) + def _assert_checkpoint(self, model_dir, global_step, finalized_trees, + attempted_layers): + reader = checkpoint_utils.load_checkpoint(model_dir) + self.assertEqual(global_step, reader.get_tensor(ops.GraphKeys.GLOBAL_STEP)) + serialized = reader.get_tensor('boosted_trees:0_serialized') + ensemble_proto = boosted_trees_pb2.TreeEnsemble() + ensemble_proto.ParseFromString(serialized) + self.assertEqual( + finalized_trees, + sum([1 for t in ensemble_proto.tree_metadata if t.is_finalized])) + self.assertEqual(attempted_layers, + ensemble_proto.growing_metadata.num_layers_attempted) def testTrainAndEvaluateEstimator(self): input_fn = _make_train_input_fn(is_classification=False) @@ -88,9 +110,10 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): num_steps = 100 # Train for a few steps, and validate final checkpoint. est.train(input_fn, steps=num_steps) - self._assert_checkpoint(est.model_dir, 11) + self._assert_checkpoint( + est.model_dir, global_step=10, finalized_trees=2, attempted_layers=10) eval_res = est.evaluate(input_fn=input_fn, steps=1) - self.assertAllClose(eval_res['average_loss'], 0.913176) + self.assertAllClose(eval_res['average_loss'], 1.008551) def testInferEstimator(self): train_input_fn = _make_train_input_fn(is_classification=False) @@ -108,31 +131,13 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): num_steps = 100 # Train for a few steps, and validate final checkpoint. est.train(train_input_fn, steps=num_steps) - self._assert_checkpoint(est.model_dir, 6) - + self._assert_checkpoint( + est.model_dir, global_step=5, finalized_trees=1, attempted_layers=5) + # Validate predictions. predictions = list(est.predict(input_fn=predict_input_fn)) - self.assertEquals(5, len(predictions)) - self.assertAllClose([0.703549], predictions[0]['predictions']) - self.assertAllClose([0.266539], predictions[1]['predictions']) - self.assertAllClose([0.256479], predictions[2]['predictions']) - self.assertAllClose([1.088732], predictions[3]['predictions']) - self.assertAllClose([1.901732], predictions[4]['predictions']) - - -class BoostedTreesClassifierTrainInMemoryTest(test_util.TensorFlowTestCase): - - def setUp(self): - self._feature_columns = { - feature_column.bucketized_column( - feature_column.numeric_column('f_%d' % i, dtype=dtypes.float32), - BUCKET_BOUNDARIES) - for i in range(NUM_FEATURES) - } - - def _assert_checkpoint(self, model_dir, expected_global_step): - self.assertEqual(expected_global_step, - checkpoint_utils.load_variable(model_dir, - ops.GraphKeys.GLOBAL_STEP)) + self.assertAllClose( + [[0.571619], [0.262821], [0.124549], [0.956801], [1.769801]], + [pred['predictions'] for pred in predictions]) def testBinaryClassifierTrainInMemoryAndEvalAndInfer(self): train_input_fn = _make_train_input_fn(is_classification=True) @@ -140,41 +145,38 @@ class BoostedTreesClassifierTrainInMemoryTest(test_util.TensorFlowTestCase): x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False) est = boosted_trees.boosted_trees_classifier_train_in_memory( - train_input_fn=train_input_fn, - feature_columns=self._feature_columns, - n_trees=1, - max_depth=5) + train_input_fn=train_input_fn, feature_columns=self._feature_columns, + n_trees=1, max_depth=5) # It will stop after 5 steps because of the max depth and num trees. - self._assert_checkpoint(est.model_dir, 6) + self._assert_checkpoint( + est.model_dir, global_step=5, finalized_trees=1, attempted_layers=5) - # Check eval. + # Check evaluate and predict. eval_res = est.evaluate(input_fn=train_input_fn, steps=1) self.assertAllClose(eval_res['accuracy'], 1.0) - - # Check predict that all labels are correct. + # Validate predictions. predictions = list(est.predict(input_fn=predict_input_fn)) - self.assertEquals(5, len(predictions)) - self.assertAllClose([0], predictions[0]['class_ids']) - self.assertAllClose([1], predictions[1]['class_ids']) - self.assertAllClose([1], predictions[2]['class_ids']) - self.assertAllClose([0], predictions[3]['class_ids']) - self.assertAllClose([0], predictions[4]['class_ids']) - + self.assertAllClose([[0], [1], [1], [0], [0]], + [pred['class_ids'] for pred in predictions]) -class BoostedTreesRegressorTrainInMemoryTest(test_util.TensorFlowTestCase): + def testBinaryClassifierTrainInMemoryWithDataset(self): + train_input_fn = _make_train_input_fn_dataset(is_classification=True) + predict_input_fn = numpy_io.numpy_input_fn( + x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False) - def setUp(self): - self._feature_columns = { - feature_column.bucketized_column( - feature_column.numeric_column('f_%d' % i, dtype=dtypes.float32), - BUCKET_BOUNDARIES) - for i in range(NUM_FEATURES) - } + est = boosted_trees.boosted_trees_classifier_train_in_memory( + train_input_fn=train_input_fn, feature_columns=self._feature_columns, + n_trees=1, max_depth=5) + # It will stop after 5 steps because of the max depth and num trees. + self._assert_checkpoint( + est.model_dir, global_step=5, finalized_trees=1, attempted_layers=5) - def _assert_checkpoint(self, model_dir, expected_global_step): - self.assertEqual(expected_global_step, - checkpoint_utils.load_variable(model_dir, - ops.GraphKeys.GLOBAL_STEP)) + # Check evaluate and predict. + eval_res = est.evaluate(input_fn=train_input_fn, steps=1) + self.assertAllClose(eval_res['accuracy'], 1.0) + predictions = list(est.predict(input_fn=predict_input_fn)) + self.assertAllClose([[0], [1], [1], [0], [0]], + [pred['class_ids'] for pred in predictions]) def testRegressorTrainInMemoryAndEvalAndInfer(self): train_input_fn = _make_train_input_fn(is_classification=False) @@ -182,25 +184,38 @@ class BoostedTreesRegressorTrainInMemoryTest(test_util.TensorFlowTestCase): x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False) est = boosted_trees.boosted_trees_regressor_train_in_memory( - train_input_fn=train_input_fn, - feature_columns=self._feature_columns, - n_trees=1, - max_depth=5) + train_input_fn=train_input_fn, feature_columns=self._feature_columns, + n_trees=1, max_depth=5) # It will stop after 5 steps because of the max depth and num trees. - self._assert_checkpoint(est.model_dir, 6) + self._assert_checkpoint( + est.model_dir, global_step=5, finalized_trees=1, attempted_layers=5) - # Check eval. + # Check evaluate and predict. eval_res = est.evaluate(input_fn=train_input_fn, steps=1) - self.assertAllClose(eval_res['average_loss'], 2.2136638) + self.assertAllClose(eval_res['average_loss'], 2.478283) + predictions = list(est.predict(input_fn=predict_input_fn)) + self.assertAllClose( + [[0.571619], [0.262821], [0.124549], [0.956801], [1.769801]], + [pred['predictions'] for pred in predictions]) - # Validate predictions. + def testRegressorTrainInMemoryWithDataset(self): + train_input_fn = _make_train_input_fn_dataset(is_classification=False) + predict_input_fn = numpy_io.numpy_input_fn( + x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False) + + est = boosted_trees.boosted_trees_regressor_train_in_memory( + train_input_fn=train_input_fn, feature_columns=self._feature_columns, + n_trees=1, max_depth=5) + # It will stop after 5 steps because of the max depth and num trees. + self._assert_checkpoint( + est.model_dir, global_step=5, finalized_trees=1, attempted_layers=5) + # Check evaluate and predict. + eval_res = est.evaluate(input_fn=train_input_fn, steps=1) + self.assertAllClose(eval_res['average_loss'], 2.478283) predictions = list(est.predict(input_fn=predict_input_fn)) - self.assertEquals(5, len(predictions)) - self.assertAllClose([0.703549], predictions[0]['predictions']) - self.assertAllClose([0.266539], predictions[1]['predictions']) - self.assertAllClose([0.256479], predictions[2]['predictions']) - self.assertAllClose([1.088732], predictions[3]['predictions']) - self.assertAllClose([1.901732], predictions[4]['predictions']) + self.assertAllClose( + [[0.571619], [0.262821], [0.124549], [0.956801], [1.769801]], + [pred['predictions'] for pred in predictions]) if __name__ == '__main__': diff --git a/tensorflow/contrib/estimator/python/estimator/dnn_linear_combined_test.py b/tensorflow/contrib/estimator/python/estimator/dnn_linear_combined_test.py index b5e4d34dc70ccaa4806ae8b8ed5001bd971ee7b4..dd009a6753f3231638f93e50fc8f19eae8820139 100644 --- a/tensorflow/contrib/estimator/python/estimator/dnn_linear_combined_test.py +++ b/tensorflow/contrib/estimator/python/estimator/dnn_linear_combined_test.py @@ -34,6 +34,7 @@ from tensorflow.python.estimator.inputs import numpy_io from tensorflow.python.feature_column import feature_column from tensorflow.python.framework import ops from tensorflow.python.ops import nn +from tensorflow.python.ops.losses import losses from tensorflow.python.platform import gfile from tensorflow.python.platform import test from tensorflow.python.summary.writer import writer_cache @@ -52,7 +53,9 @@ def _dnn_only_estimator_fn( config=None): return dnn_linear_combined.DNNLinearCombinedEstimator( head=head_lib.regression_head( - weight_column=weight_column, label_dimension=label_dimension), + weight_column=weight_column, label_dimension=label_dimension, + # Tests in core (from which this test inherits) test the sum loss. + loss_reduction=losses.Reduction.SUM), model_dir=model_dir, dnn_feature_columns=feature_columns, dnn_optimizer=optimizer, @@ -100,7 +103,9 @@ def _linear_only_estimator_fn( partitioner=None): return dnn_linear_combined.DNNLinearCombinedEstimator( head=head_lib.regression_head( - weight_column=weight_column, label_dimension=label_dimension), + weight_column=weight_column, label_dimension=label_dimension, + # Tests in core (from which this test inherits) test the sum loss. + loss_reduction=losses.Reduction.SUM), model_dir=model_dir, linear_feature_columns=feature_columns, linear_optimizer=optimizer, diff --git a/tensorflow/contrib/estimator/python/estimator/dnn_test.py b/tensorflow/contrib/estimator/python/estimator/dnn_test.py index 71f810acec856d42d389260e7b9fea32123348b4..75e3107670d658e55ce23d983e47311f1c180104 100644 --- a/tensorflow/contrib/estimator/python/estimator/dnn_test.py +++ b/tensorflow/contrib/estimator/python/estimator/dnn_test.py @@ -32,6 +32,7 @@ from tensorflow.python.estimator.export import export from tensorflow.python.estimator.inputs import numpy_io from tensorflow.python.feature_column import feature_column from tensorflow.python.framework import ops +from tensorflow.python.ops.losses import losses from tensorflow.python.platform import gfile from tensorflow.python.platform import test from tensorflow.python.summary.writer import writer_cache @@ -41,7 +42,9 @@ def _dnn_estimator_fn(weight_column=None, label_dimension=1, *args, **kwargs): """Returns a DNNEstimator that uses regression_head.""" return dnn.DNNEstimator( head=head_lib.regression_head( - weight_column=weight_column, label_dimension=label_dimension), + weight_column=weight_column, label_dimension=label_dimension, + # Tests in core (from which this test inherits) test the sum loss. + loss_reduction=losses.Reduction.SUM), *args, **kwargs) diff --git a/tensorflow/contrib/estimator/python/estimator/extenders.py b/tensorflow/contrib/estimator/python/estimator/extenders.py index 266ae933052b11b9ab3edb662e95c90aae207dae..201699ed775f701bc9f215fff11a688175d51645 100644 --- a/tensorflow/contrib/estimator/python/estimator/extenders.py +++ b/tensorflow/contrib/estimator/python/estimator/extenders.py @@ -97,7 +97,10 @@ def add_metrics(estimator, metric_fn): return estimator_lib.Estimator( model_fn=new_model_fn, model_dir=estimator.model_dir, - config=estimator.config) + config=estimator.config, + # pylint: disable=protected-access + warm_start_from=estimator._warm_start_settings) + # pylint: enable=protected-access def clip_gradients_by_norm(optimizer, clip_norm): diff --git a/tensorflow/contrib/estimator/python/estimator/head.py b/tensorflow/contrib/estimator/python/estimator/head.py index 74da2cbb3f4557b4ddbbeb6debaae085407a0023..3dcf0374c8a12b5907fbaf20d1ad72211a45ab5c 100644 --- a/tensorflow/contrib/estimator/python/estimator/head.py +++ b/tensorflow/contrib/estimator/python/estimator/head.py @@ -41,11 +41,10 @@ from tensorflow.python.training import training_util _DEFAULT_SERVING_KEY = signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY -# TODO(b/65403806): Switch loss_reduction default to SUM_OVER_BATCH_SIZE. def multi_class_head(n_classes, weight_column=None, label_vocabulary=None, - loss_reduction=losses.Reduction.SUM, + loss_reduction=losses.Reduction.SUM_OVER_BATCH_SIZE, loss_fn=None, name=None): """Creates a `_Head` for multi class classification. @@ -86,7 +85,8 @@ def multi_class_head(n_classes, have any value in `label_vocabulary`. Note that errors will be raised if `label_vocabulary` is not provided but labels are strings. loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how to - reduce training loss over batch. Defaults to `SUM`. + reduce training loss over batch. Defaults to `SUM_OVER_BATCH_SIZE`, namely + weighted sum of losses divided by batch size. See `tf.losses.Reduction`. loss_fn: Optional loss function. name: name of the head. If provided, summary and metrics keys will be suffixed by `"/" + name`. Also used as `name_scope` when creating ops. @@ -111,7 +111,7 @@ def binary_classification_head( weight_column=None, thresholds=None, label_vocabulary=None, - loss_reduction=losses.Reduction.SUM, + loss_reduction=losses.Reduction.SUM_OVER_BATCH_SIZE, loss_fn=None, name=None): """Creates a `_Head` for single label binary classification. @@ -155,7 +155,8 @@ def binary_classification_head( `label_vocabulary`. Note that errors will be raised if `label_vocabulary` is not provided but labels are strings. loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how to - reduce training loss over batch. Defaults to `SUM`. + reduce training loss over batch. Defaults to `SUM_OVER_BATCH_SIZE`, namely + weighted sum of losses divided by batch size. See `tf.losses.Reduction`. loss_fn: Optional loss function. name: name of the head. If provided, summary and metrics keys will be suffixed by `"/" + name`. Also used as `name_scope` when creating ops. @@ -178,7 +179,7 @@ def binary_classification_head( def regression_head(weight_column=None, label_dimension=1, - loss_reduction=losses.Reduction.SUM, + loss_reduction=losses.Reduction.SUM_OVER_BATCH_SIZE, loss_fn=None, inverse_link_fn=None, name=None): @@ -218,7 +219,9 @@ def regression_head(weight_column=None, of the last dimension of the labels `Tensor` (typically, this has shape `[batch_size, label_dimension]`). loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how to - reduce training loss over batch. Defaults to `SUM`. + reduce training loss over batch and label dimension. Defaults to + `SUM_OVER_BATCH_SIZE`, namely weighted sum of losses divided by + `batch size * label_dimension`. See `tf.losses.Reduction`. loss_fn: Optional loss function. Defaults to `mean_squared_error`. inverse_link_fn: Optional inverse link function, also known as 'mean function'. Defaults to identity. @@ -243,7 +246,7 @@ def regression_head(weight_column=None, def poisson_regression_head( weight_column=None, label_dimension=1, - loss_reduction=losses.Reduction.SUM, + loss_reduction=losses.Reduction.SUM_OVER_BATCH_SIZE, compute_full_loss=True, name=None): """Creates a `_Head` for poisson regression using `tf.nn.log_poisson_loss`. @@ -275,7 +278,9 @@ def poisson_regression_head( of the last dimension of the labels `Tensor` (typically, this has shape `[batch_size, label_dimension]`). loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how to - reduce training loss over batch. Defaults to `SUM`. + reduce training loss over batch and label dimension. Defaults to + `SUM_OVER_BATCH_SIZE`, namely weighted sum of losses divided by + `batch size * label_dimension`. See `tf.losses.Reduction`. compute_full_loss: Whether to include the constant `log(z!)` term in computing the poisson loss. See `tf.nn.log_poisson_loss` for the full documentation. @@ -480,7 +485,7 @@ class _MultiLabelHead(head_lib._Head): # pylint:disable=protected-access reduction=losses.Reduction.NONE) # Averages loss over classes. unweighted_loss = math_ops.reduce_mean( - unweighted_loss, axis=-1, keep_dims=True) + unweighted_loss, axis=-1, keepdims=True) weights = head_lib._get_weights_and_check_match_logits( # pylint:disable=protected-access, features=features, weight_column=self._weight_column, logits=logits) training_loss = losses.compute_weighted_loss( diff --git a/tensorflow/contrib/estimator/python/estimator/head_test.py b/tensorflow/contrib/estimator/python/estimator/head_test.py index 8837dfdc6c2d83495157f0d30b80ac8f6f245c60..98962ca4277a3e8fbbdb3fb2d26df9acc45168b5 100644 --- a/tensorflow/contrib/estimator/python/estimator/head_test.py +++ b/tensorflow/contrib/estimator/python/estimator/head_test.py @@ -1162,8 +1162,8 @@ class PoissonRegressionHead(test.TestCase): # exp(-1) - 2 * (-1) + 2*ln(2) - 2 + 0.5*ln(2*pi*2), # exp(1) - 3 * 1 + 3*ln(3) - 3 + 0.5*ln(2*pi*3)] # = [1.0, 3.020, 1.482] - # sum_loss = 5.502 - expected_loss = 5.502 + # training_loss = (1.0 + 3.020 + 1.482) / 3 + expected_loss = 1.834 atol = 0.001 expected_train_result = b'my_train_op' def _train_op_fn(loss): diff --git a/tensorflow/contrib/estimator/python/estimator/linear_test.py b/tensorflow/contrib/estimator/python/estimator/linear_test.py index c63514eb688af48577f0a3b7ce9e7478309f2c30..c41996b9c6871d294f157411662f2eb9d4c09e5c 100644 --- a/tensorflow/contrib/estimator/python/estimator/linear_test.py +++ b/tensorflow/contrib/estimator/python/estimator/linear_test.py @@ -32,6 +32,7 @@ from tensorflow.python.estimator.export import export from tensorflow.python.estimator.inputs import numpy_io from tensorflow.python.feature_column import feature_column from tensorflow.python.framework import ops +from tensorflow.python.ops.losses import losses from tensorflow.python.platform import gfile from tensorflow.python.platform import test from tensorflow.python.summary.writer import writer_cache @@ -42,7 +43,9 @@ def _linear_estimator_fn( """Returns a LinearEstimator that uses regression_head.""" return linear.LinearEstimator( head=head_lib.regression_head( - weight_column=weight_column, label_dimension=label_dimension), + weight_column=weight_column, label_dimension=label_dimension, + # Tests in core (from which this test inherits) test the sum loss. + loss_reduction=losses.Reduction.SUM), *args, **kwargs) diff --git a/tensorflow/contrib/estimator/python/estimator/multi_head.py b/tensorflow/contrib/estimator/python/estimator/multi_head.py index bbbc19cc4dfb4b23f9b707023fbfdd124f1f48de..ce758992140d43529037b14cbbf958d5aa763fb4 100644 --- a/tensorflow/contrib/estimator/python/estimator/multi_head.py +++ b/tensorflow/contrib/estimator/python/estimator/multi_head.py @@ -345,7 +345,7 @@ class _MultiHead(head_lib._Head): # pylint:disable=protected-access if k == _DEFAULT_SERVING_KEY: key = head_name else: - key = '%s/%s' % (k, head_name) + key = '%s/%s' % (head_name, k) export_outputs[key] = v if (k == head_lib._PREDICT_SERVING_KEY and # pylint:disable=protected-access isinstance(v, export_output_lib.PredictOutput)): diff --git a/tensorflow/contrib/estimator/python/estimator/multi_head_test.py b/tensorflow/contrib/estimator/python/estimator/multi_head_test.py index 74d3d6d728554587290301b6ddd5b9aaeb8cebac..3d6fccb1180c435f64552667306be004437f62ba 100644 --- a/tensorflow/contrib/estimator/python/estimator/multi_head_test.py +++ b/tensorflow/contrib/estimator/python/estimator/multi_head_test.py @@ -127,8 +127,8 @@ class MultiHeadTest(test.TestCase): logits=logits) self.assertItemsEqual( - (_DEFAULT_SERVING_KEY, 'predict', 'head1', 'classification/head1', - 'predict/head1', 'head2', 'classification/head2', 'predict/head2'), + (_DEFAULT_SERVING_KEY, 'predict', 'head1', 'head1/classification', + 'head1/predict', 'head2', 'head2/classification', 'head2/predict'), spec.export_outputs.keys()) # Assert predictions and export_outputs. @@ -169,11 +169,11 @@ class MultiHeadTest(test.TestCase): self.assertAllClose( expected_probabilities['head1'], sess.run( - spec.export_outputs['predict/head1'].outputs['probabilities'])) + spec.export_outputs['head1/predict'].outputs['probabilities'])) self.assertAllClose( expected_probabilities['head2'], sess.run( - spec.export_outputs['predict/head2'].outputs['probabilities'])) + spec.export_outputs['head2/predict'].outputs['probabilities'])) def test_predict_two_heads_logits_tensor(self): """Tests predict with logits as Tensor.""" @@ -197,8 +197,8 @@ class MultiHeadTest(test.TestCase): logits=logits) self.assertItemsEqual( - (_DEFAULT_SERVING_KEY, 'predict', 'head1', 'classification/head1', - 'predict/head1', 'head2', 'classification/head2', 'predict/head2'), + (_DEFAULT_SERVING_KEY, 'predict', 'head1', 'head1/classification', + 'head1/predict', 'head2', 'head2/classification', 'head2/predict'), spec.export_outputs.keys()) # Assert predictions and export_outputs. @@ -254,8 +254,8 @@ class MultiHeadTest(test.TestCase): logits=logits) self.assertItemsEqual( - (_DEFAULT_SERVING_KEY, 'predict', 'head1', 'regression/head1', - 'predict/head1', 'head2', 'regression/head2', 'predict/head2'), + (_DEFAULT_SERVING_KEY, 'predict', 'head1', 'head1/regression', + 'head1/predict', 'head2', 'head2/regression', 'head2/predict'), spec.export_outputs.keys()) # Assert predictions and export_outputs. @@ -483,14 +483,14 @@ class MultiHeadTest(test.TestCase): [[2., 2., 0.], [2., 2., 0.]]], dtype=np.float32), } # Loss for the first head: - # loss1 = (1+1)^2 + (0-1)^2 + (1+1)^2 + (0-1)^2 + - # (1.5+1.5)^2 + (1.5-1.5)^2 + (1.5+1.5)^2 + (1.5-1.5)^2 - # = 28 + # loss1 = ((1+1)^2 + (0-1)^2 + (1+1)^2 + (0-1)^2 + + # (1.5+1.5)^2 + (1.5-1.5)^2 + (1.5+1.5)^2 + (1.5-1.5)^2) / 8 + # = 3.5 # Loss for the second head: - # loss2 = (0-2)^2 + (1+2)^2 + (0-2)^2 + (0-2)^2 + (1+2)^2 + (0-2)^2 + - # (2+2)^2 + (2-2)^2 + (0+2)^2 + (2+2)^2 + (2-2)^2 + (0+2)^2 - # = 74 - expected_training_loss = 28. + 74. + # loss2 = ((0-2)^2 + (1+2)^2 + (0-2)^2 + (0-2)^2 + (1+2)^2 + (0-2)^2 + + # (2+2)^2 + (2-2)^2 + (0+2)^2 + (2+2)^2 + (2-2)^2 + (0+2)^2) / 12 + # = 6.167 + expected_training_loss = 3.5 + 6.167 training_loss = multi_head.create_loss( features={}, diff --git a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py index fa2697800ec1a44f215f3d5fc9be2197a9e58219..a8774d6dab9205439e6e312827f9cd1306e3f1ea 100644 --- a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py +++ b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py @@ -456,7 +456,7 @@ def _get_local_devices(device_type): def _split_batch(features, labels, number_of_shards, device): - """Split input features and labes into batches.""" + """Split input features and labels into batches.""" def ensure_divisible_by_shards(sequence): batch_size = ops_lib.convert_to_tensor(sequence).get_shape()[0] @@ -602,7 +602,7 @@ def _local_device_setter(worker_device, ps_devices, ps_strategy): def _scale_tower_loss(tower_spec, loss_reduction, number_of_towers): - """Produce an EstimatorSpec with approproriately scaled loss.""" + """Produce an EstimatorSpec with appropriately scaled loss.""" if tower_spec.loss is None: return tower_spec diff --git a/tensorflow/contrib/estimator/python/estimator/rnn.py b/tensorflow/contrib/estimator/python/estimator/rnn.py new file mode 100644 index 0000000000000000000000000000000000000000..b475c12f5af3aedc766a0880a98c5c1e29bddbb7 --- /dev/null +++ b/tensorflow/contrib/estimator/python/estimator/rnn.py @@ -0,0 +1,481 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Recurrent Neural Network estimators.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import six + +from tensorflow.contrib.estimator.python.estimator import extenders +from tensorflow.contrib.feature_column.python.feature_column import sequence_feature_column as seq_fc +from tensorflow.python.estimator import estimator +from tensorflow.python.estimator.canned import head as head_lib +from tensorflow.python.estimator.canned import optimizers +from tensorflow.python.feature_column import feature_column as feature_column_lib +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.layers import core as core_layers +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import check_ops +from tensorflow.python.ops import init_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import partitioned_variables +from tensorflow.python.ops import rnn +from tensorflow.python.ops import rnn_cell +from tensorflow.python.ops import variable_scope +from tensorflow.python.summary import summary +from tensorflow.python.training import optimizer as optimizer_lib +from tensorflow.python.training import training_util + + +# The defaults are historical artifacts of the initial implementation, but seem +# reasonable choices. +_DEFAULT_LEARNING_RATE = 0.05 +_DEFAULT_CLIP_NORM = 5.0 + +_CELL_TYPES = {'basic_rnn': rnn_cell.BasicRNNCell, + 'lstm': rnn_cell.BasicLSTMCell, + 'gru': rnn_cell.GRUCell} + +# Indicates no value was provided by the user to a kwarg. +USE_DEFAULT = object() + + +def _single_rnn_cell(num_units, cell_type): + cell_type = _CELL_TYPES.get(cell_type, cell_type) + if not cell_type or not issubclass(cell_type, rnn_cell.RNNCell): + raise ValueError('Supported cell types are {}; got {}'.format( + list(_CELL_TYPES.keys()), cell_type)) + return cell_type(num_units=num_units) + + +def _make_rnn_cell_fn(num_units, cell_type='basic_rnn'): + """Convenience function to create `rnn_cell_fn` for canned RNN Estimators. + + Args: + num_units: Iterable of integer number of hidden units per RNN layer. + cell_type: A subclass of `tf.nn.rnn_cell.RNNCell` or a string specifying + the cell type. Supported strings are: `'basic_rnn'`, `'lstm'`, and + `'gru'`. + + Returns: + A function that takes a single argument, an instance of + `tf.estimator.ModeKeys`, and returns an instance derived from + `tf.nn.rnn_cell.RNNCell`. + + Raises: + ValueError: If cell_type is not supported. + """ + def rnn_cell_fn(mode): + # Unused. Part of the rnn_cell_fn interface since user specified functions + # may need different behavior across modes (e.g. dropout). + del mode + cells = [_single_rnn_cell(n, cell_type) for n in num_units] + if len(cells) == 1: + return cells[0] + return rnn_cell.MultiRNNCell(cells) + return rnn_cell_fn + + +def _concatenate_context_input(sequence_input, context_input): + """Replicates `context_input` across all timesteps of `sequence_input`. + + Expands dimension 1 of `context_input` then tiles it `sequence_length` times. + This value is appended to `sequence_input` on dimension 2 and the result is + returned. + + Args: + sequence_input: A `Tensor` of dtype `float32` and shape `[batch_size, + padded_length, d0]`. + context_input: A `Tensor` of dtype `float32` and shape `[batch_size, d1]`. + + Returns: + A `Tensor` of dtype `float32` and shape `[batch_size, padded_length, + d0 + d1]`. + + Raises: + ValueError: If `sequence_input` does not have rank 3 or `context_input` does + not have rank 2. + """ + seq_rank_check = check_ops.assert_rank( + sequence_input, + 3, + message='sequence_input must have rank 3', + data=[array_ops.shape(sequence_input)]) + seq_type_check = check_ops.assert_type( + sequence_input, + dtypes.float32, + message='sequence_input must have dtype float32; got {}.'.format( + sequence_input.dtype)) + ctx_rank_check = check_ops.assert_rank( + context_input, + 2, + message='context_input must have rank 2', + data=[array_ops.shape(context_input)]) + ctx_type_check = check_ops.assert_type( + context_input, + dtypes.float32, + message='context_input must have dtype float32; got {}.'.format( + context_input.dtype)) + with ops.control_dependencies( + [seq_rank_check, seq_type_check, ctx_rank_check, ctx_type_check]): + padded_length = array_ops.shape(sequence_input)[1] + tiled_context_input = array_ops.tile( + array_ops.expand_dims(context_input, 1), + array_ops.concat([[1], [padded_length], [1]], 0)) + return array_ops.concat([sequence_input, tiled_context_input], 2) + + +def _select_last_activations(activations, sequence_lengths): + """Selects the nth set of activations for each n in `sequence_length`. + + Returns a `Tensor` of shape `[batch_size, k]`. If `sequence_length` is not + `None`, then `output[i, :] = activations[i, sequence_length[i] - 1, :]`. If + `sequence_length` is `None`, then `output[i, :] = activations[i, -1, :]`. + + Args: + activations: A `Tensor` with shape `[batch_size, padded_length, k]`. + sequence_lengths: A `Tensor` with shape `[batch_size]` or `None`. + Returns: + A `Tensor` of shape `[batch_size, k]`. + """ + with ops.name_scope( + 'select_last_activations', values=[activations, sequence_lengths]): + activations_shape = array_ops.shape(activations) + batch_size = activations_shape[0] + padded_length = activations_shape[1] + output_units = activations_shape[2] + if sequence_lengths is None: + sequence_lengths = padded_length + start_indices = math_ops.to_int64( + math_ops.range(batch_size) * padded_length) + last_indices = start_indices + sequence_lengths - 1 + reshaped_activations = array_ops.reshape( + activations, [batch_size * padded_length, output_units]) + + last_activations = array_ops.gather(reshaped_activations, last_indices) + last_activations.set_shape([activations.shape[0], activations.shape[2]]) + return last_activations + + +def _rnn_logit_fn_builder(output_units, rnn_cell_fn, sequence_feature_columns, + context_feature_columns, input_layer_partitioner): + """Function builder for a rnn logit_fn. + + Args: + output_units: An int indicating the dimension of the logit layer. + rnn_cell_fn: A function with one argument, a `tf.estimator.ModeKeys`, and + returns an object of type `tf.nn.rnn_cell.RNNCell`. + sequence_feature_columns: An iterable containing the `FeatureColumn`s + that represent sequential input. + context_feature_columns: An iterable containing the `FeatureColumn`s + that represent contextual input. + input_layer_partitioner: Partitioner for input layer. + + Returns: + A logit_fn (see below). + + Raises: + ValueError: If output_units is not an int. + """ + if not isinstance(output_units, int): + raise ValueError('output_units must be an int. Given type: {}'.format( + type(output_units))) + + def rnn_logit_fn(features, mode): + """Recurrent Neural Network logit_fn. + + Args: + features: This is the first item returned from the `input_fn` + passed to `train`, `evaluate`, and `predict`. This should be a + single `Tensor` or `dict` of same. + mode: Optional. Specifies if this training, evaluation or prediction. See + `ModeKeys`. + + Returns: + A `Tensor` representing the logits. + """ + with variable_scope.variable_scope( + 'sequence_input_layer', + values=tuple(six.itervalues(features)), + partitioner=input_layer_partitioner): + sequence_input, sequence_length = seq_fc.sequence_input_layer( + features=features, feature_columns=sequence_feature_columns) + summary.histogram('sequence_length', sequence_length) + + if context_feature_columns: + context_input = feature_column_lib.input_layer( + features=features, + feature_columns=context_feature_columns) + sequence_input = _concatenate_context_input(sequence_input, + context_input) + + cell = rnn_cell_fn(mode) + # Ignore output state. + rnn_outputs, _ = rnn.dynamic_rnn( + cell=cell, + inputs=sequence_input, + dtype=dtypes.float32, + time_major=False) + last_activations = _select_last_activations(rnn_outputs, sequence_length) + + with variable_scope.variable_scope('logits', values=(rnn_outputs,)): + logits = core_layers.dense( + last_activations, + units=output_units, + activation=None, + kernel_initializer=init_ops.glorot_uniform_initializer()) + return logits + + return rnn_logit_fn + + +def _rnn_model_fn(features, + labels, + mode, + head, + rnn_cell_fn, + sequence_feature_columns, + context_feature_columns, + optimizer='Adagrad', + input_layer_partitioner=None, + config=None): + """Recurrent Neural Net model_fn. + + Args: + features: dict of `Tensor` and `SparseTensor` objects returned from + `input_fn`. + labels: `Tensor` of shape [batch_size, 1] or [batch_size] with labels. + mode: Defines whether this is training, evaluation or prediction. + See `ModeKeys`. + head: A `head_lib._Head` instance. + rnn_cell_fn: A function with one argument, a `tf.estimator.ModeKeys`, and + returns an object of type `tf.nn.rnn_cell.RNNCell`. + sequence_feature_columns: Iterable containing `FeatureColumn`s that + represent sequential model inputs. + context_feature_columns: Iterable containing `FeatureColumn`s that + represent model inputs not associated with a specific timestep. + optimizer: String, `tf.Optimizer` object, or callable that creates the + optimizer to use for training. If not specified, will use the Adagrad + optimizer with a default learning rate of 0.05 and gradient clip norm of + 5.0. + input_layer_partitioner: Partitioner for input layer. Defaults + to `min_max_variable_partitioner` with `min_slice_size` 64 << 20. + config: `RunConfig` object to configure the runtime settings. + + Returns: + An `EstimatorSpec` instance. + + Raises: + ValueError: If mode or optimizer is invalid, or features has the wrong type. + """ + if not isinstance(features, dict): + raise ValueError('features should be a dictionary of `Tensor`s. ' + 'Given type: {}'.format(type(features))) + + # If user does not provide an optimizer instance, use the optimizer specified + # by the string with default learning rate and gradient clipping. + if not isinstance(optimizer, optimizer_lib.Optimizer): + optimizer = optimizers.get_optimizer_instance( + optimizer, learning_rate=_DEFAULT_LEARNING_RATE) + optimizer = extenders.clip_gradients_by_norm(optimizer, _DEFAULT_CLIP_NORM) + + num_ps_replicas = config.num_ps_replicas if config else 0 + partitioner = partitioned_variables.min_max_variable_partitioner( + max_partitions=num_ps_replicas) + with variable_scope.variable_scope( + 'rnn', + values=tuple(six.itervalues(features)), + partitioner=partitioner): + input_layer_partitioner = input_layer_partitioner or ( + partitioned_variables.min_max_variable_partitioner( + max_partitions=num_ps_replicas, + min_slice_size=64 << 20)) + + logit_fn = _rnn_logit_fn_builder( + output_units=head.logits_dimension, + rnn_cell_fn=rnn_cell_fn, + sequence_feature_columns=sequence_feature_columns, + context_feature_columns=context_feature_columns, + input_layer_partitioner=input_layer_partitioner) + logits = logit_fn(features=features, mode=mode) + + def _train_op_fn(loss): + """Returns the op to optimize the loss.""" + return optimizer.minimize( + loss, + global_step=training_util.get_global_step()) + + return head.create_estimator_spec( + features=features, + mode=mode, + labels=labels, + train_op_fn=_train_op_fn, + logits=logits) + + +class RNNClassifier(estimator.Estimator): + """A classifier for TensorFlow RNN models. + + Trains a recurrent neural network model to classify instances into one of + multiple classes. + + Example: + + ```python + token_sequence = sequence_categorical_column_with_hash_bucket(...) + token_emb = embedding_column(categorical_column=token_sequence, ...) + + estimator = RNNClassifier( + num_units=[32, 16], cell_type='lstm', + sequence_feature_columns=[token_emb]) + + # Input builders + def input_fn_train: # returns x, y + pass + estimator.train(input_fn=input_fn_train, steps=100) + + def input_fn_eval: # returns x, y + pass + metrics = estimator.evaluate(input_fn=input_fn_eval, steps=10) + def input_fn_predict: # returns x, None + pass + predictions = estimator.predict(input_fn=input_fn_predict) + ``` + + Input of `train` and `evaluate` should have following features, + otherwise there will be a `KeyError`: + + * if `weight_column` is not `None`, a feature with + `key=weight_column` whose value is a `Tensor`. + * for each `column` in `sequence_feature_columns`: + - a feature with `key=column.name` whose `value` is a `SparseTensor`. + * for each `column` in `context_feature_columns`: + - if `column` is a `_CategoricalColumn`, a feature with `key=column.name` + whose `value` is a `SparseTensor`. + - if `column` is a `_WeightedCategoricalColumn`, two features: the first + with `key` the id column name, the second with `key` the weight column + name. Both features' `value` must be a `SparseTensor`. + - if `column` is a `_DenseColumn`, a feature with `key=column.name` + whose `value` is a `Tensor`. + + Loss is calculated by using softmax cross entropy. + + @compatibility(eager) + Estimators are not compatible with eager execution. + @end_compatibility + """ + + def __init__(self, + sequence_feature_columns, + context_feature_columns=None, + num_units=None, + cell_type=USE_DEFAULT, + rnn_cell_fn=None, + model_dir=None, + n_classes=2, + weight_column=None, + label_vocabulary=None, + optimizer='Adagrad', + input_layer_partitioner=None, + config=None): + """Initializes a `RNNClassifier` instance. + + Args: + sequence_feature_columns: An iterable containing the `FeatureColumn`s + that represent sequential input. All items in the set should either be + sequence columns (e.g. `sequence_numeric_column`) or constructed from + one (e.g. `embedding_column` with `sequence_categorical_column_*` as + input). + context_feature_columns: An iterable containing the `FeatureColumn`s + for contextual input. The data represented by these columns will be + replicated and given to the RNN at each timestep. These columns must be + instances of classes derived from `_DenseColumn` such as + `numeric_column`, not the sequential variants. + num_units: Iterable of integer number of hidden units per RNN layer. If + set, `cell_type` must also be specified and `rnn_cell_fn` must be + `None`. + cell_type: A subclass of `tf.nn.rnn_cell.RNNCell` or a string specifying + the cell type. Supported strings are: `'basic_rnn'`, `'lstm'`, and + `'gru'`. If set, `num_units` must also be specified and `rnn_cell_fn` + must be `None`. + rnn_cell_fn: A function with one argument, a `tf.estimator.ModeKeys`, and + returns an object of type `tf.nn.rnn_cell.RNNCell` that will be used to + construct the RNN. If set, `num_units` and `cell_type` cannot be set. + This is for advanced users who need additional customization beyond + `num_units` and `cell_type`. Note that `tf.nn.rnn_cell.MultiRNNCell` is + needed for stacked RNNs. + model_dir: Directory to save model parameters, graph and etc. This can + also be used to load checkpoints from the directory into a estimator to + continue training a previously saved model. + n_classes: Number of label classes. Defaults to 2, namely binary + classification. Must be > 1. + weight_column: A string or a `_NumericColumn` created by + `tf.feature_column.numeric_column` defining feature column representing + weights. It is used to down weight or boost examples during training. It + will be multiplied by the loss of the example. If it is a string, it is + used as a key to fetch weight tensor from the `features`. If it is a + `_NumericColumn`, raw tensor is fetched by key `weight_column.key`, + then weight_column.normalizer_fn is applied on it to get weight tensor. + label_vocabulary: A list of strings represents possible label values. If + given, labels must be string type and have any value in + `label_vocabulary`. If it is not given, that means labels are + already encoded as integer or float within [0, 1] for `n_classes=2` and + encoded as integer values in {0, 1,..., n_classes-1} for `n_classes`>2 . + Also there will be errors if vocabulary is not provided and labels are + string. + optimizer: An instance of `tf.Optimizer` used to train the model. Defaults + to Adagrad optimizer. + input_layer_partitioner: Optional. Partitioner for input layer. Defaults + to `min_max_variable_partitioner` with `min_slice_size` 64 << 20. + config: `RunConfig` object to configure the runtime settings. + + Raises: + ValueError: If `num_units`, `cell_type`, and `rnn_cell_fn` are not + compatible. + """ + if rnn_cell_fn and (num_units or cell_type != USE_DEFAULT): + raise ValueError( + 'num_units and cell_type must not be specified when using rnn_cell_fn' + ) + if not rnn_cell_fn: + if cell_type == USE_DEFAULT: + cell_type = 'basic_rnn' + rnn_cell_fn = _make_rnn_cell_fn(num_units, cell_type) + + if n_classes == 2: + head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss( # pylint: disable=protected-access + weight_column=weight_column, + label_vocabulary=label_vocabulary) + else: + head = head_lib._multi_class_head_with_softmax_cross_entropy_loss( # pylint: disable=protected-access + n_classes, weight_column=weight_column, + label_vocabulary=label_vocabulary) + def _model_fn(features, labels, mode, config): + return _rnn_model_fn( + features=features, + labels=labels, + mode=mode, + head=head, + rnn_cell_fn=rnn_cell_fn, + sequence_feature_columns=tuple(sequence_feature_columns or []), + context_feature_columns=tuple(context_feature_columns or []), + optimizer=optimizer, + input_layer_partitioner=input_layer_partitioner, + config=config) + super(RNNClassifier, self).__init__( + model_fn=_model_fn, model_dir=model_dir, config=config) diff --git a/tensorflow/contrib/estimator/python/estimator/rnn_test.py b/tensorflow/contrib/estimator/python/estimator/rnn_test.py new file mode 100644 index 0000000000000000000000000000000000000000..393f94f5c7de02c56d93993bbeb8aaec4ea8234c --- /dev/null +++ b/tensorflow/contrib/estimator/python/estimator/rnn_test.py @@ -0,0 +1,1131 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for rnn.py.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import random +import shutil +import tempfile + +import numpy as np +import six + +from tensorflow.contrib.estimator.python.estimator import rnn +from tensorflow.contrib.feature_column.python.feature_column import sequence_feature_column as seq_fc +from tensorflow.core.example import example_pb2 +from tensorflow.core.example import feature_pb2 +from tensorflow.python.estimator import model_fn +from tensorflow.python.estimator.canned import metric_keys +from tensorflow.python.estimator.canned import prediction_keys +from tensorflow.python.estimator.export import export +from tensorflow.python.estimator.inputs import numpy_io +from tensorflow.python.feature_column import feature_column as fc +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.framework import sparse_tensor +from tensorflow.python.ops import check_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import parsing_ops +from tensorflow.python.ops import partitioned_variables +from tensorflow.python.ops import rnn_cell +from tensorflow.python.ops import state_ops +from tensorflow.python.ops import variable_scope +from tensorflow.python.ops import variables as variables_lib +from tensorflow.python.platform import gfile +from tensorflow.python.platform import test +from tensorflow.python.summary.writer import writer_cache +from tensorflow.python.training import checkpoint_utils +from tensorflow.python.training import input as input_lib +from tensorflow.python.training import monitored_session +from tensorflow.python.training import optimizer +from tensorflow.python.training import training_util + + +# Names of variables created by BasicRNNCell model. +TOKEN_EMBEDDING_NAME = 'rnn/sequence_input_layer/input_layer/tokens_sequential_embedding/embedding_weights' +CELL_WEIGHTS_NAME = 'rnn/rnn/basic_rnn_cell/kernel' +CELL_BIAS_NAME = 'rnn/rnn/basic_rnn_cell/bias' +MULTI_CELL_WEIGHTS_NAME_PATTERN = 'rnn/rnn/multi_rnn_cell/cell_%d/basic_rnn_cell/kernel' +MULTI_CELL_BIAS_NAME_PATTERN = 'rnn/rnn/multi_rnn_cell/cell_%d/basic_rnn_cell/bias' +LOGITS_WEIGHTS_NAME = 'rnn/logits/dense/kernel' +LOGITS_BIAS_NAME = 'rnn/logits/dense/bias' + + +def _assert_close(expected, actual, rtol=1e-04, name='assert_close'): + with ops.name_scope(name, 'assert_close', (expected, actual, rtol)) as scope: + expected = ops.convert_to_tensor(expected, name='expected') + actual = ops.convert_to_tensor(actual, name='actual') + rdiff = math_ops.abs(expected - actual, 'diff') / math_ops.abs(expected) + rtol = ops.convert_to_tensor(rtol, name='rtol') + return check_ops.assert_less( + rdiff, + rtol, + data=('Condition expected =~ actual did not hold element-wise:' + 'expected = ', expected, 'actual = ', actual, 'rdiff = ', rdiff, + 'rtol = ', rtol,), + name=scope) + + +def create_checkpoint(rnn_weights, rnn_biases, logits_weights, logits_biases, + global_step, model_dir): + """Create checkpoint file with provided model weights. + + Args: + rnn_weights: Iterable of values of weights for the RNN cell. + rnn_biases: Iterable of values of biases for the RNN cell. + logits_weights: Iterable of values for matrix connecting RNN output to + logits. + logits_biases: Iterable of values for logits bias term. + global_step: Initial global step to save in checkpoint. + model_dir: Directory into which checkpoint is saved. + """ + model_weights = {} + model_weights[CELL_WEIGHTS_NAME] = rnn_weights + model_weights[CELL_BIAS_NAME] = rnn_biases + model_weights[LOGITS_WEIGHTS_NAME] = logits_weights + model_weights[LOGITS_BIAS_NAME] = logits_biases + + with ops.Graph().as_default(): + # Create model variables. + for k, v in six.iteritems(model_weights): + variables_lib.Variable(v, name=k, dtype=dtypes.float32) + + # Create non-model variables. + global_step_var = training_util.create_global_step() + assign_op = global_step_var.assign(global_step) + + # Initialize vars and save checkpoint. + with monitored_session.MonitoredTrainingSession( + checkpoint_dir=model_dir) as sess: + sess.run(assign_op) + + +class RNNLogitFnTest(test.TestCase): + """Tests correctness of logits calculated from _rnn_logit_fn_builder.""" + + def setUp(self): + self._model_dir = tempfile.mkdtemp() + + def tearDown(self): + if self._model_dir: + writer_cache.FileWriterCache.clear() + shutil.rmtree(self._model_dir) + + def _test_logits(self, mode, rnn_units, logits_dimension, features_fn, + sequence_feature_columns, context_feature_columns, + expected_logits): + """Tests that the expected logits are calculated.""" + with ops.Graph().as_default(): + # Global step needed for MonitoredSession, which is in turn used to + # explicitly set variable weights through a checkpoint. + training_util.create_global_step() + # Use a variable scope here with 'rnn', emulating the rnn model_fn, so + # the checkpoint naming is shared. + with variable_scope.variable_scope('rnn'): + input_layer_partitioner = ( + partitioned_variables.min_max_variable_partitioner( + max_partitions=0, min_slice_size=64 << 20)) + logit_fn = rnn._rnn_logit_fn_builder( + output_units=logits_dimension, + rnn_cell_fn=rnn._make_rnn_cell_fn(rnn_units), + sequence_feature_columns=sequence_feature_columns, + context_feature_columns=context_feature_columns, + input_layer_partitioner=input_layer_partitioner) + # Features are constructed within this function, otherwise the Tensors + # containing the features would be defined outside this graph. + logits = logit_fn(features=features_fn(), mode=mode) + with monitored_session.MonitoredTrainingSession( + checkpoint_dir=self._model_dir) as sess: + self.assertAllClose(expected_logits, sess.run(logits), atol=1e-4) + + def testOneDimLogits(self): + """Tests one-dimensional logits. + + Intermediate values are rounded for ease in reading. + input_layer = [[[10]], [[5]]] + initial_state = [0, 0] + rnn_output_timestep_1 = [[tanh(.1*10 + .2*0 + .3*0 +.2), + tanh(-.2*10 - .3*0 - .4*0 +.5)]] + = [[0.83, -0.91]] + rnn_output_timestep_2 = [[tanh(.1*5 + .2*.83 - .3*.91 +.2), + tanh(-.2*5 - .3*.83 + .4*.91 +.5)]] + = [[0.53, -0.37]] + logits = [[-1*0.53 - 1*0.37 + 0.3]] = [[-0.6033]] + """ + base_global_step = 100 + create_checkpoint( + rnn_weights=[[.1, -.2], [.2, -.3], [.3, -.4]], + rnn_biases=[.2, .5], + logits_weights=[[-1.], [1.]], + logits_biases=[0.3], + global_step=base_global_step, + model_dir=self._model_dir) + + def features_fn(): + return { + 'price': + sparse_tensor.SparseTensor( + values=[10., 5.], + indices=[[0, 0], [0, 1]], + dense_shape=[1, 2]), + } + + sequence_feature_columns = [ + seq_fc.sequence_numeric_column('price', shape=(1,))] + context_feature_columns = [] + for mode in [ + model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL, + model_fn.ModeKeys.PREDICT + ]: + self._test_logits( + mode, + rnn_units=[2], + logits_dimension=1, + features_fn=features_fn, + sequence_feature_columns=sequence_feature_columns, + context_feature_columns=context_feature_columns, + expected_logits=[[-0.6033]]) + + def testMultiDimLogits(self): + """Tests multi-dimensional logits. + + Intermediate values are rounded for ease in reading. + input_layer = [[[10]], [[5]]] + initial_state = [0, 0] + rnn_output_timestep_1 = [[tanh(.1*10 + .2*0 + .3*0 +.2), + tanh(-.2*10 - .3*0 - .4*0 +.5)]] + = [[0.83, -0.91]] + rnn_output_timestep_2 = [[tanh(.1*5 + .2*.83 - .3*.91 +.2), + tanh(-.2*5 - .3*.83 + .4*.91 +.5)]] + = [[0.53, -0.37]] + logits = [[-1*0.53 - 1*0.37 + 0.3], + [0.5*0.53 + 0.3*0.37 + 0.4], + [0.2*0.53 - 0.1*0.37 + 0.5] + = [[-0.6033, 0.7777, 0.5698]] + """ + base_global_step = 100 + create_checkpoint( + rnn_weights=[[.1, -.2], [.2, -.3], [.3, -.4]], + rnn_biases=[.2, .5], + logits_weights=[[-1., 0.5, 0.2], [1., -0.3, 0.1]], + logits_biases=[0.3, 0.4, 0.5], + global_step=base_global_step, + model_dir=self._model_dir) + + def features_fn(): + return { + 'price': + sparse_tensor.SparseTensor( + values=[10., 5.], + indices=[[0, 0], [0, 1]], + dense_shape=[1, 2]), + } + + sequence_feature_columns = [ + seq_fc.sequence_numeric_column('price', shape=(1,))] + context_feature_columns = [] + + for mode in [ + model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL, + model_fn.ModeKeys.PREDICT + ]: + self._test_logits( + mode, + rnn_units=[2], + logits_dimension=3, + features_fn=features_fn, + sequence_feature_columns=sequence_feature_columns, + context_feature_columns=context_feature_columns, + expected_logits=[[-0.6033, 0.7777, 0.5698]]) + + def testMultiExampleMultiDim(self): + """Tests multiple examples and multi-dimensional logits. + + Intermediate values are rounded for ease in reading. + input_layer = [[[10], [5]], [[2], [7]]] + initial_state = [[0, 0], [0, 0]] + rnn_output_timestep_1 = [[tanh(.1*10 + .2*0 + .3*0 +.2), + tanh(-.2*10 - .3*0 - .4*0 +.5)], + [tanh(.1*2 + .2*0 + .3*0 +.2), + tanh(-.2*2 - .3*0 - .4*0 +.5)]] + = [[0.83, -0.91], [0.38, 0.10]] + rnn_output_timestep_2 = [[tanh(.1*5 + .2*.83 - .3*.91 +.2), + tanh(-.2*5 - .3*.83 + .4*.91 +.5)], + [tanh(.1*7 + .2*.38 + .3*.10 +.2), + tanh(-.2*7 - .3*.38 - .4*.10 +.5)]] + = [[0.53, -0.37], [0.76, -0.78] + logits = [[-1*0.53 - 1*0.37 + 0.3, + 0.5*0.53 + 0.3*0.37 + 0.4, + 0.2*0.53 - 0.1*0.37 + 0.5], + [-1*0.76 - 1*0.78 + 0.3, + 0.5*0.76 +0.3*0.78 + 0.4, + 0.2*0.76 -0.1*0.78 + 0.5]] + = [[-0.6033, 0.7777, 0.5698], [-1.2473, 1.0170, 0.5745]] + """ + base_global_step = 100 + create_checkpoint( + rnn_weights=[[.1, -.2], [.2, -.3], [.3, -.4]], + rnn_biases=[.2, .5], + logits_weights=[[-1., 0.5, 0.2], [1., -0.3, 0.1]], + logits_biases=[0.3, 0.4, 0.5], + global_step=base_global_step, + model_dir=self._model_dir) + + def features_fn(): + return { + 'price': + sparse_tensor.SparseTensor( + values=[10., 5., 2., 7.], + indices=[[0, 0], [0, 1], [1, 0], [1, 1]], + dense_shape=[2, 2]), + } + + sequence_feature_columns = [ + seq_fc.sequence_numeric_column('price', shape=(1,)) + ] + context_feature_columns = [] + + for mode in [ + model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL, + model_fn.ModeKeys.PREDICT + ]: + self._test_logits( + mode, + rnn_units=[2], + logits_dimension=3, + features_fn=features_fn, + sequence_feature_columns=sequence_feature_columns, + context_feature_columns=context_feature_columns, + expected_logits=[[-0.6033, 0.7777, 0.5698], + [-1.2473, 1.0170, 0.5745]]) + + def testMultiExamplesDifferentLength(self): + """Tests multiple examples with different lengths. + + Intermediate values are rounded for ease in reading. + input_layer = [[[10], [5]], [[2], [0]]] + initial_state = [[0, 0], [0, 0]] + rnn_output_timestep_1 = [[tanh(.1*10 + .2*0 + .3*0 +.2), + tanh(-.2*10 - .3*0 - .4*0 +.5)], + [tanh(.1*2 + .2*0 + .3*0 +.2), + tanh(-.2*2 - .3*0 - .4*0 +.5)]] + = [[0.83, -0.91], [0.38, 0.10]] + rnn_output_timestep_2 = [[tanh(.1*5 + .2*.83 - .3*.91 +.2), + tanh(-.2*5 - .3*.83 + .4*.91 +.5)], + []] + = [[0.53, -0.37], []] + logits = [[-1*0.53 - 1*0.37 + 0.3], + [-1*0.38 + 1*0.10 + 0.3]] + = [[-0.6033], [0.0197]] + """ + base_global_step = 100 + create_checkpoint( + rnn_weights=[[.1, -.2], [.2, -.3], [.3, -.4]], + rnn_biases=[.2, .5], + logits_weights=[[-1.], [1.]], + logits_biases=[0.3], + global_step=base_global_step, + model_dir=self._model_dir) + + def features_fn(): + return { + 'price': + sparse_tensor.SparseTensor( + values=[10., 5., 2.], + indices=[[0, 0], [0, 1], [1, 0]], + dense_shape=[2, 2]), + } + + sequence_feature_columns = [ + seq_fc.sequence_numeric_column('price', shape=(1,))] + context_feature_columns = [] + + for mode in [ + model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL, + model_fn.ModeKeys.PREDICT + ]: + self._test_logits( + mode, + rnn_units=[2], + logits_dimension=1, + features_fn=features_fn, + sequence_feature_columns=sequence_feature_columns, + context_feature_columns=context_feature_columns, + expected_logits=[[-0.6033], [0.0197]]) + + def testMultiExamplesWithContext(self): + """Tests multiple examples with context features. + + Intermediate values are rounded for ease in reading. + input_layer = [[[10, -0.5], [5, -0.5]], [[2, 0.8], [0, 0]]] + initial_state = [[0, 0], [0, 0]] + rnn_output_timestep_1 = [[tanh(.1*10 - 1*.5 + .2*0 + .3*0 +.2), + tanh(-.2*10 - 0.9*.5 - .3*0 - .4*0 +.5)], + [tanh(.1*2 + 1*.8 + .2*0 + .3*0 +.2), + tanh(-.2*2 + .9*.8 - .3*0 - .4*0 +.5)]] + = [[0.60, -0.96], [0.83, 0.68]] + rnn_output_timestep_2 = [[tanh(.1*5 - 1*.5 + .2*.60 - .3*.96 +.2), + tanh(-.2*5 - .9*.5 - .3*.60 + .4*.96 +.5)], + []] + = [[0.03, -0.63], []] + logits = [[-1*0.03 - 1*0.63 + 0.3], + [-1*0.83 + 1*0.68 + 0.3]] + = [[-0.3662], [0.1414]] + """ + base_global_step = 100 + create_checkpoint( + # Context features weights are inserted between input and state weights. + rnn_weights=[[.1, -.2], [1., 0.9], [.2, -.3], [.3, -.4]], + rnn_biases=[.2, .5], + logits_weights=[[-1.], [1.]], + logits_biases=[0.3], + global_step=base_global_step, + model_dir=self._model_dir) + + def features_fn(): + return { + 'price': + sparse_tensor.SparseTensor( + values=[10., 5., 2.], + indices=[[0, 0], [0, 1], [1, 0]], + dense_shape=[2, 2]), + 'context': [[-0.5], [0.8]], + } + + sequence_feature_columns = [ + seq_fc.sequence_numeric_column('price', shape=(1,))] + context_feature_columns = [fc.numeric_column('context', shape=(1,))] + + for mode in [ + model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL, + model_fn.ModeKeys.PREDICT + ]: + self._test_logits( + mode, + rnn_units=[2], + logits_dimension=1, + features_fn=features_fn, + sequence_feature_columns=sequence_feature_columns, + context_feature_columns=context_feature_columns, + expected_logits=[[-0.3662], [0.1414]]) + + def testMultiExamplesMultiFeatures(self): + """Tests examples with multiple sequential feature columns. + + Intermediate values are rounded for ease in reading. + input_layer = [[[1, 0, 10], [0, 1, 5]], [[1, 0, 2], [0, 0, 0]]] + initial_state = [[0, 0], [0, 0]] + rnn_output_timestep_1 = [[tanh(.5*1 + 1*0 + .1*10 + .2*0 + .3*0 +.2), + tanh(-.5*1 - 1*0 - .2*10 - .3*0 - .4*0 +.5)], + [tanh(.5*1 + 1*0 + .1*2 + .2*0 + .3*0 +.2), + tanh(-.5*1 - 1*0 - .2*2 - .3*0 - .4*0 +.5)]] + = [[0.94, -0.96], [0.72, -0.38]] + rnn_output_timestep_2 = [[tanh(.5*0 + 1*1 + .1*5 + .2*.94 - .3*.96 +.2), + tanh(-.5*0 - 1*1 - .2*5 - .3*.94 + .4*.96 +.5)], + []] + = [[0.92, -0.88], []] + logits = [[-1*0.92 - 1*0.88 + 0.3], + [-1*0.72 - 1*0.38 + 0.3]] + = [[-1.5056], [-0.7962]] + """ + base_global_step = 100 + create_checkpoint( + # FeatureColumns are sorted alphabetically, so on_sale weights are + # inserted before price. + rnn_weights=[[.5, -.5], [1., -1.], [.1, -.2], [.2, -.3], [.3, -.4]], + rnn_biases=[.2, .5], + logits_weights=[[-1.], [1.]], + logits_biases=[0.3], + global_step=base_global_step, + model_dir=self._model_dir) + + def features_fn(): + return { + 'price': + sparse_tensor.SparseTensor( + values=[10., 5., 2.], + indices=[[0, 0], [0, 1], [1, 0]], + dense_shape=[2, 2]), + 'on_sale': + sparse_tensor.SparseTensor( + values=[0, 1, 0], + indices=[[0, 0], [0, 1], [1, 0]], + dense_shape=[2, 2]), + } + + price_column = seq_fc.sequence_numeric_column('price', shape=(1,)) + on_sale_column = fc.indicator_column( + seq_fc.sequence_categorical_column_with_identity( + 'on_sale', num_buckets=2)) + sequence_feature_columns = [price_column, on_sale_column] + context_feature_columns = [] + + for mode in [ + model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL, + model_fn.ModeKeys.PREDICT + ]: + self._test_logits( + mode, + rnn_units=[2], + logits_dimension=1, + features_fn=features_fn, + sequence_feature_columns=sequence_feature_columns, + context_feature_columns=context_feature_columns, + expected_logits=[[-1.5056], [-0.7962]]) + + +class RNNClassifierTrainingTest(test.TestCase): + + def setUp(self): + self._model_dir = tempfile.mkdtemp() + + def tearDown(self): + if self._model_dir: + writer_cache.FileWriterCache.clear() + shutil.rmtree(self._model_dir) + + def _assert_checkpoint( + self, n_classes, input_units, cell_units, expected_global_step): + + shapes = { + name: shape for (name, shape) in + checkpoint_utils.list_variables(self._model_dir) + } + + self.assertEqual([], shapes[ops.GraphKeys.GLOBAL_STEP]) + self.assertEqual( + expected_global_step, + checkpoint_utils.load_variable( + self._model_dir, ops.GraphKeys.GLOBAL_STEP)) + + # RNN Cell variables. + if len(cell_units) > 1: + for i, cell_unit in enumerate(cell_units): + self.assertEqual([input_units + cell_unit, cell_unit], + shapes[MULTI_CELL_WEIGHTS_NAME_PATTERN % i]) + self.assertEqual([cell_unit], + shapes[MULTI_CELL_BIAS_NAME_PATTERN % i]) + input_units = cell_unit + elif len(cell_units) == 1: + self.assertEqual([input_units + cell_unit, cell_unit], + shapes[CELL_WEIGHTS_NAME]) + self.assertEqual([cell_unit], shapes[CELL_BIAS_NAME]) + + # Logits variables. + logits_dimension = n_classes if n_classes > 2 else 1 + self.assertEqual([cell_units[-1], logits_dimension], + shapes[LOGITS_WEIGHTS_NAME]) + self.assertEqual([logits_dimension], shapes[LOGITS_BIAS_NAME]) + + def _mock_optimizer(self, expected_loss=None): + expected_var_names = [ + '%s/part_0:0' % CELL_BIAS_NAME, + '%s/part_0:0' % CELL_WEIGHTS_NAME, + '%s/part_0:0' % LOGITS_BIAS_NAME, + '%s/part_0:0' % LOGITS_WEIGHTS_NAME, + ] + + def _minimize(loss, global_step): + trainable_vars = ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES) + self.assertItemsEqual( + expected_var_names, + [var.name for var in trainable_vars]) + + # Verify loss. We can't check the value directly, so we add an assert op. + self.assertEquals(0, loss.shape.ndims) + if expected_loss is None: + return state_ops.assign_add(global_step, 1).op + assert_loss = _assert_close( + math_ops.to_float(expected_loss, name='expected'), + loss, + name='assert_loss') + with ops.control_dependencies((assert_loss,)): + return state_ops.assign_add(global_step, 1).op + + mock_optimizer = test.mock.NonCallableMock( + spec=optimizer.Optimizer, + wraps=optimizer.Optimizer(use_locking=False, name='my_optimizer')) + mock_optimizer.minimize = test.mock.MagicMock(wraps=_minimize) + + # NOTE: Estimator.params performs a deepcopy, which wreaks havoc with mocks. + # So, return mock_optimizer itself for deepcopy. + mock_optimizer.__deepcopy__ = lambda _: mock_optimizer + return mock_optimizer + + def testConflictingRNNCellFn(self): + col = seq_fc.sequence_categorical_column_with_hash_bucket( + 'tokens', hash_bucket_size=10) + embed = fc.embedding_column(col, dimension=2) + cell_units = [4, 2] + + with self.assertRaisesRegexp( + ValueError, + 'num_units and cell_type must not be specified when using rnn_cell_fn'): + rnn.RNNClassifier( + sequence_feature_columns=[embed], + rnn_cell_fn=lambda x: x, + num_units=cell_units) + + with self.assertRaisesRegexp( + ValueError, + 'num_units and cell_type must not be specified when using rnn_cell_fn'): + rnn.RNNClassifier( + sequence_feature_columns=[embed], + rnn_cell_fn=lambda x: x, + cell_type='lstm') + + def _testFromScratchWithDefaultOptimizer(self, n_classes): + def train_input_fn(): + return { + 'tokens': + sparse_tensor.SparseTensor( + values=['the', 'cat', 'sat'], + indices=[[0, 0], [0, 1], [0, 2]], + dense_shape=[1, 3]), + }, [[1]] + + col = seq_fc.sequence_categorical_column_with_hash_bucket( + 'tokens', hash_bucket_size=10) + embed = fc.embedding_column(col, dimension=2) + input_units = 2 + + cell_units = [4, 2] + est = rnn.RNNClassifier( + sequence_feature_columns=[embed], + num_units=cell_units, + n_classes=n_classes, + model_dir=self._model_dir) + + # Train for a few steps, and validate final checkpoint. + num_steps = 10 + est.train(input_fn=train_input_fn, steps=num_steps) + self._assert_checkpoint(n_classes, input_units, cell_units, num_steps) + + def testBinaryClassFromScratchWithDefaultOptimizer(self): + self._testFromScratchWithDefaultOptimizer(n_classes=2) + + def testMultiClassFromScratchWithDefaultOptimizer(self): + self._testFromScratchWithDefaultOptimizer(n_classes=4) + + def testFromScratchWithCustomRNNCellFn(self): + def train_input_fn(): + return { + 'tokens': + sparse_tensor.SparseTensor( + values=['the', 'cat', 'sat'], + indices=[[0, 0], [0, 1], [0, 2]], + dense_shape=[1, 3]), + }, [[1]] + + col = seq_fc.sequence_categorical_column_with_hash_bucket( + 'tokens', hash_bucket_size=10) + embed = fc.embedding_column(col, dimension=2) + input_units = 2 + cell_units = [4, 2] + n_classes = 2 + + def rnn_cell_fn(mode): + del mode # unused + cells = [rnn_cell.BasicRNNCell(num_units=n) for n in cell_units] + return rnn_cell.MultiRNNCell(cells) + + est = rnn.RNNClassifier( + sequence_feature_columns=[embed], + rnn_cell_fn=rnn_cell_fn, + n_classes=n_classes, + model_dir=self._model_dir) + + # Train for a few steps, and validate final checkpoint. + num_steps = 10 + est.train(input_fn=train_input_fn, steps=num_steps) + self._assert_checkpoint(n_classes, input_units, cell_units, num_steps) + + def _testExampleWeight(self, n_classes): + def train_input_fn(): + return { + 'tokens': + sparse_tensor.SparseTensor( + values=['the', 'cat', 'sat', 'dog', 'barked'], + indices=[[0, 0], [0, 1], [0, 2], [1, 0], [1, 1]], + dense_shape=[2, 3]), + 'w': [[1], [2]], + }, [[1], [0]] + + col = seq_fc.sequence_categorical_column_with_hash_bucket( + 'tokens', hash_bucket_size=10) + embed = fc.embedding_column(col, dimension=2) + input_units = 2 + + cell_units = [4, 2] + est = rnn.RNNClassifier( + num_units=cell_units, + sequence_feature_columns=[embed], + n_classes=n_classes, + weight_column='w', + model_dir=self._model_dir) + + # Train for a few steps, and validate final checkpoint. + num_steps = 10 + est.train(input_fn=train_input_fn, steps=num_steps) + self._assert_checkpoint(n_classes, input_units, cell_units, num_steps) + + def testBinaryClassWithExampleWeight(self): + self._testExampleWeight(n_classes=2) + + def testMultiClassWithExampleWeight(self): + self._testExampleWeight(n_classes=4) + + def testBinaryClassFromCheckpoint(self): + initial_global_step = 100 + create_checkpoint( + rnn_weights=[[.1, -.2], [.2, -.3], [.3, -.4]], + rnn_biases=[.2, .5], + logits_weights=[[-1.], [1.]], + logits_biases=[0.3], + global_step=initial_global_step, + model_dir=self._model_dir) + + def train_input_fn(): + return { + 'price': + sparse_tensor.SparseTensor( + values=[10., 5., 2.], + indices=[[0, 0], [0, 1], [1, 0]], + dense_shape=[2, 2]), + }, [[0], [1]] + + # Uses same checkpoint and examples as testBinaryClassEvaluationMetrics. + # See that test for loss calculation. + mock_optimizer = self._mock_optimizer(expected_loss=1.119661) + + sequence_feature_columns = [ + seq_fc.sequence_numeric_column('price', shape=(1,))] + est = rnn.RNNClassifier( + num_units=[2], + sequence_feature_columns=sequence_feature_columns, + n_classes=2, + optimizer=mock_optimizer, + model_dir=self._model_dir) + self.assertEqual(0, mock_optimizer.minimize.call_count) + est.train(input_fn=train_input_fn, steps=10) + self.assertEqual(1, mock_optimizer.minimize.call_count) + + def testMultiClassFromCheckpoint(self): + initial_global_step = 100 + create_checkpoint( + rnn_weights=[[.1, -.2], [.2, -.3], [.3, -.4]], + rnn_biases=[.2, .5], + logits_weights=[[-1., 0.5, 0.2], [1., -0.3, 0.1]], + logits_biases=[0.3, 0.4, 0.5], + global_step=initial_global_step, + model_dir=self._model_dir) + + def train_input_fn(): + return { + 'price': + sparse_tensor.SparseTensor( + values=[10., 5., 2., 7.], + indices=[[0, 0], [0, 1], [1, 0], [1, 1]], + dense_shape=[2, 2]), + }, [[0], [1]] + + # Uses same checkpoint and examples as testMultiClassEvaluationMetrics. + # See that test for loss calculation. + mock_optimizer = self._mock_optimizer(expected_loss=2.662932) + + sequence_feature_columns = [ + seq_fc.sequence_numeric_column('price', shape=(1,))] + est = rnn.RNNClassifier( + num_units=[2], + sequence_feature_columns=sequence_feature_columns, + n_classes=3, + optimizer=mock_optimizer, + model_dir=self._model_dir) + self.assertEqual(0, mock_optimizer.minimize.call_count) + est.train(input_fn=train_input_fn, steps=10) + self.assertEqual(1, mock_optimizer.minimize.call_count) + + +def sorted_key_dict(unsorted_dict): + return {k: unsorted_dict[k] for k in sorted(unsorted_dict)} + + +class RNNClassifierEvaluationTest(test.TestCase): + + def setUp(self): + self._model_dir = tempfile.mkdtemp() + + def tearDown(self): + if self._model_dir: + writer_cache.FileWriterCache.clear() + shutil.rmtree(self._model_dir) + + def testBinaryClassEvaluationMetrics(self): + global_step = 100 + create_checkpoint( + rnn_weights=[[.1, -.2], [.2, -.3], [.3, -.4]], + rnn_biases=[.2, .5], + logits_weights=[[-1.], [1.]], + logits_biases=[0.3], + global_step=global_step, + model_dir=self._model_dir) + + def eval_input_fn(): + return { + 'price': + sparse_tensor.SparseTensor( + values=[10., 5., 2.], + indices=[[0, 0], [0, 1], [1, 0]], + dense_shape=[2, 2]), + }, [[0], [1]] + + sequence_feature_columns = [ + seq_fc.sequence_numeric_column('price', shape=(1,))] + + est = rnn.RNNClassifier( + num_units=[2], + sequence_feature_columns=sequence_feature_columns, + n_classes=2, + model_dir=self._model_dir) + eval_metrics = est.evaluate(eval_input_fn, steps=1) + + # Uses identical numbers to testMultiExamplesWithDifferentLength. + # See that test for logits calculation. + # logits = [[-0.603282], [0.019719]] + # probability = exp(logits) / (1 + exp(logits)) = [[0.353593], [0.504930]] + # loss = -label * ln(p) - (1 - label) * ln(1 - p) + # = [[0.436326], [0.683335]] + expected_metrics = { + ops.GraphKeys.GLOBAL_STEP: global_step, + metric_keys.MetricKeys.LOSS: 1.119661, + metric_keys.MetricKeys.LOSS_MEAN: 0.559831, + metric_keys.MetricKeys.ACCURACY: 1.0, + metric_keys.MetricKeys.PREDICTION_MEAN: 0.429262, + metric_keys.MetricKeys.LABEL_MEAN: 0.5, + metric_keys.MetricKeys.ACCURACY_BASELINE: 0.5, + # With default threshold of 0.5, the model is a perfect classifier. + metric_keys.MetricKeys.RECALL: 1.0, + metric_keys.MetricKeys.PRECISION: 1.0, + # Positive example is scored above negative, so AUC = 1.0. + metric_keys.MetricKeys.AUC: 1.0, + metric_keys.MetricKeys.AUC_PR: 1.0, + } + self.assertAllClose( + sorted_key_dict(expected_metrics), sorted_key_dict(eval_metrics)) + + def testMultiClassEvaluationMetrics(self): + global_step = 100 + create_checkpoint( + rnn_weights=[[.1, -.2], [.2, -.3], [.3, -.4]], + rnn_biases=[.2, .5], + logits_weights=[[-1., 0.5, 0.2], [1., -0.3, 0.1]], + logits_biases=[0.3, 0.4, 0.5], + global_step=global_step, + model_dir=self._model_dir) + + def eval_input_fn(): + return { + 'price': + sparse_tensor.SparseTensor( + values=[10., 5., 2., 7.], + indices=[[0, 0], [0, 1], [1, 0], [1, 1]], + dense_shape=[2, 2]), + }, [[0], [1]] + + sequence_feature_columns = [ + seq_fc.sequence_numeric_column('price', shape=(1,))] + + est = rnn.RNNClassifier( + num_units=[2], + sequence_feature_columns=sequence_feature_columns, + n_classes=3, + model_dir=self._model_dir) + eval_metrics = est.evaluate(eval_input_fn, steps=1) + + # Uses identical numbers to testMultiExampleMultiDim. + # See that test for logits calculation. + # logits = [[-0.603282, 0.777708, 0.569756], + # [-1.247356, 1.017018, 0.574481]] + # logits_exp = exp(logits) / (1 + exp(logits)) + # = [[0.547013, 2.176468, 1.767836], + # [0.287263, 2.764937, 1.776208]] + # softmax_probabilities = logits_exp / logits_exp.sum() + # = [[0.121793, 0.484596, 0.393611], + # [0.059494, 0.572639, 0.367866]] + # loss = -1. * log(softmax[label]) + # = [[2.105432], [0.557500]] + expected_metrics = { + ops.GraphKeys.GLOBAL_STEP: global_step, + metric_keys.MetricKeys.LOSS: 2.662932, + metric_keys.MetricKeys.LOSS_MEAN: 1.331466, + metric_keys.MetricKeys.ACCURACY: 0.5, + } + + self.assertAllClose( + sorted_key_dict(expected_metrics), sorted_key_dict(eval_metrics)) + + +class RNNClassifierPredictionTest(test.TestCase): + + def setUp(self): + self._model_dir = tempfile.mkdtemp() + + def tearDown(self): + if self._model_dir: + writer_cache.FileWriterCache.clear() + shutil.rmtree(self._model_dir) + + def testBinaryClassPredictions(self): + create_checkpoint( + rnn_weights=[[.1, -.2], [.2, -.3], [.3, -.4]], + rnn_biases=[.2, .5], + logits_weights=[[-1.], [1.]], + logits_biases=[0.3], + global_step=0, + model_dir=self._model_dir) + + def predict_input_fn(): + return { + 'price': + sparse_tensor.SparseTensor( + values=[10., 5.], + indices=[[0, 0], [0, 1]], + dense_shape=[1, 2]), + } + + sequence_feature_columns = [ + seq_fc.sequence_numeric_column('price', shape=(1,))] + label_vocabulary = ['class_0', 'class_1'] + + est = rnn.RNNClassifier( + num_units=[2], + sequence_feature_columns=sequence_feature_columns, + n_classes=2, + label_vocabulary=label_vocabulary, + model_dir=self._model_dir) + # Uses identical numbers to testOneDimLogits. + # See that test for logits calculation. + # logits = [-0.603282] + # logistic = exp(-0.6033) / (1 + exp(-0.6033)) = [0.353593] + # probabilities = [0.646407, 0.353593] + # class_ids = argmax(probabilities) = [0] + predictions = next(est.predict(predict_input_fn)) + self.assertAllClose([-0.603282], + predictions[prediction_keys.PredictionKeys.LOGITS]) + self.assertAllClose([0.353593], + predictions[prediction_keys.PredictionKeys.LOGISTIC]) + self.assertAllClose( + [0.646407, 0.353593], + predictions[prediction_keys.PredictionKeys.PROBABILITIES]) + self.assertAllClose([0], + predictions[prediction_keys.PredictionKeys.CLASS_IDS]) + self.assertEqual([b'class_0'], + predictions[prediction_keys.PredictionKeys.CLASSES]) + + def testMultiClassPredictions(self): + create_checkpoint( + rnn_weights=[[.1, -.2], [.2, -.3], [.3, -.4]], + rnn_biases=[.2, .5], + logits_weights=[[-1., 0.5, 0.2], [1., -0.3, 0.1]], + logits_biases=[0.3, 0.4, 0.5], + global_step=0, + model_dir=self._model_dir) + + def predict_input_fn(): + return { + 'price': + sparse_tensor.SparseTensor( + values=[10., 5.], + indices=[[0, 0], [0, 1]], + dense_shape=[1, 2]), + } + + sequence_feature_columns = [ + seq_fc.sequence_numeric_column('price', shape=(1,))] + label_vocabulary = ['class_0', 'class_1', 'class_2'] + + est = rnn.RNNClassifier( + num_units=[2], + sequence_feature_columns=sequence_feature_columns, + n_classes=3, + label_vocabulary=label_vocabulary, + model_dir=self._model_dir) + # Uses identical numbers to testMultiDimLogits. + # See that test for logits calculation. + # logits = [-0.603282, 0.777708, 0.569756] + # logits_exp = exp(logits) = [0.547013, 2.176468, 1.767836] + # softmax_probabilities = logits_exp / logits_exp.sum() + # = [0.121793, 0.484596, 0.393611] + # class_ids = argmax(probabilities) = [1] + predictions = next(est.predict(predict_input_fn)) + self.assertAllClose([-0.603282, 0.777708, 0.569756], + predictions[prediction_keys.PredictionKeys.LOGITS]) + self.assertAllClose( + [0.121793, 0.484596, 0.393611], + predictions[prediction_keys.PredictionKeys.PROBABILITIES]) + self.assertAllClose([1], + predictions[prediction_keys.PredictionKeys.CLASS_IDS]) + self.assertEqual([b'class_1'], + predictions[prediction_keys.PredictionKeys.CLASSES]) + + +class RNNClassifierIntegrationTest(test.TestCase): + + def setUp(self): + self._model_dir = tempfile.mkdtemp() + + def tearDown(self): + if self._model_dir: + writer_cache.FileWriterCache.clear() + shutil.rmtree(self._model_dir) + + def _test_complete_flow( + self, train_input_fn, eval_input_fn, predict_input_fn, n_classes, + batch_size): + col = seq_fc.sequence_categorical_column_with_hash_bucket( + 'tokens', hash_bucket_size=10) + embed = fc.embedding_column(col, dimension=2) + feature_columns = [embed] + + cell_units = [4, 2] + est = rnn.RNNClassifier( + num_units=cell_units, + sequence_feature_columns=feature_columns, + n_classes=n_classes, + model_dir=self._model_dir) + + # TRAIN + num_steps = 10 + est.train(train_input_fn, steps=num_steps) + + # EVALUATE + scores = est.evaluate(eval_input_fn) + self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP]) + self.assertIn('loss', six.iterkeys(scores)) + + # PREDICT + predicted_proba = np.array([ + x[prediction_keys.PredictionKeys.PROBABILITIES] + for x in est.predict(predict_input_fn) + ]) + self.assertAllEqual((batch_size, n_classes), predicted_proba.shape) + + # EXPORT + feature_spec = { + 'tokens': parsing_ops.VarLenFeature(dtypes.string), + 'label': parsing_ops.FixedLenFeature([1], dtypes.int64), + } + serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( + feature_spec) + export_dir = est.export_savedmodel(tempfile.mkdtemp(), + serving_input_receiver_fn) + self.assertTrue(gfile.Exists(export_dir)) + + def testNumpyInputFn(self): + """Tests complete flow with numpy_input_fn.""" + n_classes = 3 + batch_size = 10 + words = ['dog', 'cat', 'bird', 'the', 'a', 'sat', 'flew', 'slept'] + # Numpy only supports dense input, so all examples will have same length. + # TODO(b/73160931): Update test when support for prepadded data exists. + sequence_length = 3 + + features = [] + for _ in range(batch_size): + sentence = random.sample(words, sequence_length) + features.append(sentence) + + x_data = np.array(features) + y_data = np.random.randint(n_classes, size=batch_size) + + train_input_fn = numpy_io.numpy_input_fn( + x={'tokens': x_data}, + y=y_data, + batch_size=batch_size, + num_epochs=None, + shuffle=True) + eval_input_fn = numpy_io.numpy_input_fn( + x={'tokens': x_data}, + y=y_data, + batch_size=batch_size, + shuffle=False) + predict_input_fn = numpy_io.numpy_input_fn( + x={'tokens': x_data}, + batch_size=batch_size, + shuffle=False) + + self._test_complete_flow( + train_input_fn=train_input_fn, + eval_input_fn=eval_input_fn, + predict_input_fn=predict_input_fn, + n_classes=n_classes, + batch_size=batch_size) + + def testParseExampleInputFn(self): + """Tests complete flow with input_fn constructed from parse_example.""" + n_classes = 3 + batch_size = 10 + words = [b'dog', b'cat', b'bird', b'the', b'a', b'sat', b'flew', b'slept'] + + serialized_examples = [] + for _ in range(batch_size): + sequence_length = random.randint(1, len(words)) + sentence = random.sample(words, sequence_length) + label = random.randint(0, n_classes - 1) + example = example_pb2.Example(features=feature_pb2.Features( + feature={ + 'tokens': + feature_pb2.Feature(bytes_list=feature_pb2.BytesList( + value=sentence)), + 'label': + feature_pb2.Feature(int64_list=feature_pb2.Int64List( + value=[label])), + })) + serialized_examples.append(example.SerializeToString()) + + feature_spec = { + 'tokens': parsing_ops.VarLenFeature(dtypes.string), + 'label': parsing_ops.FixedLenFeature([1], dtypes.int64), + } + def _train_input_fn(): + features = parsing_ops.parse_example(serialized_examples, feature_spec) + labels = features.pop('label') + return features, labels + def _eval_input_fn(): + features = parsing_ops.parse_example( + input_lib.limit_epochs(serialized_examples, num_epochs=1), + feature_spec) + labels = features.pop('label') + return features, labels + def _predict_input_fn(): + features = parsing_ops.parse_example( + input_lib.limit_epochs(serialized_examples, num_epochs=1), + feature_spec) + features.pop('label') + return features, None + + self._test_complete_flow( + train_input_fn=_train_input_fn, + eval_input_fn=_eval_input_fn, + predict_input_fn=_predict_input_fn, + n_classes=n_classes, + batch_size=batch_size) + + +if __name__ == '__main__': + test.main() diff --git a/tensorflow/contrib/factorization/python/ops/gmm_ops.py b/tensorflow/contrib/factorization/python/ops/gmm_ops.py index 5d77bc77e124378e13667673e4e841c0a1135b31..e076631bc16fd379a2ad31af9055a7388d98c7ca 100644 --- a/tensorflow/contrib/factorization/python/ops/gmm_ops.py +++ b/tensorflow/contrib/factorization/python/ops/gmm_ops.py @@ -54,10 +54,10 @@ def _covariance(x, diag): diagonal matrix just the diagonal is returned. """ num_points = math_ops.to_float(array_ops.shape(x)[0]) - x -= math_ops.reduce_mean(x, 0, keep_dims=True) + x -= math_ops.reduce_mean(x, 0, keepdims=True) if diag: cov = math_ops.reduce_sum( - math_ops.square(x), 0, keep_dims=True) / (num_points - 1) + math_ops.square(x), 0, keepdims=True) / (num_points - 1) else: cov = math_ops.matmul(x, x, transpose_a=True) / (num_points - 1) return cov @@ -313,7 +313,7 @@ class GmmAlgorithm(object): # TODO(xavigonzalvo): look into alternatives to log for # reparametrization of variance parameters. det_expanded = math_ops.reduce_sum( - math_ops.log(self._covs + 1e-3), 1, keep_dims=True) + math_ops.log(self._covs + 1e-3), 1, keepdims=True) diff = shard - self._means x2 = math_ops.square(diff) cov_expanded = array_ops.expand_dims(1.0 / (self._covs + 1e-3), 2) @@ -351,7 +351,7 @@ class GmmAlgorithm(object): shard_id: id of current shard_id. """ self._prior_probs[shard_id] = math_ops.reduce_logsumexp( - self._probs[shard_id], axis=1, keep_dims=True) + self._probs[shard_id], axis=1, keepdims=True) def _define_expectation_operation(self, shard_id): # Shape broadcasting. @@ -375,7 +375,7 @@ class GmmAlgorithm(object): """ # Soft assignment of each data point to each of the two clusters. self._points_in_k[shard_id] = math_ops.reduce_sum( - self._w[shard_id], 0, keep_dims=True) + self._w[shard_id], 0, keepdims=True) # Partial means. w_mul_x = array_ops.expand_dims( math_ops.matmul( @@ -397,7 +397,7 @@ class GmmAlgorithm(object): # Compute the effective number of data points assigned to component k. with ops.control_dependencies(self._w): points_in_k = array_ops.squeeze( - math_ops.add_n(self._points_in_k), squeeze_dims=[0]) + math_ops.add_n(self._points_in_k), axis=[0]) # Update alpha. if 'w' in self._params: final_points_in_k = points_in_k / num_batches @@ -454,7 +454,7 @@ class GmmAlgorithm(object): for shard_id, prior_probs in enumerate(self._prior_probs): op.append(prior_probs + math_ops.log(self._w[shard_id])) self._scores = array_ops.squeeze( - math_ops.reduce_logsumexp(op, axis=2, keep_dims=True), axis=0) + math_ops.reduce_logsumexp(op, axis=2, keepdims=True), axis=0) def gmm(inp, diff --git a/tensorflow/contrib/factorization/python/ops/kmeans.py b/tensorflow/contrib/factorization/python/ops/kmeans.py index bfe338c9f9a7b761cfcd627b92f1682af97630c9..9ffdd3ba5e8ac496533d0207f2b6846dbc92bc89 100644 --- a/tensorflow/contrib/factorization/python/ops/kmeans.py +++ b/tensorflow/contrib/factorization/python/ops/kmeans.py @@ -374,11 +374,11 @@ class KMeansClustering(estimator.Estimator): than `num_clusters`, a TensorFlow runtime error occurs. distance_metric: The distance metric used for clustering. One of: * `KMeansClustering.SQUARED_EUCLIDEAN_DISTANCE`: Euclidean distance - between vectors `u` and `v` is defined as `\\(||u - v||_2\\)` + between vectors `u` and `v` is defined as \\(||u - v||_2\\) which is the square root of the sum of the absolute squares of the elements' difference. * `KMeansClustering.COSINE_DISTANCE`: Cosine distance between vectors - `u` and `v` is defined as `\\(1 - (u . v) / (||u||_2 ||v||_2)\\)`. + `u` and `v` is defined as \\(1 - (u . v) / (||u||_2 ||v||_2)\\). random_seed: Python integer. Seed for PRNG used to initialize centers. use_mini_batch: A boolean specifying whether to use the mini-batch k-means algorithm. See explanation above. diff --git a/tensorflow/contrib/framework/__init__.py b/tensorflow/contrib/framework/__init__.py index cbb68bd3eb257f9472515e5c29ce4f02057be321..11397e86bd811e376ff4b60f86109d9ee5dd1a62 100644 --- a/tensorflow/contrib/framework/__init__.py +++ b/tensorflow/contrib/framework/__init__.py @@ -72,6 +72,9 @@ See the @{$python/contrib.framework} guide. @@variable @@VariableDeviceChooser @@convolutional_delta_orthogonal +@@convolutional_orthogonal_1d +@@convolutional_orthogonal_2d +@@convolutional_orthogonal_3d @@zero_initializer @@load_checkpoint @@ -115,9 +118,13 @@ from tensorflow.python.framework.smart_cond import smart_cond from tensorflow.python.framework.smart_cond import smart_constant_value from tensorflow.python.framework.tensor_spec import BoundedTensorSpec from tensorflow.python.framework.tensor_spec import TensorSpec +from tensorflow.python.ops.array_ops import broadcast_to from tensorflow.python.ops.init_ops import convolutional_delta_orthogonal +from tensorflow.python.ops.init_ops import convolutional_orthogonal_1d +from tensorflow.python.ops.init_ops import convolutional_orthogonal_2d +from tensorflow.python.ops.init_ops import convolutional_orthogonal_3d from tensorflow.python.util.all_util import remove_undocumented -_allowed_symbols = ['nest'] +_allowed_symbols = ['nest', 'broadcast_to'] remove_undocumented(__name__, allowed_exception_list=_allowed_symbols) diff --git a/tensorflow/contrib/framework/python/framework/tensor_util_test.py b/tensorflow/contrib/framework/python/framework/tensor_util_test.py index a2834b648933772cab53002462c3edbe9a553e94..8fc4f60492b0bfb22ea78cb7b5906e452bb6da58 100644 --- a/tensorflow/contrib/framework/python/framework/tensor_util_test.py +++ b/tensorflow/contrib/framework/python/framework/tensor_util_test.py @@ -48,7 +48,7 @@ class LocalVariabletest(test.TestCase): variables = variables_lib.local_variables() self.assertEquals(2, len(variables)) self.assertRaises(errors_impl.OpError, sess.run, variables) - variables_lib.initialize_variables(variables).run() + variables_lib.variables_initializer(variables).run() self.assertAllEqual(set([value0, value1]), set(sess.run(variables))) diff --git a/tensorflow/contrib/framework/python/ops/arg_scope_test.py b/tensorflow/contrib/framework/python/ops/arg_scope_test.py index 7ba9d4ffa90f6860629b15a2ea91e0c573bf6368..4c3879d4fc08b53ea8be5f1256a830a64fb39af6 100644 --- a/tensorflow/contrib/framework/python/ops/arg_scope_test.py +++ b/tensorflow/contrib/framework/python/ops/arg_scope_test.py @@ -170,6 +170,30 @@ class ArgScopeTest(test.TestCase): self.assertTupleEqual(args, func1_args) self.assertDictEqual(kwargs, func1_kwargs) + def testNestedArgScopeObjectCreatedOutsideScopeOverridesArgScope(self): + + def get_scope_object(): + with arg_scope([func1], a=1, b=None, c=[1]) as sc: + return sc + + scope_object = get_scope_object() + with arg_scope([func1], b=2, d=10): + with arg_scope(scope_object): + args, kwargs = func1(0) + self.assertTupleEqual(args, (0,)) + self.assertDictEqual(kwargs, {'a': 1, 'b': None, 'c': [1]}) + + def testArgScopeObjectCreatedWithinScopeInheritsArgScope(self): + def get_scope_object(): + with arg_scope([func1], a=1, b=None, c=[1]) as sc: + return sc + + with arg_scope([func1], b=2, d=10): + with arg_scope(get_scope_object()): + args, kwargs = func1(0) + self.assertTupleEqual(args, (0,)) + self.assertDictEqual(kwargs, {'a': 1, 'b': None, 'c': [1], 'd': 10}) + def testSharedArgScope(self): func1_args = (0,) func1_kwargs = {'a': 1, 'b': None, 'c': [1]} diff --git a/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc b/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc index 0e06575d96f9b9538f0245b12d48cfd7c0e8d981..1e8f011b5d8ee6f9dff8fd0c7a650a2085ffce56 100644 --- a/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc +++ b/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc @@ -543,7 +543,8 @@ void LaunchFusedConv2DBiasActivationOp:: fused_conv_parameters, &algorithm_config)) { std::vector algorithms; CHECK(stream->parent()->GetConvolveAlgorithms( - fused_conv_parameters.ShouldIncludeWinogradNonfusedAlgo(), + fused_conv_parameters.ShouldIncludeWinogradNonfusedAlgo( + stream->parent()), &algorithms)); dnn::ProfileResult best_result; dnn::ProfileResult best_result_no_scratch; diff --git a/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op.py b/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op.py index a97adf622e6e576f8b5ce2babe004cb3a46d80a5..983b6dc8e5a1512ba81ecbc8d5ca5adaea09afe4 100644 --- a/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op.py +++ b/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op.py @@ -65,7 +65,7 @@ def fused_conv2d_bias_activation(conv_input, side_input_scale: A scalar `float32` that will be multiplied by side_input. This is optional and defaults to 0. side_input: A `Tensor` of the format specified by `data_format`. - This is useful for imlementing ResNet blocks. + This is useful for implementing ResNet blocks. activation_mode: (optional) currently must be the default "Relu". Note that in qint8 mode, it also clips to 127, so acts like ReluX. data_format: Specifies the data format. diff --git a/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op_test.py b/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op_test.py index bb155aa2496cbafd9f0630d3dffb2ba69395186c..3d0ed899322c26bf4ae428930899d7a5885e9f21 100644 --- a/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op_test.py +++ b/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op_test.py @@ -566,7 +566,7 @@ def GetInceptionFwdTest(input_size, filter_size, stride, padding, return Test -def CalculateCovolvedOutputDim(input_dim, filter_dim, stride, padding_type): +def CalculateConvolvedOutputDim(input_dim, filter_dim, stride, padding_type): """Calculates the size of an output dimension of a strided convolution. Given the sizes of the corresponding dimension of the input and filter shapes, @@ -827,10 +827,10 @@ class FusedConvInt8Tests(test.TestCase): maxval=1.0, dtype=dtypes.float32), -1.0, 1.0, dtypes.qint8) - output_height = CalculateCovolvedOutputDim(input_height, filter_height, - vertical_stride, padding_type) - output_width = CalculateCovolvedOutputDim(input_width, filter_width, - horizontal_stride, padding_type) + output_height = CalculateConvolvedOutputDim(input_height, filter_height, + vertical_stride, padding_type) + output_width = CalculateConvolvedOutputDim(input_width, filter_width, + horizontal_stride, padding_type) print("output_height=", output_height, ", output_width=", output_width) side_input, _, _ = gen_array_ops.quantize_v2( diff --git a/tensorflow/contrib/gan/BUILD b/tensorflow/contrib/gan/BUILD index 9e56d3c039c93fee3a5a26915a74d2f1cd1e2f48..b305f37791d71f5a6edeada2bb710a2e5f23087d 100644 --- a/tensorflow/contrib/gan/BUILD +++ b/tensorflow/contrib/gan/BUILD @@ -354,6 +354,7 @@ py_test( name = "classifier_metrics_test", srcs = ["python/eval/python/classifier_metrics_test.py"], srcs_version = "PY2AND3", + tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":classifier_metrics", "//tensorflow/core:protos_all_py", @@ -363,6 +364,7 @@ py_test( "//tensorflow/python:framework_ops", "//tensorflow/python:variables", "//third_party/py/numpy", + "@absl_py//absl/testing:parameterized", ], ) diff --git a/tensorflow/contrib/gan/python/eval/python/classifier_metrics_test.py b/tensorflow/contrib/gan/python/eval/python/classifier_metrics_test.py index 663e49bdca3cb2dd9257da326488c877fcc4256d..4fb8d58bc9125664d42260de72b83b2362eff9ba 100644 --- a/tensorflow/contrib/gan/python/eval/python/classifier_metrics_test.py +++ b/tensorflow/contrib/gan/python/eval/python/classifier_metrics_test.py @@ -22,6 +22,7 @@ import os import tarfile import tempfile +from absl.testing import parameterized import numpy as np from scipy import linalg as scp_linalg @@ -182,13 +183,20 @@ def _run_with_mock(function, *args, **kwargs): return function(*args, **kwargs) -class ClassifierMetricsTest(test.TestCase): +class ClassifierMetricsTest(test.TestCase, parameterized.TestCase): - def test_run_inception_graph(self): + @parameterized.named_parameters( + ('GraphDef', False), + ('DefaultGraphDefFn', True)) + def test_run_inception_graph(self, use_default_graph_def): """Test `run_inception` graph construction.""" batch_size = 7 img = array_ops.ones([batch_size, 299, 299, 3]) - logits = _run_with_mock(classifier_metrics.run_inception, img) + + if use_default_graph_def: + logits = _run_with_mock(classifier_metrics.run_inception, img) + else: + logits = classifier_metrics.run_inception(img, _get_dummy_graphdef()) self.assertTrue(isinstance(logits, ops.Tensor)) logits.shape.assert_is_compatible_with([batch_size, 1001]) @@ -196,14 +204,23 @@ class ClassifierMetricsTest(test.TestCase): # Check that none of the model variables are trainable. self.assertListEqual([], variables.trainable_variables()) - def test_run_inception_graph_pool_output(self): + @parameterized.named_parameters( + ('GraphDef', False), + ('DefaultGraphDefFn', True)) + def test_run_inception_graph_pool_output(self, use_default_graph_def): """Test `run_inception` graph construction with pool output.""" batch_size = 3 img = array_ops.ones([batch_size, 299, 299, 3]) - pool = _run_with_mock( - classifier_metrics.run_inception, - img, - output_tensor=classifier_metrics.INCEPTION_FINAL_POOL) + + if use_default_graph_def: + pool = _run_with_mock( + classifier_metrics.run_inception, + img, + output_tensor=classifier_metrics.INCEPTION_FINAL_POOL) + else: + pool = classifier_metrics.run_inception( + img, _get_dummy_graphdef(), + output_tensor=classifier_metrics.INCEPTION_FINAL_POOL) self.assertTrue(isinstance(pool, ops.Tensor)) pool.shape.assert_is_compatible_with([batch_size, 2048]) diff --git a/tensorflow/contrib/gan/python/eval/python/sliced_wasserstein_impl.py b/tensorflow/contrib/gan/python/eval/python/sliced_wasserstein_impl.py index 4b10bc0f8e607c02763d8ea622d6f8f2572c586d..4b1105f6bd4f21a0da02338b0fc9db87a41b145f 100644 --- a/tensorflow/contrib/gan/python/eval/python/sliced_wasserstein_impl.py +++ b/tensorflow/contrib/gan/python/eval/python/sliced_wasserstein_impl.py @@ -161,7 +161,7 @@ def _sliced_wasserstein(a, b, random_sampling_count, random_projection_dim): proj = random_ops.random_normal( [array_ops.shape(a)[1], random_projection_dim]) proj *= math_ops.rsqrt( - math_ops.reduce_sum(math_ops.square(proj), 0, keep_dims=True)) + math_ops.reduce_sum(math_ops.square(proj), 0, keepdims=True)) # Project both distributions and sort them. proj_a = math_ops.matmul(a, proj) proj_b = math_ops.matmul(b, proj) diff --git a/tensorflow/contrib/gan/python/eval/python/summaries_impl.py b/tensorflow/contrib/gan/python/eval/python/summaries_impl.py index 0d1afad72da8a8e087239868e25ddebe23490d1e..508f487722fba89cc8391a340f73673a526e86c4 100644 --- a/tensorflow/contrib/gan/python/eval/python/summaries_impl.py +++ b/tensorflow/contrib/gan/python/eval/python/summaries_impl.py @@ -31,6 +31,7 @@ __all__ = [ 'add_image_comparison_summaries', 'add_gan_model_summaries', 'add_regularization_loss_summaries', + 'add_cyclegan_image_summaries', ] @@ -51,14 +52,9 @@ def add_gan_model_image_summaries(gan_model, grid_size=4, model_summaries=True): ValueError: If real and generated data aren't images. """ if isinstance(gan_model, namedtuples.CycleGANModel): - saved_params = locals() - saved_params.pop('gan_model', None) - with ops.name_scope('cyclegan_x2y_image_summaries'): - add_gan_model_image_summaries(gan_model.model_x2y, **saved_params) - with ops.name_scope('cyclegan_y2x_image_summaries'): - add_gan_model_image_summaries(gan_model.model_y2x, **saved_params) - return - + raise ValueError( + '`add_gan_model_image_summaries` does not take CycleGANModels. Please ' + 'use `add_cyclegan_image_summaries` instead.') _assert_is_image(gan_model.real_data) _assert_is_image(gan_model.generated_data) @@ -89,6 +85,49 @@ def add_gan_model_image_summaries(gan_model, grid_size=4, model_summaries=True): add_gan_model_summaries(gan_model) +def add_cyclegan_image_summaries(cyclegan_model): + """Adds image summaries for CycleGAN. + + There are two summaries, one for each generator. The first image is the + generator input, the second is the generator output, and the third is G(F(x)). + + Args: + cyclegan_model: A CycleGANModel tuple. + + Raises: + ValueError: If `cyclegan_model` isn't a CycleGANModel. + ValueError: If generated data, generator inputs, and reconstructions aren't + images. + ValueError: If the generator input, generated data, and reconstructions + aren't all the same size. + """ + if not isinstance(cyclegan_model, namedtuples.CycleGANModel): + raise ValueError('`cyclegan_model` was not a CycleGANModel. Instead, was ' + '%s' % type(cyclegan_model)) + + _assert_is_image(cyclegan_model.model_x2y.generator_inputs) + _assert_is_image(cyclegan_model.model_x2y.generated_data) + _assert_is_image(cyclegan_model.reconstructed_x) + _assert_is_image(cyclegan_model.model_y2x.generator_inputs) + _assert_is_image(cyclegan_model.model_y2x.generated_data) + _assert_is_image(cyclegan_model.reconstructed_y) + + def _add_comparison_summary(gan_model, reconstructions): + image_list = (array_ops.unstack(gan_model.generator_inputs[:1]) + + array_ops.unstack(gan_model.generated_data[:1]) + + array_ops.unstack(reconstructions[:1])) + summary.image( + 'image_comparison', eval_utils.image_reshaper( + image_list, num_cols=len(image_list)), max_outputs=1) + + with ops.name_scope('x2y_image_comparison_summaries'): + _add_comparison_summary( + cyclegan_model.model_x2y, cyclegan_model.reconstructed_x) + with ops.name_scope('y2x_image_comparison_summaries'): + _add_comparison_summary( + cyclegan_model.model_y2x, cyclegan_model.reconstructed_y) + + def add_image_comparison_summaries(gan_model, num_comparisons=2, display_diffs=False): """Adds image summaries to compare triplets of images. @@ -109,15 +148,6 @@ def add_image_comparison_summaries(gan_model, num_comparisons=2, ValueError: If the generator input, real, and generated data aren't all the same size. """ - if isinstance(gan_model, namedtuples.CycleGANModel): - saved_params = locals() - saved_params.pop('gan_model', None) - with ops.name_scope('cyclegan_x2y_image_comparison_summaries'): - add_image_comparison_summaries(gan_model.model_x2y, **saved_params) - with ops.name_scope('cyclegan_y2x_image_comparison_summaries'): - add_image_comparison_summaries(gan_model.model_y2x, **saved_params) - return - _assert_is_image(gan_model.generator_inputs) _assert_is_image(gan_model.generated_data) _assert_is_image(gan_model.real_data) diff --git a/tensorflow/contrib/gan/python/eval/python/summaries_test.py b/tensorflow/contrib/gan/python/eval/python/summaries_test.py index 45eb108586bed07434ac29595164745eac6054c1..33d51bfc218ab93fb52439b1eefed98a4568c4a1 100644 --- a/tensorflow/contrib/gan/python/eval/python/summaries_test.py +++ b/tensorflow/contrib/gan/python/eval/python/summaries_test.py @@ -65,15 +65,14 @@ def get_cyclegan_model(): return namedtuples.CycleGANModel( model_x2y=model_x2y, model_y2x=model_y2x, - reconstructed_x=array_ops.zeros([3, 30, 35, 6]), - reconstructed_y=array_ops.zeros([3, 30, 35, 6])) + reconstructed_x=array_ops.zeros([4, 32, 32, 3]), + reconstructed_y=array_ops.zeros([4, 32, 32, 3])) class SummariesTest(test.TestCase): - def _test_add_gan_model_image_summaries_impl(self, get_model_fn, - expected_num_summary_ops, - model_summaries): + def _test_add_gan_model_image_summaries_impl( + self, get_model_fn, expected_num_summary_ops, model_summaries): summaries.add_gan_model_image_summaries(get_model_fn(), grid_size=2, model_summaries=model_summaries) @@ -89,8 +88,9 @@ class SummariesTest(test.TestCase): def test_add_gan_model_image_summaries_no_model(self): self._test_add_gan_model_image_summaries_impl(get_gan_model, 2, False) - def test_add_gan_model_image_summaries_for_cyclegan(self): - self._test_add_gan_model_image_summaries_impl(get_cyclegan_model, 10, True) + def test_cyclegan_image_summaries_dont_work(self): + with self.assertRaises(ValueError): + summaries.add_gan_model_image_summaries(get_cyclegan_model()) def _test_add_gan_model_summaries_impl(self, get_model_fn, expected_num_summary_ops): @@ -137,7 +137,11 @@ class SummariesTest(test.TestCase): self._test_add_image_comparison_summaries_impl(get_gan_model, 1) def test_add_image_comparison_summaries_for_cyclegan(self): - self._test_add_image_comparison_summaries_impl(get_cyclegan_model, 2) + summaries.add_cyclegan_image_summaries(get_cyclegan_model()) + + self.assertEquals(2, len(ops.get_collection(ops.GraphKeys.SUMMARIES))) + with self.test_session(use_gpu=True): + summary.merge_all().eval() if __name__ == '__main__': diff --git a/tensorflow/contrib/gan/python/features/python/virtual_batchnorm_impl.py b/tensorflow/contrib/gan/python/features/python/virtual_batchnorm_impl.py index f8b372546b60ec8fa5fd1d72b57adaf67596c059..650eab97a3952e9aec2b489fffcc83c3bc49f2dd 100644 --- a/tensorflow/contrib/gan/python/features/python/virtual_batchnorm_impl.py +++ b/tensorflow/contrib/gan/python/features/python/virtual_batchnorm_impl.py @@ -64,11 +64,11 @@ def _statistics(x, axes): y = math_ops.cast(x, dtypes.float32) if x.dtype == dtypes.float16 else x # Compute true mean while keeping the dims for proper broadcasting. - shift = array_ops.stop_gradient(math_ops.reduce_mean(y, axes, keep_dims=True)) + shift = array_ops.stop_gradient(math_ops.reduce_mean(y, axes, keepdims=True)) - shifted_mean = math_ops.reduce_mean(y - shift, axes, keep_dims=True) + shifted_mean = math_ops.reduce_mean(y - shift, axes, keepdims=True) mean = shifted_mean + shift - mean_squared = math_ops.reduce_mean(math_ops.square(y), axes, keep_dims=True) + mean_squared = math_ops.reduce_mean(math_ops.square(y), axes, keepdims=True) mean = array_ops.squeeze(mean, axes) mean_squared = array_ops.squeeze(mean_squared, axes) diff --git a/tensorflow/contrib/gan/python/train.py b/tensorflow/contrib/gan/python/train.py index 73acd05b60a5fb02601423fd9234a56a34f75276..6fa43059f3125daea080f780210223363d0a89f9 100644 --- a/tensorflow/contrib/gan/python/train.py +++ b/tensorflow/contrib/gan/python/train.py @@ -710,7 +710,10 @@ def gan_train_ops( be used to train a generator/discriminator pair. """ if isinstance(model, namedtuples.CycleGANModel): - saved_params = locals() + # Get and store all arguments other than model and loss from locals. + # Contents of locals should not be modified, may not affect values. So make + # a copy. https://docs.python.org/2/library/functions.html#locals. + saved_params = dict(locals()) saved_params.pop('model', None) saved_params.pop('loss', None) kwargs = saved_params.pop('kwargs', {}) diff --git a/tensorflow/contrib/gdr/gdr_rendezvous_mgr.cc b/tensorflow/contrib/gdr/gdr_rendezvous_mgr.cc index 28f68cec8cce126f1b177a73e197ccd7ab749f4a..94f522c04e5a09ed2d9355fa675125c340407923 100644 --- a/tensorflow/contrib/gdr/gdr_rendezvous_mgr.cc +++ b/tensorflow/contrib/gdr/gdr_rendezvous_mgr.cc @@ -155,7 +155,7 @@ class GdrRemoteRendezvous : public BaseRemoteRendezvous { } Device* dst_device; - Status s = sess->device_mgr->LookupDevice(parsed.dst_device, &dst_device); + Status s = sess->device_mgr()->LookupDevice(parsed.dst_device, &dst_device); if (!s.ok()) { sess->worker_cache->ReleaseWorker(src_worker, rwi); done(s, Args(), recv_args, Tensor{}, false); diff --git a/tensorflow/contrib/graph_editor/select.py b/tensorflow/contrib/graph_editor/select.py index 3ea6ff4d6163b107ca0daaf3b9ad1daf0ccc1f6f..d700e6e1a7523622f845acbbc353eb0f438c9bc2 100644 --- a/tensorflow/contrib/graph_editor/select.py +++ b/tensorflow/contrib/graph_editor/select.py @@ -383,6 +383,7 @@ def get_within_boundary_ops(ops, def get_forward_walk_ops(seed_ops, inclusive=True, within_ops=None, + within_ops_fn=None, stop_at_ts=(), control_outputs=None): """Do a forward graph walk and return all the visited ops. @@ -395,6 +396,9 @@ def get_forward_walk_ops(seed_ops, within_ops: an iterable of `tf.Operation` within which the search is restricted. If `within_ops` is `None`, the search is performed within the whole graph. + within_ops_fn: if provided, a function on ops that should return True iff + the op is within the graph traversal. This can be used along within_ops, + in which case an op is within if it is also in within_ops. stop_at_ts: an iterable of tensors at which the graph walk stops. control_outputs: a `util.ControlOutputs` instance or None. If not `None`, it will be used while walking the graph forward. @@ -423,7 +427,8 @@ def get_forward_walk_ops(seed_ops, seed_ops &= within_ops def is_within(op): - return within_ops is None or op in within_ops + return (within_ops is None or op in within_ops) and ( + within_ops_fn is None or within_ops_fn(op)) result = list(seed_ops) wave = set(seed_ops) @@ -450,6 +455,7 @@ def get_forward_walk_ops(seed_ops, def get_backward_walk_ops(seed_ops, inclusive=True, within_ops=None, + within_ops_fn=None, stop_at_ts=(), control_inputs=False): """Do a backward graph walk and return all the visited ops. @@ -462,6 +468,9 @@ def get_backward_walk_ops(seed_ops, within_ops: an iterable of `tf.Operation` within which the search is restricted. If `within_ops` is `None`, the search is performed within the whole graph. + within_ops_fn: if provided, a function on ops that should return True iff + the op is within the graph traversal. This can be used along within_ops, + in which case an op is within if it is also in within_ops. stop_at_ts: an iterable of tensors at which the graph walk stops. control_inputs: if True, control inputs will be used while moving backward. Returns: @@ -488,7 +497,8 @@ def get_backward_walk_ops(seed_ops, seed_ops &= within_ops def is_within(op): - return within_ops is None or op in within_ops + return (within_ops is None or op in within_ops) and ( + within_ops_fn is None or within_ops_fn(op)) result = list(seed_ops) wave = set(seed_ops) @@ -516,6 +526,7 @@ def get_walks_intersection_ops(forward_seed_ops, forward_inclusive=True, backward_inclusive=True, within_ops=None, + within_ops_fn=None, control_inputs=False, control_outputs=None, control_ios=None): @@ -535,6 +546,9 @@ def get_walks_intersection_ops(forward_seed_ops, within_ops: an iterable of tf.Operation within which the search is restricted. If within_ops is None, the search is performed within the whole graph. + within_ops_fn: if provided, a function on ops that should return True iff + the op is within the graph traversal. This can be used along within_ops, + in which case an op is within if it is also in within_ops. control_inputs: A boolean indicating whether control inputs are enabled. control_outputs: An instance of util.ControlOutputs or None. If not None, control outputs are enabled. @@ -555,11 +569,13 @@ def get_walks_intersection_ops(forward_seed_ops, forward_seed_ops, inclusive=forward_inclusive, within_ops=within_ops, + within_ops_fn=within_ops_fn, control_outputs=control_outputs) backward_ops = get_backward_walk_ops( backward_seed_ops, inclusive=backward_inclusive, within_ops=within_ops, + within_ops_fn=within_ops_fn, control_inputs=control_inputs) return [op for op in forward_ops if op in backward_ops] @@ -569,6 +585,7 @@ def get_walks_union_ops(forward_seed_ops, forward_inclusive=True, backward_inclusive=True, within_ops=None, + within_ops_fn=None, control_inputs=False, control_outputs=None, control_ios=None): @@ -587,6 +604,9 @@ def get_walks_union_ops(forward_seed_ops, resulting set. within_ops: restrict the search within those operations. If within_ops is None, the search is done within the whole graph. + within_ops_fn: if provided, a function on ops that should return True iff + the op is within the graph traversal. This can be used along within_ops, + in which case an op is within if it is also in within_ops. control_inputs: A boolean indicating whether control inputs are enabled. control_outputs: An instance of util.ControlOutputs or None. If not None, control outputs are enabled. @@ -607,11 +627,13 @@ def get_walks_union_ops(forward_seed_ops, forward_seed_ops, inclusive=forward_inclusive, within_ops=within_ops, + within_ops_fn=within_ops_fn, control_outputs=control_outputs) backward_ops = get_backward_walk_ops( backward_seed_ops, inclusive=backward_inclusive, within_ops=within_ops, + within_ops_fn=within_ops_fn, control_inputs=control_inputs) return util.concatenate_unique(forward_ops, backward_ops) diff --git a/tensorflow/contrib/graph_editor/tests/select_test.py b/tensorflow/contrib/graph_editor/tests/select_test.py index 82f999637d0c1866a5a329974f021fe2e30fd33f..d12c6d3cbd11dde2b609a59154297a8907b0cadc 100644 --- a/tensorflow/contrib/graph_editor/tests/select_test.py +++ b/tensorflow/contrib/graph_editor/tests/select_test.py @@ -77,12 +77,10 @@ class SelectTest(test.TestCase): """Test for ge.get_ops_ios.""" control_outputs = ge.util.ControlOutputs(self.graph) self.assertEqual( - len(ge.get_ops_ios( - self.h.op, control_ios=control_outputs)), 3) + len(ge.get_ops_ios(self.h.op, control_ios=control_outputs)), 3) self.assertEqual(len(ge.get_ops_ios(self.h.op)), 2) self.assertEqual( - len(ge.get_ops_ios( - self.c.op, control_ios=control_outputs)), 6) + len(ge.get_ops_ios(self.c.op, control_ios=control_outputs)), 6) self.assertEqual(len(ge.get_ops_ios(self.c.op)), 5) def test_compute_boundary_ts_0(self): @@ -135,16 +133,49 @@ class SelectTest(test.TestCase): ops = ge.get_walks_intersection_ops([self.c.op], [self.g.op]) self.assertEqual(len(ops), 2) + ops = ge.get_walks_intersection_ops([self.a.op], [self.f.op]) + self.assertEqual(len(ops), 3) + self.assertTrue(self.a.op in ops) + self.assertTrue(self.c.op in ops) + self.assertTrue(self.f.op in ops) + + within_ops = [self.a.op, self.f.op] + ops = ge.get_walks_intersection_ops( + [self.a.op], [self.f.op], within_ops=within_ops) + self.assertEqual(len(ops), 0) + + within_ops_fn = lambda op: op in [self.a.op, self.f.op] + ops = ge.get_walks_intersection_ops( + [self.a.op], [self.f.op], within_ops_fn=within_ops_fn) + self.assertEqual(len(ops), 0) + def test_get_walks_union(self): """Test for ge.get_walks_union_ops.""" ops = ge.get_walks_union_ops([self.f.op], [self.g.op]) self.assertEqual(len(ops), 6) + ops = ge.get_walks_union_ops([self.a.op], [self.f.op]) + self.assertEqual(len(ops), 8) + + within_ops = [self.a.op, self.c.op, self.d.op, self.f.op] + ops = ge.get_walks_union_ops([self.a.op], [self.f.op], + within_ops=within_ops) + self.assertEqual(len(ops), 4) + self.assertTrue(self.b.op not in ops) + + within_ops_fn = lambda op: op in [self.a.op, self.c.op, self.f.op] + ops = ge.get_walks_union_ops([self.a.op], [self.f.op], + within_ops_fn=within_ops_fn) + self.assertEqual(len(ops), 3) + self.assertTrue(self.b.op not in ops) + self.assertTrue(self.d.op not in ops) + def test_select_ops(self): parameters = ( (("^foo/",), 7), (("^foo/bar/",), 4), - (("^foo/bar/", "a"), 5),) + (("^foo/bar/", "a"), 5), + ) for param, length in parameters: ops = ge.select_ops(*param, graph=self.graph) self.assertEqual(len(ops), length) @@ -152,7 +183,8 @@ class SelectTest(test.TestCase): def test_select_ts(self): parameters = ( (".*:0", 8), - (r".*/bar/\w+:0", 4),) + (r".*/bar/\w+:0", 4), + ) for regex, length in parameters: ts = ge.select_ts(regex, graph=self.graph) self.assertEqual(len(ts), length) @@ -160,12 +192,121 @@ class SelectTest(test.TestCase): def test_select_ops_and_ts(self): parameters = ( (("^foo/.*",), 7, 0), - (("^foo/.*", "(?#ts)^foo/bar/.*"), 7, 4),) + (("^foo/.*", "(?#ts)^foo/bar/.*"), 7, 4), + ) for param, l0, l1 in parameters: ops, ts = ge.select_ops_and_ts(*param, graph=self.graph) self.assertEqual(len(ops), l0) self.assertEqual(len(ts), l1) + def test_forward_walk_ops(self): + seed_ops = [self.a.op, self.d.op] + # Include all ops except for self.g.op + within_ops = [ + x.op for x in [self.a, self.b, self.c, self.d, self.e, self.f, self.h] + ] + # For the fn, exclude self.e.op. + within_ops_fn = lambda op: op not in (self.e.op,) + stop_at_ts = (self.f,) + + with self.graph.as_default(): + # No b.op since it's an independent source node. + # No g.op from within_ops. + # No e.op from within_ops_fn. + # No h.op from stop_at_ts and within_ops. + ops = ge.select.get_forward_walk_ops( + seed_ops, + inclusive=True, + within_ops=within_ops, + within_ops_fn=within_ops_fn, + stop_at_ts=stop_at_ts) + self.assertEqual( + set(ops), set([self.a.op, self.c.op, self.d.op, self.f.op])) + + # Also no a.op and d.op when inclusive=False + ops = ge.select.get_forward_walk_ops( + seed_ops, + inclusive=False, + within_ops=within_ops, + within_ops_fn=within_ops_fn, + stop_at_ts=stop_at_ts) + self.assertEqual(set(ops), set([self.c.op, self.f.op])) + + # Not using within_ops_fn adds e.op. + ops = ge.select.get_forward_walk_ops( + seed_ops, + inclusive=False, + within_ops=within_ops, + stop_at_ts=stop_at_ts) + self.assertEqual(set(ops), set([self.c.op, self.e.op, self.f.op])) + + # Not using stop_at_ts adds back h.op. + ops = ge.select.get_forward_walk_ops( + seed_ops, inclusive=False, within_ops=within_ops) + self.assertEqual( + set(ops), set([self.c.op, self.e.op, self.f.op, self.h.op])) + + # Starting just form a (the tensor, not op) omits a, b, d. + ops = ge.select.get_forward_walk_ops([self.a], inclusive=True) + self.assertEqual( + set(ops), set([self.c.op, self.e.op, self.f.op, self.g.op, + self.h.op])) + + def test_backward_walk_ops(self): + seed_ops = [self.h.op] + # Include all ops except for self.g.op + within_ops = [ + x.op for x in [self.a, self.b, self.c, self.d, self.e, self.f, self.h] + ] + # For the fn, exclude self.c.op. + within_ops_fn = lambda op: op not in (self.c.op,) + stop_at_ts = (self.f,) + + with self.graph.as_default(): + # Backward walk only includes h since we stop at f and g is not within. + ops = ge.select.get_backward_walk_ops( + seed_ops, + inclusive=True, + within_ops=within_ops, + within_ops_fn=within_ops_fn, + stop_at_ts=stop_at_ts) + self.assertEqual(set(ops), set([self.h.op])) + + # If we do inclusive=False, the result is empty. + ops = ge.select.get_backward_walk_ops( + seed_ops, + inclusive=False, + within_ops=within_ops, + within_ops_fn=within_ops_fn, + stop_at_ts=stop_at_ts) + self.assertEqual(set(ops), set()) + + # Removing stop_at_fs adds f.op, d.op. + ops = ge.select.get_backward_walk_ops( + seed_ops, + inclusive=True, + within_ops=within_ops, + within_ops_fn=within_ops_fn) + self.assertEqual(set(ops), set([self.d.op, self.f.op, self.h.op])) + + # Not using within_ops_fn adds back ops for a, b, c. + ops = ge.select.get_backward_walk_ops( + seed_ops, inclusive=True, within_ops=within_ops) + self.assertEqual( + set(ops), + set([ + self.a.op, self.b.op, self.c.op, self.d.op, self.f.op, self.h.op + ])) + + # Vanially backward search via self.h.op includes everything excpet e.op. + ops = ge.select.get_backward_walk_ops(seed_ops, inclusive=True) + self.assertEqual( + set(ops), + set([ + self.a.op, self.b.op, self.c.op, self.d.op, self.f.op, self.g.op, + self.h.op + ])) + if __name__ == "__main__": test.main() diff --git a/tensorflow/contrib/graph_editor/tests/transform_test.py b/tensorflow/contrib/graph_editor/tests/transform_test.py index 2603de640735a612cbd883cc6227fe3cd9f11fca..97f38c923f4a19cedf3e16203ca1e66b7e5e45d2 100644 --- a/tensorflow/contrib/graph_editor/tests/transform_test.py +++ b/tensorflow/contrib/graph_editor/tests/transform_test.py @@ -18,9 +18,11 @@ from __future__ import division from __future__ import print_function import collections +import functools import numpy as np from tensorflow.contrib import graph_editor as ge from tensorflow.contrib.graph_editor.tests import match +from tensorflow.core.framework import attr_value_pb2 from tensorflow.python.client import session from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes @@ -42,6 +44,7 @@ class TransformTest(test.TestCase): self.graph = ops.Graph() with self.graph.as_default(): c0 = constant_op.constant(1.0, shape=[10], name="Const") + c0.op._set_attr("_foo", attr_value_pb2.AttrValue(s=b"foo")) c1 = constant_op.constant(1.0, shape=[10], name="Const") c2 = constant_op.constant(1.0, shape=[10], name="Const") i = constant_op.constant(1.0, shape=[10], name="Input") @@ -112,6 +115,32 @@ class TransformTest(test.TestCase): top = ge.select_ops("^AddNoise_2$", graph=graph)[0] self.assertTrue(matcher2(top)) + def test_transform_nodedef_fn(self): + transformer = ge.Transformer() + + def nodedef_fn(node_def): + if "_foo" in node_def.attr: + del node_def.attr["_foo"] + node_def.attr["_bar"].s = b"bar" + return node_def + + my_copy_op_handler = functools.partial( + ge.transform.copy_op_handler, nodedef_fn=nodedef_fn) + transformer.transform_op_handler = my_copy_op_handler + + graph = ops.Graph() + transformer(self.graph, graph, "", "") + + c0_before = self.graph.get_operation_by_name("Const") + c0_after = graph.get_operation_by_name("Const") + self.assertEquals(c0_before.get_attr("_foo"), b"foo") + with self.assertRaises(ValueError): + c0_after.get_attr("_foo") + + all_ops = graph.get_operations() + for op in all_ops: + self.assertEquals(op.get_attr("_bar"), b"bar") + def test_copy_with_input_replacements(self): with self.graph.as_default(): ten = constant_op.constant(10.0, shape=[10], name="Input") diff --git a/tensorflow/contrib/graph_editor/transform.py b/tensorflow/contrib/graph_editor/transform.py index d8a48387a745e7d88cc6a74c96cb21a2ba1cfa1f..a320a3f232fc1dc8c9ccfd1d0f2a9a40225db5cb 100644 --- a/tensorflow/contrib/graph_editor/transform.py +++ b/tensorflow/contrib/graph_editor/transform.py @@ -129,7 +129,7 @@ def transform_op_if_inside_handler(info, op, keep_if_possible=True): return None -def copy_op_handler(info, op, new_inputs, copy_shape=True): +def copy_op_handler(info, op, new_inputs, copy_shape=True, nodedef_fn=None): """Copy a `tf.Operation`. Args: @@ -137,6 +137,11 @@ def copy_op_handler(info, op, new_inputs, copy_shape=True): op: the `tf.Operation` to be copied. new_inputs: The new inputs for this op. copy_shape: also copy the shape of the tensor + nodedef_fn: If provided, a function that will be run on the NodeDef + and should return a mutated NodeDef before a new Operation is created. + This is useful as certain features cannot be set on the Operation and + must be modified in NodeDef. + Returns: A `(op, op_outputs)` tuple containing the transformed op and its outputs. """ @@ -155,6 +160,10 @@ def copy_op_handler(info, op, new_inputs, copy_shape=True): name_ = info.graph_.unique_name(name_) node_def_.name = name_ + # Mutate NodeDef if requested: + if nodedef_fn is not None: + node_def_ = nodedef_fn(node_def_) + # Copy the other inputs needed for initialization output_types_ = op._output_types[:] input_types_ = op._input_types[:] diff --git a/tensorflow/contrib/hvx/README.md b/tensorflow/contrib/hvx/README.md index 163993a3f6bb1bedcdffb32944a98c7cc846878e..68e34f3b0938f795c8ad4c8c75226f6b0afe188d 100644 --- a/tensorflow/contrib/hvx/README.md +++ b/tensorflow/contrib/hvx/README.md @@ -42,11 +42,12 @@ If you've finished walking through the quick start guide, you may want to try bu ### Build libhexagon\_nn\_skel.so -Download Hexagon NN library from codeaurora.org and build it. +Download Hexagon NN library from codeaurora.org and build it. For Hexagon SDK 3.0, we need use the compatible version([721b2d58f](https://source.codeaurora.org/quic/hexagon_nn/nnlib/commit/?id=721b2d58f0f4e2d5b182f41e6b7c4db5356bf0fb)) of nnlib. ```shell git clone https://source.codeaurora.org/quic/hexagon_nn/nnlib cd nnlib +git reset 721b2d58f --hard ``` Just follow the instructions in `README.HOW_TO_BUILD`. You can find the file `libhexagon_nn_skel.so` in `hexagon_Release_dynamic_toolv72_v60/ship`. diff --git a/tensorflow/contrib/image/__init__.py b/tensorflow/contrib/image/__init__.py index e982030bc8959309e72d0f4e02b9755c48535a10..8f406ace1d5dcc13a018e56cc98c621a511da29b 100755 --- a/tensorflow/contrib/image/__init__.py +++ b/tensorflow/contrib/image/__init__.py @@ -25,6 +25,8 @@ projective transforms (including rotation) are supported. @@angles_to_projective_transforms @@compose_transforms @@adjust_yiq_hsv +@@flat_transforms_to_matrices +@@matrices_to_flat_transforms @@random_yiq_hsv @@rotate @@transform @@ -58,6 +60,8 @@ from tensorflow.contrib.image.python.ops.distort_image_ops import random_hsv_in_ from tensorflow.contrib.image.python.ops.image_ops import angles_to_projective_transforms from tensorflow.contrib.image.python.ops.image_ops import compose_transforms from tensorflow.contrib.image.python.ops.image_ops import connected_components +from tensorflow.contrib.image.python.ops.image_ops import flat_transforms_to_matrices +from tensorflow.contrib.image.python.ops.image_ops import matrices_to_flat_transforms from tensorflow.contrib.image.python.ops.image_ops import rotate from tensorflow.contrib.image.python.ops.image_ops import transform from tensorflow.contrib.image.python.ops.image_ops import translate diff --git a/tensorflow/contrib/image/kernels/adjust_hsv_in_yiq_op_gpu.cu.cc b/tensorflow/contrib/image/kernels/adjust_hsv_in_yiq_op_gpu.cu.cc index b71ff9cd507faac66b3a33d3c02ec9b5901d814a..645abbf0b0ea5465dadf55d065e997e16940c18d 100644 --- a/tensorflow/contrib/image/kernels/adjust_hsv_in_yiq_op_gpu.cu.cc +++ b/tensorflow/contrib/image/kernels/adjust_hsv_in_yiq_op_gpu.cu.cc @@ -53,7 +53,7 @@ void AdjustHsvInYiqGPU::operator()(OpKernelContext* ctx, int channel_count, OP_REQUIRES_OK(ctx, ctx->allocate_temp( DT_FLOAT, TensorShape({kChannelSize * kChannelSize}), &tranformation_matrix)); - // TODO(huangyp): It takes about 3.5 us to comute tranformation_matrix + // TODO(huangyp): It takes about 3.5 us to compute tranformation_matrix // with one thread. Improve its performance if necessary. internal::compute_tranformation_matrix_cuda<<<1, 1, 0, cu_stream>>>( delta_h, scale_s, scale_v, tranformation_matrix.flat().data(), diff --git a/tensorflow/contrib/image/ops/distort_image_ops.cc b/tensorflow/contrib/image/ops/distort_image_ops.cc index b169b0b2b22ad6432baed2cc96711da5ca995875..ca49635d5d0bc7bb84b19508a74be74362d96ddf 100644 --- a/tensorflow/contrib/image/ops/distort_image_ops.cc +++ b/tensorflow/contrib/image/ops/distort_image_ops.cc @@ -36,9 +36,9 @@ REGISTER_OP("AdjustHsvInYiq") Adjust the YIQ hue of one or more images. `images` is a tensor of at least 3 dimensions. The last dimension is -interpretted as channels, and must be three. +interpreted as channels, and must be three. -We used linear transfomation described in: +We used linear transformation described in: beesbuzz.biz/code/hsv_color_transforms.php The input image is considered in the RGB colorspace. Conceptually, the RGB colors are first mapped into YIQ space, rotated around the Y channel by diff --git a/tensorflow/contrib/image/ops/image_ops.cc b/tensorflow/contrib/image/ops/image_ops.cc index 68771b3d054a64ba94141c092e20df1ed6b2339b..ebdcaea7abae2a967786831b62b331897aa3f6a3 100644 --- a/tensorflow/contrib/image/ops/image_ops.cc +++ b/tensorflow/contrib/image/ops/image_ops.cc @@ -93,7 +93,7 @@ row_to_col_match_indices: A vector of length num_rows, which is the number of If `row_to_col_match_indices[i]` is not -1, row i is matched to column `row_to_col_match_indices[i]`. col_to_row_match_indices: A vector of length num_columns, which is the number - of columns of the input ditance matrix. + of columns of the input distance matrix. If `col_to_row_match_indices[j]` is not -1, column j is matched to row `col_to_row_match_indices[j]`. )doc"); diff --git a/tensorflow/contrib/image/ops/single_image_random_dot_stereograms_ops.cc b/tensorflow/contrib/image/ops/single_image_random_dot_stereograms_ops.cc index 8139d4272d6950815bd39a64e86e0f7422e6f799..bd784c6bda0344c092c1ae0af2c60be50fdff102 100755 --- a/tensorflow/contrib/image/ops/single_image_random_dot_stereograms_ops.cc +++ b/tensorflow/contrib/image/ops/single_image_random_dot_stereograms_ops.cc @@ -69,7 +69,7 @@ Outputs a single image random dot stereogram for export via encode_PNG/JPG OP. Given the 2-D tensor 'depth_values' with encoded Z values, this operation will encode 3-D data into a 2-D image. The output of this Op is suitable for the encode_PNG/JPG ops. Be careful with image compression as this may corrupt the -encode 3-D data witin the image. +encode 3-D data within the image. This Op is based upon: 'http://www.learningace.com/doc/4331582/b6ab058d1e206d68ab60e4e1ead2fe6e/sirds-paper' @@ -111,7 +111,7 @@ output_image_shape: Output size of returned image in X,Y, Channels 1-grayscale, output_data_window: Size of "DATA" window, must be equal to or smaller than 'output_image_shape', will be centered and use 'convergence_dots_size' for best fit to avoid overlap if possible -image:= A tensor of size 'output_image_shape' with the encloded 'depth_values' +image:= A tensor of size 'output_image_shape' with the encoded 'depth_values' )doc"); } // namespace tensorflow diff --git a/tensorflow/contrib/image/python/ops/image_ops.py b/tensorflow/contrib/image/python/ops/image_ops.py index c139ae89d8d682d6b87813c3a21703ffa762f28e..cd984c80543886be1f682933e2e003bd3374e425 100644 --- a/tensorflow/contrib/image/python/ops/image_ops.py +++ b/tensorflow/contrib/image/python/ops/image_ops.py @@ -433,7 +433,7 @@ def bipartite_match(distance_mat, of rows of the input `distance_matrix`. If `row_to_col_match_indices[i]` is not -1, row i is matched to column `row_to_col_match_indices[i]`. col_to_row_match_indices: A vector of length num_columns, which is the - number of columns of the input ditance matrix. + number of columns of the input distance matrix. If `col_to_row_match_indices[j]` is not -1, column j is matched to row `col_to_row_match_indices[j]`. """ diff --git a/tensorflow/contrib/image/python/ops/single_image_random_dot_stereograms.py b/tensorflow/contrib/image/python/ops/single_image_random_dot_stereograms.py index d4a6a5bcbb52511d4093587814100b2a0e8b2420..0ceb683ff4c6965a5ee4bcb04846a69d4d8ea0a5 100755 --- a/tensorflow/contrib/image/python/ops/single_image_random_dot_stereograms.py +++ b/tensorflow/contrib/image/python/ops/single_image_random_dot_stereograms.py @@ -45,7 +45,7 @@ def single_image_random_dot_stereograms(depth_values, Given the 2-D tensor 'depth_values' with encoded Z values, this operation will encode 3-D data into a 2-D image. The output of this Op is suitable for the encode_PNG/JPG ops. Be careful with image compression as this may - corrupt the encode 3-D data witin the image. + corrupt the encode 3-D data within the image. Based upon [this paper](http://www.learningace.com/doc/4331582/b6ab058d1e206d68ab60e4e1ead2fe6e/sirds-paper). diff --git a/tensorflow/contrib/integrate/__init__.py b/tensorflow/contrib/integrate/__init__.py index 68bf511099ab473d158108df6ff07827819297d9..694f0c14bd4e74535c70fab76c5f7ac58f452559 100644 --- a/tensorflow/contrib/integrate/__init__.py +++ b/tensorflow/contrib/integrate/__init__.py @@ -18,6 +18,7 @@ See the @{$python/contrib.integrate} guide. @@odeint +@@odeint_fixed """ from __future__ import absolute_import diff --git a/tensorflow/contrib/kernel_methods/python/losses.py b/tensorflow/contrib/kernel_methods/python/losses.py index f182fef067b7f523bc5ca63227265be40528b171..4ef0a66a52429233c6e6f70667a451466493629c 100644 --- a/tensorflow/contrib/kernel_methods/python/losses.py +++ b/tensorflow/contrib/kernel_methods/python/losses.py @@ -43,10 +43,10 @@ def sparse_multiclass_hinge_loss( This is a generalization of standard (binary) hinge loss. For a given instance with correct label c*, the loss is given by: - loss = max_{c != c*} logits_c - logits_{c*} + 1. + $$loss = max_{c != c*} logits_c - logits_{c*} + 1.$$ or equivalently - loss = max_c { logits_c - logits_{c*} + I_{c != c*} } - where I_{c != c*} = 1 if c != c* and 0 otherwise. + $$loss = max_c { logits_c - logits_{c*} + I_{c != c*} }$$ + where \\(I_{c != c*} = 1\ \text{if}\ c != c*\\) and 0 otherwise. Args: labels: `Tensor` of shape [batch_size] or [batch_size, 1]. Corresponds to diff --git a/tensorflow/contrib/kernel_methods/python/mappers/random_fourier_features.py b/tensorflow/contrib/kernel_methods/python/mappers/random_fourier_features.py index 9dc01124ab195ae17b8795a11e4ebefe3f2c746b..9a721a9d440e66eb30bb94daf2b6878318f1e75f 100644 --- a/tensorflow/contrib/kernel_methods/python/mappers/random_fourier_features.py +++ b/tensorflow/contrib/kernel_methods/python/mappers/random_fourier_features.py @@ -34,33 +34,31 @@ class RandomFourierFeatureMapper(dkm.DenseKernelMapper): r"""Class that implements Random Fourier Feature Mapping (RFFM) in TensorFlow. The RFFM mapping is used to approximate the Gaussian (RBF) kernel: - ``` - exp(-||x-y||_2^2 / (2 * sigma^2)) - ``` + $$(exp(-||x-y||_2^2 / (2 * \sigma^2))$$ The implementation of RFFM is based on the following paper: "Random Features for Large-Scale Kernel Machines" by Ali Rahimi and Ben Recht. (link: https://people.eecs.berkeley.edu/~brecht/papers/07.rah.rec.nips.pdf) - The mapping uses a matrix `Omega \in R^{d x D}` and a bias vector `b \in R^D` - where `d` is the input dimension (number of dense input features) and `D` is - the output dimension (i.e., dimension of the feature space the input is mapped - to). Each entry of `Omega` is sampled i.i.d. from a (scaled) Gaussian - distribution and each entry of `b` is sampled independently and uniformly from - [0, 2 * pi]. - - For a single input feature vector x in R^d, its RFFM is defined as: - ``` - sqrt(2/D) * cos(x * Omega + b) - ``` - where `cos` is the element-wise cosine function and `x, b` are represented as - row vectors. The aforementioned paper shows that the linear kernel of - RFFM-mapped vectors approximates the Gaussian kernel of the initial vectors. + The mapping uses a matrix \\(\Omega \in R^{d x D}\\) and a bias vector + \\(b \in R^D\\) where \\(d\\) is the input dimension (number of dense input + features) and \\(D\\) is the output dimension (i.e., dimension of the feature + space the input is mapped to). Each entry of \\(\Omega\\) is sampled i.i.d. + from a (scaled) Gaussian distribution and each entry of \\(b\\) is sampled + independently and uniformly from [0, \\(2 * \pi\\)]. + + For a single input feature vector \\(x \in R^d\\), its RFFM is defined as: + $$\sqrt(2/D) * cos(x * \Omega + b)$$ + + where \\(cos\\) is the element-wise cosine function and \\(x, b\\) are + represented as row vectors. The aforementioned paper shows that the linear + kernel of RFFM-mapped vectors approximates the Gaussian kernel of the initial + vectors. """ def __init__(self, input_dim, output_dim, stddev=1.0, seed=1, name=None): - """Constructs a RandomFourierFeatureMapper instance. + r"""Constructs a RandomFourierFeatureMapper instance. Args: input_dim: The dimension (number of features) of the tensors to be mapped. @@ -68,11 +66,11 @@ class RandomFourierFeatureMapper(dkm.DenseKernelMapper): stddev: The standard deviation of the Gaussian kernel to be approximated. The error of the classifier trained using this approximation is very sensitive to this parameter. - seed: An integer used to initialize the parameters (`Omega` and `b`) of - the mapper. For repeatable sequences across different invocations of the - mapper object (for instance, to ensure consistent mapping both at - training and eval/inference if these happen in different invocations), - set this to the same integer. + seed: An integer used to initialize the parameters (\\(\Omega\\) and + \\(b\\)) of the mapper. For repeatable sequences across different + invocations of the mapper object (for instance, to ensure consistent + mapping both at training and eval/inference if these happen in + different invocations), set this to the same integer. name: name for the mapper object. """ # TODO(sibyl-vie3Poto): Maybe infer input_dim and/or output_dim (if not explicitly diff --git a/tensorflow/contrib/kernel_methods/python/mappers/random_fourier_features_test.py b/tensorflow/contrib/kernel_methods/python/mappers/random_fourier_features_test.py index 6f4a264485993ab737723171409042b4a9673669..91929184a2e6f3cccae92cb819501a7c6ef81673 100644 --- a/tensorflow/contrib/kernel_methods/python/mappers/random_fourier_features_test.py +++ b/tensorflow/contrib/kernel_methods/python/mappers/random_fourier_features_test.py @@ -34,7 +34,7 @@ def _inner_product(x, y): """Inner product between tensors x and y. The input tensors are assumed to be in ROW representation, that is, the method - returns x * y^T. + returns \\(x * y^T\\). Args: x: input tensor in row format diff --git a/tensorflow/contrib/kfac/examples/BUILD b/tensorflow/contrib/kfac/examples/BUILD index 7dd40c19c510e600c5d300e5f30d918f316e53c2..8186fa1c62cb952f86614a96c3965bcddae1686e 100644 --- a/tensorflow/contrib/kfac/examples/BUILD +++ b/tensorflow/contrib/kfac/examples/BUILD @@ -28,8 +28,28 @@ py_library( ) py_binary( - name = "convnet_mnist_main", - srcs = ["convnet_mnist_main.py"], + name = "convnet_mnist_single_main", + srcs = ["convnet_mnist_single_main.py"], + srcs_version = "PY2AND3", + deps = [ + ":convnet", + "//tensorflow:tensorflow_py", + ], +) + +py_binary( + name = "convnet_mnist_multi_tower_main", + srcs = ["convnet_mnist_multi_tower_main.py"], + srcs_version = "PY2AND3", + deps = [ + ":convnet", + "//tensorflow:tensorflow_py", + ], +) + +py_binary( + name = "convnet_mnist_distributed_main", + srcs = ["convnet_mnist_distributed_main.py"], srcs_version = "PY2AND3", deps = [ ":convnet", diff --git a/tensorflow/contrib/kfac/examples/convnet.py b/tensorflow/contrib/kfac/examples/convnet.py index 39d80addaac1fe855a37255b32bf4412b99df46a..e8e3353091df25e135b1247bf976bb9ce177d1a7 100644 --- a/tensorflow/contrib/kfac/examples/convnet.py +++ b/tensorflow/contrib/kfac/examples/convnet.py @@ -37,6 +37,8 @@ import tensorflow as tf from tensorflow.contrib.kfac.examples import mlp from tensorflow.contrib.kfac.examples import mnist +from tensorflow.contrib.kfac.python.ops import optimizer as opt + lc = tf.contrib.kfac.layer_collection oq = tf.contrib.kfac.op_queue @@ -48,12 +50,18 @@ __all__ = [ "linear_layer", "build_model", "minimize_loss_single_machine", - "minimize_loss_distributed", + "distributed_grads_only_and_ops_chief_worker", + "distributed_grads_and_ops_dedicated_workers", "train_mnist_single_machine", - "train_mnist_distributed", + "train_mnist_distributed_sync_replicas", + "train_mnist_multitower" ] +# Inverse update ops will be run every _INVERT_EVRY iterations. +_INVERT_EVERY = 10 + + def conv_layer(layer_id, inputs, kernel_size, out_channels): """Builds a convolutional layer with ReLU non-linearity. @@ -161,8 +169,9 @@ def build_model(examples, labels, num_labels, layer_collection): accuracy = tf.reduce_mean( tf.cast(tf.equal(labels, tf.argmax(logits, axis=1)), dtype=tf.float32)) - tf.summary.scalar("loss", loss) - tf.summary.scalar("accuracy", accuracy) + with tf.device("/cpu:0"): + tf.summary.scalar("loss", loss) + tf.summary.scalar("accuracy", accuracy) # Register parameters. K-FAC needs to know about the inputs, outputs, and # parameters of each conv/fully connected layer and the logits powering the @@ -181,41 +190,59 @@ def build_model(examples, labels, num_labels, layer_collection): def minimize_loss_single_machine(loss, accuracy, layer_collection, + device="/gpu:0", session_config=None): """Minimize loss with K-FAC on a single machine. - A single Session is responsible for running all of K-FAC's ops. + A single Session is responsible for running all of K-FAC's ops. The covariance + and inverse update ops are placed on `device`. All model variables are on CPU. Args: loss: 0-D Tensor. Loss to be minimized. accuracy: 0-D Tensor. Accuracy of classifier on current minibatch. layer_collection: LayerCollection instance describing model architecture. Used by K-FAC to construct preconditioner. + device: string, Either '/cpu:0' or '/gpu:0'. The covaraince and invserse + update ops are run on this device. session_config: None or tf.ConfigProto. Configuration for tf.Session(). Returns: final value for 'accuracy'. """ # Train with K-FAC. - global_step = tf.train.get_or_create_global_step() + g_step = tf.train.get_or_create_global_step() optimizer = opt.KfacOptimizer( learning_rate=0.0001, cov_ema_decay=0.95, damping=0.001, layer_collection=layer_collection, + placement_strategy="round_robin", + cov_devices=[device], + inv_devices=[device], momentum=0.9) - train_op = optimizer.minimize(loss, global_step=global_step) + (cov_update_thunks, + inv_update_thunks) = optimizer.make_vars_and_create_op_thunks() + + with tf.device(device): + train_op = optimizer.minimize(loss, global_step=g_step) + + def make_update_op(update_thunks): + update_op = [thunk() for thunk in update_thunks] + return tf.group(*update_op) + + cov_update_op = make_update_op(cov_update_thunks) + with tf.control_dependencies([train_op, cov_update_op]): + inverse_op = tf.cond( + tf.equal(tf.mod(g_step + 1, _INVERT_EVERY), 0), + lambda: make_update_op(inv_update_thunks), tf.no_op) tf.logging.info("Starting training.") with tf.train.MonitoredTrainingSession(config=session_config) as sess: while not sess.should_stop(): - global_step_, loss_, accuracy_, _, _ = sess.run( - [global_step, loss, accuracy, train_op, optimizer.cov_update_op]) - - if global_step_ % 100 == 0: - sess.run(optimizer.inv_update_op) + global_step_, loss_, accuracy_, _ = sess.run( + [g_step, loss, accuracy, inverse_op]) - if global_step_ % 100 == 0: + if (global_step_ + 1) % _INVERT_EVERY == 0: tf.logging.info("global_step: %d | loss: %f | accuracy: %s", global_step_, loss_, accuracy_) @@ -250,16 +277,62 @@ def _num_gradient_tasks(num_tasks): return int(np.ceil(0.6 * num_tasks)) -def minimize_loss_distributed(task_id, num_worker_tasks, num_ps_tasks, master, - checkpoint_dir, loss, accuracy, layer_collection): - """Minimize loss with an synchronous implementation of K-FAC. +def _make_distributed_train_op( + task_id, + num_worker_tasks, + num_ps_tasks, + layer_collection +): + """Creates optimizer and distributed training op. - Different tasks are responsible for different parts of K-FAC's Ops. The first - 60% of tasks update weights; the next 20% accumulate covariance statistics; - the last 20% invert the matrices used to precondition gradients. + Constructs KFAC optimizer and wraps it in `sync_replicas` optimizer. Makes + the train op. + + Args: + task_id: int. Integer in [0, num_worker_tasks). ID for this worker. + num_worker_tasks: int. Number of workers in this distributed training setup. + num_ps_tasks: int. Number of parameter servers holding variables. If 0, + parameter servers are not used. + layer_collection: LayerCollection instance describing model architecture. + Used by K-FAC to construct preconditioner. + + Returns: + sync_optimizer: `tf.train.SyncReplicasOptimizer` instance which wraps KFAC + optimizer. + optimizer: Instance of `opt.KfacOptimizer`. + global_step: `tensor`, Global step. + """ + tf.logging.info("Task id : %d", task_id) + with tf.device(tf.train.replica_device_setter(num_ps_tasks)): + global_step = tf.train.get_or_create_global_step() + optimizer = opt.KfacOptimizer( + learning_rate=0.0001, + cov_ema_decay=0.95, + damping=0.001, + layer_collection=layer_collection, + momentum=0.9) + sync_optimizer = tf.train.SyncReplicasOptimizer( + opt=optimizer, + replicas_to_aggregate=_num_gradient_tasks(num_worker_tasks), + total_num_replicas=num_worker_tasks) + return sync_optimizer, optimizer, global_step + + +def distributed_grads_only_and_ops_chief_worker( + task_id, is_chief, num_worker_tasks, num_ps_tasks, master, checkpoint_dir, + loss, accuracy, layer_collection, invert_every=10): + """Minimize loss with a synchronous implementation of K-FAC. + + All workers perform gradient computation. Chief worker applies gradient after + averaging the gradients obtained from all the workers. All workers block + execution untill the update is applied. Chief worker runs covariance and + inverse update ops. Covariance and inverse matrices are placed on parameter + servers in a round robin manner. For further details on synchronous + distributed optimization check `tf.train.SyncReplicasOptimizer`. Args: task_id: int. Integer in [0, num_worker_tasks). ID for this worker. + is_chief: `boolean`, `True` if the worker is chief worker. num_worker_tasks: int. Number of workers in this distributed training setup. num_ps_tasks: int. Number of parameter servers holding variables. If 0, parameter servers are not used. @@ -271,6 +344,7 @@ def minimize_loss_distributed(task_id, num_worker_tasks, num_ps_tasks, master, run with each step. layer_collection: LayerCollection instance describing model architecture. Used by K-FAC to construct preconditioner. + invert_every: `int`, Number of steps between update the inverse. Returns: final value for 'accuracy'. @@ -278,19 +352,80 @@ def minimize_loss_distributed(task_id, num_worker_tasks, num_ps_tasks, master, Raises: ValueError: if task_id >= num_worker_tasks. """ - with tf.device(tf.train.replica_device_setter(num_ps_tasks)): - global_step = tf.train.get_or_create_global_step() - optimizer = opt.KfacOptimizer( - learning_rate=0.0001, - cov_ema_decay=0.95, - damping=0.001, - layer_collection=layer_collection, - momentum=0.9) - inv_update_queue = oq.OpQueue(optimizer.inv_update_ops) - sync_optimizer = tf.train.SyncReplicasOptimizer( - opt=optimizer, - replicas_to_aggregate=_num_gradient_tasks(num_worker_tasks)) - train_op = sync_optimizer.minimize(loss, global_step=global_step) + + sync_optimizer, optimizer, global_step = _make_distributed_train_op( + task_id, num_worker_tasks, num_ps_tasks, layer_collection) + (cov_update_thunks, + inv_update_thunks) = optimizer.make_vars_and_create_op_thunks() + train_op = sync_optimizer.minimize(loss, global_step=global_step) + + tf.logging.info("Starting training.") + hooks = [sync_optimizer.make_session_run_hook(is_chief)] + + def make_update_op(update_thunks): + update_op = [thunk() for thunk in update_thunks] + return tf.group(*update_op) + + if is_chief: + cov_update_op = make_update_op(cov_update_thunks) + with tf.control_dependencies([train_op, cov_update_op]): + update_op = tf.cond( + tf.equal(tf.mod(global_step + 1, invert_every), 0), + lambda: make_update_op(inv_update_thunks), + tf.no_op) + else: + update_op = train_op + + with tf.train.MonitoredTrainingSession( + master=master, + is_chief=is_chief, + checkpoint_dir=checkpoint_dir, + hooks=hooks, + stop_grace_period_secs=0) as sess: + while not sess.should_stop(): + global_step_, loss_, accuracy_, _ = sess.run( + [global_step, loss, accuracy, update_op]) + tf.logging.info("global_step: %d | loss: %f | accuracy: %s", global_step_, + loss_, accuracy_) + return accuracy_ + + +def distributed_grads_and_ops_dedicated_workers( + task_id, is_chief, num_worker_tasks, num_ps_tasks, master, checkpoint_dir, + loss, accuracy, layer_collection): + """Minimize loss with a synchronous implementation of K-FAC. + + Different workers are responsible for different parts of K-FAC's Ops. The + first 60% of tasks compute gradients; the next 20% accumulate covariance + statistics; the last 20% invert the matrices used to precondition gradients. + The chief worker applies the gradient . + + Args: + task_id: int. Integer in [0, num_worker_tasks). ID for this worker. + is_chief: `boolean`, `True` if the worker is chief worker. + num_worker_tasks: int. Number of workers in this distributed training setup. + num_ps_tasks: int. Number of parameter servers holding variables. If 0, + parameter servers are not used. + master: string. IP and port of TensorFlow runtime process. Set to empty + string to run locally. + checkpoint_dir: string or None. Path to store checkpoints under. + loss: 0-D Tensor. Loss to be minimized. + accuracy: dict mapping strings to 0-D Tensors. Additional accuracy to + run with each step. + layer_collection: LayerCollection instance describing model architecture. + Used by K-FAC to construct preconditioner. + + Returns: + final value for 'accuracy'. + + Raises: + ValueError: if task_id >= num_worker_tasks. + """ + sync_optimizer, optimizer, global_step = _make_distributed_train_op( + task_id, num_worker_tasks, num_ps_tasks, layer_collection) + _, cov_update_op, inv_update_ops, _, _, _ = optimizer.make_ops_and_vars() + train_op = sync_optimizer.minimize(loss, global_step=global_step) + inv_update_queue = oq.OpQueue(inv_update_ops) tf.logging.info("Starting training.") is_chief = (task_id == 0) @@ -306,7 +441,7 @@ def minimize_loss_distributed(task_id, num_worker_tasks, num_ps_tasks, master, if _is_gradient_task(task_id, num_worker_tasks): learning_op = train_op elif _is_cov_update_task(task_id, num_worker_tasks): - learning_op = optimizer.cov_update_op + learning_op = cov_update_op elif _is_inv_update_task(task_id, num_worker_tasks): # TODO(duckworthd): Running this op before cov_update_op has been run a # few times can result in "InvalidArgumentError: Cholesky decomposition @@ -324,13 +459,18 @@ def minimize_loss_distributed(task_id, num_worker_tasks, num_ps_tasks, master, return accuracy_ -def train_mnist_single_machine(data_dir, num_epochs, use_fake_data=False): +def train_mnist_single_machine(data_dir, + num_epochs, + use_fake_data=False, + device="/gpu:0"): """Train a ConvNet on MNIST. Args: data_dir: string. Directory to read MNIST examples from. num_epochs: int. Number of passes to make over the training set. use_fake_data: bool. If True, generate a synthetic dataset. + device: string, Either '/cpu:0' or '/gpu:0'. The covaraince and inverse + update ops are run on this device. Returns: accuracy of model on the final minibatch of training data. @@ -350,22 +490,38 @@ def train_mnist_single_machine(data_dir, num_epochs, use_fake_data=False): examples, labels, num_labels=10, layer_collection=layer_collection) # Fit model. - return minimize_loss_single_machine(loss, accuracy, layer_collection) + return minimize_loss_single_machine( + loss, accuracy, layer_collection, device=device) def train_mnist_multitower(data_dir, num_epochs, num_towers, - use_fake_data=True): + use_fake_data=True, devices=None): """Train a ConvNet on MNIST. + Training data is split equally among the towers. Each tower computes loss on + its own batch of data and the loss is aggregated on the CPU. The model + variables are placed on first tower. The covariance and inverse update ops + and variables are placed on GPUs in a round robin manner. + Args: data_dir: string. Directory to read MNIST examples from. num_epochs: int. Number of passes to make over the training set. num_towers: int. Number of CPUs to split inference across. use_fake_data: bool. If True, generate a synthetic dataset. + devices: string, Either list of CPU or GPU. The covaraince and inverse + update ops are run on this device. Returns: accuracy of model on the final minibatch of training data. """ + if devices: + device_count = {"GPU": num_towers} + else: + device_count = {"CPU": num_towers} + + devices = devices or [ + "/cpu:{}".format(tower_id) for tower_id in range(num_towers) + ] # Load a dataset. tf.logging.info("Loading MNIST into memory.") tower_batch_size = 128 @@ -388,7 +544,7 @@ def train_mnist_multitower(data_dir, num_epochs, num_towers, layer_collection = lc.LayerCollection() tower_results = [] for tower_id in range(num_towers): - with tf.device("/cpu:%d" % tower_id): + with tf.device(devices[tower_id]): with tf.name_scope("tower%d" % tower_id): with tf.variable_scope(tf.get_variable_scope(), reuse=(tower_id > 0)): tf.logging.info("Building tower %d." % tower_id) @@ -402,34 +558,79 @@ def train_mnist_multitower(data_dir, num_epochs, num_towers, accuracy = tf.reduce_mean(accuracies) # Fit model. + session_config = tf.ConfigProto( - allow_soft_placement=False, device_count={ - "CPU": num_towers - }) - return minimize_loss_single_machine( - loss, accuracy, layer_collection, session_config=session_config) + allow_soft_placement=False, + device_count=device_count, + ) + + g_step = tf.train.get_or_create_global_step() + optimizer = opt.KfacOptimizer( + learning_rate=0.0001, + cov_ema_decay=0.95, + damping=0.001, + layer_collection=layer_collection, + placement_strategy="round_robin", + cov_devices=devices, + inv_devices=devices, + momentum=0.9) + (cov_update_thunks, + inv_update_thunks) = optimizer.make_vars_and_create_op_thunks() + train_op = optimizer.minimize(loss, global_step=g_step) -def train_mnist_distributed(task_id, - num_worker_tasks, - num_ps_tasks, - master, - data_dir, - num_epochs, - use_fake_data=False): - """Train a ConvNet on MNIST. + def make_update_op(update_thunks): + update_op = [thunk() for thunk in update_thunks] + return tf.group(*update_op) + + cov_update_op = make_update_op(cov_update_thunks) + with tf.control_dependencies([train_op, cov_update_op]): + inverse_op = tf.cond( + tf.equal(tf.mod(g_step + 1, _INVERT_EVERY), 0), + lambda: make_update_op(inv_update_thunks), tf.no_op) + + tf.logging.info("Starting training.") + with tf.train.MonitoredTrainingSession(config=session_config) as sess: + while not sess.should_stop(): + global_step_, loss_, accuracy_, _ = sess.run( + [g_step, loss, accuracy, inverse_op]) + + if (global_step_ + 1) % _INVERT_EVERY == 0: + tf.logging.info("global_step: %d | loss: %f | accuracy: %s", + global_step_, loss_, accuracy_) + + +def train_mnist_distributed_sync_replicas(task_id, + is_chief, + num_worker_tasks, + num_ps_tasks, + master, + data_dir, + num_epochs, + op_strategy, + use_fake_data=False): + """Train a ConvNet on MNIST using Sync replicas optimizer. Args: task_id: int. Integer in [0, num_worker_tasks). ID for this worker. + is_chief: `boolean`, `True` if the worker is chief worker. num_worker_tasks: int. Number of workers in this distributed training setup. num_ps_tasks: int. Number of parameter servers holding variables. master: string. IP and port of TensorFlow runtime process. data_dir: string. Directory to read MNIST examples from. num_epochs: int. Number of passes to make over the training set. + op_strategy: `string`, Strategy to run the covariance and inverse + ops. If op_strategy == `chief_worker` then covaraiance and inverse + update ops are run on chief worker otherwise they are run on dedicated + workers. + use_fake_data: bool. If True, generate a synthetic dataset. Returns: accuracy of model on the final minibatch of training data. + + Raises: + ValueError: If `op_strategy` not in ["chief_worker", "dedicated_workers"]. """ # Load a dataset. tf.logging.info("Loading MNIST into memory.") @@ -448,9 +649,17 @@ def train_mnist_distributed(task_id, # Fit model. checkpoint_dir = None if data_dir is None else os.path.join(data_dir, "kfac") - return minimize_loss_distributed(task_id, num_worker_tasks, num_ps_tasks, - master, checkpoint_dir, loss, accuracy, - layer_collection) + if op_strategy == "chief_worker": + return distributed_grads_only_and_ops_chief_worker( + task_id, is_chief, num_worker_tasks, num_ps_tasks, master, + checkpoint_dir, loss, accuracy, layer_collection) + elif op_strategy == "dedicated_workers": + return distributed_grads_and_ops_dedicated_workers( + task_id, is_chief, num_worker_tasks, num_ps_tasks, master, + checkpoint_dir, loss, accuracy, layer_collection) + else: + raise ValueError("Only supported op strategies are : {}, {}".format( + "chief_worker", "dedicated_workers")) if __name__ == "__main__": diff --git a/tensorflow/contrib/kfac/examples/convnet_mnist_distributed_main.py b/tensorflow/contrib/kfac/examples/convnet_mnist_distributed_main.py new file mode 100644 index 0000000000000000000000000000000000000000..b4c2d4a9e9bfcc4bfb55a25d2f23e66afe5b1375 --- /dev/null +++ b/tensorflow/contrib/kfac/examples/convnet_mnist_distributed_main.py @@ -0,0 +1,62 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +r"""Train a ConvNet on MNIST using K-FAC. + +Distributed training with sync replicas optimizer. See +`convnet.train_mnist_distributed_sync_replicas` for details. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + + +from absl import flags +import tensorflow as tf + +from tensorflow.contrib.kfac.examples import convnet + +FLAGS = flags.FLAGS +flags.DEFINE_integer("task", -1, "Task identifier") +flags.DEFINE_string("data_dir", "/tmp/mnist", "local mnist dir") +flags.DEFINE_string( + "cov_inv_op_strategy", "chief_worker", + "In dist training mode run the cov, inv ops on chief or dedicated workers." +) +flags.DEFINE_string("master", "local", "Session master.") +flags.DEFINE_integer("ps_tasks", 2, + "Number of tasks in the parameter server job.") +flags.DEFINE_integer("replicas_to_aggregate", 5, + "Number of replicas to aggregate.") +flags.DEFINE_integer("worker_replicas", 5, "Number of replicas in worker job.") +flags.DEFINE_integer("num_epochs", None, "Number of epochs.") + + +def _is_chief(): + """Determines whether a job is the chief worker.""" + if "chief_worker" in FLAGS.brain_jobs: + return FLAGS.brain_job_name == "chief_worker" + else: + return FLAGS.task == 0 + + +def main(unused_argv): + _ = unused_argv + convnet.train_mnist_distributed_sync_replicas( + FLAGS.task, _is_chief(), FLAGS.worker_replicas, FLAGS.ps_tasks, + FLAGS.master, FLAGS.data_dir, FLAGS.num_epochs, FLAGS.cov_inv_op_strategy) + +if __name__ == "__main__": + tf.app.run(main=main) diff --git a/tensorflow/contrib/kfac/examples/convnet_mnist_multi_tower_main.py b/tensorflow/contrib/kfac/examples/convnet_mnist_multi_tower_main.py new file mode 100644 index 0000000000000000000000000000000000000000..4249bf8a8d9d3a5beb87d4140a55b0ee6eadbc64 --- /dev/null +++ b/tensorflow/contrib/kfac/examples/convnet_mnist_multi_tower_main.py @@ -0,0 +1,48 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +r"""Train a ConvNet on MNIST using K-FAC. + +Multi tower training mode. See `convnet.train_mnist_multitower` for details. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + + +from absl import flags +import tensorflow as tf + +from tensorflow.contrib.kfac.examples import convnet + +FLAGS = flags.FLAGS +flags.DEFINE_string("data_dir", "/tmp/multitower_1/mnist", "local mnist dir") +flags.DEFINE_integer("num_towers", 2, + "Number of towers for multi tower training.") + + +def main(unused_argv): + _ = unused_argv + assert FLAGS.num_towers > 1 + devices = ["/gpu:{}".format(tower_id) for tower_id in range(FLAGS.num_towers)] + convnet.train_mnist_multitower( + FLAGS.data_dir, + num_epochs=200, + num_towers=FLAGS.num_towers, + devices=devices) + + +if __name__ == "__main__": + tf.app.run(main=main) diff --git a/tensorflow/contrib/kfac/examples/convnet_mnist_main.py b/tensorflow/contrib/kfac/examples/convnet_mnist_single_main.py similarity index 57% rename from tensorflow/contrib/kfac/examples/convnet_mnist_main.py rename to tensorflow/contrib/kfac/examples/convnet_mnist_single_main.py index b0c6fbde198850c76af0bc1600dc23e926227229..2c1f09936073a34816da61d771f59e848b8787af 100644 --- a/tensorflow/contrib/kfac/examples/convnet_mnist_main.py +++ b/tensorflow/contrib/kfac/examples/convnet_mnist_single_main.py @@ -14,44 +14,26 @@ # ============================================================================== r"""Train a ConvNet on MNIST using K-FAC. -See convnet.py for details. +Train on single machine. See `convnet.train_mnist_single_machine` for details. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import argparse -import sys +from absl import flags import tensorflow as tf from tensorflow.contrib.kfac.examples import convnet -FLAGS = None +FLAGS = flags.FLAGS +flags.DEFINE_string("data_dir", "/tmp/mnist", "local mnist dir") -def main(argv): - _ = argv - - if FLAGS.num_towers > 1: - convnet.train_mnist_multitower( - FLAGS.data_dir, num_epochs=200, num_towers=FLAGS.num_towers) - else: - convnet.train_mnist_single_machine(FLAGS.data_dir, num_epochs=200) +def main(unused_argv): + convnet.train_mnist_single_machine(FLAGS.data_dir, num_epochs=200) if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument( - "--data_dir", - type=str, - default="/tmp/mnist", - help="Directory to store dataset in.") - parser.add_argument( - "--num_towers", - type=int, - default=1, - help="Number of CPUs to split minibatch across.") - FLAGS, unparsed = parser.parse_known_args() - tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) + tf.app.run(main=main) diff --git a/tensorflow/contrib/kfac/examples/tests/convnet_test.py b/tensorflow/contrib/kfac/examples/tests/convnet_test.py index 8d86c2bb5150cd4bc8a2b21ba050e904929e0fe9..6de775cc79953ba548c766e861d6d88e0455a508 100644 --- a/tensorflow/contrib/kfac/examples/tests/convnet_test.py +++ b/tensorflow/contrib/kfac/examples/tests/convnet_test.py @@ -112,15 +112,16 @@ class ConvNetTest(tf.test.TestCase): def testMinimizeLossSingleMachine(self): with tf.Graph().as_default(): loss, accuracy, layer_collection = self._build_toy_problem() - accuracy_ = convnet.minimize_loss_single_machine(loss, accuracy, - layer_collection) - self.assertLess(accuracy_, 1.0) + accuracy_ = convnet.minimize_loss_single_machine( + loss, accuracy, layer_collection, device="/cpu:0") + self.assertLess(accuracy_, 2.0) def testMinimizeLossDistributed(self): with tf.Graph().as_default(): loss, accuracy, layer_collection = self._build_toy_problem() - accuracy_ = convnet.minimize_loss_distributed( + accuracy_ = convnet.distributed_grads_only_and_ops_chief_worker( task_id=0, + is_chief=True, num_worker_tasks=1, num_ps_tasks=0, master="", @@ -128,7 +129,7 @@ class ConvNetTest(tf.test.TestCase): loss=loss, accuracy=accuracy, layer_collection=layer_collection) - self.assertLess(accuracy_, 1.0) + self.assertLess(accuracy_, 2.0) def testTrainMnistSingleMachine(self): with tf.Graph().as_default(): @@ -138,7 +139,7 @@ class ConvNetTest(tf.test.TestCase): # but there are too few parameters for the model to effectively memorize # the training set the way an MLP can. convnet.train_mnist_single_machine( - data_dir=None, num_epochs=1, use_fake_data=True) + data_dir=None, num_epochs=1, use_fake_data=True, device="/cpu:0") def testTrainMnistMultitower(self): with tf.Graph().as_default(): @@ -149,13 +150,15 @@ class ConvNetTest(tf.test.TestCase): def testTrainMnistDistributed(self): with tf.Graph().as_default(): # Ensure model training doesn't crash. - convnet.train_mnist_distributed( + convnet.train_mnist_distributed_sync_replicas( task_id=0, + is_chief=True, num_worker_tasks=1, num_ps_tasks=0, master="", data_dir=None, num_epochs=1, + op_strategy="chief_worker", use_fake_data=True) diff --git a/tensorflow/contrib/kfac/python/kernel_tests/BUILD b/tensorflow/contrib/kfac/python/kernel_tests/BUILD index f73c24f8fbb195d2e2063dd1e3b0310b6d04866b..2477d2bfc12c2df64a672fd457e9634009ccd129 100644 --- a/tensorflow/contrib/kfac/python/kernel_tests/BUILD +++ b/tensorflow/contrib/kfac/python/kernel_tests/BUILD @@ -114,6 +114,7 @@ py_test( name = "utils_test", srcs = ["utils_test.py"], srcs_version = "PY2AND3", + tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ "//tensorflow/contrib/kfac/python/ops:utils", "//tensorflow/contrib/tpu", diff --git a/tensorflow/contrib/kfac/python/ops/estimator.py b/tensorflow/contrib/kfac/python/ops/estimator.py index ced1110676754b6c8bba813ace743b3f3daddb26..d11c9c828810742cd176e4c5b7b77cf9a5cf87d9 100644 --- a/tensorflow/contrib/kfac/python/ops/estimator.py +++ b/tensorflow/contrib/kfac/python/ops/estimator.py @@ -85,9 +85,9 @@ class FisherEstimator(object): """Create a FisherEstimator object. Args: - variables: A list of the variables for which to estimate the Fisher. This - must match the variables registered in layer_collection (if it is not - None). + variables: A `list` of variables or `callable` which returns the variables + for which to estimate the Fisher. This must match the variables + registered in layer_collection (if it is not None). cov_ema_decay: The decay factor used when calculating the covariance estimate moving averages. damping: float. The damping factor used to stabilize training due to @@ -147,7 +147,10 @@ class FisherEstimator(object): @property def variables(self): - return self._variables + if callable(self._variables): + return self._variables() + else: + return self._variables @property def damping(self): diff --git a/tensorflow/contrib/kfac/python/ops/fisher_blocks.py b/tensorflow/contrib/kfac/python/ops/fisher_blocks.py index e0d9cb5ea9d4237f03292c4c948056345b4dc5ce..00b3673a742e92057b0a1673d3f42a19379111fe 100644 --- a/tensorflow/contrib/kfac/python/ops/fisher_blocks.py +++ b/tensorflow/contrib/kfac/python/ops/fisher_blocks.py @@ -19,11 +19,11 @@ Information matrix. Suppose one has a model that parameterizes a posterior distribution over 'y' given 'x' with parameters 'params', p(y | x, params). Its Fisher Information matrix is given by, - F(params) = E[ v(x, y, params) v(x, y, params)^T ] + $$F(params) = E[ v(x, y, params) v(x, y, params)^T ]$$ where, - v(x, y, params) = (d / d params) log p(y | x, params) + $$v(x, y, params) = (d / d params) log p(y | x, params)$$ and the expectation is taken with respect to the data's distribution for 'x' and the model's posterior distribution for 'y', @@ -85,7 +85,7 @@ def normalize_damping(damping, num_replications): def compute_pi_tracenorm(left_cov, right_cov): """Computes the scalar constant pi for Tikhonov regularization/damping. - pi = sqrt( (trace(A) / dim(A)) / (trace(B) / dim(B)) ) + $$\pi = \sqrt{ (trace(A) / dim(A)) / (trace(B) / dim(B)) }$$ See section 6.3 of https://arxiv.org/pdf/1503.05671.pdf for details. Args: @@ -462,14 +462,14 @@ class FullyConnectedDiagonalFB(InputOutputMultiTower, FisherBlock): Let 'params' be a vector parameterizing a model and 'i' an arbitrary index into it. We are interested in Fisher(params)[i, i]. This is, - Fisher(params)[i, i] = E[ v(x, y, params) v(x, y, params)^T ][i, i] - = E[ v(x, y, params)[i] ^ 2 ] + $$Fisher(params)[i, i] = E[ v(x, y, params) v(x, y, params)^T ][i, i] + = E[ v(x, y, params)[i] ^ 2 ]$$ Consider fully connected layer in this model with (unshared) weight matrix 'w'. For an example 'x' that produces layer inputs 'a' and output preactivations 's', - v(x, y, w) = vec( a (d loss / d s)^T ) + $$v(x, y, w) = vec( a (d loss / d s)^T )$$ This FisherBlock tracks Fisher(params)[i, i] for all indices 'i' corresponding to the layer's parameters 'w'. @@ -532,14 +532,14 @@ class ConvDiagonalFB(InputOutputMultiTower, FisherBlock): Let 'params' be a vector parameterizing a model and 'i' an arbitrary index into it. We are interested in Fisher(params)[i, i]. This is, - Fisher(params)[i, i] = E[ v(x, y, params) v(x, y, params)^T ][i, i] - = E[ v(x, y, params)[i] ^ 2 ] + $$Fisher(params)[i, i] = E[ v(x, y, params) v(x, y, params)^T ][i, i] + = E[ v(x, y, params)[i] ^ 2 ]$$ Consider a convoluational layer in this model with (unshared) filter matrix 'w'. For an example image 'x' that produces layer inputs 'a' and output preactivations 's', - v(x, y, w) = vec( sum_{loc} a_{loc} (d loss / d s_{loc})^T ) + $$v(x, y, w) = vec( sum_{loc} a_{loc} (d loss / d s_{loc})^T )$$ where 'loc' is a single (x, y) location in an image. @@ -805,12 +805,12 @@ class ConvKFCBasicFB(InputOutputMultiTower, KroneckerProductFB): 'w'. For a minibatch that produces inputs 'a' and output preactivations 's', this FisherBlock estimates, - F(w) = #locations * kronecker(E[flat(a) flat(a)^T], - E[flat(ds) flat(ds)^T]) + $$F(w) = \#locations * kronecker(E[flat(a) flat(a)^T], + E[flat(ds) flat(ds)^T])$$ where - ds = (d / ds) log p(y | x, w) + $$ds = (d / ds) log p(y | x, w)$$ #locations = number of (x, y) locations where 'w' is applied. where the expectation is taken over all examples and locations and flat() @@ -1567,7 +1567,7 @@ class FullyConnectedSeriesFB(InputOutputMultiTowerMultiUse, if self._option == SeriesFBApproximation.option1: - # Note that L_A = A0^(-1/2) * U_A and L_G = G0^(-1/2) * U_G. + # Note that \\(L_A = A0^{-1/2} * U_A and L_G = G0^{-1/2} * U_G.\\) L_A, psi_A = self._input_factor.get_option1quants( self._input_damping_func) L_G, psi_G = self._output_factor.get_option1quants( @@ -1581,33 +1581,33 @@ class FullyConnectedSeriesFB(InputOutputMultiTowerMultiUse, T = self._num_timesteps return (1 - x)**2 / (T * (1 - x**2) - 2 * x * (1 - x**T)) - # Y = gamma( psi_G*psi_A^T ) (computed element-wise) + # \\(Y = \gamma( psi_G*psi_A^T )\\) (computed element-wise) # Even though Y is Z-independent we are recomputing it from the psi's # each since Y depends on both A and G quantities, and it is relatively # cheap to compute. Y = gamma(array_ops.reshape(psi_G, [int(psi_G.shape[0]), -1]) * psi_A) - # Z = L_G^T * Z * L_A + # \\(Z = L_G^T * Z * L_A\\) # This is equivalent to the following computation from the original # pseudo-code: - # Z = G0^(-1/2) * Z * A0^(-1/2) - # Z = U_G^T * Z * U_A + # \\(Z = G0^{-1/2} * Z * A0^{-1/2}\\) + # \\(Z = U_G^T * Z * U_A\\) Z = math_ops.matmul(L_G, math_ops.matmul(Z, L_A), transpose_a=True) - # Z = Z .* Y + # \\(Z = Z .* Y\\) Z *= Y - # Z = L_G * Z * L_A^T + # \\(Z = L_G * Z * L_A^T\\) # This is equivalent to the following computation from the original # pseudo-code: - # Z = U_G * Z * U_A^T - # Z = G0^(-1/2) * Z * A0^(-1/2) + # \\(Z = U_G * Z * U_A^T\\) + # \\(Z = G0^{-1/2} * Z * A0^{-1/2}\\) Z = math_ops.matmul(L_G, math_ops.matmul(Z, L_A, transpose_b=True)) elif self._option == SeriesFBApproximation.option2: - # Note that P_A = A_1^T * A_0^(-1) and P_G = G_1^T * G_0^(-1), - # and K_A = A_0^(-1/2) * E_A and K_G = G_0^(-1/2) * E_G. + # Note that \\(P_A = A_1^T * A_0^{-1} and P_G = G_1^T * G_0^{-1}\\), + # and \\(K_A = A_0^{-1/2} * E_A\ and\ K_G = G_0^{-1/2} * E_G.\\) P_A, K_A, mu_A = self._input_factor.get_option2quants( self._input_damping_func) P_G, K_G, mu_G = self._output_factor.get_option2quants( @@ -1616,26 +1616,26 @@ class FullyConnectedSeriesFB(InputOutputMultiTowerMultiUse, # Our approach differs superficially from the pseudo-code in the paper # in order to reduce the total number of matrix-matrix multiplies. # In particular, the first three computations in the pseudo code are - # Z = G0^(-1/2) * Z * A0^(-1/2) - # Z = Z - hPsi_G^T * Z * hPsi_A - # Z = E_G^T * Z * E_A - # Noting that hPsi = C0^(-1/2) * C1 * C0^(-1/2), so that - # C0^(-1/2) * hPsi = C0^(-1) * C1 * C0^(-1/2) = P^T * C0^(-1/2) + # \\(Z = G0^{-1/2} * Z * A0^{-1/2}\\) + # \\(Z = Z - hPsi_G^T * Z * hPsi_A\\) + # \\(Z = E_G^T * Z * E_A\\) + # Noting that hPsi = C0^{-1/2} * C1 * C0^{-1/2}\\), so that + # \\(C0^{-1/2} * hPsi = C0^{-1} * C1 * C0^{-1/2} = P^T * C0^{-1/2}\\) # the entire computation can be written as - # Z = E_G^T * (G0^(-1/2) * Z * A0^(-1/2) - # - hPsi_G^T * G0^(-1/2) * Z * A0^(-1/2) * hPsi_A) * E_A - # = E_G^T * (G0^(-1/2) * Z * A0^(-1/2) - # - G0^(-1/2) * P_G * Z * P_A^T * A0^(-1/2)) * E_A - # = E_G^T * G0^(-1/2) * Z * A0^(-1/2) * E_A - # - E_G^T* G0^(-1/2) * P_G * Z * P_A^T * A0^(-1/2) * E_A - # = K_G^T * Z * K_A - K_G^T * P_G * Z * P_A^T * K_A + # \\(Z = E_G^T * (G0^{-1/2} * Z * A0^{-1/2}\\) + # \\( - hPsi_G^T * G0^{-1/2} * Z * A0^{-1/2} * hPsi_A) * E_A\\) + # \\( = E_G^T * (G0^{-1/2} * Z * A0^{-1/2}\\) + # \\( - G0^{-1/2} * P_G * Z * P_A^T * A0^{-1/2}) * E_A\\) + # \\( = E_G^T * G0^{-1/2} * Z * A0^{-1/2} * E_A\\) + # \\( - E_G^T* G0^{-1/2} * P_G * Z * P_A^T * A0^{-1/2} * E_A\\) + # \\( = K_G^T * Z * K_A - K_G^T * P_G * Z * P_A^T * K_A\\) # This final expression is computed by the following two lines: - # Z = Z - P_G * Z * P_A^T + # \\(Z = Z - P_G * Z * P_A^T\\) Z -= math_ops.matmul(P_G, math_ops.matmul(Z, P_A, transpose_b=True)) - # Z = K_G^T * Z * K_A + # \\(Z = K_G^T * Z * K_A\\) Z = math_ops.matmul(K_G, math_ops.matmul(Z, K_A), transpose_a=True) - # Z = Z ./ (1*1^T - mu_G*mu_A^T) + # \\(Z = Z ./ (1*1^T - mu_G*mu_A^T)\\) # Be careful with the outer product. We don't want to accidentally # make it an inner-product instead. tmp = 1.0 - array_ops.reshape(mu_G, [int(mu_G.shape[0]), -1]) * mu_A @@ -1646,13 +1646,13 @@ class FullyConnectedSeriesFB(InputOutputMultiTowerMultiUse, # We now perform the transpose/reverse version of the operations # derived above, whose derivation from the original pseudo-code is # analgous. - # Z = K_G * Z * K_A^T + # \\(Z = K_G * Z * K_A^T\\) Z = math_ops.matmul(K_G, math_ops.matmul(Z, K_A, transpose_b=True)) - # Z = Z - P_G^T * Z * P_A + # \\(Z = Z - P_G^T * Z * P_A\\) Z -= math_ops.matmul(P_G, math_ops.matmul(Z, P_A), transpose_a=True) - # Z = normalize (1/E[T]) * Z + # \\(Z = normalize (1/E[T]) * Z\\) # Note that this normalization is done because we compute the statistics # by averaging, not summing, over time. (And the gradient is presumably # summed over time, not averaged, and thus their scales are different.) diff --git a/tensorflow/contrib/kfac/python/ops/layer_collection.py b/tensorflow/contrib/kfac/python/ops/layer_collection.py index 586a004f880e7bea2a772c53091285c2907ca31a..366e2a82d56602de0df706cbd382c21aba5540af 100644 --- a/tensorflow/contrib/kfac/python/ops/layer_collection.py +++ b/tensorflow/contrib/kfac/python/ops/layer_collection.py @@ -28,6 +28,7 @@ from collections import defaultdict from collections import OrderedDict from contextlib import contextmanager from functools import partial +import warnings import math import six @@ -84,7 +85,7 @@ _EMBEDDING_MULTI_APPROX_TO_BLOCK_TYPES = { APPROX_KRONECKER_INDEP_NAME: fb.EmbeddingKFACMultiIndepFB } -# Possible value for 'reuse' keyword argument. Sets 'reuse' to +# Possible value for `reuse` keyword argument. Sets `reuse` to # tf.get_variable_scope().reuse. VARIABLE_SCOPE = "VARIABLE_SCOPE" @@ -171,6 +172,9 @@ class LayerCollection(object): def __init__(self, graph=None, name="LayerCollection"): + warnings.warn( + "tf.contrib.kfac is deprecated and will be removed by 2018-11-01. " + "Use https://pypi.python.org/pypi/kfac instead.") self.fisher_blocks = LayerParametersDict() self.fisher_factors = OrderedDict() self._linked_parameters = dict( @@ -294,8 +298,8 @@ class LayerCollection(object): layer_key: A variable or tuple of variables. The key to check for in existing registrations and to register if valid. fisher_block: The associated `FisherBlock`. - reuse: Method to use for inserting new `FisherBlock`s. One of True, False, - or 'VARIABLE_SCOPE'. + reuse: Method to use for inserting new `FisherBlock's. One of True, False, + or `VARIABLE_SCOPE`. Raises: ValueError: If `layer_key` was already registered and reuse is `False`, @@ -359,15 +363,14 @@ class LayerCollection(object): is None. name: (OPTIONAL) str or None. Unique name for this loss function. If None, a new name is generated. (Default: None) - reuse: (OPTIONAL) bool or str. If True, reuse an existing FisherBlock. - If False, create a new FisherBlock. If VARIABLE_SCOPE, use - tf.get_variable_scope().reuse. + reuse: (OPTIONAL) bool or str. If True, adds `loss` as an additional + tower for the existing loss function. Raises: ValueError: If reuse == True and name == None. ValueError: If reuse == True and seed != None. - KeyError: If reuse == True and no existing LossFunction with 'name' found. - KeyError: If reuse == False and existing LossFunction with 'name' found. + KeyError: If reuse == True and no existing LossFunction with `name` found. + KeyError: If reuse == False and existing LossFunction with `name` found. """ name = name or self._graph.unique_name(base_name) @@ -491,24 +494,24 @@ class LayerCollection(object): """ params = frozenset(utils.ensure_sequence(params)) - # Check if any of the variables in 'params' is already in - # 'self.fisher_blocks.keys()'. + # Check if any of the variables in `params` is already in + # 'self.fisher_blocks.keys()`. for registered_params, fisher_block in self.fisher_blocks.items(): registered_params_set = set(utils.ensure_sequence(registered_params)) for variable in params: if (variable in registered_params_set and params != registered_params_set): raise ValueError( - "Can't link parameters {}, variable {} was already registered in " + "Can`t link parameters {}, variable {} was already registered in " "group {} with layer {}".format(params, variable, registered_params, fisher_block)) - # Check if any of the variables in 'params' is already in - # 'self.linked_parameters'. + # Check if any of the variables in `params` is already in + # 'self.linked_parameters`. for variable in params: for other_linked_params in self.linked_parameters: if variable in other_linked_params: - raise ValueError("Can't link parameters {}, variable {} was already " + raise ValueError("Can`t link parameters {}, variable {} was already " "linked in group {}.".format(params, variable, other_linked_params)) self._linked_parameters[params] = approximation @@ -576,15 +579,15 @@ class LayerCollection(object): produced by layer. approx: str or None. If not None must be "kron". The Fisher approximation to use. If None the default value is used. (Default: None) - reuse: bool or str. If True, this adds 'inputs' and 'outputs' as an + reuse: bool or str. If True, this adds `inputs` and `outputs` as an additional mini-batch/tower of data to use when estimating the Fisher block for this layer (which must have already been registered). If "VARIABLE_SCOPE", use tf.get_variable_scope().reuse. (Default: "VARIABLE_SCOPE") Raises: - ValueError: For improper value to 'approx'. - KeyError: If reuse == True but no FisherBlock found for 'params'. + ValueError: For improper value to `approx`. + KeyError: If reuse == True but no FisherBlock found for `params`. ValueError: If reuse == True and FisherBlock found but of the wrong type. """ block_type, approx = self._get_block_type( @@ -618,15 +621,15 @@ class LayerCollection(object): approx: str or None. If not None must be one of "kron" or "diagonal". The Fisher approximation to use. If None the default value is used. (Default: None) - reuse: bool or str. If True, this adds 'inputs' and 'outputs' as an + reuse: bool or str. If True, this adds `inputs` and `outputs` as an additional mini-batch/tower of data to use when estimating the Fisher block for this layer (which must have already been registered). If "VARIABLE_SCOPE", use tf.get_variable_scope().reuse. (Default: "VARIABLE_SCOPE") Raises: - ValueError: For improper value to 'approx'. - KeyError: If reuse == True but no FisherBlock found for 'params'. + ValueError: For improper value to `approx`. + KeyError: If reuse == True but no FisherBlock found for `params`. ValueError: If reuse == True and FisherBlock found but of the wrong type. """ @@ -669,15 +672,15 @@ class LayerCollection(object): approx: str or None. If not None must be one of "kron" or "diagonal". The Fisher approximation to use. If None the default value is used. (Default: None) - reuse: bool or str. If True, this adds 'inputs' and 'outputs' as an + reuse: bool or str. If True, this adds `inputs` and `outputs` as an additional mini-batch/tower of data to use when estimating the Fisher block for this layer (which must have already been registered). If "VARIABLE_SCOPE", use tf.get_variable_scope().reuse. (Default: "VARIABLE_SCOPE") Raises: - ValueError: For improper value to 'approx'. - KeyError: If reuse == True but no FisherBlock found for 'params'. + ValueError: For improper value to `approx`. + KeyError: If reuse == True but no FisherBlock found for `params`. ValueError: If reuse == True and FisherBlock found but of the wrong type. """ @@ -686,7 +689,7 @@ class LayerCollection(object): _CONV2D_APPROX_TO_BLOCK_TYPES) # It feels bad to pass in configuration that has to do with the internal - # implementation. And then we can't use the same constructor for both + # implementation. And then we can`t use the same constructor for both # anymore and are thus forced to use this ugly if-statement. # TODO(b/74793309): Clean this up? if approx == APPROX_KRONECKER_NAME: @@ -749,15 +752,15 @@ class LayerCollection(object): approx: str or None. If not None must be one of "kron" or "diagonal". The Fisher approximation to use. If None the default value is used. (Default: None) - reuse: bool or str. If True, this adds 'inputs' and 'outputs' as an + reuse: bool or str. If True, this adds `inputs` and `outputs` as an additional mini-batch/tower of data to use when estimating the Fisher block for this layer (which must have already been registered). If "VARIABLE_SCOPE", use tf.get_variable_scope().reuse. (Default: "VARIABLE_SCOPE") Raises: - ValueError: For improper value to 'approx'. - KeyError: If reuse == True but no FisherBlock found for 'params'. + ValueError: For improper value to `approx`. + KeyError: If reuse == True but no FisherBlock found for `params`. ValueError: If reuse == True and FisherBlock found but of the wrong type. """ # TODO(b/74793309): Have this use _get_block_type like the other @@ -804,15 +807,15 @@ class LayerCollection(object): data_format: str or None. Format of data. approx: str or None. If not None must "diagonal". The Fisher approximation to use. If None the default value is used. (Default: None) - reuse: bool or str. If True, this adds 'inputs' and 'outputs' as an + reuse: bool or str. If True, this adds `inputs` and `outputs` as an additional mini-batch/tower of data to use when estimating the Fisher block for this layer (which must have already been registered). If "VARIABLE_SCOPE", use tf.get_variable_scope().reuse. (Default: "VARIABLE_SCOPE") Raises: - ValueError: For improper value to 'approx'. - KeyError: If reuse == True but no FisherBlock found for 'params'. + ValueError: For improper value to `approx`. + KeyError: If reuse == True but no FisherBlock found for `params`. ValueError: If reuse == True and FisherBlock found but of the wrong type. """ # TODO(b/74793309): Have this use _get_block_type like the other @@ -872,15 +875,15 @@ class LayerCollection(object): approx: str or None. If not None must be one of "kron" or "diagonal". The Fisher approximation to use. If None the default value is used. (Default: None) - reuse: bool or str. If True, this adds 'inputs' and 'outputs' as an + reuse: bool or str. If True, this adds `inputs` and `outputs` as an additional mini-batch/tower of data to use when estimating the Fisher block for this layer (which must have already been registered). If "VARIABLE_SCOPE", use tf.get_variable_scope().reuse. (Default: "VARIABLE_SCOPE") Raises: - ValueError: For improper value to 'approx'. - KeyError: If reuse == True but no FisherBlock found for 'params'. + ValueError: For improper value to `approx`. + KeyError: If reuse == True but no FisherBlock found for `params`. ValueError: If reuse == True and FisherBlock found but of the wrong type. """ self.register_depthwise_conv2d( @@ -917,14 +920,14 @@ class LayerCollection(object): approx: str or None. It not None, must be one of "full" or "diagonal". The Fisher approximation to use. If None the default value is used. (Default: None) - reuse: bool or str. If True, this adds 'batch_size' to the total + reuse: bool or str. If True, this adds `batch_size` to the total mini-batch size use when estimating the Fisher block for this layer (which must have already been registered). If "VARIABLE_SCOPE", use tf.get_variable_scope().reuse. (Default: "VARIABLE_SCOPE") Raises: - ValueError: For improper value to 'approx'. - KeyError: If reuse == True but no FisherBlock found for 'params'. + ValueError: For improper value to `approx`. + KeyError: If reuse == True but no FisherBlock found for `params`. ValueError: If reuse == True and FisherBlock found but of the wrong type. """ block_type, approx = self._get_block_type( @@ -954,10 +957,10 @@ class LayerCollection(object): correspond to a "time-step" in an RNN). OR, can be single Tensor, of shape [num_uses * batch_size , input_size], which is a reshaped version of a Tensor of shape [num_uses, batch_size, input_size]. - outputs: A list of Tensors, the same length as 'inputs', each of shape + outputs: A list of Tensors, the same length as `inputs`, each of shape [batch_size, output_size]. Outputs produced by layer. The list indexes each use in the graph (which might correspond to a "time-step" in an - RNN). Needs to correspond with the order used in 'inputs'. OR, can be + RNN). Needs to correspond with the order used in `inputs`. OR, can be a single Tensor of shape [num_uses * batch_size, output_size], which is a reshaped version of a Tensor of shape [num_uses, batch_size, output_size]. @@ -967,16 +970,16 @@ class LayerCollection(object): approx: str or None. If not None, must be of "kron_indep", "kron_series_1" or "kron_series_2". The Fisher approximation to use. If None the default value is used. (Default: None) - reuse: bool or str. If True, this adds inputs and outputs as an + reuse: bool or str. If True, this adds `inputs` and `outputs` as an additional mini-batch/tower of data to use when estimating the Fisher block for this layer (which must have already been registered). If "VARIABLE_SCOPE", use tf.get_variable_scope().reuse. (Note that the - word 'use' here has a completely different meaning to "use in the graph" - as it perturns to the 'inputs', 'outputs', and 'num_uses' arguments.) + word `use` here has a completely different meaning to "use in the graph" + as it perturns to the `inputs`, `outputs`, and `num_uses` arguments.) (Default: "VARIABLE_SCOPE") Raises: - ValueError: For improper value to 'approx'. + ValueError: For improper value to `approx`. """ block_type, approx = self._get_block_type( params, approx, self.default_fully_connected_multi_approximation, @@ -990,9 +993,11 @@ class LayerCollection(object): num_uses=num_uses), reuse=reuse) block.register_additional_tower(inputs, outputs) - - assert len(inputs) == len(outputs) - self._add_uses(params, len(inputs)) + if isinstance(inputs, (tuple, list)): + assert len(inputs) == len(outputs) + self._add_uses(params, len(inputs)) + else: + self._add_uses(params, 1) def register_conv2d_multi(self, params, @@ -1023,7 +1028,7 @@ class LayerCollection(object): outputs: A list of Tensors, each of shape [batch_size, height, width, out_channels]. Output produced by layer. The list indexes each use in the graph (which might correspond to a "time-step" in an RNN). - Needs to correspond with the order used in 'inputs'. OR, can be a + Needs to correspond with the order used in `inputs`. OR, can be a single Tensor, of shape [num_uses * batch_size, height, width, out_channels], which is a reshaped version of a Tensor of shape [num_uses, batch_size, height, width, out_channels]. @@ -1035,17 +1040,17 @@ class LayerCollection(object): approx: str or None. If not None must by "kron_indep". The Fisher approximation to use. If None the default value is used. (Default: None) - reuse: bool or str. If True, this adds inputs and outputs as an + reuse: bool or str. If True, this adds `inputs` and `outputs` as an additional mini-batch/tower of data to use when estimating the Fisher block for this layer (which must have already been registered). If "VARIABLE_SCOPE", use tf.get_variable_scope().reuse. (Note that the - word 'use' here has a completely different meaning to "use in the graph" - as it perturns to the 'inputs', 'outputs', and 'num_uses' arguments.) + word `use` here has a completely different meaning to "use in the graph" + as it perturns to the `inputs`, `outputs`, and `num_uses` arguments.) (Default: "VARIABLE_SCOPE") Raises: - ValueError: For improper value to 'approx'. - KeyError: If reuse == True but no FisherBlock found for 'params'. + ValueError: For improper value to `approx`. + KeyError: If reuse == True but no FisherBlock found for `params`. ValueError: If reuse == True and FisherBlock found but of the wrong type. """ block_type, approx = self._get_block_type( @@ -1066,9 +1071,11 @@ class LayerCollection(object): reuse=reuse) block.register_additional_tower(inputs, outputs) - - assert len(inputs) == len(outputs) - self._add_uses(params, len(inputs)) + if isinstance(inputs, (tuple, list)): + assert len(inputs) == len(outputs) + self._add_uses(params, len(inputs)) + else: + self._add_uses(params, 1) # TODO(b/74108452): change the loss registration functions names to refer # to "loss functions" instead of distributions. Following naming convention @@ -1088,13 +1095,13 @@ class LayerCollection(object): inputs: A list of Tensors, each of shape [batch_size, input_size] and dtype int32. Indices into embedding matrix. The list indexes each use in the graph (which might correspond to a "time-step" in an RNN). - OR, can be single Tensor, of shape [num_uses, batch_size, input_size], + OR, can be single Tensor, of shape [num_uses*batch_size, input_size], which is a reshaped version of a Tensor of shape [num_uses, batch_size, input_size]. outputs: A list of Tensors, each of shape [batch_size, embedding_size]. Outputs produced by layer. The list indexes each use in the graph (which might correspond to a "time-step" in an RNN). Needs to - correspond with the order used in 'inputs'. OR, can be a + correspond with the order used in `inputs`. OR, can be a single Tensor, of shape [num_uses * batch_size, embedding_size], which is a reshaped version of a Tensor of shape [num_uses, batch_size, embedding_size]. @@ -1104,17 +1111,17 @@ class LayerCollection(object): approx: str or None. If not None must by "kron_indep". The Fisher approximation to use. If None the default value is used. (Default: None) - reuse: bool or str. If True, this adds inputs and outputs as an + reuse: bool or str. If True, this adds `inputs` and `outputs` as an additional mini-batch/tower of data to use when estimating the Fisher block for this layer (which must have already been registered). If "VARIABLE_SCOPE", use tf.get_variable_scope().reuse. (Note that the - word 'use' here has a completely different meaning to "use in the graph" - as it perturns to the 'inputs', 'outputs', and 'num_uses' arguments.) + word `use` here has a completely different meaning to "use in the graph" + as it perturns to the `inputs`, `outputs`, and `num_uses` arguments.) (Default: "VARIABLE_SCOPE") Raises: - ValueError: For improper value to 'approx'. - KeyError: If reuse == True but no FisherBlock found for 'params'. + ValueError: For improper value to `approx`. + KeyError: If reuse == True but no FisherBlock found for `params`. ValueError: If reuse == True and FisherBlock found but of the wrong type. """ block_type, approx = self._get_block_type( @@ -1129,7 +1136,10 @@ class LayerCollection(object): params, block_type(self, vocab_size, num_uses=num_uses), reuse=reuse) block.register_additional_tower(inputs, outputs) - self._add_uses(params, len(inputs)) + if isinstance(inputs, (tuple, list)): + self._add_uses(params, len(inputs)) + else: + self._add_uses(params, 1) def register_categorical_predictive_distribution(self, logits, @@ -1149,7 +1159,7 @@ class LayerCollection(object): (Default: None) name: (OPTIONAL) str or None. Unique name for this loss function. If None, a new name is generated. (Default: None) - reuse: bool or str. If True, this adds 'logits' as an additional + reuse: bool or str. If True, this adds `logits` as an additional mini-batch/tower of inputs to the loss-function/predictive distribution (which must have already been registered). If "VARIABLE_SCOPE", use tf.get_variable_scope().reuse. (Default: "VARIABLE_SCOPE") @@ -1183,7 +1193,7 @@ class LayerCollection(object): (Default: None) name: (OPTIONAL) str or None. Unique name for this loss function. If None, a new name is generated. (Default: None) - reuse: bool or str. If True, this adds 'mean' and 'var' as an additional + reuse: bool or str. If True, this adds `mean` and `var` as an additional mini-batch/tower of inputs to the loss-function/predictive distribution (which must have already been registered). If "VARIABLE_SCOPE", use tf.get_variable_scope().reuse. (Default: "VARIABLE_SCOPE") @@ -1212,7 +1222,7 @@ class LayerCollection(object): (Default: None) name: (OPTIONAL) str or None. Unique name for this loss function. If None, a new name is generated. (Default: None) - reuse: bool or str. If True, this adds 'logits' as an additional + reuse: bool or str. If True, this adds `logits` as an additional mini-batch/tower of inputs to the loss-function/predictive distribution (which must have already been registered). If "VARIABLE_SCOPE", use tf.get_variable_scope().reuse. (Default: "VARIABLE_SCOPE") @@ -1224,18 +1234,18 @@ class LayerCollection(object): name=name, reuse=reuse) def make_or_get_factor(self, cls, args): - """Insert 'cls(args)' into 'self.fisher_factors' if not already present. + """Insert `cls(args)` into 'self.fisher_factors` if not already present. - Wraps constructor in 'tf.variable_scope()' to ensure variables constructed - in 'cls.__init__' are placed under this LayerCollection's scope. + Wraps constructor in `tf.variable_scope()` to ensure variables constructed + in `cls.__init__` are placed under this LayerCollection's scope. Args: cls: Class that implements FisherFactor. - args: Tuple of arguments to pass into 'cls's constructor. Must be + args: Tuple of arguments to pass into `cls's constructor. Must be hashable. Returns: - Instance of 'cls' found in self.fisher_factors. + Instance of `cls` found in self.fisher_factors. """ try: hash(args) diff --git a/tensorflow/contrib/kfac/python/ops/loss_functions.py b/tensorflow/contrib/kfac/python/ops/loss_functions.py index e7d4243fc3d1c2d860693f2f62447b1c9aeeee03..42d525c2c21f5ba3457cba041261dc3b225dc11e 100644 --- a/tensorflow/contrib/kfac/python/ops/loss_functions.py +++ b/tensorflow/contrib/kfac/python/ops/loss_functions.py @@ -613,19 +613,19 @@ class CategoricalLogitsNegativeLogProbLoss(DistributionNegativeLogProbLoss, def multiply_fisher(self, vector): probs = self._probs return vector * probs - probs * math_ops.reduce_sum( - vector * probs, axis=-1, keep_dims=True) + vector * probs, axis=-1, keepdims=True) def multiply_fisher_factor(self, vector): probs = self._probs sqrt_probs = self._sqrt_probs return sqrt_probs * vector - probs * math_ops.reduce_sum( - sqrt_probs * vector, axis=-1, keep_dims=True) + sqrt_probs * vector, axis=-1, keepdims=True) def multiply_fisher_factor_transpose(self, vector): probs = self._probs sqrt_probs = self._sqrt_probs return sqrt_probs * vector - sqrt_probs * math_ops.reduce_sum( - probs * vector, axis=-1, keep_dims=True) + probs * vector, axis=-1, keepdims=True) def multiply_fisher_factor_replicated_one_hot(self, index): assert len(index) == 1, "Length of index was {}".format(len(index)) diff --git a/tensorflow/contrib/kfac/python/ops/loss_functions_lib.py b/tensorflow/contrib/kfac/python/ops/loss_functions_lib.py index 705a871d482565897e7ac850327729a6186f1746..4279cb2792854249e3e076d200e2656bc615779d 100644 --- a/tensorflow/contrib/kfac/python/ops/loss_functions_lib.py +++ b/tensorflow/contrib/kfac/python/ops/loss_functions_lib.py @@ -33,7 +33,6 @@ _allowed_symbols = [ "CategoricalLogitsNegativeLogProbLoss", "OnehotCategoricalLogitsNegativeLogProbLoss", "MultiBernoulliNegativeLogProbLoss", - "MultiBernoulliNegativeLogProbLoss", "insert_slice_in_zeros", ] diff --git a/tensorflow/contrib/kfac/python/ops/optimizer.py b/tensorflow/contrib/kfac/python/ops/optimizer.py index 843aeef7d82df064b757ab4618f2b0ccbbec4cbe..f01c5a832212f88d80529672b652ca04d45c0f0e 100644 --- a/tensorflow/contrib/kfac/python/ops/optimizer.py +++ b/tensorflow/contrib/kfac/python/ops/optimizer.py @@ -108,13 +108,8 @@ class KfacOptimizer(gradient_descent.GradientDescentOptimizer): ValueError: If momentum is non-zero and momentum_type is not 'regular' or 'adam'. """ - - variables = var_list - if variables is None: - variables = tf_variables.trainable_variables() - # Parameters to be passed to the Fisher estimator: - self._variables = variables + self._variables = var_list or tf_variables.trainable_variables self._cov_ema_decay = cov_ema_decay self._layers = layer_collection self._estimation_mode = estimation_mode @@ -235,7 +230,7 @@ class KfacOptimizer(gradient_descent.GradientDescentOptimizer): @property def variables(self): - return self._variables + return self._fisher_est.variables @property def damping(self): @@ -373,6 +368,7 @@ class KfacOptimizer(gradient_descent.GradientDescentOptimizer): else: kwargs["var_list"] = kwargs.get("var_list") or self.variables var_list = kwargs["var_list"] + if set(var_list) != set(self.variables): raise ValueError("var_list doesn't match with set of Fisher-estimating " "variables.") diff --git a/tensorflow/contrib/labeled_tensor/BUILD b/tensorflow/contrib/labeled_tensor/BUILD index 18b265ae80c13e470ad8d8adf8ea8b63d0fd0cde..c8812d4b23f94102d093db878a709b090a3318d6 100644 --- a/tensorflow/contrib/labeled_tensor/BUILD +++ b/tensorflow/contrib/labeled_tensor/BUILD @@ -70,6 +70,7 @@ py_test( "python/ops/core_test.py", ], srcs_version = "PY2AND3", + tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":_typecheck", ":core", diff --git a/tensorflow/contrib/labeled_tensor/python/ops/ops_test.py b/tensorflow/contrib/labeled_tensor/python/ops/ops_test.py index 0727f4cf88728dc3d919e662d65c93a658ac730b..39e9d65407f3b1e79804317023ea03dd81484ff5 100644 --- a/tensorflow/contrib/labeled_tensor/python/ops/ops_test.py +++ b/tensorflow/contrib/labeled_tensor/python/ops/ops_test.py @@ -660,7 +660,7 @@ class ReduceSumTest(Base): sum_lt = ops.reduce_sum(self.original_lt, {('channel', 'hihowareyou')}) golden_lt = core.LabeledTensor( math_ops.reduce_sum( - self.original_lt.tensor, 1, keep_dims=True), + self.original_lt.tensor, 1, keepdims=True), [self.a0, ('channel', ['hihowareyou']), self.a2, self.a3]) self.assertLabeledTensorsEqual(sum_lt, golden_lt) @@ -668,7 +668,7 @@ class ReduceSumTest(Base): sum_lt = ops.reduce_sum(self.original_lt, ('channel', 'hihowareyou')) golden_lt = core.LabeledTensor( math_ops.reduce_sum( - self.original_lt.tensor, 1, keep_dims=True), + self.original_lt.tensor, 1, keepdims=True), [self.a0, ('channel', ['hihowareyou']), self.a2, self.a3]) self.assertLabeledTensorsEqual(sum_lt, golden_lt) diff --git a/tensorflow/contrib/layers/BUILD b/tensorflow/contrib/layers/BUILD index 4be55468db235fb3e24bcf2d1f38ca62333c592f..d5b3b279a1b7327602790c0260349cb0c758aa86 100644 --- a/tensorflow/contrib/layers/BUILD +++ b/tensorflow/contrib/layers/BUILD @@ -188,6 +188,7 @@ py_test( size = "small", srcs = ["python/layers/normalization_test.py"], srcs_version = "PY2AND3", + tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":layers_py", "//tensorflow/contrib/framework:framework_py", @@ -353,6 +354,7 @@ py_test( size = "small", srcs = ["python/ops/sparse_ops_test.py"], srcs_version = "PY2AND3", + tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":layers_py", "//tensorflow/python:array_ops", diff --git a/tensorflow/contrib/layers/__init__.py b/tensorflow/contrib/layers/__init__.py index 337c9e06b870b2cca53fcdbf3d94225660e193c4..00f03a111ae8be7f49761ef5fb5a82810bcca182 100644 --- a/tensorflow/contrib/layers/__init__.py +++ b/tensorflow/contrib/layers/__init__.py @@ -104,6 +104,7 @@ See the @{$python/contrib.layers} guide. @@infer_real_valued_columns @@sequence_input_from_feature_columns +@@group_norm @@instance_norm """ @@ -122,6 +123,7 @@ _allowed_symbols = ['bias_add', 'conv3d', 'elu', 'feature_column', + 'group_norm', 'instance_norm', 'legacy_fully_connected', 'legacy_linear', diff --git a/tensorflow/contrib/layers/python/kernel_tests/sparse_feature_cross_op_test.py b/tensorflow/contrib/layers/python/kernel_tests/sparse_feature_cross_op_test.py index f701647c2b297015f025eb53bd191a1a8c54ec62..28ddaa69a14776e0c157c2e68105ee9e17bc3cbb 100644 --- a/tensorflow/contrib/layers/python/kernel_tests/sparse_feature_cross_op_test.py +++ b/tensorflow/contrib/layers/python/kernel_tests/sparse_feature_cross_op_test.py @@ -200,7 +200,7 @@ class SparseCrossOpTest(test.TestCase): self._assert_sparse_tensor_equals(expected_out, sess.run(op)) def test_large_batch(self): - """Tests with large batch size to force multithreding. + """Tests with large batch size to force multithreading. """ batch_size = 5000 col1 = [] diff --git a/tensorflow/contrib/layers/python/layers/embedding_ops.py b/tensorflow/contrib/layers/python/layers/embedding_ops.py index ffa208540dae975cb139ad6d76dcf392678ba0ee..49c3faf3b7f5eaa3b1542a1fdddcfaff99737a24 100644 --- a/tensorflow/contrib/layers/python/layers/embedding_ops.py +++ b/tensorflow/contrib/layers/python/layers/embedding_ops.py @@ -140,6 +140,9 @@ def safe_embedding_lookup_sparse(embedding_weights, # Prune invalid ids and weights. sparse_ids, sparse_weights = _prune_invalid_ids(sparse_ids, sparse_weights) + if combiner != "sum": + sparse_ids, sparse_weights = _prune_invalid_weights( + sparse_ids, sparse_weights) # Fill in dummy values for empty features, if necessary. sparse_ids, is_row_empty = sparse_ops.sparse_fill_empty_rows(sparse_ids, @@ -188,13 +191,23 @@ def _prune_invalid_ids(sparse_ids, sparse_weights): is_id_valid = math_ops.greater_equal(sparse_ids.values, 0) if sparse_weights is not None: is_id_valid = math_ops.logical_and( - is_id_valid, math_ops.greater(sparse_weights.values, 0)) + is_id_valid, + array_ops.ones_like(sparse_weights.values, dtype=dtypes.bool)) sparse_ids = sparse_ops.sparse_retain(sparse_ids, is_id_valid) if sparse_weights is not None: sparse_weights = sparse_ops.sparse_retain(sparse_weights, is_id_valid) return sparse_ids, sparse_weights +def _prune_invalid_weights(sparse_ids, sparse_weights): + """Prune invalid weights (< 0) from the input ids and weights.""" + if sparse_weights is not None: + is_weights_valid = math_ops.greater(sparse_weights.values, 0) + sparse_ids = sparse_ops.sparse_retain(sparse_ids, is_weights_valid) + sparse_weights = sparse_ops.sparse_retain(sparse_weights, is_weights_valid) + return sparse_ids, sparse_weights + + def scattered_embedding_lookup(params, values, dimension, diff --git a/tensorflow/contrib/layers/python/layers/feature_column.py b/tensorflow/contrib/layers/python/layers/feature_column.py index 9ccb589d698ad83c9654f5523ccdcb35b031b3da..3ae07cedab0be2da8ec633cfd84e07cfdfb11457 100644 --- a/tensorflow/contrib/layers/python/layers/feature_column.py +++ b/tensorflow/contrib/layers/python/layers/feature_column.py @@ -48,7 +48,7 @@ you should choose depends on (1) the feature type and (2) the model type. recommended. embedded_dept_column = embedding_column( - sparse_column_with_keys("department", ["math", "philosphy", ...]), + sparse_column_with_keys("department", ["math", "philosophy", ...]), dimension=10) * Wide (aka linear) models (`LinearClassifier`, `LinearRegressor`). diff --git a/tensorflow/contrib/layers/python/layers/feature_column_ops.py b/tensorflow/contrib/layers/python/layers/feature_column_ops.py index 78affea44cbfb92523063968dbc1be98841854db..06060b99e7e58787994f20f037ffa451abbc7459 100644 --- a/tensorflow/contrib/layers/python/layers/feature_column_ops.py +++ b/tensorflow/contrib/layers/python/layers/feature_column_ops.py @@ -815,7 +815,7 @@ class _Transformer(object): """ def __init__(self, columns_to_tensors): - """Initializes transfomer. + """Initializes transformer. Args: columns_to_tensors: A mapping from feature columns to tensors. 'string' @@ -908,7 +908,7 @@ def _gather_feature_columns(feature_columns): def _check_forbidden_sequence_columns(feature_columns): - """Recursively cecks `feature_columns` for `_FORBIDDEN_SEQUENCE_COLUMNS`.""" + """Recursively checks `feature_columns` for `_FORBIDDEN_SEQUENCE_COLUMNS`.""" all_feature_columns = _gather_feature_columns(feature_columns) for feature_column in all_feature_columns: if isinstance(feature_column, _FORBIDDEN_SEQUENCE_COLUMNS): diff --git a/tensorflow/contrib/layers/python/layers/layers.py b/tensorflow/contrib/layers/python/layers/layers.py index 10d7f6d076b4b4c6578d7adcffc4e9cc44d77ac6..2f3e57653c5d6d949c4dcc91635690322b7f90c4 100644 --- a/tensorflow/contrib/layers/python/layers/layers.py +++ b/tensorflow/contrib/layers/python/layers/layers.py @@ -932,7 +932,8 @@ def convolution(inputs, variables_collections=None, outputs_collections=None, trainable=True, - scope=None): + scope=None, + conv_dims=None): """Adds an N-D convolution followed by an optional batch_norm layer. It is required that 1 <= N <= 3. @@ -993,6 +994,10 @@ def convolution(inputs, trainable: If `True` also add variables to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). scope: Optional scope for `variable_scope`. + conv_dims: Optional convolution dimensionality, when set it would use the + corresponding convolution (e.g. 2 for Conv 2D, 3 for Conv 3D, ..). When + leaved to None it would select the convolution dimensionality based on + the input rank (i.e. Conv ND, with N = input_rank - 2). Returns: A tensor representing the output of the operation. @@ -1015,6 +1020,9 @@ def convolution(inputs, inputs = ops.convert_to_tensor(inputs) input_rank = inputs.get_shape().ndims + if conv_dims is not None and conv_dims + 2 != input_rank: + raise ValueError('Convolution expects input with rank %d, got %d' % + (conv_dims + 2, input_rank)) if input_rank == 3: layer_class = convolutional_layers.Convolution1D elif input_rank == 4: @@ -1061,10 +1069,134 @@ def convolution(inputs, outputs = activation_fn(outputs) return utils.collect_named_outputs(outputs_collections, sc.name, outputs) +@add_arg_scope +def convolution1d(inputs, + num_outputs, + kernel_size, + stride=1, + padding='SAME', + data_format=None, + rate=1, + activation_fn=nn.relu, + normalizer_fn=None, + normalizer_params=None, + weights_initializer=initializers.xavier_initializer(), + weights_regularizer=None, + biases_initializer=init_ops.zeros_initializer(), + biases_regularizer=None, + reuse=None, + variables_collections=None, + outputs_collections=None, + trainable=True, + scope=None): + return convolution(inputs, + num_outputs, + kernel_size, + stride, + padding, + data_format, + rate, + activation_fn, + normalizer_fn, + normalizer_params, + weights_initializer, + weights_regularizer, + biases_initializer, + biases_regularizer, + reuse, + variables_collections, + outputs_collections, + trainable, + scope, + conv_dims=1) + +convolution1d.__doc__ = convolution.__doc__ -convolution2d = convolution -convolution3d = convolution +@add_arg_scope +def convolution2d(inputs, + num_outputs, + kernel_size, + stride=1, + padding='SAME', + data_format=None, + rate=1, + activation_fn=nn.relu, + normalizer_fn=None, + normalizer_params=None, + weights_initializer=initializers.xavier_initializer(), + weights_regularizer=None, + biases_initializer=init_ops.zeros_initializer(), + biases_regularizer=None, + reuse=None, + variables_collections=None, + outputs_collections=None, + trainable=True, + scope=None): + return convolution(inputs, + num_outputs, + kernel_size, + stride, + padding, + data_format, + rate, + activation_fn, + normalizer_fn, + normalizer_params, + weights_initializer, + weights_regularizer, + biases_initializer, + biases_regularizer, + reuse, + variables_collections, + outputs_collections, + trainable, + scope, + conv_dims=2) + +convolution2d.__doc__ = convolution.__doc__ +@add_arg_scope +def convolution3d(inputs, + num_outputs, + kernel_size, + stride=1, + padding='SAME', + data_format=None, + rate=1, + activation_fn=nn.relu, + normalizer_fn=None, + normalizer_params=None, + weights_initializer=initializers.xavier_initializer(), + weights_regularizer=None, + biases_initializer=init_ops.zeros_initializer(), + biases_regularizer=None, + reuse=None, + variables_collections=None, + outputs_collections=None, + trainable=True, + scope=None): + return convolution(inputs, + num_outputs, + kernel_size, + stride, + padding, + data_format, + rate, + activation_fn, + normalizer_fn, + normalizer_params, + weights_initializer, + weights_regularizer, + biases_initializer, + biases_regularizer, + reuse, + variables_collections, + outputs_collections, + trainable, + scope, + conv_dims=3) + +convolution3d.__doc__ = convolution.__doc__ @add_arg_scope def convolution2d_in_plane( @@ -1404,13 +1536,14 @@ def convolution3d_transpose( @add_arg_scope def dense_to_sparse(tensor, eos_token=0, outputs_collections=None, scope=None): """Converts a dense tensor into a sparse tensor. + An example use would be to convert dense labels to sparse ones so that they can be fed to the ctc_loss. Args: tensor: An `int` `Tensor` to be converted to a `Sparse`. eos_token: An integer. - It is part of the target label that signfies the end of a sentence. + It is part of the target label that signifies the end of a sentence. outputs_collections: Collection to add the outputs. scope: Optional scope for name_scope. """ @@ -1554,7 +1687,7 @@ def _inner_flatten(inputs, new_rank, output_collections=None, scope=None): output_collections: Collection to which the outputs will be added. scope: Optional scope for `name_scope`. Returns: - A `Tensor` or `SparseTensor` conataining the same values as `inputs`, but + A `Tensor` or `SparseTensor` containing the same values as `inputs`, but with innermost dimensions flattened to obtain rank `new_rank`. Raises: @@ -2191,11 +2324,16 @@ def images_to_sequence(inputs, outputs_collections=None, scope=None): """Convert a batch of images into a batch of sequences. + Args: inputs: a (num_images, height, width, depth) tensor data_format: A string. `NHWC` (default) and `NCHW` are supported. outputs_collections: The collections to which the outputs are added. scope: Optional scope for name_scope. + + Raises: + ValueError: If `data_format` is not either NCHW or NHWC. + Returns: (width, num_images*height, depth) sequence tensor """ @@ -2701,6 +2839,7 @@ def sequence_to_images(inputs, outputs_collections=None, scope=None): """Convert a batch of sequences into a batch of images. + Args: inputs: (num_steps, num_batches, depth) sequence tensor height: the height of the images @@ -2708,6 +2847,7 @@ def sequence_to_images(inputs, Currently supports `'channels_first'` and `'channels_last'`. outputs_collections: The collections to which the outputs are added. scope: Optional scope for name_scope. + Returns: A tensor representing the output of the operation. """ @@ -2717,7 +2857,7 @@ def sequence_to_images(inputs, if num_batches is None: num_batches = -1 else: - num_batches = num_batches // height + num_batches //= height reshaped = array_ops.reshape(inputs, [width, num_batches, height, depth]) if output_data_format == 'channels_first': diff --git a/tensorflow/contrib/layers/python/layers/layers_test.py b/tensorflow/contrib/layers/python/layers/layers_test.py index 997f910a2a97567adbd7ffa3e81a31d2ae0bad7e..b01fd5d5c95ac15c76f9dbe7c77f7e76f12149a9 100644 --- a/tensorflow/contrib/layers/python/layers/layers_test.py +++ b/tensorflow/contrib/layers/python/layers/layers_test.py @@ -310,6 +310,17 @@ class BiasAddTest(test.TestCase): class ConvolutionTest(test.TestCase): + def testInvalidShape(self): + with self.test_session(): + images_2d = random_ops.random_uniform((5, 7, 9, 3), seed=1) + with self.assertRaisesRegexp( + ValueError, 'Convolution expects input with rank 5, got 4'): + layers_lib.convolution3d(images_2d, 32, 3) + images_3d = random_ops.random_uniform((5, 6, 7, 9, 3), seed=1) + with self.assertRaisesRegexp( + ValueError, 'Convolution expects input with rank 4, got 5'): + layers_lib.convolution2d(images_3d, 32, 3) + def testInvalidDataFormat(self): height, width = 7, 9 with self.test_session(): @@ -3155,7 +3166,7 @@ class RepeatTests(test.TestCase): with self.test_session(): images = np.random.uniform(size=(5, height, width, 3)).astype(np.float32) output = _layers.repeat(images, 3, layers_lib.conv2d, 32, [3, 3]) - self.assertEqual(output.op.name, 'Repeat/convolution_3/Relu') + self.assertEqual(output.op.name, 'Repeat/convolution2d_3/Relu') self.assertListEqual(output.get_shape().as_list(), [5, 3, 3, 32]) def testRepeatWithScope(self): @@ -3749,7 +3760,7 @@ class StackTests(test.TestCase): layers_lib.convolution2d, [10, 20, 30], kernel_size=[3, 3], padding='SAME') - self.assertEqual(output.op.name, 'Stack/convolution_3/Relu') + self.assertEqual(output.op.name, 'Stack/convolution2d_3/Relu') self.assertListEqual(output.get_shape().as_list(), [5, 3, 3, 30]) def testStackWithScope(self): diff --git a/tensorflow/contrib/layers/python/layers/normalization.py b/tensorflow/contrib/layers/python/layers/normalization.py index e7d4080ff769327cc74b6629a7705ddfa552169b..c807ab0f2e5c8ac3ec2ae1d84a5b36b5f4ba76a4 100644 --- a/tensorflow/contrib/layers/python/layers/normalization.py +++ b/tensorflow/contrib/layers/python/layers/normalization.py @@ -24,11 +24,13 @@ from tensorflow.contrib.layers.python.layers import utils from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import init_ops +from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn from tensorflow.python.ops import variable_scope __all__ = [ + 'group_norm', 'instance_norm', ] @@ -158,3 +160,196 @@ def instance_norm(inputs, if activation_fn is not None: outputs = activation_fn(outputs) return utils.collect_named_outputs(outputs_collections, sc.name, outputs) + + +@add_arg_scope +def group_norm(inputs, + groups=32, + channels_axis=-1, + reduction_axes=(-3, -2), + center=True, + scale=True, + epsilon=1e-6, + activation_fn=None, + param_initializers=None, + reuse=None, + variables_collections=None, + outputs_collections=None, + trainable=True, + scope=None): + """Functional interface for the group normalization layer. + + Reference: https://arxiv.org/abs/1803.08494. + + "Group Normalization", Yuxin Wu, Kaiming He + + Args: + inputs: A Tensor with at least 2 dimensions one which is channels. All + shape dimensions must be fully defined. + groups: Integer. Divide the channels into this number of groups over which + normalization statistics are computed. This number must be commensurate + with the number of channels in `inputs`. + channels_axis: An integer. Specifies index of channels axis which will be + broken into `groups`, each of which whose statistics will be computed + across. Must be mutually exclusive with `reduction_axes`. Preferred usage + is to specify negative integers to be agnostic as to whether a batch + dimension is included. + reduction_axes: Tuple of integers. Specifies dimensions over which + statistics will be accumulated. Must be mutually exclusive with + `channels_axis`. Statistics will not be accumulated across axes not + specified in `reduction_axes` nor `channel_axis`. Preferred usage is to + specify negative integers to be agnostic to whether a batch dimension is + included. + + Some sample usage cases: + NHWC format: channels_axis=-1, reduction_axes=[-3, -2] + NCHW format: channels_axis=-3, reduction_axes=[-2, -1] + + center: If True, add offset of `beta` to normalized tensor. If False, `beta` + is ignored. + scale: If True, multiply by `gamma`. If False, `gamma` is + not used. When the next layer is linear (also e.g. `nn.relu`), this can be + disabled since the scaling can be done by the next layer. + epsilon: Small float added to variance to avoid dividing by zero. + activation_fn: Activation function, default set to None to skip it and + maintain a linear activation. + param_initializers: Optional initializers for beta, gamma, moving mean and + moving variance. + reuse: Whether or not the layer and its variables should be reused. To be + able to reuse the layer scope must be given. + variables_collections: Optional collections for the variables. + outputs_collections: Collections to add the outputs. + trainable: If `True` also add variables to the graph collection + `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). + scope: Optional scope for `variable_scope`. + + Returns: + A `Tensor` representing the output of the operation. + + Raises: + ValueError: If the rank of `inputs` is undefined. + ValueError: If rank or channels dimension of `inputs` is undefined. + ValueError: If number of groups is not commensurate with number of channels. + ValueError: If reduction_axes or channels_axis are out of bounds. + ValueError: If reduction_axes are not mutually exclusive with channels_axis. + """ + # TODO(shlens): Support partially defined shapes for the inputs. + inputs = ops.convert_to_tensor(inputs) + original_shape = inputs.shape + + if inputs.shape.ndims is None: + raise ValueError('Inputs %s has undefined rank.' % inputs.name) + if channels_axis > (inputs.shape.ndims - 1): + raise ValueError('Axis is out of bounds.') + + # Standardize the channels_axis to be positive and identify # of channels. + if channels_axis < 0: + channels_axis = inputs.shape.ndims + channels_axis + channels = inputs.shape[channels_axis].value + + if channels is None: + raise ValueError('Inputs %s has undefined channel dimension: %d.' % ( + inputs.name, channels_axis)) + + # Standardize the reduction_axes to be positive. + reduction_axes = list(reduction_axes) + for i in range(len(reduction_axes)): + if reduction_axes[i] < 0: + reduction_axes[i] += inputs.shape.ndims + + for a in reduction_axes: + if a > inputs.shape.ndims: + raise ValueError('Axis is out of bounds.') + if inputs.shape[a].value is None: + raise ValueError('Inputs %s has undefined dimensions %d.' % ( + inputs.name, a)) + if channels_axis == a: + raise ValueError('reduction_axis must be mutually exclusive ' + 'with channels_axis') + if groups > channels: + raise ValueError('Invalid groups %d for %d channels.' % (groups, channels)) + if channels % groups != 0: + raise ValueError('%d channels is not commensurate with %d groups.' % + (channels, groups)) + + # Determine axes before channels. Some examples of common image formats: + # 'NCHW': before = [N], after = [HW] + # 'NHWC': before = [NHW], after = [] + axes_before_channels = inputs.shape.as_list()[:channels_axis] + axes_after_channels = inputs.shape.as_list()[channels_axis+1:] + + # Manually broadcast the parameters to conform to the number of groups. + params_shape_broadcast = ([1] * len(axes_before_channels) + + [groups, channels // groups] + + [1] * len(axes_after_channels)) + + # Reshape the input by the group within the channel dimension. + inputs_shape = (axes_before_channels + [groups, channels // groups] + + axes_after_channels) + inputs = array_ops.reshape(inputs, inputs_shape) + + # Determine the dimensions across which moments are calculated. + moments_axes = [channels_axis + 1] + for a in reduction_axes: + if a > channels_axis: + moments_axes.append(a + 1) + else: + moments_axes.append(a) + + with variable_scope.variable_scope( + scope, 'GroupNorm', [inputs], reuse=reuse) as sc: + # Note that the params_shape is the number of channels always. + params_shape = [channels] + + # Allocate parameters for the beta and gamma of the normalization. + beta, gamma = None, None + dtype = inputs.dtype.base_dtype + if param_initializers is None: + param_initializers = {} + if center: + beta_collections = utils.get_variable_collections( + variables_collections, 'beta') + beta_initializer = param_initializers.get( + 'beta', init_ops.zeros_initializer()) + beta = variables.model_variable('beta', + shape=params_shape, + dtype=dtype, + initializer=beta_initializer, + collections=beta_collections, + trainable=trainable) + beta = array_ops.reshape(beta, params_shape_broadcast) + + if scale: + gamma_collections = utils.get_variable_collections( + variables_collections, 'gamma') + gamma_initializer = param_initializers.get( + 'gamma', init_ops.ones_initializer()) + gamma = variables.model_variable('gamma', + shape=params_shape, + dtype=dtype, + initializer=gamma_initializer, + collections=gamma_collections, + trainable=trainable) + gamma = array_ops.reshape(gamma, params_shape_broadcast) + + # Calculate the moments. + mean, variance = nn.moments(inputs, moments_axes, keep_dims=True) + + # Compute normalization. + # TODO(shlens): Fix nn.batch_normalization to handle the 5-D Tensor + # appropriately so that this operation may be faster. + gain = math_ops.rsqrt(variance + epsilon) + offset = -mean * gain + if gamma is not None: + gain *= gamma + offset *= gamma + if beta is not None: + offset += beta + outputs = inputs * gain + offset + + # Collapse the groups into the channel dimension. + outputs = array_ops.reshape(outputs, original_shape) + + if activation_fn is not None: + outputs = activation_fn(outputs) + return utils.collect_named_outputs(outputs_collections, sc.name, outputs) diff --git a/tensorflow/contrib/layers/python/layers/normalization_test.py b/tensorflow/contrib/layers/python/layers/normalization_test.py index 5cff1bf0ebb2fe8bc6933de882ecd47a9edf0f94..b6e96350db92baf4770683273be7e5dde73dbcec 100644 --- a/tensorflow/contrib/layers/python/layers/normalization_test.py +++ b/tensorflow/contrib/layers/python/layers/normalization_test.py @@ -166,5 +166,231 @@ class InstanceNormTest(test.TestCase): def testOutputBigInput5DNCHW(self): self.doOutputTest((1, 100, 100, 1, 1), 'NCHW', tol=1e-3) + +class GroupNormTest(test.TestCase): + + def testInvalidGroupSize(self): + inputs = array_ops.placeholder(dtypes.float32, shape=(5, 2, 10, 10)) + with self.assertRaisesRegexp(ValueError, + 'Invalid groups 10 for 2 channels.'): + normalization.group_norm(inputs, groups=10, + reduction_axes=[-2, -1], channels_axis=-3) + + def testBadCommensurateGroup(self): + inputs = array_ops.placeholder(dtypes.float32, shape=(5, 4, 10, 10)) + with self.assertRaisesRegexp(ValueError, + '4 channels is not commensurate with ' + '3 groups.'): + normalization.group_norm(inputs, groups=3, + reduction_axes=[-2, -1], channels_axis=-3) + + def testAxisIsBad(self): + inputs = array_ops.placeholder(dtypes.float32, shape=(1, 2, 4, 5)) + with self.assertRaisesRegexp(ValueError, + 'Axis is out of bounds.'): + normalization.group_norm(inputs, channels_axis=5) + with self.assertRaisesRegexp(ValueError, + 'Axis is out of bounds.'): + normalization.group_norm(inputs, reduction_axes=[1, 5]) + + def testNotMutuallyExclusiveAxis(self): + inputs = array_ops.placeholder(dtypes.float32, shape=(10, 32, 32, 32)) + # Specify axis with negative values. + with self.assertRaisesRegexp(ValueError, 'mutually exclusive'): + normalization.group_norm(inputs, channels_axis=-2, reduction_axes=[-2]) + # Specify axis with positive values. + with self.assertRaisesRegexp(ValueError, 'mutually exclusive'): + normalization.group_norm(inputs, channels_axis=1, reduction_axes=[1, 3]) + # Specify axis with mixed positive and negative values. + with self.assertRaisesRegexp(ValueError, 'mutually exclusive'): + normalization.group_norm(inputs, channels_axis=-2, reduction_axes=[2]) + + def testUnknownShape(self): + inputs = array_ops.placeholder(dtypes.float32) + with self.assertRaisesRegexp(ValueError, 'undefined rank'): + normalization.group_norm(inputs) + + def testParamsShapeNotFullyDefinedReductionAxes(self): + inputs = array_ops.placeholder(dtypes.float32, shape=(1, 32, None, 4)) + with self.assertRaisesRegexp(ValueError, 'undefined dimensions'): + normalization.group_norm(inputs) + + def testParamsShapeNotFullyDefinedChannelsAxis(self): + inputs = array_ops.placeholder(dtypes.float32, shape=(1, 3, 4, None)) + with self.assertRaisesRegexp(ValueError, 'undefined channel dimension'): + normalization.group_norm(inputs, channels_axis=-1, + reduction_axes=[-3, -2]) + + def testCreateOp(self): + height, width, groups = 3, 3, 4 + images = random_ops.random_uniform((5, height, width, 2*groups), seed=1) + output = normalization.group_norm(images, groups=groups, channels_axis=-1, + reduction_axes=[-3, -2]) + print('name: ', output.op.name) + self.assertListEqual([5, height, width, 2*groups], output.shape.as_list()) + + def testCreateOpFloat64(self): + height, width, groups = 3, 3, 5 + images = random_ops.random_uniform( + (5, height, width, 4*groups), dtype=dtypes.float64, seed=1) + output = normalization.group_norm(images, groups=groups) + self.assertEqual(dtypes.float64, output.dtype) + self.assertListEqual([5, height, width, 4*groups], output.shape.as_list()) + + def testCreateOpNoScaleCenter(self): + height, width, groups = 3, 3, 7 + images = random_ops.random_uniform( + (5, height, width, 3*groups), dtype=dtypes.float32, seed=1) + output = normalization.group_norm(images, groups=groups, center=False, + scale=False) + self.assertListEqual([5, height, width, 3*groups], output.shape.as_list()) + self.assertEqual(0, len(contrib_variables.get_variables_by_name('beta'))) + self.assertEqual(0, len(contrib_variables.get_variables_by_name('gamma'))) + + def testCreateVariables_NHWC(self): + height, width = 3, 3 + images = random_ops.random_uniform((5, height, width, 8), seed=1) + normalization.group_norm(images, groups=4, + channels_axis=-1, reduction_axes=(-3, -2), + center=True, scale=True) + beta = contrib_variables.get_variables_by_name('beta')[0] + gamma = contrib_variables.get_variables_by_name('gamma')[0] + self.assertEqual('GroupNorm/beta', beta.op.name) + self.assertEqual('GroupNorm/gamma', gamma.op.name) + + def testCreateVariables_NCHW(self): + height, width, groups = 3, 3, 4 + images = random_ops.random_uniform((5, 2*groups, height, width), seed=1) + normalization.group_norm(images, groups=4, + channels_axis=-3, reduction_axes=(-2, -1), + center=True, scale=True) + beta = contrib_variables.get_variables_by_name('beta')[0] + gamma = contrib_variables.get_variables_by_name('gamma')[0] + self.assertEqual('GroupNorm/beta', beta.op.name) + self.assertEqual('GroupNorm/gamma', gamma.op.name) + + def testReuseVariables(self): + height, width = 3, 3 + images = random_ops.random_uniform((5, height, width, 4), seed=1) + normalization.group_norm(images, groups=2, scale=True, scope='IN') + normalization.group_norm(images, groups=2, scale=True, scope='IN', + reuse=True) + beta = contrib_variables.get_variables_by_name('beta') + gamma = contrib_variables.get_variables_by_name('gamma') + self.assertEqual(1, len(beta)) + self.assertEqual(1, len(gamma)) + + def testValueCorrectWithReuseVars(self): + height, width = 3, 3 + image_shape = (10, height, width, 4) + images = random_ops.random_uniform(image_shape, seed=1) + output_train = normalization.group_norm(images, groups=2, scope='IN') + output_eval = normalization.group_norm(images, groups=2, scope='IN', + reuse=True) + with self.test_session() as sess: + sess.run(variables.global_variables_initializer()) + # output_train and output_eval should be the same. + train_np, eval_np = sess.run([output_train, output_eval]) + self.assertAllClose(train_np, eval_np) + + def doOutputTest(self, input_shape, channels_axis=None, reduction_axes=None, + groups=2, tol=1e-2): + # Select the axis for the channel and the dimensions along which statistics + # are accumulated. + if channels_axis < 0: + channels_axis += len(input_shape) + reduced_axes = [channels_axis + 1] + for a in reduction_axes: + if a < 0: + a += len(input_shape) + if a < channels_axis: + reduced_axes.append(a) + else: + reduced_axes.append(a+1) + reduced_axes = tuple(reduced_axes) + + # Calculate the final shape for the output Tensor. + axes_before_channels = input_shape[:channels_axis] + axes_after_channels = input_shape[channels_axis+1:] + channels = input_shape[channels_axis] + outputs_shape = (axes_before_channels + [groups, channels // groups] + + axes_after_channels) + + # Calculate the final shape for the output statistics. + reduced_shape = [] + for i, a in enumerate(outputs_shape): + if i not in reduced_axes: + reduced_shape.append(a) + + for mu in (0.0, 1e2): + for sigma in (1.0, 0.1): + # Determine shape of Tensor after normalization. + expected_mean = np.zeros(reduced_shape) + expected_var = np.ones(reduced_shape) + + inputs = random_ops.random_uniform(input_shape, seed=0) * sigma + mu + output_op = normalization.group_norm( + inputs, groups=groups, center=False, scale=False, + channels_axis=channels_axis, + reduction_axes=reduction_axes) + with self.test_session() as sess: + sess.run(variables.global_variables_initializer()) + outputs = sess.run(output_op) + # Make sure that there are no NaNs + self.assertFalse(np.isnan(outputs).any()) + + outputs = np.reshape(outputs, outputs_shape) + mean = np.mean(outputs, axis=reduced_axes) + var = np.var(outputs, axis=reduced_axes) + # The mean and variance of each example should be close to 0 and 1 + # respectively. + self.assertAllClose(expected_mean, mean, rtol=tol, atol=tol) + self.assertAllClose(expected_var, var, rtol=tol, atol=tol) + + def testOutputSmallInput4D_NHWC(self): + input_shape = [10, 10, 10, 30] + # Specify axes with positive values. + self.doOutputTest(input_shape, channels_axis=3, reduction_axes=[1, 2]) + # Specify axes with negative values. + self.doOutputTest(input_shape, channels_axis=-1, reduction_axes=[-3, -2]) + + def testOutputSmallInput3D_NHWC(self): + input_shape = [10, 10, 30] + # Specify axes with positive values. + self.doOutputTest(input_shape, channels_axis=2, reduction_axes=[0, 1]) + # Specify axes with negative values. + self.doOutputTest(input_shape, channels_axis=-1, reduction_axes=[-3, -2]) + + def testOutputSmallInput4D_NCHW(self): + input_shape = [10, 10, 10, 30] + # Specify axes with positive values. + self.doOutputTest(input_shape, channels_axis=1, reduction_axes=[2, 3]) + # Specify axes with negative values. + self.doOutputTest(input_shape, channels_axis=-3, reduction_axes=[-2, -1]) + + def testOutputSmallInput3D_NCHW(self): + input_shape = [10, 10, 30] + # Specify axes with positive values. + self.doOutputTest(input_shape, channels_axis=0, reduction_axes=[1, 2]) + # Specify axes with negative values. + self.doOutputTest(input_shape, channels_axis=-3, reduction_axes=[-2, -1]) + + def testOutputBigInput4D_NHWC(self): + self.doOutputTest([5, 100, 100, 1], channels_axis=3, reduction_axes=[1, 2], + groups=1) + + def testOutputBigInput4D_NCHW(self): + self.doOutputTest([1, 100, 100, 4], channels_axis=1, reduction_axes=[2, 3], + groups=4) + + def testOutputSmallInput2D_NC(self): + self.doOutputTest([10, 7*100], channels_axis=1, reduction_axes=[], groups=7) + + def testOutputSmallInput5D_NCXXX(self): + self.doOutputTest([10, 10, 20, 40, 5], + channels_axis=1, + reduction_axes=[2, 3, 4], + groups=5) + if __name__ == '__main__': test.main() diff --git a/tensorflow/contrib/layers/python/layers/rev_block_lib.py b/tensorflow/contrib/layers/python/layers/rev_block_lib.py index e49589ddf627aa456496cebb2d0fc72fcdad710f..02d294c68f1e10108d774c5fe23b6371a7a9f0e6 100644 --- a/tensorflow/contrib/layers/python/layers/rev_block_lib.py +++ b/tensorflow/contrib/layers/python/layers/rev_block_lib.py @@ -247,9 +247,7 @@ class RevBlock(base.Layer): f_vars_idxs = [[] for _ in range(self.num_layers)] g_vars_idxs = [[] for _ in range(self.num_layers)] - for i, t in enumerate(variables): - ref = _underlying_variable_ref(t) - + for i, ref in enumerate(variables): # Use the name to identify the layer number and function (f or g) regex = LAYER_RE.match(ref.name) layer_no = int(regex.group(1)) @@ -604,6 +602,7 @@ def _fn_with_custom_grad_internal(fn, inputs, grad_fn, use_global_vars=False): """Custom grad fn applying grad_fn for identity Defun.""" fn_inputs, fn_vars, fn_outputs = nest.pack_sequence_as( defun_inputs, list(op.inputs)) + fn_vars = [_underlying_variable_ref(v) for v in fn_vars] dys = list(dys) assert len(fn_outputs) == len(outputs) assert len(fn_outputs) == len(dys) diff --git a/tensorflow/contrib/layers/python/layers/rev_block_lib_test.py b/tensorflow/contrib/layers/python/layers/rev_block_lib_test.py index d1ad4e8c98de3e5c5ac212d55cc93707ba9c01cc..8c118402a4c85d4b0504754fcd0436ce8b00862d 100644 --- a/tensorflow/contrib/layers/python/layers/rev_block_lib_test.py +++ b/tensorflow/contrib/layers/python/layers/rev_block_lib_test.py @@ -60,8 +60,8 @@ class RevBlockTest(test.TestCase): sess.run(variables.global_variables_initializer()) x1, x2, x1_inv, x2_inv = sess.run([x1, x2, x1_inv, x2_inv]) - self.assertAllClose(x1, x1_inv) - self.assertAllClose(x2, x2_inv) + self.assertAllClose(x1, x1_inv, atol=1e-5) + self.assertAllClose(x2, x2_inv, atol=1e-5) def testBackwardForward(self): @@ -304,6 +304,20 @@ class RecomputeTest(test.TestCase): self.assertAllClose(current, g) current = g + def testResourceVariable(self): + @rev_block_lib.recompute_grad(tupleize_grads=True) + def layer_with_recompute(inputs): + var = variable_scope.get_variable("var", ()) + return var * inputs + + inputs = array_ops.ones((), dtypes.float32) + with variable_scope.variable_scope("layer", use_resource=True): + outputs = layer_with_recompute(inputs) + loss = math_ops.square(outputs) + grads = gradients_impl.gradients(loss, variables.trainable_variables()) + self.assertEqual(1, len(grads)) + self.assertTrue(grads[0] is not None) + class FnWithCustomGradTest(test.TestCase): diff --git a/tensorflow/contrib/layers/python/layers/target_column.py b/tensorflow/contrib/layers/python/layers/target_column.py index 3e639a180ef11af5f7f498c647eb25417f918eb9..69bb6be81453f5f5487f25547f017dc5f87c2f2c 100644 --- a/tensorflow/contrib/layers/python/layers/target_column.py +++ b/tensorflow/contrib/layers/python/layers/target_column.py @@ -270,7 +270,7 @@ class _RegressionTargetColumn(_TargetColumn): def logits_to_predictions(self, logits, proba=False): if self.num_label_columns == 1: - return array_ops.squeeze(logits, squeeze_dims=[1]) + return array_ops.squeeze(logits, axis=[1]) return logits def get_eval_ops(self, features, logits, labels, metrics=None): @@ -418,7 +418,7 @@ def _softmax_cross_entropy_loss(logits, target): "Instead got %s." % target.dtype) # sparse_softmax_cross_entropy_with_logits requires [batch_size] target. if len(target.get_shape()) == 2: - target = array_ops.squeeze(target, squeeze_dims=[1]) + target = array_ops.squeeze(target, axis=[1]) loss_vec = nn.sparse_softmax_cross_entropy_with_logits( labels=target, logits=logits) return loss_vec diff --git a/tensorflow/contrib/layers/python/layers/utils_test.py b/tensorflow/contrib/layers/python/layers/utils_test.py index 3409860add8f8c393ffd342633e7023931867dd9..645dc1291eb6370a5e504306fc00a5454dde77ed 100644 --- a/tensorflow/contrib/layers/python/layers/utils_test.py +++ b/tensorflow/contrib/layers/python/layers/utils_test.py @@ -294,7 +294,6 @@ class NPositiveIntegersTest(test.TestCase): self.assertEqual(utils.n_positive_integers(2, 2), (2, 2)) self.assertEqual(utils.n_positive_integers(2, (2, 3)), (2, 3)) self.assertEqual(utils.n_positive_integers(3, (2, 3, 1)), (2, 3, 1)) - self.assertEqual(utils.n_positive_integers(3, (2, 3, 1)), (2, 3, 1)) self.assertEqual( utils.n_positive_integers(3, tensor_shape.TensorShape([2, 3, 1])), (2, 3, 1)) diff --git a/tensorflow/contrib/learn/BUILD b/tensorflow/contrib/learn/BUILD index ba55365c14c46ae5d60955bb5f274b24a28bc958..3b053cd4c66952cf6c494186b16c17f38801bcaf 100644 --- a/tensorflow/contrib/learn/BUILD +++ b/tensorflow/contrib/learn/BUILD @@ -117,6 +117,7 @@ py_test( size = "small", srcs = ["python/learn/learn_io/data_feeder_test.py"], srcs_version = "PY2AND3", + tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":learn", "//tensorflow/python:client_testlib", @@ -172,6 +173,7 @@ tf_py_test( "//tensorflow/python:variables", "//tensorflow/python/estimator", ], + tags = ["no_windows"], # TODO: needs investigation on Windows ) py_test( @@ -190,6 +192,7 @@ py_test( size = "small", srcs = ["python/learn/graph_actions_test.py"], srcs_version = "PY2AND3", + tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":learn", "//tensorflow/contrib/framework:framework_py", @@ -278,7 +281,10 @@ py_test( size = "medium", srcs = ["python/learn/estimators/estimator_test.py"], srcs_version = "PY2AND3", - tags = ["manual"], + tags = [ + "manual", + "noasan", # times out + ], deps = [ ":learn", "//tensorflow/contrib/framework:framework_py", @@ -591,6 +597,7 @@ py_test( size = "small", srcs = ["python/learn/learn_io/io_test.py"], srcs_version = "PY2AND3", + tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":learn", "//tensorflow/contrib/learn/python/learn/datasets", @@ -820,6 +827,7 @@ py_test( size = "small", srcs = ["python/learn/utils/saved_model_export_utils_test.py"], srcs_version = "PY2AND3", + tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":learn", "//tensorflow/contrib/layers:layers_py", diff --git a/tensorflow/contrib/learn/python/learn/estimators/head.py b/tensorflow/contrib/learn/python/learn/estimators/head.py index 2b4b6eff39f4fc8a20a149edfc07d2f4f27a9bae..e28e6854a5097d66cb486be3e82f3726f5cc70fd 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/head.py +++ b/tensorflow/contrib/learn/python/learn/estimators/head.py @@ -777,7 +777,7 @@ class _RegressionHead(_SingleHead): key = prediction_key.PredictionKey.SCORES with ops.name_scope(None, "predictions", (logits,)): if self.logits_dimension == 1: - logits = array_ops.squeeze(logits, squeeze_dims=(1,), name=key) + logits = array_ops.squeeze(logits, axis=(1,), name=key) return {key: self._link_fn(logits)} def _metrics(self, eval_loss, predictions, labels, weights): @@ -974,7 +974,7 @@ def _softmax_cross_entropy_loss(labels, logits, weights=None): is_squeezed_labels = False # TODO(ptucker): This will break for dynamic shapes. if len(labels.get_shape()) == 2: - labels = array_ops.squeeze(labels, squeeze_dims=(1,)) + labels = array_ops.squeeze(labels, axis=(1,)) is_squeezed_labels = True loss = nn.sparse_softmax_cross_entropy_with_logits( diff --git a/tensorflow/contrib/learn/python/learn/estimators/kmeans_test.py b/tensorflow/contrib/learn/python/learn/estimators/kmeans_test.py index b28835a809736a099ad2f08d127dc68d7977a3c1..584556992a0db2345e182e92c4a7f7582d3cd8dc 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/kmeans_test.py +++ b/tensorflow/contrib/learn/python/learn/estimators/kmeans_test.py @@ -36,7 +36,6 @@ from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import data_flow_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import random_ops -from tensorflow.python.ops import random_ops from tensorflow.python.platform import benchmark from tensorflow.python.platform import flags from tensorflow.python.platform import test diff --git a/tensorflow/contrib/learn/python/learn/estimators/run_config.py b/tensorflow/contrib/learn/python/learn/estimators/run_config.py index f3500bf56f4f8e605312261519432133d7337ba0..14ee2ba6094760d52180d6de7763ea88b8ee98c8 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/run_config.py +++ b/tensorflow/contrib/learn/python/learn/estimators/run_config.py @@ -298,7 +298,8 @@ class RunConfig(ClusterConfig, core_run_config.RunConfig): # core_run_config.RunConfig.__init__(self) # so instead of breaking compatibility with that assumption, we # just manually initialize this field: - self._distribute = None + self._train_distribute = None + self._device_fn = None gpu_options = config_pb2.GPUOptions( per_process_gpu_memory_fraction=gpu_memory_fraction) diff --git a/tensorflow/contrib/learn/python/learn/learn_io/data_feeder_test.py b/tensorflow/contrib/learn/python/learn/learn_io/data_feeder_test.py index 82848be7df653dd60219317d28f233767746f544..1f439965daf956665bbedc919281df0ee07b5d62 100644 --- a/tensorflow/contrib/learn/python/learn/learn_io/data_feeder_test.py +++ b/tensorflow/contrib/learn/python/learn/learn_io/data_feeder_test.py @@ -18,6 +18,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import os.path import numpy as np import six from six.moves import xrange # pylint: disable=redefined-builtin @@ -26,6 +27,7 @@ from six.moves import xrange # pylint: disable=redefined-builtin from tensorflow.contrib.learn.python.learn.learn_io import * from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops +from tensorflow.python.lib.io import file_io from tensorflow.python.platform import test # pylint: enable=wildcard-import @@ -35,6 +37,13 @@ class DataFeederTest(test.TestCase): # pylint: disable=undefined-variable """Tests for `DataFeeder`.""" + def setUp(self): + self._base_dir = os.path.join(self.get_temp_dir(), 'base_dir') + file_io.create_dir(self._base_dir) + + def tearDown(self): + file_io.delete_recursively(self._base_dir) + def _wrap_dict(self, data, prepend=''): return {prepend + '1': data, prepend + '2': data} @@ -45,14 +54,14 @@ class DataFeederTest(test.TestCase): def _assert_dtype(self, expected_np_dtype, expected_tf_dtype, input_data): feeder = data_feeder.DataFeeder(input_data, None, n_classes=0, batch_size=1) if isinstance(input_data, dict): - for k, v in list(feeder.input_dtype.items()): + for v in list(feeder.input_dtype.values()): self.assertEqual(expected_np_dtype, v) else: self.assertEqual(expected_np_dtype, feeder.input_dtype) with ops.Graph().as_default() as g, self.test_session(g): inp, _ = feeder.input_builder() if isinstance(inp, dict): - for k, v in list(inp.items()): + for v in list(inp.values()): self.assertEqual(expected_tf_dtype, v.dtype) else: self.assertEqual(expected_tf_dtype, inp.dtype) @@ -301,7 +310,10 @@ class DataFeederTest(test.TestCase): [0.60000002, 0.2]]) self.assertAllClose(feed_dict[out.name], [[0., 0., 1.], [0., 1., 0.]]) - def test_hdf5_data_feeder(self): + # TODO(rohanj): Fix this test by fixing data_feeder. Currently, h5py doesn't + # support permutation based indexing lookups (More documentation at + # http://docs.h5py.org/en/latest/high/dataset.html#fancy-indexing) + def DISABLED_test_hdf5_data_feeder(self): def func(df): inp, out = df.input_builder() @@ -314,11 +326,12 @@ class DataFeederTest(test.TestCase): import h5py # pylint: disable=g-import-not-at-top x = np.matrix([[1, 2], [3, 4]]) y = np.array([1, 2]) - h5f = h5py.File('test_hdf5.h5', 'w') + file_path = os.path.join(self._base_dir, 'test_hdf5.h5') + h5f = h5py.File(file_path, 'w') h5f.create_dataset('x', data=x) h5f.create_dataset('y', data=y) h5f.close() - h5f = h5py.File('test_hdf5.h5', 'r') + h5f = h5py.File(file_path, 'r') x = h5f['x'] y = h5f['y'] func(data_feeder.DataFeeder(x, y, n_classes=0, batch_size=3)) diff --git a/tensorflow/contrib/learn/python/learn/ops/losses_ops.py b/tensorflow/contrib/learn/python/learn/ops/losses_ops.py index 92976d1539c7ddc226b81f903beee82b798ec8db..9f2cadb01747c5a8e4ee75ac38f423f85e11bbba 100644 --- a/tensorflow/contrib/learn/python/learn/ops/losses_ops.py +++ b/tensorflow/contrib/learn/python/learn/ops/losses_ops.py @@ -40,7 +40,7 @@ def mean_squared_error_regressor(tensor_in, labels, weights, biases, name=None): [tensor_in, labels]): predictions = nn.xw_plus_b(tensor_in, weights, biases) if len(labels.get_shape()) == 1 and len(predictions.get_shape()) == 2: - predictions = array_ops_.squeeze(predictions, squeeze_dims=[1]) + predictions = array_ops_.squeeze(predictions, axis=[1]) return predictions, losses.mean_squared_error(labels, predictions) diff --git a/tensorflow/contrib/legacy_seq2seq/BUILD b/tensorflow/contrib/legacy_seq2seq/BUILD index 8c2c4fd29c0502d4199f27a65e4827b2db973c3d..4ce91a140f816ddc8bdc60287e4cbc807172ec6d 100644 --- a/tensorflow/contrib/legacy_seq2seq/BUILD +++ b/tensorflow/contrib/legacy_seq2seq/BUILD @@ -58,5 +58,8 @@ cuda_py_tests( "//tensorflow/python:variable_scope", "//tensorflow/python:variables", ], - tags = ["noasan"], # times out b/63678675 + tags = [ + "noasan", # times out b/63678675 + "optonly", # times out (flaky) + ], ) diff --git a/tensorflow/contrib/linalg/BUILD b/tensorflow/contrib/linalg/BUILD index a7812f74d1e69276a4bba597b41e442bc4dbbc4a..2e92ad6eb39d8aa8876a34572f50d5b6aff0511a 100644 --- a/tensorflow/contrib/linalg/BUILD +++ b/tensorflow/contrib/linalg/BUILD @@ -58,6 +58,31 @@ cuda_py_test( "//tensorflow/python:math_ops", "//tensorflow/python:platform_test", ], - shard_count = 4, - tags = ["noasan"], + shard_count = 5, + tags = [ + "noasan", + "optonly", + ], +) + +cuda_py_test( + name = "linear_operator_kronecker_test", + size = "medium", + srcs = ["python/kernel_tests/linear_operator_kronecker_test.py"], + additional_deps = [ + ":linalg_py", + "//third_party/py/numpy", + "//tensorflow/python:array_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework", + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python:math_ops", + "//tensorflow/python:platform_test", + ], + shard_count = 8, + tags = [ + "noasan", + "optonly", + ], ) diff --git a/tensorflow/contrib/linalg/__init__.py b/tensorflow/contrib/linalg/__init__.py index 14cc3b2b4971de1a31960ee33c2f304154b1f411..38bd66b13f79ee918acbdc2d33a6367e4e34d4b1 100644 --- a/tensorflow/contrib/linalg/__init__.py +++ b/tensorflow/contrib/linalg/__init__.py @@ -22,6 +22,7 @@ See the @{$python/contrib.linalg} guide. @@LinearOperatorIdentity @@LinearOperatorScaledIdentity @@LinearOperatorFullMatrix +@@LinearOperatorKronecker @@LinearOperatorLowerTriangular @@LinearOperatorLowRankUpdate @@LinearOperatorComposition @@ -36,6 +37,7 @@ from __future__ import print_function from tensorflow.contrib.linalg.python.ops.linear_operator_addition import * from tensorflow.contrib.linalg.python.ops.linear_operator_block_diag import * +from tensorflow.contrib.linalg.python.ops.linear_operator_kronecker import * from tensorflow.python.ops.linalg.linear_operator import * from tensorflow.python.ops.linalg.linear_operator_composition import * from tensorflow.python.ops.linalg.linear_operator_diag import * diff --git a/tensorflow/contrib/linalg/python/kernel_tests/linear_operator_block_diag_test.py b/tensorflow/contrib/linalg/python/kernel_tests/linear_operator_block_diag_test.py index cc1a047d6a2b6029080fad3f240aa00f50504f07..e7407ede11409a47f4d9db96ad5b5d801ef1625d 100644 --- a/tensorflow/contrib/linalg/python/kernel_tests/linear_operator_block_diag_test.py +++ b/tensorflow/contrib/linalg/python/kernel_tests/linear_operator_block_diag_test.py @@ -76,6 +76,8 @@ class SquareLinearOperatorBlockDiagTest( build_info((1, 1)), build_info((1, 3, 3)), build_info((5, 5), blocks=[(2, 2), (3, 3)]), + build_info((3, 7, 7), blocks=[(1, 2, 2), (3, 2, 2), (1, 3, 3)]), + build_info((2, 1, 5, 5), blocks=[(2, 1, 2, 2), (1, 3, 3)]), ] def _operator_and_mat_and_feed_dict(self, build_info, dtype, use_placeholder): @@ -184,70 +186,5 @@ class SquareLinearOperatorBlockDiagTest( block_diag.LinearOperatorBlockDiag([]) -# This test is for blocks with different batch dimensions. -# LinearOperatorFullMatrix doesn't broadcast matmul/solve. -class SquareDiagLinearOperatorBlockDiagTest( - linear_operator_test_util.SquareLinearOperatorDerivedClassTest): - """Most tests done in the base class LinearOperatorDerivedClassTest.""" - - def setUp(self): - # Increase from 1e-6 to 1e-4 - self._atol[dtypes.float32] = 1e-4 - self._atol[dtypes.complex64] = 1e-4 - self._rtol[dtypes.float32] = 1e-4 - self._rtol[dtypes.complex64] = 1e-4 - - @property - def _operator_build_infos(self): - build_info = linear_operator_test_util.OperatorBuildInfo - return [ - build_info((3, 7, 7), blocks=[(1, 2, 2), (3, 2, 2), (1, 3, 3)]), - build_info((2, 1, 6, 6), blocks=[(2, 1, 2, 2), (1, 1, 4, 4)]), - ] - - def _operator_and_mat_and_feed_dict(self, build_info, dtype, use_placeholder): - shape = list(build_info.shape) - expected_blocks = ( - build_info.__dict__["blocks"] if "blocks" in build_info.__dict__ - else [shape]) - diag_matrices = [ - linear_operator_test_util.random_uniform( - shape=block_shape[:-1], minval=1., maxval=20., dtype=dtype) - for block_shape in expected_blocks - ] - - if use_placeholder: - diag_matrices_ph = [ - array_ops.placeholder(dtype=dtype) for _ in expected_blocks - ] - diag_matrices = self.evaluate(diag_matrices) - # Evaluate here because (i) you cannot feed a tensor, and (ii) - # values are random and we want the same value used for both mat and - # feed_dict. - operator = block_diag.LinearOperatorBlockDiag( - [linalg.LinearOperatorDiag(m_ph) for m_ph in diag_matrices_ph]) - feed_dict = {m_ph: m for (m_ph, m) in zip( - diag_matrices_ph, diag_matrices)} - else: - operator = block_diag.LinearOperatorBlockDiag( - [linalg.LinearOperatorDiag(m) for m in diag_matrices]) - feed_dict = None - # Should be auto-set. - self.assertTrue(operator.is_square) - - # Broadcast the shapes. - expected_shape = list(build_info.shape) - - matrices = linear_operator_util.broadcast_matrix_batch_dims( - [array_ops.matrix_diag(diag_block) for diag_block in diag_matrices]) - - block_diag_dense = _block_diag_dense(expected_shape, matrices) - if not use_placeholder: - block_diag_dense.set_shape( - expected_shape[:-2] + [expected_shape[-1], expected_shape[-1]]) - - return operator, block_diag_dense, feed_dict - - if __name__ == "__main__": test.main() diff --git a/tensorflow/contrib/linalg/python/kernel_tests/linear_operator_kronecker_test.py b/tensorflow/contrib/linalg/python/kernel_tests/linear_operator_kronecker_test.py new file mode 100644 index 0000000000000000000000000000000000000000..6574da22a188c7aa25ad8426522d0b446af8f5f3 --- /dev/null +++ b/tensorflow/contrib/linalg/python/kernel_tests/linear_operator_kronecker_test.py @@ -0,0 +1,194 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.contrib.linalg.python.ops import linear_operator_kronecker as kronecker +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.framework import random_seed +from tensorflow.python.ops import array_ops +from tensorflow.python.ops.linalg import linalg as linalg_lib +from tensorflow.python.ops.linalg import linear_operator_test_util +from tensorflow.python.ops.linalg import linear_operator_util +from tensorflow.python.platform import test + +linalg = linalg_lib +random_seed.set_random_seed(23) +rng = np.random.RandomState(0) + + +def _kronecker_dense(factors): + """Convert a list of factors, into a dense Kronecker product.""" + product = factors[0] + for factor in factors[1:]: + product = product[..., array_ops.newaxis, :, array_ops.newaxis] + factor_to_mul = factor[..., array_ops.newaxis, :, array_ops.newaxis, :] + product *= factor_to_mul + product = array_ops.reshape( + product, + shape=array_ops.concat( + [array_ops.shape(product)[:-4], + [array_ops.shape(product)[-4] * array_ops.shape(product)[-3], + array_ops.shape(product)[-2] * array_ops.shape(product)[-1]] + ], axis=0)) + + return product + + +class KroneckerDenseTest(test.TestCase): + + def testKroneckerDenseMatrix(self): + x = ops.convert_to_tensor([[2., 3.], [1., 2.]], dtype=dtypes.float32) + y = ops.convert_to_tensor([[1., 2.], [5., -1.]], dtype=dtypes.float32) + # From explicitly writing out the kronecker product of x and y. + z = ops.convert_to_tensor([ + [2., 4., 3., 6.], + [10., -2., 15., -3.], + [1., 2., 2., 4.], + [5., -1., 10., -2.]], dtype=dtypes.float32) + # From explicitly writing out the kronecker product of y and x. + w = ops.convert_to_tensor([ + [2., 3., 4., 6.], + [1., 2., 2., 4.], + [10., 15., -2., -3.], + [5., 10., -1., -2.]], dtype=dtypes.float32) + + with self.test_session(): + self.assertAllClose(_kronecker_dense([x, y]).eval(), z.eval()) + self.assertAllClose(_kronecker_dense([y, x]).eval(), w.eval()) + + +class SquareLinearOperatorKroneckerTest( + linear_operator_test_util.SquareLinearOperatorDerivedClassTest): + """Most tests done in the base class LinearOperatorDerivedClassTest.""" + + def setUp(self): + # Increase from 1e-6 to 1e-4 + self._atol[dtypes.float32] = 1e-4 + self._atol[dtypes.complex64] = 1e-4 + self._rtol[dtypes.float32] = 1e-4 + self._rtol[dtypes.complex64] = 1e-4 + + @property + def _operator_build_infos(self): + build_info = linear_operator_test_util.OperatorBuildInfo + return [ + build_info((1, 1), factors=[(1, 1), (1, 1)]), + build_info((8, 8), factors=[(2, 2), (2, 2), (2, 2)]), + build_info((12, 12), factors=[(2, 2), (3, 3), (2, 2)]), + build_info((1, 3, 3), factors=[(1, 1), (1, 3, 3)]), + build_info((3, 6, 6), factors=[(3, 1, 1), (1, 2, 2), (1, 3, 3)]), + ] + + def _operator_and_mat_and_feed_dict(self, build_info, dtype, use_placeholder): + shape = list(build_info.shape) + expected_factors = build_info.__dict__["factors"] + matrices = [ + linear_operator_test_util.random_positive_definite_matrix( + block_shape, dtype, force_well_conditioned=True) + for block_shape in expected_factors + ] + + if use_placeholder: + matrices_ph = [ + array_ops.placeholder(dtype=dtype) for _ in expected_factors + ] + # Evaluate here because (i) you cannot feed a tensor, and (ii) + # values are random and we want the same value used for both mat and + # feed_dict. + matrices = self.evaluate(matrices) + operator = kronecker.LinearOperatorKronecker( + [linalg.LinearOperatorFullMatrix( + m_ph, is_square=True) for m_ph in matrices_ph], + is_square=True) + feed_dict = {m_ph: m for (m_ph, m) in zip(matrices_ph, matrices)} + else: + operator = kronecker.LinearOperatorKronecker( + [linalg.LinearOperatorFullMatrix( + m, is_square=True) for m in matrices]) + feed_dict = None + # Should be auto-set. + self.assertTrue(operator.is_square) + + matrices = linear_operator_util.broadcast_matrix_batch_dims(matrices) + + kronecker_dense = _kronecker_dense(matrices) + + if not use_placeholder: + kronecker_dense.set_shape(shape) + + return operator, kronecker_dense, feed_dict + + def test_is_x_flags(self): + # Matrix with two positive eigenvalues, 1, and 1. + # The matrix values do not effect auto-setting of the flags. + matrix = [[1., 0.], [1., 1.]] + operator = kronecker.LinearOperatorKronecker( + [linalg.LinearOperatorFullMatrix(matrix), + linalg.LinearOperatorFullMatrix(matrix)], + is_positive_definite=True, + is_non_singular=True, + is_self_adjoint=False) + self.assertTrue(operator.is_positive_definite) + self.assertTrue(operator.is_non_singular) + self.assertFalse(operator.is_self_adjoint) + + def test_is_non_singular_auto_set(self): + # Matrix with two positive eigenvalues, 11 and 8. + # The matrix values do not effect auto-setting of the flags. + matrix = [[11., 0.], [1., 8.]] + operator_1 = linalg.LinearOperatorFullMatrix(matrix, is_non_singular=True) + operator_2 = linalg.LinearOperatorFullMatrix(matrix, is_non_singular=True) + + operator = kronecker.LinearOperatorKronecker( + [operator_1, operator_2], + is_positive_definite=False, # No reason it HAS to be False... + is_non_singular=None) + self.assertFalse(operator.is_positive_definite) + self.assertTrue(operator.is_non_singular) + + with self.assertRaisesRegexp(ValueError, "always non-singular"): + kronecker.LinearOperatorKronecker( + [operator_1, operator_2], is_non_singular=False) + + def test_name(self): + matrix = [[11., 0.], [1., 8.]] + operator_1 = linalg.LinearOperatorFullMatrix(matrix, name="left") + operator_2 = linalg.LinearOperatorFullMatrix(matrix, name="right") + + operator = kronecker.LinearOperatorKronecker([operator_1, operator_2]) + + self.assertEqual("left_x_right", operator.name) + + def test_different_dtypes_raises(self): + operators = [ + linalg.LinearOperatorFullMatrix(rng.rand(2, 3, 3)), + linalg.LinearOperatorFullMatrix(rng.rand(2, 3, 3).astype(np.float32)) + ] + with self.assertRaisesRegexp(TypeError, "same dtype"): + kronecker.LinearOperatorKronecker(operators) + + def test_empty_or_one_operators_raises(self): + with self.assertRaisesRegexp(ValueError, ">=1 operators"): + kronecker.LinearOperatorKronecker([]) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/linalg/python/ops/linear_operator_kronecker.py b/tensorflow/contrib/linalg/python/ops/linear_operator_kronecker.py new file mode 100644 index 0000000000000000000000000000000000000000..79080d194f59b7ebce045ab3e3d262ca948d9391 --- /dev/null +++ b/tensorflow/contrib/linalg/python/ops/linear_operator_kronecker.py @@ -0,0 +1,560 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Construct the Kronecker product of one or more `LinearOperators`.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.framework import common_shapes +from tensorflow.python.framework import errors +from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_shape +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import check_ops +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops.linalg import linalg_impl as linalg +from tensorflow.python.ops.linalg import linear_operator + + +def _vec(x): + """Stacks column of matrix to form a single column.""" + return array_ops.reshape( + array_ops.matrix_transpose(x), + array_ops.concat( + [array_ops.shape(x)[:-2], [-1]], axis=0)) + + +def _unvec_by(y, num_col): + """Unstack vector to form a matrix, with a specified amount of columns.""" + return array_ops.matrix_transpose( + array_ops.reshape( + y, + array_ops.concat( + [array_ops.shape(y)[:-1], [num_col, -1]], axis=0))) + + +def _rotate_last_dim(x, rotate_right=False): + """Rotate the last dimension either left or right.""" + ndims = array_ops.rank(x) + if rotate_right: + transpose_perm = array_ops.concat( + [[ndims - 1], math_ops.range(0, ndims - 1)], axis=0) + else: + transpose_perm = array_ops.concat( + [math_ops.range(1, ndims), [0]], axis=0) + return array_ops.transpose(x, transpose_perm) + + +class LinearOperatorKronecker(linear_operator.LinearOperator): + """Kronecker product between two `LinearOperators`. + + This operator composes one or more linear operators `[op1,...,opJ]`, + building a new `LinearOperator` representing the Kronecker product: + `op1 x op2 x .. opJ` (we omit parentheses as the Kronecker product is + associative). + + If `opj` has shape `batch_shape_j` + [M_j, N_j`, then the composed operator + will have shape equal to `broadcast_batch_shape + [prod M_j, prod N_j]`, + where the product is over all operators. + + ```python + # Create a 4 x 4 linear operator composed of two 2 x 2 operators. + operator_1 = LinearOperatorFullMatrix([[1., 2.], [3., 4.]]) + operator_2 = LinearOperatorFullMatrix([[1., 0.], [2., 1.]]) + operator = LinearOperatorKronecker([operator_1, operator_2]) + + operator.to_dense() + ==> [[1., 2., 0., 0.], + [3., 4., 0., 0.], + [2., 4., 1., 2.], + [6., 8., 3., 4.]] + + operator.shape + ==> [4, 4] + + operator.log_abs_determinant() + ==> scalar Tensor + + x = ... Shape [4, 2] Tensor + operator.matmul(x) + ==> Shape [4, 2] Tensor + + # Create a [2, 3] batch of 4 x 5 linear operators. + matrix_45 = tf.random_normal(shape=[2, 3, 4, 5]) + operator_45 = LinearOperatorFullMatrix(matrix) + + # Create a [2, 3] batch of 5 x 6 linear operators. + matrix_56 = tf.random_normal(shape=[2, 3, 5, 6]) + operator_56 = LinearOperatorFullMatrix(matrix_56) + + # Compose to create a [2, 3] batch of 20 x 30 operators. + operator_large = LinearOperatorKronecker([operator_45, operator_56]) + + # Create a shape [2, 3, 20, 2] vector. + x = tf.random_normal(shape=[2, 3, 6, 2]) + operator_large.matmul(x) + ==> Shape [2, 3, 30, 2] Tensor + ``` + + #### Performance + + The performance of `LinearOperatorKronecker` on any operation is equal to + the sum of the individual operators' operations. + + #### Matrix property hints + + This `LinearOperator` is initialized with boolean flags of the form `is_X`, + for `X = non_singular, self_adjoint, positive_definite, square`. + These have the following meaning: + + * If `is_X == True`, callers should expect the operator to have the + property `X`. This is a promise that should be fulfilled, but is *not* a + runtime assert. For example, finite floating point precision may result + in these promises being violated. + * If `is_X == False`, callers should expect the operator to not have `X`. + * If `is_X == None` (the default), callers should have no expectation either + way. + """ + + def __init__(self, + operators, + is_non_singular=None, + is_self_adjoint=None, + is_positive_definite=None, + is_square=None, + name=None): + r"""Initialize a `LinearOperatorKronecker`. + + `LinearOperatorKronecker` is initialized with a list of operators + `[op_1,...,op_J]`. + + Args: + operators: Iterable of `LinearOperator` objects, each with + the same `dtype` and composable shape, representing the Kronecker + factors. + is_non_singular: Expect that this operator is non-singular. + is_self_adjoint: Expect that this operator is equal to its hermitian + transpose. + is_positive_definite: Expect that this operator is positive definite, + meaning the quadratic form `x^H A x` has positive real part for all + nonzero `x`. Note that we do not require the operator to be + self-adjoint to be positive-definite. See: + https://en.wikipedia.org/wiki/Positive-definite_matrix\ + #Extension_for_non_symmetric_matrices + is_square: Expect that this operator acts like square [batch] matrices. + name: A name for this `LinearOperator`. Default is the individual + operators names joined with `_x_`. + + Raises: + TypeError: If all operators do not have the same `dtype`. + ValueError: If `operators` is empty. + """ + # Validate operators. + check_ops.assert_proper_iterable(operators) + operators = list(operators) + if not operators: + raise ValueError( + "Expected a list of >=1 operators. Found: %s" % operators) + self._operators = operators + + # Validate dtype. + dtype = operators[0].dtype + for operator in operators: + if operator.dtype != dtype: + name_type = (str((o.name, o.dtype)) for o in operators) + raise TypeError( + "Expected all operators to have the same dtype. Found %s" + % " ".join(name_type)) + + # Auto-set and check hints. + # A Kronecker product is invertible, if and only if all factors are + # invertible. + if all(operator.is_non_singular for operator in operators): + if is_non_singular is False: + raise ValueError( + "The Kronecker product of non-singular operators is always " + "non-singular.") + is_non_singular = True + + if all(operator.is_self_adjoint for operator in operators): + if is_self_adjoint is False: + raise ValueError( + "The Kronecker product of self-adjoint operators is always " + "self-adjoint.") + is_self_adjoint = True + + # The eigenvalues of a Kronecker product are equal to the products of eigen + # values of the corresponding factors. + if all(operator.is_positive_definite for operator in operators): + if is_positive_definite is False: + raise ValueError("The Kronecker product of positive-definite operators " + "is always positive-definite.") + is_positive_definite = True + + # Initialization. + graph_parents = [] + for operator in operators: + graph_parents.extend(operator.graph_parents) + + if name is None: + name = operators[0].name + for operator in operators[1:]: + name += "_x_" + operator.name + with ops.name_scope(name, values=graph_parents): + super(LinearOperatorKronecker, self).__init__( + dtype=dtype, + graph_parents=graph_parents, + is_non_singular=is_non_singular, + is_self_adjoint=is_self_adjoint, + is_positive_definite=is_positive_definite, + is_square=is_square, + name=name) + + @property + def operators(self): + return self._operators + + def _shape(self): + # Get final matrix shape. + domain_dimension = self.operators[0].domain_dimension + for operator in self.operators[1:]: + domain_dimension *= operator.domain_dimension + + range_dimension = self.operators[0].range_dimension + for operator in self.operators[1:]: + range_dimension *= operator.range_dimension + + matrix_shape = tensor_shape.TensorShape([ + range_dimension, domain_dimension]) + + # Get broadcast batch shape. + # broadcast_shape checks for compatibility. + batch_shape = self.operators[0].batch_shape + for operator in self.operators[1:]: + batch_shape = common_shapes.broadcast_shape( + batch_shape, operator.batch_shape) + + return batch_shape.concatenate(matrix_shape) + + def _shape_tensor(self): + domain_dimension = self.operators[0].domain_dimension_tensor() + for operator in self.operators[1:]: + domain_dimension *= operator.domain_dimension_tensor() + + range_dimension = self.operators[0].range_dimension_tensor() + for operator in self.operators[1:]: + range_dimension *= operator.range_dimension_tensor() + + matrix_shape = [range_dimension, domain_dimension] + + # Get broadcast batch shape. + # broadcast_shape checks for compatibility. + batch_shape = self.operators[0].batch_shape_tensor() + for operator in self.operators[1:]: + batch_shape = array_ops.broadcast_dynamic_shape( + batch_shape, operator.batch_shape_tensor()) + + return array_ops.concat((batch_shape, matrix_shape), 0) + + def _matmul(self, x, adjoint=False, adjoint_arg=False): + # Here we heavily rely on Roth's column Lemma [1]: + # (A x B) * vec X = vec BXA^T, + # where vec stacks all the columns of the matrix under each other. In our + # case, x represents a batch of vec X (i.e. we think of x as a batch of + # column vectors, rather than a matrix). Each member of the batch can be + # reshaped to a matrix (hence we get a batch of matrices). + # We can iteratively apply this lemma by noting that if B is a Kronecker + # product, then we can apply the lemma again. + + # [1] W. E. Roth, "On direct product matrices," + # Bulletin of the American Mathematical Society, vol. 40, pp. 461-468, + # 1934 + + # Efficiency + + # Naively doing the Kronecker product, by calculating the dense matrix and + # applying it will can take cubic time in the size of domain_dimension + # (assuming a square matrix). The other issue is that calculating the dense + # matrix can be prohibitively expensive, in that it can take a large amount + # of memory. + # + # This implementation avoids this memory blow up by only computing matmuls + # with the factors. In this way, we don't have to realize the dense matrix. + # In terms of complexity, if we have Kronecker Factors of size: + # (n1, n1), (n2, n2), (n3, n3), ... (nJ, nJ), with N = \prod n_i, and we + # have as input a [N, M] matrix, the naive approach would take O(N^2 M). + # With this approach (ignoring reshaping of tensors and transposes for now), + # the time complexity can be O(M * (\sum n_i) * N). There is also the + # benefit of batched multiplication (In this example, the batch size is + # roughly M * N) so this can be much faster. However, not factored in are + # the costs of the several transposing of tensors, which can affect cache + # behavior. + + # Below we document the shape manipulation for adjoint=False, + # adjoint_arg=False, but the general case of different adjoints is still + # handled. + + if adjoint_arg: + x = linalg.adjoint(x) + + # Always add a batch dimension to enable broadcasting to work. + batch_shape = array_ops.concat( + [array_ops.ones_like(self.batch_shape_tensor()), [1, 1]], 0) + x += array_ops.zeros(batch_shape, dtype=x.dtype.base_dtype) + + # x has shape [B, R, C], where B represent some number of batch dimensions, + # R represents the number of rows, and C represents the number of columns. + # In order to apply Roth's column lemma, we need to operate on a batch of + # column vectors, so we reshape into a batch of column vectors. We put it + # at the front to ensure that broadcasting between operators to the batch + # dimensions B still works. + output = _rotate_last_dim(x, rotate_right=True) + + # Also expand the shape to be [A, C, B, R]. The first dimension will be + # used to accumulate dimensions from each operator matmul. + output = output[array_ops.newaxis, ...] + + # In this loop, A is going to refer to the value of the accumulated + # dimension. A = 1 at the start, and will end up being self.range_dimension. + # V will refer to the last dimension. V = R at the start, and will end up + # being 1 in the end. + for operator in self.operators[:-1]: + # Reshape output from [A, C, B, V] to be + # [A, C, B, V / op.domain_dimension, op.domain_dimension] + if adjoint: + operator_dimension = operator.range_dimension_tensor() + else: + operator_dimension = operator.domain_dimension_tensor() + + output = _unvec_by(output, operator_dimension) + + # We are computing (XA^T) = (AX^T)^T. + # output has [A, C, B, V / op.domain_dimension, op.domain_dimension], + # which is being converted to: + # [A, C, B, V / op.domain_dimension, op.range_dimension] + output = array_ops.matrix_transpose(output) + output = operator.matmul(output, adjoint=adjoint, adjoint_arg=False) + output = array_ops.matrix_transpose(output) + # Rearrange it to [A * op.range_dimension, C, B, V / op.domain_dimension] + output = _rotate_last_dim(output, rotate_right=False) + output = _vec(output) + output = _rotate_last_dim(output, rotate_right=True) + + # After the loop, we will have + # A = self.range_dimension / op[-1].range_dimension + # V = op[-1].domain_dimension + + # We convert that using matvec to get: + # [A, C, B, op[-1].range_dimension] + output = self.operators[-1].matvec(output, adjoint=adjoint) + # Rearrange shape to be [B1, ... Bn, self.range_dimension, C] + output = _rotate_last_dim(output, rotate_right=False) + output = _vec(output) + output = _rotate_last_dim(output, rotate_right=False) + + if x.shape.is_fully_defined(): + column_dim = x.shape[-1] + broadcast_batch_shape = common_shapes.broadcast_shape( + x.shape[:-2], self.batch_shape) + if adjoint: + matrix_dimensions = [self.domain_dimension, column_dim] + else: + matrix_dimensions = [self.range_dimension, column_dim] + + print("x: ", x) + print("bathc_shape:", self.batch_shape) + print("self.shape:", self.shape) + print("output: ", output) + output.set_shape(broadcast_batch_shape.concatenate( + matrix_dimensions)) + + return output + + def _determinant(self): + # Note that we have |X1 x X2| = |X1| ** n * |X2| ** m, where X1 is an m x m + # matrix, and X2 is an n x n matrix. We can iteratively apply this property + # to get the determinant of |X1 x X2 x X3 ...|. If T is the product of the + # domain dimension of all operators, then we have: + # |X1 x X2 x X3 ...| = + # |X1| ** (T / m) * |X2 x X3 ... | ** m = + # |X1| ** (T / m) * |X2| ** (m * (T / m) / n) * ... = + # |X1| ** (T / m) * |X2| ** (T / n) * | X3 x X4... | ** (m * n) + # And by doing induction we have product(|X_i| ** (T / dim(X_i))). + total = self.domain_dimension_tensor() + determinant = 1. + for operator in self.operators: + determinant *= operator.determinant() ** math_ops.cast( + total / operator.domain_dimension_tensor(), + dtype=operator.dtype) + return determinant + + def _log_abs_determinant(self): + # This will be sum((total / dim(x_i)) * log |X_i|) + total = self.domain_dimension_tensor() + log_abs_det = 0. + for operator in self.operators: + log_abs_det += operator.log_abs_determinant() * math_ops.cast( + total / operator.domain_dimension_tensor(), + dtype=operator.dtype) + return log_abs_det + + def _trace(self): + # tr(A x B) = tr(A) * tr(B) + trace = 1. + for operator in self.operators: + trace *= operator.trace() + return trace + + def _solve(self, rhs, adjoint=False, adjoint_arg=False): + # Here we follow the same use of Roth's column lemma as in `matmul`, with + # the key difference that we replace all `matmul` instances with `solve`. + # This follows from the property that inv(A x B) = inv(A) x inv(B). + + # Below we document the shape manipulation for adjoint=False, + # adjoint_arg=False, but the general case of different adjoints is still + # handled. + + if adjoint_arg: + rhs = linalg.adjoint(rhs) + + # Always add a batch dimension to enable broadcasting to work. + batch_shape = array_ops.concat( + [array_ops.ones_like(self.batch_shape_tensor()), [1, 1]], 0) + rhs += array_ops.zeros(batch_shape, dtype=rhs.dtype.base_dtype) + + # rhs has shape [B, R, C], where B represent some number of batch + # dimensions, + # R represents the number of rows, and C represents the number of columns. + # In order to apply Roth's column lemma, we need to operate on a batch of + # column vectors, so we reshape into a batch of column vectors. We put it + # at the front to ensure that broadcasting between operators to the batch + # dimensions B still works. + output = _rotate_last_dim(rhs, rotate_right=True) + + # Also expand the shape to be [A, C, B, R]. The first dimension will be + # used to accumulate dimensions from each operator matmul. + output = output[array_ops.newaxis, ...] + + # In this loop, A is going to refer to the value of the accumulated + # dimension. A = 1 at the start, and will end up being self.range_dimension. + # V will refer to the last dimension. V = R at the start, and will end up + # being 1 in the end. + for operator in self.operators[:-1]: + # Reshape output from [A, C, B, V] to be + # [A, C, B, V / op.domain_dimension, op.domain_dimension] + if adjoint: + operator_dimension = operator.range_dimension_tensor() + else: + operator_dimension = operator.domain_dimension_tensor() + + output = _unvec_by(output, operator_dimension) + + # We are computing (XA^-1^T) = (A^-1 X^T)^T. + # output has [A, C, B, V / op.domain_dimension, op.domain_dimension], + # which is being converted to: + # [A, C, B, V / op.domain_dimension, op.range_dimension] + output = array_ops.matrix_transpose(output) + output = operator.solve(output, adjoint=adjoint, adjoint_arg=False) + output = array_ops.matrix_transpose(output) + # Rearrange it to [A * op.range_dimension, C, B, V / op.domain_dimension] + output = _rotate_last_dim(output, rotate_right=False) + output = _vec(output) + output = _rotate_last_dim(output, rotate_right=True) + + # After the loop, we will have + # A = self.range_dimension / op[-1].range_dimension + # V = op[-1].domain_dimension + + # We convert that using matvec to get: + # [A, C, B, op[-1].range_dimension] + output = self.operators[-1].solvevec(output, adjoint=adjoint) + # Rearrange shape to be [B1, ... Bn, self.range_dimension, C] + output = _rotate_last_dim(output, rotate_right=False) + output = _vec(output) + output = _rotate_last_dim(output, rotate_right=False) + + if rhs.shape.is_fully_defined(): + column_dim = rhs.shape[-1] + broadcast_batch_shape = common_shapes.broadcast_shape( + rhs.shape[:-2], self.batch_shape) + if adjoint: + matrix_dimensions = [self.domain_dimension, column_dim] + else: + matrix_dimensions = [self.range_dimension, column_dim] + + output.set_shape(broadcast_batch_shape.concatenate( + matrix_dimensions)) + + return output + + def _diag_part(self): + diag_part = self.operators[0].diag_part() + for operator in self.operators[1:]: + diag_part = diag_part[..., :, array_ops.newaxis] + op_diag_part = operator.diag_part()[..., array_ops.newaxis, :] + diag_part *= op_diag_part + diag_part = array_ops.reshape( + diag_part, + shape=array_ops.concat( + [array_ops.shape(diag_part)[:-2], [-1]], axis=0)) + if self.range_dimension > self.domain_dimension: + diag_dimension = self.domain_dimension + else: + diag_dimension = self.range_dimension + diag_part.set_shape( + self.batch_shape.concatenate(diag_dimension)) + return diag_part + + def _to_dense(self): + product = self.operators[0].to_dense() + for operator in self.operators[1:]: + # Product has shape [B, R1, 1, C1]. + product = product[ + ..., :, array_ops.newaxis, :, array_ops.newaxis] + # Operator has shape [B, 1, R2, 1, C2]. + op_to_mul = operator.to_dense()[ + ..., array_ops.newaxis, :, array_ops.newaxis, :] + # This is now [B, R1, R2, C1, C2]. + product *= op_to_mul + # Now merge together dimensions to get [B, R1 * R2, C1 * C2]. + product = array_ops.reshape( + product, + shape=array_ops.concat( + [array_ops.shape(product)[:-4], + [array_ops.shape(product)[-4] * array_ops.shape(product)[-3], + array_ops.shape(product)[-2] * array_ops.shape(product)[-1]] + ], axis=0)) + product.set_shape(self.shape) + return product + + def _assert_non_singular(self): + if all(operator.is_square for operator in self.operators): + asserts = [operator.assert_non_singular() for operator in self.operators] + return control_flow_ops.group(asserts) + else: + raise errors.InvalidArgumentError( + node_def=None, op=None, message="All Kronecker factors must be " + "square for the product to be invertible.") + + def _assert_self_adjoint(self): + if all(operator.is_square for operator in self.operators): + asserts = [operator.assert_self_adjoint() for operator in self.operators] + return control_flow_ops.group(asserts) + else: + raise errors.InvalidArgumentError( + node_def=None, op=None, message="All Kronecker factors must be " + "square for the product to be self adjoint.") diff --git a/tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py b/tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py index cfe62fac43b35d863eb559b95057ae62a41bed49..6e6c812adcbaf7e90d7c10c05fdfc0e150829329 100644 --- a/tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py +++ b/tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py @@ -18,6 +18,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import random import threading from tensorflow.contrib.linear_optimizer.python.ops.sdca_ops import SdcaModel @@ -102,6 +103,35 @@ def make_example_dict(example_protos, example_weights): example_ids=['%d' % i for i in range(0, len(example_protos))]) +def make_random_examples_and_variables_dicts(num_examples, dim, num_non_zero): + random.seed(1) + + sparse_features = [ + SparseFeatureColumn( + [i for i in range(num_examples) for _ in range(num_non_zero)], [ + i for _ in range(num_examples) + for i in random.sample(range(dim), num_non_zero) + ], + [num_non_zero**(-0.5) for _ in range(num_examples * num_non_zero)]) + ] + examples_dict = dict( + sparse_features=sparse_features, + dense_features=[], + example_weights=[random.random() for _ in range(num_examples)], + example_labels=[ + 1. if random.random() > 0.5 else 0. for _ in range(num_examples) + ], + example_ids=[str(i) for i in range(num_examples)]) + + weights = variables_lib.Variable( + array_ops.zeros([dim], dtype=dtypes.float32)) + variables_dict = dict( + sparse_features_weights=[weights], + dense_features_weights=[]) + + return examples_dict, variables_dict + + def make_variable_dict(max_age, max_gender): # TODO(sibyl-toe9oF2e): Figure out how to derive max_age & max_gender from # examples_dict. @@ -235,6 +265,60 @@ class SdcaWithLogisticLossTest(SdcaModelTest): self.assertAllClose( 0.01, lr.approximate_duality_gap().eval(), rtol=1e-2, atol=1e-2) + def testSparseRandom(self): + dim = 20 + num_examples = 1000 + # Number of non-zero features per example. + non_zeros = 10 + # Setup test data. + with self._single_threaded_test_session(): + examples, variables = make_random_examples_and_variables_dicts( + num_examples, dim, non_zeros) + options = dict( + symmetric_l2_regularization=.1, + symmetric_l1_regularization=0, + num_table_shards=1, + adaptive=False, + loss_type='logistic_loss') + + lr = SdcaModel(examples, variables, options) + variables_lib.global_variables_initializer().run() + train_op = lr.minimize() + for _ in range(4): + train_op.run() + lr.update_weights(train_op).run() + # Duality gap is 1.4e-5. + # It would be 0.01 without shuffling and 0.02 with adaptive sampling. + self.assertNear(0.0, lr.approximate_duality_gap().eval(), err=1e-3) + + def testSparseDuplicate(self): + # Setup test data + example_protos = [ + make_example_proto({ + 'age': [0] * 5, + 'gender': [0] * 5 + }, 0), + make_example_proto({ + 'age': [1] * 5, + 'gender': [1] * 5 + }, 1), + ] + example_weights = [1.0, 1.0] + with self._single_threaded_test_session(): + examples = make_example_dict(example_protos, example_weights) + variables = make_variable_dict(1, 1) + options = dict( + symmetric_l2_regularization=1, + symmetric_l1_regularization=0, + loss_type='logistic_loss') + + lr = SdcaModel(examples, variables, options) + variables_lib.global_variables_initializer().run() + train_op = lr.minimize() + with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, + 'Duplicate'): + train_op.run() + def testDistributedSimple(self): # Setup test data example_protos = [ diff --git a/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py b/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py index 3f5fdc18bb8f47cceee8f81dd5ded02059344b8b..f980746a19fb8e0a02b9d023c127da7ab33e457f 100644 --- a/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py +++ b/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py @@ -168,6 +168,10 @@ class SdcaModel(object): # of workers return self._options.get('num_loss_partitions', 1) + def _adaptive(self): + # Perform adaptive sampling. + return self._options.get('adaptive', True) + def _num_table_shards(self): # Number of hash table shards. # Return 1 if not specified or if the value is 'None' @@ -344,7 +348,8 @@ class SdcaModel(object): l1=self._options['symmetric_l1_regularization'], l2=self._symmetric_l2_regularization(), num_loss_partitions=self._num_loss_partitions(), - num_inner_iterations=1) + num_inner_iterations=1, + adaptative=self._adaptive()) # pylint: enable=protected-access with ops.control_dependencies([esu]): diff --git a/tensorflow/contrib/linear_optimizer/python/ops/sharded_mutable_dense_hashtable.py b/tensorflow/contrib/linear_optimizer/python/ops/sharded_mutable_dense_hashtable.py index ec726bbed41a86eb314e3591ecaedaa6bf0e5e9b..5015fb0848107950dd27eb81431dd308f22858bc 100644 --- a/tensorflow/contrib/linear_optimizer/python/ops/sharded_mutable_dense_hashtable.py +++ b/tensorflow/contrib/linear_optimizer/python/ops/sharded_mutable_dense_hashtable.py @@ -49,6 +49,7 @@ class ShardedMutableDenseHashTable(lookup.LookupInterface): default_value, empty_key, num_shards=1, + checkpoint=True, name='ShardedMutableHashTable'): with ops.name_scope(name, 'sharded_mutable_hash_table') as scope: super(ShardedMutableDenseHashTable, self).__init__(key_dtype, @@ -61,6 +62,7 @@ class ShardedMutableDenseHashTable(lookup.LookupInterface): value_dtype=value_dtype, default_value=default_value, empty_key=empty_key, + checkpoint=checkpoint, name='%s-%d-of-%d' % (name, i + 1, num_shards))) self._table_shards = table_shards # TODO(andreasst): add a value_shape() method to LookupInterface diff --git a/tensorflow/contrib/linear_optimizer/python/sdca_estimator_test.py b/tensorflow/contrib/linear_optimizer/python/sdca_estimator_test.py index 79a5928a21cb9a2633b2aac178f185ba333790d6..bed3d5139fcbf9d9e8b85605c752736f26af6793 100644 --- a/tensorflow/contrib/linear_optimizer/python/sdca_estimator_test.py +++ b/tensorflow/contrib/linear_optimizer/python/sdca_estimator_test.py @@ -30,6 +30,13 @@ from tensorflow.python.platform import test class SDCALogisticClassifierTest(test.TestCase): + def _single_threaded_test_session(self): + # TODO(andreasst): figure out why SDCALinearRegressor needs a single + # threaded session to pass in tsan mode but SDCALogisticClassifier does not. + config = config_pb2.ConfigProto( + inter_op_parallelism_threads=1, intra_op_parallelism_threads=1) + return self.test_session(config=config) + def testRealValuedFeatures(self): """Tests SDCALogisticClassifier works with real valued features.""" @@ -41,7 +48,7 @@ class SDCALogisticClassifierTest(test.TestCase): 'weights': constant_op.constant([[1.0], [1.0]]) }, constant_op.constant([[0], [1]]) - with self.test_session(): + with self._single_threaded_test_session(): maintenance_cost = feature_column_lib.real_valued_column( 'maintenance_cost') sq_footage = feature_column_lib.real_valued_column('sq_footage') @@ -66,7 +73,7 @@ class SDCALogisticClassifierTest(test.TestCase): constant_op.constant([[500.0, 800.0], [200.0, 600.0]]) }, constant_op.constant([[0], [1]]) - with self.test_session(): + with self._single_threaded_test_session(): dense_feature = feature_column_lib.real_valued_column( 'dense_feature', dimension=2) classifier = sdca_estimator.SDCALogisticClassifier( @@ -86,7 +93,7 @@ class SDCALogisticClassifierTest(test.TestCase): 'weights': constant_op.constant([[1.0], [1.0], [1.0]]) }, constant_op.constant([[1], [0], [1]]) - with self.test_session(): + with self._single_threaded_test_session(): price_bucket = feature_column_lib.bucketized_column( feature_column_lib.real_valued_column('price'), boundaries=[500.0, 700.0]) @@ -120,7 +127,7 @@ class SDCALogisticClassifierTest(test.TestCase): constant_op.constant([[1.0], [1.0], [1.0]]) }, constant_op.constant([[1], [0], [1]]) - with self.test_session(): + with self._single_threaded_test_session(): price = feature_column_lib.real_valued_column('price') country = feature_column_lib.sparse_column_with_hash_bucket( 'country', hash_bucket_size=5) @@ -151,7 +158,7 @@ class SDCALogisticClassifierTest(test.TestCase): dense_shape=[3, 5]) }, constant_op.constant([[1], [0], [1]]) - with self.test_session(): + with self._single_threaded_test_session(): country = feature_column_lib.sparse_column_with_hash_bucket( 'country', hash_bucket_size=5) country_weighted_by_price = feature_column_lib.weighted_sparse_column( @@ -163,6 +170,38 @@ class SDCALogisticClassifierTest(test.TestCase): metrics = classifier.evaluate(input_fn=input_fn, steps=1) self.assertGreater(metrics['accuracy'], 0.9) + def testSparseFeaturesWithDuplicates(self): + """Tests SDCALogisticClassifier with duplicated sparse features.""" + + def input_fn(): + return { + 'example_id': + constant_op.constant(['1', '2']), + 'age': + sparse_tensor.SparseTensor( + values=['20-29'] * 5 + ['31-40'] * 5, + indices=[[0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [1, 0], + [1, 0], [1, 0], [1, 0], [1, 0]], + dense_shape=[2, 1]), + 'gender': + sparse_tensor.SparseTensor( + values=['m'] * 5 + ['f'] * 5, + indices=[[0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [1, 0], + [1, 0], [1, 0], [1, 0], [1, 0]], + dense_shape=[2, 1]), + }, constant_op.constant([[1], [0]]) + + with self._single_threaded_test_session(): + age = feature_column_lib.sparse_column_with_hash_bucket( + 'age', hash_bucket_size=10) + gender = feature_column_lib.sparse_column_with_hash_bucket( + 'gender', hash_bucket_size=10) + classifier = sdca_estimator.SDCALogisticClassifier( + example_id_column='example_id', feature_columns=[age, gender]) + classifier.fit(input_fn=input_fn, steps=50) + metrics = classifier.evaluate(input_fn=input_fn, steps=1) + self.assertLess(metrics['loss'], 0.060) + def testCrossedFeatures(self): """Tests SDCALogisticClassifier with crossed features.""" @@ -182,7 +221,7 @@ class SDCALogisticClassifierTest(test.TestCase): dense_shape=[3, 1]) }, constant_op.constant([[0], [0], [1]]) - with self.test_session(): + with self._single_threaded_test_session(): language = feature_column_lib.sparse_column_with_hash_bucket( 'language', hash_bucket_size=5) country = feature_column_lib.sparse_column_with_hash_bucket( @@ -215,7 +254,7 @@ class SDCALogisticClassifierTest(test.TestCase): constant_op.constant([[3.0], [1.0], [1.0]]) }, constant_op.constant([[1], [0], [1]]) - with self.test_session(): + with self._single_threaded_test_session(): price = feature_column_lib.real_valued_column('price') sq_footage_bucket = feature_column_lib.bucketized_column( feature_column_lib.real_valued_column('sq_footage'), diff --git a/tensorflow/contrib/linear_optimizer/python/sdca_optimizer.py b/tensorflow/contrib/linear_optimizer/python/sdca_optimizer.py index 92d022f2a30ffeb77e81d3bd01365afcd14826b5..5d4572bf6c761e0de2c9e6d7e17193abf0ebb170 100644 --- a/tensorflow/contrib/linear_optimizer/python/sdca_optimizer.py +++ b/tensorflow/contrib/linear_optimizer/python/sdca_optimizer.py @@ -19,6 +19,7 @@ from __future__ import print_function from tensorflow.contrib import layers from tensorflow.contrib.linear_optimizer.python.ops import sdca_ops from tensorflow.contrib.linear_optimizer.python.ops.sparse_feature_column import SparseFeatureColumn +from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops @@ -71,12 +72,14 @@ class SDCAOptimizer(object): num_loss_partitions=1, num_table_shards=None, symmetric_l1_regularization=0.0, - symmetric_l2_regularization=1.0): + symmetric_l2_regularization=1.0, + adaptive=True): self._example_id_column = example_id_column self._num_loss_partitions = num_loss_partitions self._num_table_shards = num_table_shards self._symmetric_l1_regularization = symmetric_l1_regularization self._symmetric_l2_regularization = symmetric_l2_regularization + self._adaptive = adaptive def get_name(self): return 'SDCAOptimizer' @@ -101,6 +104,10 @@ class SDCAOptimizer(object): def symmetric_l2_regularization(self): return self._symmetric_l2_regularization + @property + def adaptive(self): + return self._adaptive + def get_train_step(self, columns_to_variables, weight_column_name, loss_type, features, targets, global_step): """Returns the training operation of an SdcaModel optimizer.""" @@ -175,28 +182,42 @@ class SDCAOptimizer(object): elif isinstance( column, ( + layers.feature_column._WeightedSparseColumn, # pylint: disable=protected-access layers.feature_column._CrossedColumn, # pylint: disable=protected-access layers.feature_column._SparseColumn)): # pylint: disable=protected-access - sparse_features.append( - SparseFeatureColumn( - array_ops.reshape( - array_ops.split( - value=transformed_tensor.indices, - num_or_size_splits=2, - axis=1)[0], [-1]), - array_ops.reshape(transformed_tensor.values, [-1]), None)) - sparse_feature_weights.append(columns_to_variables[column][0]) - elif isinstance(column, layers.feature_column._WeightedSparseColumn): # pylint: disable=protected-access - id_tensor = column.id_tensor(transformed_tensor) - weight_tensor = column.weight_tensor(transformed_tensor) + + if isinstance(column, layers.feature_column._WeightedSparseColumn): # pylint: disable=protected-access + id_tensor = column.id_tensor(transformed_tensor) + weight_tensor = array_ops.reshape( + column.weight_tensor(transformed_tensor).values, [-1]) + else: + id_tensor = transformed_tensor + weight_tensor = array_ops.ones( + [array_ops.shape(id_tensor.indices)[0]], dtypes.float32) + + example_ids = array_ops.reshape(id_tensor.indices[:, 0], [-1]) + + flat_ids = array_ops.reshape(id_tensor.values, [-1]) + projection_length = math_ops.reduce_max(flat_ids) + 1 + # project ids based on example ids so that we can dedup ids that + # occur multiple times for a single example. + projected_ids = projection_length * example_ids + flat_ids + + # Remove any redudant ids. + ids, idx = array_ops.unique(projected_ids) + # Keep only one example id per duplicated ids. + example_ids_filtered = math_ops.unsorted_segment_min( + example_ids, idx, + array_ops.shape(ids)[0]) + + # reproject ids back feature id space. + reproject_ids = (ids - projection_length * example_ids_filtered) + + weights = array_ops.reshape( + math_ops.unsorted_segment_sum(weight_tensor, idx, + array_ops.shape(ids)[0]), [-1]) sparse_feature_with_values.append( - SparseFeatureColumn( - array_ops.reshape( - array_ops.split( - value=id_tensor.indices, num_or_size_splits=2, axis=1) - [0], [-1]), - array_ops.reshape(id_tensor.values, [-1]), - array_ops.reshape(weight_tensor.values, [-1]))) + SparseFeatureColumn(example_ids_filtered, reproject_ids, weights)) sparse_feature_with_values_weights.append( columns_to_variables[column][0]) else: @@ -228,6 +249,7 @@ class SDCAOptimizer(object): options=dict( symmetric_l1_regularization=self._symmetric_l1_regularization, symmetric_l2_regularization=self._symmetric_l2_regularization, + adaptive=self._adaptive, num_loss_partitions=self._num_loss_partitions, num_table_shards=self._num_table_shards, loss_type=loss_type)) diff --git a/tensorflow/contrib/lite/BUILD b/tensorflow/contrib/lite/BUILD index ac269d540a69ce6018fb24051a2c389a5b7f0f1f..1534f97d7600151e78c7fa7e8509d9e871240421 100644 --- a/tensorflow/contrib/lite/BUILD +++ b/tensorflow/contrib/lite/BUILD @@ -89,6 +89,7 @@ cc_library( hdrs = [ "builtin_op_data.h", ], + deps = [":context"], ) cc_library( @@ -136,6 +137,7 @@ cc_library( "//tensorflow/contrib/lite/kernels:eigen_support", "//tensorflow/contrib/lite/kernels:gemm_support", "//tensorflow/contrib/lite/nnapi:nnapi_lib", + "//tensorflow/contrib/lite/profiling:profiler", "//tensorflow/contrib/lite/schema:schema_fbs", ], ) diff --git a/tensorflow/contrib/lite/Makefile b/tensorflow/contrib/lite/Makefile index b4504f246a0f806d35d8c3d659717a86d2f2a4f5..65fba52d461461f4594e2222ef6df3849b741f99 100644 --- a/tensorflow/contrib/lite/Makefile +++ b/tensorflow/contrib/lite/Makefile @@ -90,7 +90,8 @@ $(wildcard tensorflow/contrib/lite/kernels/*.c) \ $(wildcard tensorflow/contrib/lite/kernels/internal/*.c) \ $(wildcard tensorflow/contrib/lite/kernels/internal/optimized/*.c) \ $(wildcard tensorflow/contrib/lite/kernels/internal/reference/*.c) \ -$(wildcard tensorflow/contrib/lite/downloads/farmhash/src/farmhash.cc) +$(wildcard tensorflow/contrib/lite/downloads/farmhash/src/farmhash.cc) \ +$(wildcard tensorflow/contrib/lite/downloads/fft2d/fftsg.c) # Remove any duplicates. CORE_CC_ALL_SRCS := $(sort $(CORE_CC_ALL_SRCS)) CORE_CC_EXCLUDE_SRCS := \ diff --git a/tensorflow/contrib/lite/allocation.cc b/tensorflow/contrib/lite/allocation.cc index 4b322e027d48f4bf9f90d5b873c449d1ec31cc49..a4772731ecda92431c412672610a39c188dabf27 100644 --- a/tensorflow/contrib/lite/allocation.cc +++ b/tensorflow/contrib/lite/allocation.cc @@ -22,6 +22,7 @@ limitations under the License. #include #include #include +#include #include "tensorflow/contrib/lite/allocation.h" #include "tensorflow/contrib/lite/context.h" diff --git a/tensorflow/contrib/lite/arena_planner.cc b/tensorflow/contrib/lite/arena_planner.cc index 8e47e2375e2e306c345a2b6caa2411abd9b3ceb0..4f836d367747e06de682b5764206d33f6e2fb983 100644 --- a/tensorflow/contrib/lite/arena_planner.cc +++ b/tensorflow/contrib/lite/arena_planner.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ #include "tensorflow/contrib/lite/arena_planner.h" +#include namespace tflite { diff --git a/tensorflow/contrib/lite/arena_planner.h b/tensorflow/contrib/lite/arena_planner.h index f84b3dad9550e789237c8e45971002c7d336b9d3..e9d0fbc5a9b5aec06e28da8757466b25f40da2f5 100644 --- a/tensorflow/contrib/lite/arena_planner.h +++ b/tensorflow/contrib/lite/arena_planner.h @@ -25,7 +25,7 @@ limitations under the License. namespace tflite { -class AllocationInfo; +struct AllocationInfo; // A memory planner that makes all the allocations using arenas. // diff --git a/tensorflow/contrib/lite/build_def.bzl b/tensorflow/contrib/lite/build_def.bzl index 2813d1c347163e67c70983d3dd49773f4a4b4544..85216776823eab2ab3ac2a3bc666f21e312acc6c 100644 --- a/tensorflow/contrib/lite/build_def.bzl +++ b/tensorflow/contrib/lite/build_def.bzl @@ -124,19 +124,19 @@ def tf_to_tflite(name, src, options, out): out: name of the output flatbuffer file. """ - toco = "//tensorflow/contrib/lite/toco:toco" + toco_cmdline = " ".join([ + "//tensorflow/contrib/lite/toco:toco", + "--input_format=TENSORFLOW_GRAPHDEF", + "--output_format=TFLITE", + ("--input_file=$(location %s)" % src), + ("--output_file=$(location %s)" % out), + ] + options ) native.genrule( name = name, - srcs=[src, options], + srcs=[src], outs=[out], - cmd = ("$(location %s) " + - " --input_file=$(location %s) " + - " --output_file=$(location %s) " + - " --input_format=TENSORFLOW_GRAPHDEF" + - " --output_format=TFLITE" + - " `cat $(location %s)`") - % (toco, src, out, options), - tools= [toco], + cmd = toco_cmdline, + tools= ["//tensorflow/contrib/lite/toco:toco"], ) def tflite_to_json(name, src, out): @@ -200,8 +200,7 @@ def gen_zipped_test_files(name, files): native.genrule( name = name + "_" + f + ".files", cmd = ("$(locations :generate_examples) --toco $(locations %s) " % toco - + " --zip_to_output " + f + - " $(@D) zipped"), + + " --zip_to_output " + f + " $(@D)"), outs = [out_file], tools = [ ":generate_examples", diff --git a/tensorflow/contrib/lite/build_ios_universal_lib.sh b/tensorflow/contrib/lite/build_ios_universal_lib.sh index 4a9023ff33de15dd384531d51e39de4ffeecdb8b..9f398f4a9f3dcafd7bd49fd5d95e9991b8b36b75 100755 --- a/tensorflow/contrib/lite/build_ios_universal_lib.sh +++ b/tensorflow/contrib/lite/build_ios_universal_lib.sh @@ -19,11 +19,16 @@ set -e SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" cd "$SCRIPT_DIR/../../.." -make -f tensorflow/contrib/lite/Makefile TARGET=IOS IOS_ARCH=x86_64 -j 8 -make -f tensorflow/contrib/lite/Makefile TARGET=IOS IOS_ARCH=i386 -j 8 -make -f tensorflow/contrib/lite/Makefile TARGET=IOS IOS_ARCH=armv7 -j 8 -make -f tensorflow/contrib/lite/Makefile TARGET=IOS IOS_ARCH=armv7s -j 8 -make -f tensorflow/contrib/lite/Makefile TARGET=IOS IOS_ARCH=arm64 -j 8 +make -f tensorflow/contrib/lite/Makefile TARGET=IOS IOS_ARCH=x86_64 -j 8 \ +$SCRIPT_DIR/gen/lib/ios_x86_64/libtensorflow-lite.a +make -f tensorflow/contrib/lite/Makefile TARGET=IOS IOS_ARCH=i386 -j 8 \ +$SCRIPT_DIR/gen/lib/ios_i386/libtensorflow-lite.a +make -f tensorflow/contrib/lite/Makefile TARGET=IOS IOS_ARCH=armv7 -j 8 \ +$SCRIPT_DIR/gen/lib/ios_armv7/libtensorflow-lite.a +make -f tensorflow/contrib/lite/Makefile TARGET=IOS IOS_ARCH=armv7s -j 8 \ +$SCRIPT_DIR/gen/lib/ios_armv7s/libtensorflow-lite.a +make -f tensorflow/contrib/lite/Makefile TARGET=IOS IOS_ARCH=arm64 -j 8 \ +$SCRIPT_DIR/gen/lib/ios_arm64/libtensorflow-lite.a lipo \ tensorflow/contrib/lite/gen/lib/ios_x86_64/libtensorflow-lite.a \ diff --git a/tensorflow/contrib/lite/builtin_op_data.h b/tensorflow/contrib/lite/builtin_op_data.h index 5fc8954743e5b3b458e5c2004f4378cbad6056c0..4910c89eaebabb7bd9a4e003b75fa6de4d5af69d 100644 --- a/tensorflow/contrib/lite/builtin_op_data.h +++ b/tensorflow/contrib/lite/builtin_op_data.h @@ -17,6 +17,8 @@ limitations under the License. #include +#include "tensorflow/contrib/lite/context.h" + #ifdef __cplusplus extern "C" { #endif // __cplusplus @@ -51,6 +53,8 @@ typedef struct { TfLitePadding padding; int stride_width; int stride_height; + int dilation_width_factor; + int dilation_height_factor; TfLiteFusedActivation activation; } TfLiteConvParams; @@ -174,6 +178,11 @@ typedef struct { int block_size; } TfLiteSpaceToDepthParams; +typedef struct { + TfLiteType in_data_type; + TfLiteType out_data_type; +} TfLiteCastParams; + typedef enum { kTfLiteCombinerTypeSum = 0, kTfLiteCombinerTypeMean = 1, @@ -214,6 +223,10 @@ typedef struct { int shrink_axis_mask; } TfLiteStridedSliceParams; +typedef struct { + TfLiteType output_type; +} TfLiteArgMaxParams; + #ifdef __cplusplus } // extern "C" #endif // __cplusplus diff --git a/tensorflow/contrib/lite/builtin_ops.h b/tensorflow/contrib/lite/builtin_ops.h index 17b791e4e2f38d9a1108d35d1298445a1c370727..859bc7ab70dc363e08800ca5c40eb0da6ca426b0 100644 --- a/tensorflow/contrib/lite/builtin_ops.h +++ b/tensorflow/contrib/lite/builtin_ops.h @@ -80,6 +80,9 @@ typedef enum { kTfLiteBuiltinCast = 53, kTfLiteBuiltinPrelu = 54, kTfLiteBuiltinMaximum = 55, + kTfLiteBuiltinArgMax = 56, + kTfLiteBuiltinMinimum = 57, + kTfLiteBuiltinLess = 58, } TfLiteBuiltinOperator; #ifdef __cplusplus diff --git a/tensorflow/contrib/lite/context.h b/tensorflow/contrib/lite/context.h index 45184b05ecefb504c75815ae900f3b605359a443..0b38f43cd32fbdfa0296eec7ef81aab76ebe5461 100644 --- a/tensorflow/contrib/lite/context.h +++ b/tensorflow/contrib/lite/context.h @@ -137,6 +137,7 @@ typedef enum { kTfLiteUInt8 = 3, kTfLiteInt64 = 4, kTfLiteString = 5, + kTfLiteBool = 6, } TfLiteType; // Parameters for asymmetric quantization. Quantized values can be converted @@ -155,6 +156,7 @@ typedef union { char* raw; const char* raw_const; uint8_t* uint8; + bool* b; } TfLitePtrUnion; // Memory allocation strategies. kTfLiteMmapRo is for read-only memory-mapped diff --git a/tensorflow/contrib/lite/download_dependencies.sh b/tensorflow/contrib/lite/download_dependencies.sh index a93ed201d647ddf2359a57254a959871c13fb94f..436c3e1d4cad5e6ee355d7e9cf8ee7da1a8385ce 100755 --- a/tensorflow/contrib/lite/download_dependencies.sh +++ b/tensorflow/contrib/lite/download_dependencies.sh @@ -30,12 +30,15 @@ if [ ! -f $BZL_FILE_PATH ]; then fi EIGEN_URL="$(grep -o 'http.*bitbucket.org/eigen/eigen/get/.*tar\.gz' "${BZL_FILE_PATH}" | grep -v mirror.bazel | head -n1)" -GEMMLOWP_URL="$(grep -o 'https://mirror.bazel.build/github.com/google/gemmlowp/.*zip' "${BZL_FILE_PATH}" | head -n1)" +# TODO (yongtang): Replace the following with 'https://mirror.bazel.build/github.com/google/gemmlowp/.*zip' once +# the archive has been propagated in mirror.bazel.build. +GEMMLOWP_URL="$(grep -o 'https://github.com/google/gemmlowp/.*zip' "${BZL_FILE_PATH}" | head -n1)" GOOGLETEST_URL="https://github.com/google/googletest/archive/release-1.8.0.tar.gz" ABSL_URL="$(grep -o 'https://github.com/abseil/abseil-cpp/.*tar.gz' "${BZL_FILE_PATH}" | head -n1)" NEON_2_SSE_URL="https://github.com/intel/ARM_NEON_2_x86_SSE/archive/master.zip" FARMHASH_URL="https://mirror.bazel.build/github.com/google/farmhash/archive/816a4ae622e964763ca0862d9dbd19324a1eaf45.tar.gz" FLATBUFFERS_URL="https://github.com/google/flatbuffers/archive/master.zip" +FFT2D_URL="https://mirror.bazel.build/www.kurims.kyoto-u.ac.jp/~ooura/fft.tgz" # TODO(petewarden): Some new code in Eigen triggers a clang bug with iOS arm64, # so work around it by patching the source. @@ -91,6 +94,7 @@ download_and_extract "${ABSL_URL}" "${DOWNLOADS_DIR}/absl" download_and_extract "${NEON_2_SSE_URL}" "${DOWNLOADS_DIR}/neon_2_sse" download_and_extract "${FARMHASH_URL}" "${DOWNLOADS_DIR}/farmhash" download_and_extract "${FLATBUFFERS_URL}" "${DOWNLOADS_DIR}/flatbuffers" +download_and_extract "${FFT2D_URL}" "${DOWNLOADS_DIR}/fft2d" replace_by_sed 's#static uint32x4_t p4ui_CONJ_XOR = vld1q_u32( conj_XOR_DATA );#static uint32x4_t p4ui_CONJ_XOR; // = vld1q_u32( conj_XOR_DATA ); - Removed by script#' \ "${DOWNLOADS_DIR}/eigen/Eigen/src/Core/arch/NEON/Complex.h" diff --git a/tensorflow/contrib/lite/examples/ios/camera/tflite_camera_example.xcodeproj/project.pbxproj b/tensorflow/contrib/lite/examples/ios/camera/tflite_camera_example.xcodeproj/project.pbxproj index b0236e9c608ec35437bcfe79c51149a76f9f416e..98d3b5bb8ad45bf34f6996b3361291896a451a6f 100644 --- a/tensorflow/contrib/lite/examples/ios/camera/tflite_camera_example.xcodeproj/project.pbxproj +++ b/tensorflow/contrib/lite/examples/ios/camera/tflite_camera_example.xcodeproj/project.pbxproj @@ -326,10 +326,6 @@ GCC_WARN_UNUSED_VARIABLE = YES; HEADER_SEARCH_PATHS = ( "$(inherited)", - ../../../../../../, - ../../../downloads/flatbuffers/include/, - ../../../downloads/eigen/, - ../../../downloads/, ); IPHONEOS_DEPLOYMENT_TARGET = 8.0; MTL_ENABLE_DEBUG_INFO = YES; @@ -373,10 +369,6 @@ GCC_WARN_UNUSED_VARIABLE = YES; HEADER_SEARCH_PATHS = ( "$(inherited)", - ../../../../../../, - ../../../downloads/flatbuffers/include/, - ../../../downloads/eigen/, - ../../../downloads/, ); IPHONEOS_DEPLOYMENT_TARGET = 8.0; MTL_ENABLE_DEBUG_INFO = NO; diff --git a/tensorflow/contrib/lite/g3doc/apis.md b/tensorflow/contrib/lite/g3doc/apis.md index fe208e47d1ac10995881e55c8596ae14ff4242df..50cc146a87ee9ab94aea6a92fb2fb5c531f83369 100644 --- a/tensorflow/contrib/lite/g3doc/apis.md +++ b/tensorflow/contrib/lite/g3doc/apis.md @@ -29,7 +29,7 @@ interpreter->AllocateTensors(); float* input = interpreter->typed_input_tensor(0); // Fill `input`. interpreter->Invoke(); -float* output = interpreter->type_output_tensor(0); +float* output = interpreter->typed_output_tensor(0); ``` ### Data Alignment diff --git a/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md b/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md index 61ea5231e352f5e014f9200eccae69548574c034..203924f03d3101130049b9679328fac1e2da02bd 100644 --- a/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md +++ b/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md @@ -302,6 +302,19 @@ Options { } ``` +**LESS** + +``` +Inputs { + 0: a tensor + 1: a tensor +} +Outputs { + 0: a tensor of type bool, true whenever an element of the first tensor is less + than the corresponding element of the second tensor. +} +``` + **LOCAL_RESPONSE_NORMALIZATION** ``` diff --git a/tensorflow/contrib/lite/interpreter.cc b/tensorflow/contrib/lite/interpreter.cc index 4575fe884dc07963df5f0a26c5fe6680d92e409c..91b6c414bf036fbf57f53fc75f570b05449fa89e 100644 --- a/tensorflow/contrib/lite/interpreter.cc +++ b/tensorflow/contrib/lite/interpreter.cc @@ -14,10 +14,12 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/contrib/lite/interpreter.h" + #include #include #include #include + #include "tensorflow/contrib/lite/arena_planner.h" #include "tensorflow/contrib/lite/context.h" #include "tensorflow/contrib/lite/error_reporter.h" @@ -26,6 +28,7 @@ limitations under the License. #include "tensorflow/contrib/lite/kernels/gemm_support.h" #include "tensorflow/contrib/lite/memory_planner.h" #include "tensorflow/contrib/lite/nnapi_delegate.h" +#include "tensorflow/contrib/lite/profiling/profiler.h" #include "tensorflow/contrib/lite/schema/schema_generated.h" #include "tensorflow/contrib/lite/util.h" @@ -245,11 +248,8 @@ TfLiteStatus Interpreter::ReplaceSubgraphsWithDelegateKernels( // Initialize the output tensors's delegate-related fields. for (int tensor_index : subgraph.output_tensors) { TfLiteTensor* tensor = &tensors_[tensor_index]; - TF_LITE_ENSURE_EQ(&context_, tensor->delegate, nullptr); - TF_LITE_ENSURE_EQ(&context_, tensor->buffer_handle, - kTfLiteNullBufferHandle); - // buffer_handle will be filled in delegate's `Prepare` - // function. + TF_LITE_ENSURE(&context_, tensor->delegate == nullptr || + tensor->delegate == delegate); tensor->delegate = delegate; } @@ -337,9 +337,13 @@ TfLiteStatus Interpreter::BytesRequired(TfLiteType type, const int* dims, case kTfLiteInt64: *bytes = sizeof(int64_t) * count; break; + case kTfLiteBool: + *bytes = sizeof(bool) * count; + break; default: - ReportError(&context_, - "Only float32, int32, int64, uint8 supported currently."); + ReportError( + &context_, + "Only float32, int32, int64, uint8, bool supported currently."); return kTfLiteError; } return kTfLiteOk; @@ -543,6 +547,7 @@ TfLiteStatus Interpreter::Invoke() { TfLiteNode& node = nodes_and_registration_[node_index].first; const TfLiteRegistration& registration = nodes_and_registration_[node_index].second; + SCOPED_OPERATOR_PROFILE(profiler_, node_index); // TODO(ycling): This is an extra loop through inputs to check if the data // need to be copied from Delegate buffer to raw memory, which is often not @@ -566,6 +571,12 @@ TfLiteStatus Interpreter::Invoke() { } } + if (!allow_buffer_handle_output_) { + for (int tensor_index : outputs_) { + EnsureTensorDataIsReadable(tensor_index); + } + } + return status; } diff --git a/tensorflow/contrib/lite/interpreter.h b/tensorflow/contrib/lite/interpreter.h index 77db17878318276c6cf5067274a3af3be262c8e1..a49134b95ee47b97ad1e56d07f737fa44f89badc 100644 --- a/tensorflow/contrib/lite/interpreter.h +++ b/tensorflow/contrib/lite/interpreter.h @@ -20,10 +20,12 @@ limitations under the License. #include #include #include + #include "tensorflow/contrib/lite/allocation.h" #include "tensorflow/contrib/lite/context.h" #include "tensorflow/contrib/lite/error_reporter.h" #include "tensorflow/contrib/lite/memory_planner.h" +#include "tensorflow/contrib/lite/profiling/profiler.h" namespace tflite { @@ -48,6 +50,10 @@ template <> constexpr TfLiteType typeToTfLiteType() { return kTfLiteUInt8; } +template <> +constexpr TfLiteType typeToTfLiteType() { + return kTfLiteBool; +} // Forward declare since NNAPIDelegate uses Interpreter. class NNAPIDelegate; @@ -208,7 +214,7 @@ class Interpreter { // TODO(aselle): Create a safe ArrayHandle interface to avoid exposing this // read/write access to structure const std::pair* node_and_registration( - int node_index) { + int node_index) const { if (node_index >= nodes_and_registration_.size() || node_index < 0) return nullptr; return &nodes_and_registration_[node_index]; @@ -278,6 +284,7 @@ class Interpreter { // Ensure the data in `tensor.data` is readable. In case delegate is used, // it might require to copy the data from delegate buffer to raw memory. + // WARNING: This is an experimental API and subject to change. TfLiteStatus EnsureTensorDataIsReadable(int tensor_index) { TF_LITE_ENSURE(&context_, tensor_index < tensors_size()); TfLiteTensor* tensor = &tensors_[tensor_index]; @@ -316,6 +323,12 @@ class Interpreter { TfLiteBufferHandle* buffer_handle, TfLiteDelegate** delegate); + void SetProfiler(profiling::Profiler* profiler) { profiler_ = profiler; } + + profiling::Profiler* GetProfiler(profiling::Profiler* profiler) { + return profiler_; + } + // The default capacity of `tensors_` vector. static constexpr int kTensorsReservedCapacity = 128; // The capacity headroom of `tensors_` vector before calling ops' @@ -324,6 +337,18 @@ class Interpreter { // pointers to existing tensors. static constexpr int kTensorsCapacityHeadroom = 16; + // Set if buffer handle output is allowed. + // + // When using hardware delegation, Interpreter will make the data of output + // tensors available in `tensor->data` by default. If the application can + // consume the buffer handle directly (e.g. reading output from OpenGL + // texture), it can set this flag to false, so Interpreter won't copy the data + // from buffer handle to CPU memory. + // WARNING: This is an experimental API and subject to change. + void SetAllowBufferHandleOutput(bool allow_buffer_handle_output) { + allow_buffer_handle_output_ = allow_buffer_handle_output; + } + private: // Give 'op_reg' a chance to initialize itself using the contents of // 'buffer'. @@ -514,6 +539,11 @@ class Interpreter { std::unique_ptr nnapi_delegate_; std::unique_ptr memory_planner_; + + bool allow_buffer_handle_output_ = false; + + // Profiler for this interpreter instance. + profiling::Profiler* profiler_; }; } // namespace tflite diff --git a/tensorflow/contrib/lite/java/BUILD b/tensorflow/contrib/lite/java/BUILD index 7f7a2632dd7858deb861ebc66b3348c9eb32e090..1dda55b8edf8f85293c473b51b8a19066bac5f73 100644 --- a/tensorflow/contrib/lite/java/BUILD +++ b/tensorflow/contrib/lite/java/BUILD @@ -46,6 +46,23 @@ android_library( ], ) +java_library( + name = "ovicbenchmarkerlib", + srcs = [ + "ovic/src/main/java/org/tensorflow/ovic/OvicClassifier.java", + "ovic/src/main/java/org/tensorflow/ovic/OvicSingleImageResult.java", + ], + javacopts = JAVACOPTS, + visibility = ["//visibility:public"], + deps = [ + ":libtensorflowlite_jni.so", + ":tensorflowlite_java", + "//tensorflow/contrib/lite/java/src/main/native", + "//tensorflow/contrib/lite/java/src/testhelper/java/org/tensorflow/lite:testhelper", + "@org_checkerframework_qual", + ], +) + java_library( name = "tensorflowlitelib", srcs = glob( @@ -100,6 +117,7 @@ java_test( "src/testdata/int64.bin", "src/testdata/invalid_model.bin", "src/testdata/uint8.bin", + "src/testdata/with_custom_op.lite", ], javacopts = JAVACOPTS, test_class = "org.tensorflow.lite.NativeInterpreterWrapperTest", @@ -147,6 +165,28 @@ java_test( ], ) +java_test( + name = "OvicClassifierTest", + size = "medium", + srcs = ["ovic/src/test/java/org/tensorflow/ovic/OvicClassifierTest.java"], + data = [ + "ovic/src/testdata/float_model.lite", + "ovic/src/testdata/labels.txt", + "ovic/src/testdata/low_res_model.lite", + "ovic/src/testdata/quantized_model.lite", + "ovic/src/testdata/test_image_128.jpg", + "ovic/src/testdata/test_image_224.jpg", + ], + javacopts = JAVACOPTS, + test_class = "org.tensorflow.ovic.OvicClassifierTest", + visibility = ["//visibility:public"], + deps = [ + ":ovicbenchmarkerlib", + "@com_google_truth", + "@junit", + ], +) + filegroup( name = "libtensorflowlite_jni", srcs = select({ diff --git a/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/Camera2BasicFragment.java b/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/Camera2BasicFragment.java index 300786c3ca01b12a46f7f9a6fe8fd720f97a79f4..18f64651889d7eeb4be961afc47554cbcc51a410 100644 --- a/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/Camera2BasicFragment.java +++ b/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/Camera2BasicFragment.java @@ -54,6 +54,9 @@ import android.view.Surface; import android.view.TextureView; import android.view.View; import android.view.ViewGroup; +import android.widget.CompoundButton; +import android.widget.NumberPicker; +import android.widget.ToggleButton; import android.widget.TextView; import android.widget.Toast; import java.io.IOException; @@ -82,6 +85,8 @@ public class Camera2BasicFragment extends Fragment private boolean runClassifier = false; private boolean checkedPermissions = false; private TextView textView; + private ToggleButton toggle; + private NumberPicker np; private ImageClassifier classifier; /** Max preview width that is guaranteed by Camera2 API */ @@ -289,6 +294,24 @@ public class Camera2BasicFragment extends Fragment public void onViewCreated(final View view, Bundle savedInstanceState) { textureView = (AutoFitTextureView) view.findViewById(R.id.texture); textView = (TextView) view.findViewById(R.id.text); + toggle = (ToggleButton) view.findViewById(R.id.button); + + toggle.setOnCheckedChangeListener(new CompoundButton.OnCheckedChangeListener() { + public void onCheckedChanged(CompoundButton buttonView, boolean isChecked) { + classifier.setUseNNAPI(isChecked); + } + }); + + np = (NumberPicker) view.findViewById(R.id.np); + np.setMinValue(1); + np.setMaxValue(10); + np.setWrapSelectorWheel(true); + np.setOnValueChangedListener(new NumberPicker.OnValueChangeListener() { + @Override + public void onValueChange(NumberPicker picker, int oldVal, int newVal){ + classifier.setNumThreads(newVal); + } + }); } /** Load the model and labels. */ diff --git a/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifier.java b/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifier.java index c57bb348c5b386a59327c7b1bc769717ca755269..d32c0779101cf8e795ee9d7e970401c2c03bb33a 100644 --- a/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifier.java +++ b/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifier.java @@ -142,6 +142,16 @@ public abstract class ImageClassifier { } } + public void setUseNNAPI(Boolean nnapi) { + if (tflite != null) + tflite.setUseNNAPI(nnapi); + } + + public void setNumThreads(int num_threads) { + if (tflite != null) + tflite.setNumThreads(num_threads); + } + /** Closes tflite to release resources. */ public void close() { tflite.close(); diff --git a/tensorflow/contrib/lite/java/demo/app/src/main/res/layout/fragment_camera2_basic.xml b/tensorflow/contrib/lite/java/demo/app/src/main/res/layout/fragment_camera2_basic.xml index 15305c436e0d997af15a326ab4027ea713ed8098..db557ad62f619e88f72426a48a74bffb0f57b818 100644 --- a/tensorflow/contrib/lite/java/demo/app/src/main/res/layout/fragment_camera2_basic.xml +++ b/tensorflow/contrib/lite/java/demo/app/src/main/res/layout/fragment_camera2_basic.xml @@ -22,24 +22,59 @@ android:layout_width="wrap_content" android:layout_height="wrap_content" android:layout_alignParentStart="true" + android:layout_alignParentLeft="true" android:layout_alignParentTop="true" /> - + + + + + + + diff --git a/tensorflow/contrib/lite/java/demo/app/src/main/res/values/strings.xml b/tensorflow/contrib/lite/java/demo/app/src/main/res/values/strings.xml index a08ec3eb629250a727cec49a822375fe5569f455..29a033bcd437c951ef6e8ba78f4fc3a0fcafac96 100644 --- a/tensorflow/contrib/lite/java/demo/app/src/main/res/values/strings.xml +++ b/tensorflow/contrib/lite/java/demo/app/src/main/res/values/strings.xml @@ -21,4 +21,6 @@ NN:On NN:Off Use NNAPI + tflite + NNAPI diff --git a/tensorflow/contrib/lite/java/ovic/README.md b/tensorflow/contrib/lite/java/ovic/README.md new file mode 100644 index 0000000000000000000000000000000000000000..76c33838bfe5b8596d78cae7d022c51d2a379e76 --- /dev/null +++ b/tensorflow/contrib/lite/java/ovic/README.md @@ -0,0 +1,83 @@ +# Benchmarker for LPIRC Workshop at CVPR 2018 + +This folder contains building code for track one of the [Low Power ImageNet Recognition Challenge workshop at CVPR 2018.](https://rebootingcomputing.ieee.org/home/sitemap/14-lpirc/80-low-power-image-recognition-challenge-lpirc-2018) + +## Pre-requesits + +Follow the steps [here](https://www.tensorflow.org/mobile/tflite/demo_android) to install Tensorflow, Bazel, and the Android NDK and SDK. + +## To test the benchmarker: + +The testing utilities helps the developers (you) to make sure that your submissions in TfLite format will be processed as expected in the competition's benchmarking system. + +Note: for now the tests only provides correctness checks, i.e. classifier predicts the correct category on the test image, but no on-device latency measurements. To test the latency measurement functionality, the tests will print the latency running on a desktop computer, which is not indicative of the on-device run-time. +We are releasing an benchmarker Apk that would allow developers to measure latency on their own devices. + +### Obtain the sample models + +The test data (models and images) should be downloaded automatically for you by Bazel. In case they are not, you can manually install them as below. + +Note: all commands should be called from your tensorflow installation folder (under this folder you should find `tensorflow/contrib/lite`). + + +* Download the [testdata package](https://storage.googleapis.com/download.tensorflow.org/data/ovic.zip): + +```sh +curl -L https://storage.googleapis.com/download.tensorflow.org/data/ovic.zip -o /tmp/ovic.zip +``` + +* Unzip the package into the testdata folder: + +```sh +unzip -j /tmp/ovic.zip -d tensorflow/contrib/lite/java/ovic/src/testdata/ +``` + +### Run tests + +You can run test with Bazel as below. This helps to ensure that the installation is correct. + +```sh +bazel test --cxxopt=--std=c++11 //tensorflow/contrib/lite/java:OvicClassifierTest --test_output=all +``` + +### Test your submissions + +Once you have a submission that follows the instructions from the [competition site](https://rebootingcomputing.ieee.org/home/sitemap/14-lpirc/80-low-power-image-recognition-challenge-lpirc-2018), you can verify it as below. + +* Move your submission to the testdata folder: + +Let say the submission file is located at `/tmp/my_model.lite`, then + +```sh +cp /tmp/my_model.lite tensorflow/contrib/lite/java/ovic/src/testdata/ +``` + +* Resize the test image to the resolutions that are expected by your submission: + +The test images can be found at `tensorflow/contrib/lite/java/ovic/src/testdata/test_image_*.jpg`. You may reuse these images if your image resolutions are 128x128 or 224x224. + +* Add your model and test image to the BUILD rule: + +```JSON +java_test( + name = "OvicClassifierTest", + size = "medium", + srcs = ["ovic/src/test/java/org/tensorflow/ovic/OvicClassifierTest.java"], + data = [ + "ovic/src/testdata/float_model.lite", + "ovic/src/testdata/labels.txt", + "ovic/src/testdata/low_res_model.lite", + "ovic/src/testdata/quantized_model.lite", + "ovic/src/testdata/test_image_128.jpg", + "ovic/src/testdata/test_image_224.jpg", + "ovic/src/testdata/my_model.lite", # <--- Your submission. + "ovic/src/testdata/my_test_image.jpg", # <--- Your test image. + ], + ... +``` + +* Modify `OvicClassifierTest.java` to test your model. + +Change `TEST_IMAGE_PATH` to `testdata/my_test_image.jpg`. If your model runs inference in floating point, change `FLOAT_MODEL_PATH` to `testdata/my_model.lite`. If your model runs [quantized inference](https://www.tensorflow.org/performance/quantization), change `QUANTIZED_MODEL_PATH` to `testdata/my_model.lite`. + +Now you can run the bazel tests to catch any runtime issues with the submission. diff --git a/tensorflow/contrib/lite/java/ovic/src/test/java/org/tensorflow/ovic/OvicClassifierTest.java b/tensorflow/contrib/lite/java/ovic/src/test/java/org/tensorflow/ovic/OvicClassifierTest.java index 4fd23a99d25d715530cf36f398d949f7e70598de..098ed8ceba52b6d44868353b80cab0862e334d4d 100644 --- a/tensorflow/contrib/lite/java/ovic/src/test/java/org/tensorflow/ovic/OvicClassifierTest.java +++ b/tensorflow/contrib/lite/java/ovic/src/test/java/org/tensorflow/ovic/OvicClassifierTest.java @@ -26,7 +26,6 @@ import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.nio.MappedByteBuffer; import java.nio.channels.FileChannel; -import java.nio.file.Paths; import javax.imageio.ImageIO; import org.junit.Before; import org.junit.Test; @@ -45,27 +44,33 @@ public final class OvicClassifierTest { private ByteBuffer testImage = null; private ByteBuffer lowResTestImage = null; private OvicSingleImageResult testResult = null; - private static final String LABELS_PATH = "testdata/labels.txt"; - private static final String QUANTIZED_MODEL_PATH = "testdata/quantized_model.lite"; - private static final String LOW_RES_MODEL_PATH = "testdata/low_res_model.lite"; - private static final String FLOAT_MODEL_PATH = "testdata/float_model.lite"; - private static final String TEST_IMAGE_PATH = "testdata/test_image_224.jpg"; - private static final String TEST_LOW_RES_IMAGE_PATH = "testdata/test_image_128.jpg"; + private static final String LABELS_PATH = + "third_party/tensorflow/contrib/lite/java/ovic/src/testdata/labels.txt"; + private static final String QUANTIZED_MODEL_PATH = + "third_party/tensorflow/contrib/lite/java/ovic/src/testdata/quantized_model.lite"; + private static final String LOW_RES_MODEL_PATH = + "third_party/tensorflow/contrib/lite/java/ovic/src/testdata/low_res_model.lite"; + private static final String FLOAT_MODEL_PATH = + "third_party/tensorflow/contrib/lite/java/ovic/src/testdata/float_model.lite"; + private static final String TEST_IMAGE_PATH = + "third_party/tensorflow/contrib/lite/java/ovic/src/testdata/test_image_224.jpg"; + private static final String TEST_LOW_RES_IMAGE_PATH = + "third_party/tensorflow/contrib/lite/java/ovic/src/testdata/test_image_128.jpg"; private static final int TEST_IMAGE_GROUNDTRUTH = 653; // "military uniform" @Before public void setUp() { try { - File labelsfile = new File(getTestDir(LABELS_PATH)); + File labelsfile = new File(LABELS_PATH); labelsInputStream = new FileInputStream(labelsfile); - quantizedModel = loadModelFile(getTestDir(QUANTIZED_MODEL_PATH)); - floatModel = loadModelFile(getTestDir(FLOAT_MODEL_PATH)); - lowResModel = loadModelFile(getTestDir(LOW_RES_MODEL_PATH)); - File imageFile = new File(getTestDir(TEST_IMAGE_PATH)); + quantizedModel = loadModelFile(QUANTIZED_MODEL_PATH); + floatModel = loadModelFile(FLOAT_MODEL_PATH); + lowResModel = loadModelFile(LOW_RES_MODEL_PATH); + File imageFile = new File(TEST_IMAGE_PATH); BufferedImage img = ImageIO.read(imageFile); testImage = toByteBuffer(img); // Low res image and models. - imageFile = new File(getTestDir(TEST_LOW_RES_IMAGE_PATH)); + imageFile = new File(TEST_LOW_RES_IMAGE_PATH); img = ImageIO.read(imageFile); lowResTestImage = toByteBuffer(img); } catch (IOException e) { @@ -74,10 +79,6 @@ public final class OvicClassifierTest { System.out.println("Successful setup"); } - private static String getTestDir(String testfile) throws IOException { - return Paths.get("third_party/tensorflow/contrib/lite/java/ovic/src/", testfile).toString(); - } - @Test public void ovicClassifier_quantizedModelCreateSuccess() throws Exception { classifier = new OvicClassifier(labelsInputStream, quantizedModel); diff --git a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/DataType.java b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/DataType.java index fc16488a6459eb227fde712055d3e8ccfcce0070..75334cd96e8daadc356dadea063eee30ef6d5245 100644 --- a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/DataType.java +++ b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/DataType.java @@ -51,7 +51,11 @@ enum DataType { } } throw new IllegalArgumentException( - "DataType " + c + " is not recognized in Java (version " + TensorFlowLite.version() + ")"); + "DataType error: DataType " + + c + + " is not recognized in Java (version " + + TensorFlowLite.version() + + ")"); } /** Returns byte size of the type. */ @@ -68,7 +72,8 @@ enum DataType { case BYTEBUFFER: return 1; } - throw new IllegalArgumentException("DataType " + this + " is not supported yet"); + throw new IllegalArgumentException( + "DataType error: DataType " + this + " is not supported yet"); } /** Gets string names of the data type. */ @@ -85,7 +90,8 @@ enum DataType { case BYTEBUFFER: return "ByteBuffer"; } - throw new IllegalArgumentException("DataType " + this + " is not supported yet"); + throw new IllegalArgumentException( + "DataType error: DataType " + this + " is not supported yet"); } // Cached to avoid copying it diff --git a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java index 14f461f5f9ba8c0755d2a1968533a79cce10750a..e84ee7112983ec584308b7cbcd919f119eccbcc9 100644 --- a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java +++ b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java @@ -67,6 +67,19 @@ public final class Interpreter implements AutoCloseable { wrapper = new NativeInterpreterWrapper(modelFile.getAbsolutePath()); } + /** + * Initializes a {@code Interpreter} and specifies the number of threads used for inference. + * + * @param modelFile: a file of a pre-trained TF Lite model + * @param numThreads: number of threads to use for inference + */ + public Interpreter(@NonNull File modelFile, int numThreads) { + if (modelFile == null) { + return; + } + wrapper = new NativeInterpreterWrapper(modelFile.getAbsolutePath(), numThreads); + } + /** * Initializes a {@code Interpreter} with a {@code MappedByteBuffer} to the model file. * @@ -124,17 +137,19 @@ public final class Interpreter implements AutoCloseable { public void runForMultipleInputsOutputs( @NonNull Object[] inputs, @NonNull Map outputs) { if (wrapper == null) { - throw new IllegalStateException("The Interpreter has already been closed."); + throw new IllegalStateException("Internal error: The Interpreter has already been closed."); } Tensor[] tensors = wrapper.run(inputs); if (outputs == null || tensors == null || outputs.size() > tensors.length) { - throw new IllegalArgumentException("Outputs do not match with model outputs."); + throw new IllegalArgumentException("Output error: Outputs do not match with model outputs."); } final int size = tensors.length; for (Integer idx : outputs.keySet()) { if (idx == null || idx < 0 || idx >= size) { throw new IllegalArgumentException( - String.format("Invalid index of output %d (should be in range [0, %d))", idx, size)); + String.format( + "Output error: Invalid index of output %d (should be in range [0, %d))", + idx, size)); } tensors[idx].copyTo(outputs.get(idx)); } @@ -147,7 +162,7 @@ public final class Interpreter implements AutoCloseable { */ public void resizeInput(int idx, @NonNull int[] dims) { if (wrapper == null) { - throw new IllegalStateException("The Interpreter has already been closed."); + throw new IllegalStateException("Internal error: The Interpreter has already been closed."); } wrapper.resizeInput(idx, dims); } @@ -160,7 +175,7 @@ public final class Interpreter implements AutoCloseable { */ public int getInputIndex(String opName) { if (wrapper == null) { - throw new IllegalStateException("The Interpreter has already been closed."); + throw new IllegalStateException("Internal error: The Interpreter has already been closed."); } return wrapper.getInputIndex(opName); } @@ -173,7 +188,7 @@ public final class Interpreter implements AutoCloseable { */ public int getOutputIndex(String opName) { if (wrapper == null) { - throw new IllegalStateException("The Interpreter has already been closed."); + throw new IllegalStateException("Internal error: The Interpreter has already been closed."); } return wrapper.getOutputIndex(opName); } @@ -185,7 +200,7 @@ public final class Interpreter implements AutoCloseable { */ public Long getLastNativeInferenceDurationNanoseconds() { if (wrapper == null) { - throw new IllegalStateException("The interpreter has already been closed."); + throw new IllegalStateException("Internal error: The interpreter has already been closed."); } return wrapper.getLastNativeInferenceDurationNanoseconds(); } @@ -195,8 +210,16 @@ public final class Interpreter implements AutoCloseable { if (wrapper != null) { wrapper.setUseNNAPI(useNNAPI); } else { - throw new IllegalStateException("NativeInterpreterWrapper has already been closed."); + throw new IllegalStateException( + "Internal error: NativeInterpreterWrapper has already been closed."); + } + } + + public void setNumThreads(int num_threads) { + if (wrapper == null) { + throw new IllegalStateException("The interpreter has already been closed."); } + wrapper.setNumThreads(num_threads); } /** Release resources associated with the {@code Interpreter}. */ diff --git a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/NativeInterpreterWrapper.java b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/NativeInterpreterWrapper.java index dbf8f8f7cc2815a46130e342d7e45d4e471696de..2fc803715be5e5afbc5d548d46c1665785f6055d 100644 --- a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/NativeInterpreterWrapper.java +++ b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/NativeInterpreterWrapper.java @@ -32,9 +32,13 @@ import java.util.Map; final class NativeInterpreterWrapper implements AutoCloseable { NativeInterpreterWrapper(String modelPath) { + this(modelPath, /* numThreads= */ -1); + } + + NativeInterpreterWrapper(String modelPath, int numThreads) { errorHandle = createErrorReporter(ERROR_BUFFER_SIZE); modelHandle = createModel(modelPath, errorHandle); - interpreterHandle = createInterpreter(modelHandle, errorHandle, /* numThreads= */ -1); + interpreterHandle = createInterpreter(modelHandle, errorHandle, numThreads); isMemoryAllocated = true; } @@ -44,11 +48,7 @@ final class NativeInterpreterWrapper implements AutoCloseable { * NativeInterpreterWrapper}. */ NativeInterpreterWrapper(MappedByteBuffer mappedByteBuffer) { - modelByteBuffer = mappedByteBuffer; - errorHandle = createErrorReporter(ERROR_BUFFER_SIZE); - modelHandle = createModelWithBuffer(modelByteBuffer, errorHandle); - interpreterHandle = createInterpreter(modelHandle, errorHandle, /* numThreads= */ -1); - isMemoryAllocated = true; + this(mappedByteBuffer, /* numThreads= */ -1); } /** @@ -80,7 +80,7 @@ final class NativeInterpreterWrapper implements AutoCloseable { /** Sets inputs, runs model inference and returns outputs. */ Tensor[] run(Object[] inputs) { if (inputs == null || inputs.length == 0) { - throw new IllegalArgumentException("Invalid inputs. Inputs should not be null or empty."); + throw new IllegalArgumentException("Input error: Inputs should not be null or empty."); } int[] dataTypes = new int[inputs.length]; Object[] sizes = new Object[inputs.length]; @@ -92,7 +92,7 @@ final class NativeInterpreterWrapper implements AutoCloseable { ByteBuffer buffer = (ByteBuffer) inputs[i]; if (buffer.order() != ByteOrder.nativeOrder()) { throw new IllegalArgumentException( - "Invalid ByteBuffer. It shoud use ByteOrder.nativeOrder()."); + "Input error: ByteBuffer shoud use ByteOrder.nativeOrder()."); } numsOfBytes[i] = buffer.limit(); sizes[i] = getInputDims(interpreterHandle, i, numsOfBytes[i]); @@ -103,7 +103,7 @@ final class NativeInterpreterWrapper implements AutoCloseable { } else { throw new IllegalArgumentException( String.format( - "%d-th element of the %d inputs is not an array or a ByteBuffer.", + "Input error: %d-th element of the %d inputs is not an array or a ByteBuffer.", i, inputs.length)); } } @@ -119,7 +119,7 @@ final class NativeInterpreterWrapper implements AutoCloseable { this, isMemoryAllocated); if (outputsHandles == null || outputsHandles.length == 0) { - throw new IllegalStateException("Interpreter has no outputs."); + throw new IllegalStateException("Internal error: Interpreter has no outputs."); } isMemoryAllocated = true; Tensor[] outputs = new Tensor[outputsHandles.length]; @@ -153,6 +153,10 @@ final class NativeInterpreterWrapper implements AutoCloseable { useNNAPI(interpreterHandle, useNNAPI); } + void setNumThreads(int num_threads) { + numThreads(interpreterHandle, num_threads); + } + /** Gets index of an input given its name. */ int getInputIndex(String name) { if (inputsIndexes == null) { @@ -169,7 +173,8 @@ final class NativeInterpreterWrapper implements AutoCloseable { } else { throw new IllegalArgumentException( String.format( - "%s is not a valid name for any input. The indexes of the inputs are %s", + "Input error: %s is not a valid name for any input. " + + "The indexes of the inputs are %s", name, inputsIndexes.toString())); } } @@ -190,7 +195,8 @@ final class NativeInterpreterWrapper implements AutoCloseable { } else { throw new IllegalArgumentException( String.format( - "%s is not a valid name for any output. The indexes of the outputs are %s", + "Input error: %s is not a valid name for any output. " + + "The indexes of the outputs are %s", name, outputsIndexes.toString())); } } @@ -229,7 +235,8 @@ final class NativeInterpreterWrapper implements AutoCloseable { return DataType.BYTEBUFFER; } } - throw new IllegalArgumentException("cannot resolve DataType of " + o.getClass().getName()); + throw new IllegalArgumentException( + "DataType error: cannot resolve DataType of " + o.getClass().getName()); } /** Returns the shape of an object as an int array. */ @@ -245,7 +252,7 @@ final class NativeInterpreterWrapper implements AutoCloseable { return 0; } if (Array.getLength(o) == 0) { - throw new IllegalArgumentException("array lengths cannot be 0."); + throw new IllegalArgumentException("Array lengths cannot be 0."); } return 1 + numDimensions(Array.get(o, 0)); } @@ -259,7 +266,7 @@ final class NativeInterpreterWrapper implements AutoCloseable { shape[dim] = len; } else if (shape[dim] != len) { throw new IllegalArgumentException( - String.format("mismatched lengths (%d and %d) in dimension %d", shape[dim], len, dim)); + String.format("Mismatched lengths (%d and %d) in dimension %d", shape[dim], len, dim)); } for (int i = 0; i < len; ++i) { fillShape(Array.get(o, i), dim + 1, shape); @@ -321,6 +328,8 @@ final class NativeInterpreterWrapper implements AutoCloseable { private static native void useNNAPI(long interpreterHandle, boolean state); + private static native void numThreads(long interpreterHandle, int num_threads); + private static native long createErrorReporter(int size); private static native long createModel(String modelPathOrBuffer, long errorHandle); diff --git a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Tensor.java b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Tensor.java index 54ace6c63ce5bd1b38be744176d0378e3cc8a1d3..09e887aae3339e9f114c07d689c0d7b5e2fc384b 100644 --- a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Tensor.java +++ b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Tensor.java @@ -34,15 +34,16 @@ final class Tensor { if (NativeInterpreterWrapper.dataTypeOf(dst) != dtype) { throw new IllegalArgumentException( String.format( - "Cannot convert an TensorFlowLite tensor with type %s to a Java object of " - + "type %s (which is compatible with the TensorFlowLite type %s)", + "Output error: Cannot convert an TensorFlowLite tensor with type %s to a Java " + + "object of type %s (which is compatible with the TensorFlowLite type %s)", dtype, dst.getClass().getName(), NativeInterpreterWrapper.dataTypeOf(dst))); } int[] dstShape = NativeInterpreterWrapper.shapeOf(dst); if (!Arrays.equals(dstShape, shapeCopy)) { throw new IllegalArgumentException( String.format( - "Shape of output target %s does not match with the shape of the Tensor %s.", + "Output error: Shape of output target %s does not match with the shape of the " + + "Tensor %s.", Arrays.toString(dstShape), Arrays.toString(shapeCopy))); } readMultiDimensionalArray(nativeHandle, dst); diff --git a/tensorflow/contrib/lite/java/src/main/native/exception_jni.cc b/tensorflow/contrib/lite/java/src/main/native/exception_jni.cc index 1578c9e3ddd034ad9ce17c8c3ae6c942258e2a55..34d91be04cd6c855a2068510ca810c0b93637584 100644 --- a/tensorflow/contrib/lite/java/src/main/native/exception_jni.cc +++ b/tensorflow/contrib/lite/java/src/main/native/exception_jni.cc @@ -44,7 +44,8 @@ BufferErrorReporter::BufferErrorReporter(JNIEnv* env, int limit) { buffer_ = new char[limit]; if (!buffer_) { throwException(env, kNullPointerException, - "Malloc of BufferErrorReporter to hold %d char failed.", + "Internal error: Malloc of BufferErrorReporter to hold %d " + "char failed.", limit); return; } diff --git a/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.cc b/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.cc index 844226203bb02f4017b2f04da34ac81ac2b7a191..45f510da1d940a288e2794cb3e08f66451956b64 100644 --- a/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.cc +++ b/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.cc @@ -22,7 +22,7 @@ const int kBufferSize = 256; tflite::Interpreter* convertLongToInterpreter(JNIEnv* env, jlong handle) { if (handle == 0) { throwException(env, kIllegalArgumentException, - "Invalid handle to Interpreter."); + "Internal error: Invalid handle to Interpreter."); return nullptr; } return reinterpret_cast(handle); @@ -30,7 +30,8 @@ tflite::Interpreter* convertLongToInterpreter(JNIEnv* env, jlong handle) { tflite::FlatBufferModel* convertLongToModel(JNIEnv* env, jlong handle) { if (handle == 0) { - throwException(env, kIllegalArgumentException, "Invalid handle to model."); + throwException(env, kIllegalArgumentException, + "Internal error: Invalid handle to model."); return nullptr; } return reinterpret_cast(handle); @@ -39,7 +40,7 @@ tflite::FlatBufferModel* convertLongToModel(JNIEnv* env, jlong handle) { BufferErrorReporter* convertLongToErrorReporter(JNIEnv* env, jlong handle) { if (handle == 0) { throwException(env, kIllegalArgumentException, - "Invalid handle to ErrorReporter."); + "Internal error: Invalid handle to ErrorReporter."); return nullptr; } return reinterpret_cast(handle); @@ -51,7 +52,7 @@ std::vector convertJIntArrayToVector(JNIEnv* env, jintArray inputs) { jint* ptr = env->GetIntArrayElements(inputs, nullptr); if (ptr == nullptr) { throwException(env, kIllegalArgumentException, - "Empty dimensions of input array."); + "Array has empty dimensions."); return {}; } for (int i = 0; i < size; ++i) { @@ -113,7 +114,7 @@ TfLiteStatus checkInputs(JNIEnv* env, tflite::Interpreter* interpreter, jobjectArray sizes) { if (input_size != interpreter->inputs().size()) { throwException(env, kIllegalArgumentException, - "Expected num of inputs is %d but got %d", + "Input error: Expected num of inputs is %d but got %d", interpreter->inputs().size(), input_size); return kTfLiteError; } @@ -121,8 +122,9 @@ TfLiteStatus checkInputs(JNIEnv* env, tflite::Interpreter* interpreter, input_size != env->GetArrayLength(nums_of_bytes) || input_size != env->GetArrayLength(values)) { throwException(env, kIllegalArgumentException, - "Arrays in arguments should be of the same length, but got " - "%d sizes, %d data_types, %d nums_of_bytes, and %d values", + "Internal error: Arrays in arguments should be of the same " + "length, but got %d sizes, %d data_types, %d nums_of_bytes, " + "and %d values", input_size, env->GetArrayLength(data_types), env->GetArrayLength(nums_of_bytes), env->GetArrayLength(values)); @@ -136,8 +138,8 @@ TfLiteStatus checkInputs(JNIEnv* env, tflite::Interpreter* interpreter, int num_dims = static_cast(env->GetArrayLength(dims)); if (target->dims->size != num_dims) { throwException(env, kIllegalArgumentException, - "%d-th input should have %d dimensions, but found %d " - "dimensions", + "Input error: %d-th input should have %d dimensions, but " + "found %d dimensions", i, target->dims->size, num_dims); return kTfLiteError; } @@ -150,7 +152,8 @@ TfLiteStatus checkInputs(JNIEnv* env, tflite::Interpreter* interpreter, num_dims); printDims(obtained_dims.get(), kBufferSize, ptr, num_dims); throwException(env, kIllegalArgumentException, - "%d-th input dimension should be [%s], but found [%s]", + "Input error: %d-th input dimension should be [%s], but " + "found [%s]", i, expected_dims.get(), obtained_dims.get()); env->ReleaseIntArrayElements(dims, ptr, JNI_ABORT); return kTfLiteError; @@ -236,8 +239,8 @@ TfLiteStatus setInputs(JNIEnv* env, tflite::Interpreter* interpreter, TfLiteType type = resolveDataType(data_type[i]); if (type != target->type) { throwException(env, kIllegalArgumentException, - "DataType (%d) of input data does not match with the " - "DataType (%d) of model inputs.", + "Input error: DataType (%d) of input data does not " + "match with the DataType (%d) of model inputs.", type, target->type); return kTfLiteError; } @@ -270,7 +273,8 @@ Java_org_tensorflow_lite_NativeInterpreterWrapper_getInputNames(JNIEnv* env, jclass string_class = env->FindClass("java/lang/String"); if (string_class == nullptr) { throwException(env, kUnsupportedOperationException, - "Can not find java/lang/String class to get input names."); + "Internal error: Can not find java/lang/String class to get " + "input names."); return nullptr; } size_t size = interpreter->inputs().size(); @@ -292,7 +296,8 @@ Java_org_tensorflow_lite_NativeInterpreterWrapper_getOutputNames(JNIEnv* env, jclass string_class = env->FindClass("java/lang/String"); if (string_class == nullptr) { throwException(env, kUnsupportedOperationException, - "Can not find java/lang/String class to get output names."); + "Internal error: Can not find java/lang/String class to get " + "output names."); return nullptr; } size_t size = interpreter->outputs().size(); @@ -315,6 +320,16 @@ Java_org_tensorflow_lite_NativeInterpreterWrapper_useNNAPI(JNIEnv* env, interpreter->UseNNAPI(static_cast(state)); } +JNIEXPORT void JNICALL +Java_org_tensorflow_lite_NativeInterpreterWrapper_numThreads(JNIEnv* env, + jclass clazz, + jlong handle, + jint num_threads) { + tflite::Interpreter* interpreter = convertLongToInterpreter(env, handle); + if (interpreter == nullptr) return; + interpreter->SetNumThreads(static_cast(num_threads)); +} + JNIEXPORT jlong JNICALL Java_org_tensorflow_lite_NativeInterpreterWrapper_createErrorReporter( JNIEnv* env, jclass clazz, jint size) { @@ -351,8 +366,8 @@ Java_org_tensorflow_lite_NativeInterpreterWrapper_createModel( path, verifier.get(), error_reporter); if (!model) { throwException(env, kIllegalArgumentException, - "Contents of %s does not encode a valid TensorFlowLite " - "model: %s", + "Contents of %s does not encode a valid " + "TensorFlowLite model: %s", path, error_reporter->CachedErrorMessage()); env->ReleaseStringUTFChars(model_file, path); return 0; @@ -380,8 +395,8 @@ Java_org_tensorflow_lite_NativeInterpreterWrapper_createModelWithBuffer( buf, static_cast(capacity), error_reporter); if (!model) { throwException(env, kIllegalArgumentException, - "MappedByteBuffer does not encode a valid TensorFlowLite " - "model: %s", + "MappedByteBuffer does not encode a valid " + "TensorFlowLite model: %s", error_reporter->CachedErrorMessage()); return 0; } @@ -403,7 +418,7 @@ Java_org_tensorflow_lite_NativeInterpreterWrapper_createInterpreter( &interpreter, static_cast(num_threads)); if (status != kTfLiteOk) { throwException(env, kIllegalArgumentException, - "Cannot create interpreter: %s", + "Internal error: Cannot create interpreter: %s", error_reporter->CachedErrorMessage()); return 0; } @@ -411,7 +426,7 @@ Java_org_tensorflow_lite_NativeInterpreterWrapper_createInterpreter( status = interpreter->AllocateTensors(); if (status != kTfLiteOk) { throwException(env, kNullPointerException, - "Can not allocate memory for the interpreter", + "Internal error: Cannot allocate memory for the interpreter", error_reporter->CachedErrorMessage()); return 0; } @@ -440,7 +455,8 @@ Java_org_tensorflow_lite_NativeInterpreterWrapper_run( // resizes inputs status = resizeInputs(env, interpreter, input_size, sizes); if (status != kTfLiteOk) { - throwException(env, kNullPointerException, "Can not resize the input: %s", + throwException(env, kNullPointerException, + "Internal error: Can not resize the input: %s", error_reporter->CachedErrorMessage()); return nullptr; } @@ -448,7 +464,8 @@ Java_org_tensorflow_lite_NativeInterpreterWrapper_run( status = interpreter->AllocateTensors(); if (status != kTfLiteOk) { throwException(env, kNullPointerException, - "Can not allocate memory for the given inputs: %s", + "Internal error: Can not allocate memory for the given " + "inputs: %s", error_reporter->CachedErrorMessage()); return nullptr; } @@ -461,7 +478,7 @@ Java_org_tensorflow_lite_NativeInterpreterWrapper_run( // runs inference if (interpreter->Invoke() != kTfLiteOk) { throwException(env, kIllegalArgumentException, - "Failed to run on the given Interpreter: %s", + "Internal error: Failed to run on the given Interpreter: %s", error_reporter->CachedErrorMessage()); return nullptr; } @@ -479,8 +496,9 @@ Java_org_tensorflow_lite_NativeInterpreterWrapper_run( // returns outputs const std::vector& results = interpreter->outputs(); if (results.empty()) { - throwException(env, kIllegalArgumentException, - "The Interpreter does not have any outputs."); + throwException( + env, kIllegalArgumentException, + "Internal error: The Interpreter does not have any outputs."); return nullptr; } jlongArray outputs = env->NewLongArray(results.size()); @@ -501,7 +519,8 @@ Java_org_tensorflow_lite_NativeInterpreterWrapper_getInputDims( const int idx = static_cast(input_idx); if (input_idx < 0 || input_idx >= interpreter->inputs().size()) { throwException(env, kIllegalArgumentException, - "Out of range: Failed to get %d-th input out of %d inputs", + "Input error: Out of range: Failed to get %d-th input out of" + " %d inputs", input_idx, interpreter->inputs().size()); return nullptr; } @@ -514,8 +533,8 @@ Java_org_tensorflow_lite_NativeInterpreterWrapper_getInputDims( } if (num_bytes != expected_num_bytes) { throwException(env, kIllegalArgumentException, - "Failed to get input dimensions. %d-th input should have" - " %d bytes, but found %d bytes.", + "Input error: Failed to get input dimensions. %d-th input " + "should have %d bytes, but found %d bytes.", idx, expected_num_bytes, num_bytes); return nullptr; } @@ -533,8 +552,8 @@ Java_org_tensorflow_lite_NativeInterpreterWrapper_getOutputDataType( const int idx = static_cast(output_idx); if (output_idx < 0 || output_idx >= interpreter->outputs().size()) { throwException(env, kIllegalArgumentException, - "Out of range: Failed to get %d-th output out of %d outputs", - output_idx, interpreter->outputs().size()); + "Failed to get %d-th output out of %d outputs", output_idx, + interpreter->outputs().size()); return -1; } TfLiteTensor* target = interpreter->tensor(interpreter->outputs()[idx]); @@ -555,7 +574,8 @@ Java_org_tensorflow_lite_NativeInterpreterWrapper_resizeInput( const int idx = static_cast(input_idx); if (idx < 0 || idx >= interpreter->inputs().size()) { throwException(env, kIllegalArgumentException, - "Can not resize %d-th input for a model having %d inputs.", + "Input error: Can not resize %d-th input for a model having " + "%d inputs.", idx, interpreter->inputs().size()); return JNI_FALSE; } @@ -567,7 +587,7 @@ Java_org_tensorflow_lite_NativeInterpreterWrapper_resizeInput( interpreter->inputs()[idx], convertJIntArrayToVector(env, dims)); if (status != kTfLiteOk) { throwException(env, kIllegalArgumentException, - "Failed to resize %d-th input: %s", idx, + "Internal error: Failed to resize %d-th input: %s", idx, error_reporter->CachedErrorMessage()); return JNI_FALSE; } diff --git a/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.h b/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.h index 0e28a77feea41d72be126d6e60fffbe7ce374a76..eaa765cb343e9764bd0ef018d636a76f4b8a13e4 100644 --- a/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.h +++ b/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.h @@ -61,7 +61,7 @@ Java_org_tensorflow_lite_NativeInterpreterWrapper_getOutputNames(JNIEnv* env, /* * Class: org_tensorflow_lite_NativeInterpreterWrapper * Method: - * Signature: (JZ) + * Signature: (JZ)V */ JNIEXPORT void JNICALL Java_org_tensorflow_lite_NativeInterpreterWrapper_useNNAPI(JNIEnv* env, @@ -69,6 +69,16 @@ Java_org_tensorflow_lite_NativeInterpreterWrapper_useNNAPI(JNIEnv* env, jlong handle, jboolean state); +/* + * Class: org_tensorflow_lite_NativeInterpreterWrapper + * Method: + * Signature: (JI)V + */ +JNIEXPORT void JNICALL +Java_org_tensorflow_lite_NativeInterpreterWrapper_numThreads(JNIEnv* env, + jclass clazz, + jlong handle, + jint num_threads); /* * Class: org_tensorflow_lite_NativeInterpreterWrapper * Method: diff --git a/tensorflow/contrib/lite/java/src/main/native/tensor_jni.cc b/tensorflow/contrib/lite/java/src/main/native/tensor_jni.cc index 65126e78a3003f8a69c69326124d613e878c0f9d..17f4be09c63a9e80feda9d0324d49cc0418fe66a 100644 --- a/tensorflow/contrib/lite/java/src/main/native/tensor_jni.cc +++ b/tensorflow/contrib/lite/java/src/main/native/tensor_jni.cc @@ -23,7 +23,7 @@ namespace { TfLiteTensor* convertLongToTensor(JNIEnv* env, jlong handle) { if (handle == 0) { throwException(env, kIllegalArgumentException, - "Invalid handle to TfLiteTensor."); + "Internal error: Invalid handle to TfLiteTensor."); return nullptr; } return reinterpret_cast(handle); @@ -36,7 +36,8 @@ size_t writeOneDimensionalArray(JNIEnv* env, jobject object, TfLiteType type, size_t to_copy = num_elements * elementByteSize(type); if (to_copy > dst_size) { throwException(env, kIllegalStateException, - "cannot write Java array of %d bytes to Tensor of %d bytes", + "Internal error: cannot write Java array of %d bytes to " + "Tensor of %d bytes", to_copy, dst_size); return 0; } @@ -71,10 +72,10 @@ size_t writeOneDimensionalArray(JNIEnv* env, jobject object, TfLiteType type, } default: { throwException(env, kUnsupportedOperationException, - "TensorFlowLite currently supports float (32 bits), " - "int (32 bits), byte (8 bits), and long (64 bits), " - "support for other types (DataType %d in this case) will " - "be added in the future", + "DataType error: TensorFlowLite currently supports float " + "(32 bits), int (32 bits), byte (8 bits), and long " + "(64 bits), support for other types (DataType %d in this " + "case) will be added in the future", kTfLiteFloat32, type); return 0; } @@ -88,8 +89,9 @@ size_t readOneDimensionalArray(JNIEnv* env, TfLiteType data_type, if (size > src_size) { throwException( env, kIllegalStateException, - "cannot fill a Java array of %d bytes with a Tensor of %d bytes", size, - src_size); + "Internal error: cannot fill a Java array of %d bytes with a Tensor of " + "%d bytes", + size, src_size); return 0; } switch (data_type) { @@ -117,8 +119,8 @@ size_t readOneDimensionalArray(JNIEnv* env, TfLiteType data_type, return size; } default: { - throwException(env, kIllegalStateException, "invalid DataType(%d)", - data_type); + throwException(env, kIllegalStateException, + "DataType error: invalid DataType(%d)", data_type); } } return 0; @@ -152,19 +154,22 @@ size_t elementByteSize(TfLiteType data_type) { switch (data_type) { case kTfLiteFloat32: static_assert(sizeof(jfloat) == 4, - "Java float not compatible with kTfLiteFloat"); + "Interal error: Java float not compatible with " + "kTfLiteFloat"); return 4; case kTfLiteInt32: static_assert(sizeof(jint) == 4, - "Java int not compatible with kTfLiteInt"); + "Interal error: Java int not compatible with kTfLiteInt"); return 4; case kTfLiteUInt8: static_assert(sizeof(jbyte) == 1, - "Java byte not compatible with kTfLiteUInt8"); + "Interal error: Java byte not compatible with " + "kTfLiteUInt8"); return 1; case kTfLiteInt64: static_assert(sizeof(jlong) == 8, - "Java long not compatible with kTfLiteInt64"); + "Interal error: Java long not compatible with " + "kTfLiteInt64"); return 8; default: return 0; @@ -212,7 +217,7 @@ Java_org_tensorflow_lite_Tensor_readMultiDimensionalArray(JNIEnv* env, int num_dims = tensor->dims->size; if (num_dims == 0) { throwException(env, kIllegalArgumentException, - "copyTo() is not meant for scalar Tensors."); + "Internal error: Cannot copy empty/scalar Tensors."); return; } readMultiDimensionalArray(env, tensor->type, tensor->data.raw, tensor->bytes, diff --git a/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/NativeInterpreterWrapperTest.java b/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/NativeInterpreterWrapperTest.java index dbe45e5a05b8227b441de7ca6747f61d010ae210..7c00d3196fd001a288d77d4e01f0b30978d72afe 100644 --- a/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/NativeInterpreterWrapperTest.java +++ b/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/NativeInterpreterWrapperTest.java @@ -321,9 +321,7 @@ public final class NativeInterpreterWrapperTest { wrapper.run(inputs); fail(); } catch (IllegalArgumentException e) { - assertThat(e) - .hasMessageThat() - .contains("Invalid inputs. Inputs should not be null or empty."); + assertThat(e).hasMessageThat().contains("Inputs should not be null or empty."); } wrapper.close(); } @@ -440,7 +438,7 @@ public final class NativeInterpreterWrapperTest { NativeInterpreterWrapper.numDimensions(emptyArray); fail(); } catch (IllegalArgumentException e) { - assertThat(e).hasMessageThat().contains("array lengths cannot be 0."); + assertThat(e).hasMessageThat().contains("Array lengths cannot be 0."); } } diff --git a/tensorflow/contrib/lite/kernels/BUILD b/tensorflow/contrib/lite/kernels/BUILD index df0f3cbeb0e99c4a7cb6a9c610ce660f06454744..80cefe83b29192d3ed8f37fc38c4b6d8a9a039a9 100644 --- a/tensorflow/contrib/lite/kernels/BUILD +++ b/tensorflow/contrib/lite/kernels/BUILD @@ -12,10 +12,7 @@ tf_cc_test( name = "optional_tensor_test", size = "small", srcs = ["optional_tensor_test.cc"], - tags = [ - "tflite_not_portable_ios_arm64", - "tflite_not_portable_ios_x86_64", - ], + tags = ["tflite_not_portable_ios"], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:framework", @@ -108,10 +105,7 @@ tf_cc_test( name = "kernel_util_test", size = "small", srcs = ["kernel_util_test.cc"], - tags = [ - "tflite_not_portable_ios_arm64", - "tflite_not_portable_ios_x86_64", - ], + tags = ["tflite_not_portable_ios"], deps = [ ":kernel_util", "//tensorflow/contrib/lite/testing:util", @@ -135,12 +129,14 @@ cc_library( srcs = [ "activations.cc", "add.cc", + "arg_max.cc", "audio_spectrogram.cc", "basic_rnn.cc", "batch_to_space_nd.cc", "bidirectional_sequence_lstm.cc", "bidirectional_sequence_rnn.cc", "cast.cc", + "comparisons.cc", "concatenation.cc", "conv.cc", "depthwise_conv.cc", @@ -156,7 +152,7 @@ cc_library( "local_response_norm.cc", "lsh_projection.cc", "lstm.cc", - "maximum.cc", + "maximum_minimum.cc", "mean.cc", "mfcc.cc", "mul.cc", @@ -216,6 +212,7 @@ tf_cc_test( name = "audio_spectrogram_test", size = "small", srcs = ["audio_spectrogram_test.cc"], + tags = ["tflite_not_portable_ios"], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:framework", @@ -229,6 +226,7 @@ tf_cc_test( name = "mfcc_test", size = "small", srcs = ["mfcc_test.cc"], + tags = ["tflite_not_portable_ios"], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:framework", @@ -242,10 +240,7 @@ tf_cc_test( name = "activations_test", size = "small", srcs = ["activations_test.cc"], - tags = [ - "tflite_not_portable_ios_arm64", - "tflite_not_portable_ios_x86_64", - ], + tags = ["tflite_not_portable_ios"], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:framework", @@ -258,9 +253,21 @@ tf_cc_test( name = "add_test", size = "small", srcs = ["add_test.cc"], + tags = ["tflite_not_portable_ios"], + deps = [ + ":builtin_ops", + "//tensorflow/contrib/lite:framework", + "//tensorflow/contrib/lite/kernels:test_util", + "@com_google_googletest//:gtest", + ], +) + +tf_cc_test( + name = "arg_max_test", + size = "small", + srcs = ["arg_max_test.cc"], tags = [ - "tflite_not_portable_ios_arm64", - "tflite_not_portable_ios_x86_64", + "tflite_not_portable_ios", ], deps = [ ":builtin_ops", @@ -274,10 +281,7 @@ tf_cc_test( name = "div_test", size = "small", srcs = ["div_test.cc"], - tags = [ - "tflite_not_portable_ios_arm64", - "tflite_not_portable_ios_x86_64", - ], + tags = ["tflite_not_portable_ios"], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:framework", @@ -290,10 +294,7 @@ tf_cc_test( name = "sub_test", size = "small", srcs = ["sub_test.cc"], - tags = [ - "tflite_not_portable_ios_arm64", - "tflite_not_portable_ios_x86_64", - ], + tags = ["tflite_not_portable_ios"], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:framework", @@ -306,10 +307,7 @@ tf_cc_test( name = "transpose_test", size = "small", srcs = ["transpose_test.cc"], - tags = [ - "tflite_not_portable_ios_arm64", - "tflite_not_portable_ios_x86_64", - ], + tags = ["tflite_not_portable_ios"], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:framework", @@ -324,10 +322,7 @@ tf_cc_test( name = "space_to_batch_nd_test", size = "small", srcs = ["space_to_batch_nd_test.cc"], - tags = [ - "tflite_not_portable_ios_arm64", - "tflite_not_portable_ios_x86_64", - ], + tags = ["tflite_not_portable_ios"], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:framework", @@ -340,10 +335,7 @@ tf_cc_test( name = "batch_to_space_nd_test", size = "small", srcs = ["batch_to_space_nd_test.cc"], - tags = [ - "tflite_not_portable_ios_arm64", - "tflite_not_portable_ios_x86_64", - ], + tags = ["tflite_not_portable_ios"], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:framework", @@ -356,6 +348,7 @@ tf_cc_test( name = "cast_test", size = "small", srcs = ["cast_test.cc"], + tags = ["tflite_not_portable_ios"], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:framework", @@ -368,10 +361,7 @@ tf_cc_test( name = "concatenation_test", size = "small", srcs = ["concatenation_test.cc"], - tags = [ - "tflite_not_portable_ios_arm64", - "tflite_not_portable_ios_x86_64", - ], + tags = ["tflite_not_portable_ios"], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:framework", @@ -384,10 +374,7 @@ tf_cc_test( name = "conv_test", size = "small", srcs = ["conv_test.cc"], - tags = [ - "tflite_not_portable_ios_arm64", - "tflite_not_portable_ios_x86_64", - ], + tags = ["tflite_not_portable_ios"], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:framework", @@ -401,10 +388,7 @@ tf_cc_test( name = "depthwise_conv_test", size = "small", srcs = ["depthwise_conv_test.cc"], - tags = [ - "tflite_not_portable_ios_arm64", - "tflite_not_portable_ios_x86_64", - ], + tags = ["tflite_not_portable_ios"], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:framework", @@ -417,6 +401,7 @@ tf_cc_test( name = "dequantize_test", size = "small", srcs = ["dequantize_test.cc"], + tags = ["tflite_not_portable_ios"], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:framework", @@ -430,10 +415,7 @@ tf_cc_test( name = "basic_rnn_test", size = "small", srcs = ["basic_rnn_test.cc"], - tags = [ - "tflite_not_portable_ios_arm64", - "tflite_not_portable_ios_x86_64", - ], + tags = ["tflite_not_portable_ios"], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:framework", @@ -446,10 +428,7 @@ tf_cc_test( name = "bidirectional_sequence_lstm_test", size = "small", srcs = ["bidirectional_sequence_lstm_test.cc"], - tags = [ - "tflite_not_portable_ios_arm64", - "tflite_not_portable_ios_x86_64", - ], + tags = ["tflite_not_portable_ios"], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:framework", @@ -462,10 +441,7 @@ tf_cc_test( name = "unidirectional_sequence_lstm_test", size = "small", srcs = ["unidirectional_sequence_lstm_test.cc"], - tags = [ - "tflite_not_portable_ios_arm64", - "tflite_not_portable_ios_x86_64", - ], + tags = ["tflite_not_portable_ios"], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:framework", @@ -493,10 +469,7 @@ tf_cc_test( name = "unidirectional_sequence_rnn_test", size = "small", srcs = ["unidirectional_sequence_rnn_test.cc"], - tags = [ - "tflite_not_portable_ios_arm64", - "tflite_not_portable_ios_x86_64", - ], + tags = ["tflite_not_portable_ios"], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:framework", @@ -509,10 +482,7 @@ tf_cc_test( name = "l2norm_test", size = "small", srcs = ["l2norm_test.cc"], - tags = [ - "tflite_not_portable_ios_arm64", - "tflite_not_portable_ios_x86_64", - ], + tags = ["tflite_not_portable_ios"], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:framework", @@ -525,10 +495,7 @@ tf_cc_test( name = "exp_test", size = "small", srcs = ["exp_test.cc"], - tags = [ - "tflite_not_portable_ios_arm64", - "tflite_not_portable_ios_x86_64", - ], + tags = ["tflite_not_portable_ios"], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:framework", @@ -538,9 +505,10 @@ tf_cc_test( ) tf_cc_test( - name = "maximum_test", + name = "maximum_minimum_test", size = "small", - srcs = ["maximum_test.cc"], + srcs = ["maximum_minimum_test.cc"], + tags = ["tflite_not_portable_ios"], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:framework", @@ -553,10 +521,7 @@ tf_cc_test( name = "mean_test", size = "small", srcs = ["mean_test.cc"], - tags = [ - "tflite_not_portable_ios_arm64", - "tflite_not_portable_ios_x86_64", - ], + tags = ["tflite_not_portable_ios"], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:framework", @@ -569,10 +534,7 @@ tf_cc_test( name = "mul_test", size = "small", srcs = ["mul_test.cc"], - tags = [ - "tflite_not_portable_ios_arm64", - "tflite_not_portable_ios_x86_64", - ], + tags = ["tflite_not_portable_ios"], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:framework", @@ -585,10 +547,7 @@ tf_cc_test( name = "pad_test", size = "small", srcs = ["pad_test.cc"], - tags = [ - "tflite_not_portable_ios_arm64", - "tflite_not_portable_ios_x86_64", - ], + tags = ["tflite_not_portable_ios"], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:framework", @@ -601,10 +560,7 @@ tf_cc_test( name = "reshape_test", size = "small", srcs = ["reshape_test.cc"], - tags = [ - "tflite_not_portable_ios_arm64", - "tflite_not_portable_ios_x86_64", - ], + tags = ["tflite_not_portable_ios"], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:framework", @@ -617,10 +573,7 @@ tf_cc_test( name = "gather_test", size = "small", srcs = ["gather_test.cc"], - tags = [ - "tflite_not_portable_ios_arm64", - "tflite_not_portable_ios_x86_64", - ], + tags = ["tflite_not_portable_ios"], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:builtin_op_data", @@ -634,10 +587,7 @@ tf_cc_test( name = "topk_v2_test", size = "small", srcs = ["topk_v2_test.cc"], - tags = [ - "tflite_not_portable_ios_arm64", - "tflite_not_portable_ios_x86_64", - ], + tags = ["tflite_not_portable_ios"], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:builtin_op_data", @@ -651,10 +601,7 @@ tf_cc_test( name = "resize_bilinear_test", size = "small", srcs = ["resize_bilinear_test.cc"], - tags = [ - "tflite_not_portable_ios_arm64", - "tflite_not_portable_ios_x86_64", - ], + tags = ["tflite_not_portable_ios"], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:framework", @@ -667,10 +614,7 @@ tf_cc_test( name = "svdf_test", size = "small", srcs = ["svdf_test.cc"], - tags = [ - "tflite_not_portable_ios_arm64", - "tflite_not_portable_ios_x86_64", - ], + tags = ["tflite_not_portable_ios"], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:framework", @@ -683,10 +627,7 @@ tf_cc_test( name = "embedding_lookup_test", size = "small", srcs = ["embedding_lookup_test.cc"], - tags = [ - "tflite_not_portable_ios_arm64", - "tflite_not_portable_ios_x86_64", - ], + tags = ["tflite_not_portable_ios"], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:framework", @@ -699,10 +640,7 @@ tf_cc_test( name = "embedding_lookup_sparse_test", size = "small", srcs = ["embedding_lookup_sparse_test.cc"], - tags = [ - "tflite_not_portable_ios_arm64", - "tflite_not_portable_ios_x86_64", - ], + tags = ["tflite_not_portable_ios"], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:framework", @@ -715,10 +653,7 @@ tf_cc_test( name = "fully_connected_test", size = "small", srcs = ["fully_connected_test.cc"], - tags = [ - "tflite_not_portable_ios_arm64", - "tflite_not_portable_ios_x86_64", - ], + tags = ["tflite_not_portable_ios"], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:framework", @@ -732,10 +667,7 @@ tf_cc_test( name = "local_response_norm_test", size = "small", srcs = ["local_response_norm_test.cc"], - tags = [ - "tflite_not_portable_ios_arm64", - "tflite_not_portable_ios_x86_64", - ], + tags = ["tflite_not_portable_ios"], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:framework", @@ -748,10 +680,7 @@ tf_cc_test( name = "pooling_test", size = "small", srcs = ["pooling_test.cc"], - tags = [ - "tflite_not_portable_ios_arm64", - "tflite_not_portable_ios_x86_64", - ], + tags = ["tflite_not_portable_ios"], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:framework", @@ -764,10 +693,7 @@ tf_cc_test( name = "softmax_test", size = "small", srcs = ["softmax_test.cc"], - tags = [ - "tflite_not_portable_ios_arm64", - "tflite_not_portable_ios_x86_64", - ], + tags = ["tflite_not_portable_ios"], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:framework", @@ -781,10 +707,7 @@ tf_cc_test( name = "log_softmax_test", size = "small", srcs = ["log_softmax_test.cc"], - tags = [ - "tflite_not_portable_ios_arm64", - "tflite_not_portable_ios_x86_64", - ], + tags = ["tflite_not_portable_ios"], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:framework", @@ -798,10 +721,7 @@ tf_cc_test( name = "lsh_projection_test", size = "small", srcs = ["lsh_projection_test.cc"], - tags = [ - "tflite_not_portable_ios_arm64", - "tflite_not_portable_ios_x86_64", - ], + tags = ["tflite_not_portable_ios"], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:framework", @@ -814,10 +734,7 @@ tf_cc_test( name = "hashtable_lookup_test", size = "small", srcs = ["hashtable_lookup_test.cc"], - tags = [ - "tflite_not_portable_ios_arm64", - "tflite_not_portable_ios_x86_64", - ], + tags = ["tflite_not_portable_ios"], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:framework", @@ -831,10 +748,7 @@ tf_cc_test( name = "lstm_test", size = "small", srcs = ["lstm_test.cc"], - tags = [ - "tflite_not_portable_ios_arm64", - "tflite_not_portable_ios_x86_64", - ], + tags = ["tflite_not_portable_ios"], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:framework", @@ -847,10 +761,7 @@ tf_cc_test( name = "skip_gram_test", size = "small", srcs = ["skip_gram_test.cc"], - tags = [ - "tflite_not_portable_ios_arm64", - "tflite_not_portable_ios_x86_64", - ], + tags = ["tflite_not_portable_ios"], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:framework", @@ -864,10 +775,7 @@ tf_cc_test( name = "space_to_depth_test", size = "small", srcs = ["space_to_depth_test.cc"], - tags = [ - "tflite_not_portable_ios_arm64", - "tflite_not_portable_ios_x86_64", - ], + tags = ["tflite_not_portable_ios"], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:framework", @@ -880,10 +788,7 @@ tf_cc_test( name = "split_test", size = "small", srcs = ["split_test.cc"], - tags = [ - "tflite_not_portable_ios_arm64", - "tflite_not_portable_ios_x86_64", - ], + tags = ["tflite_not_portable_ios"], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:framework", @@ -896,10 +801,7 @@ tf_cc_test( name = "squeeze_test", size = "small", srcs = ["squeeze_test.cc"], - tags = [ - "tflite_not_portable_ios_arm64", - "tflite_not_portable_ios_x86_64", - ], + tags = ["tflite_not_portable_ios"], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:framework", @@ -912,9 +814,23 @@ tf_cc_test( name = "strided_slice_test", size = "small", srcs = ["strided_slice_test.cc"], + tags = ["tflite_not_portable_ios"], + deps = [ + ":builtin_ops", + "//tensorflow/contrib/lite:framework", + "//tensorflow/contrib/lite/kernels:test_util", + "@com_google_googletest//:gtest", + ], +) + +tf_cc_test( + name = "comparisons_test", + size = "small", + srcs = [ + "comparisons_test.cc", + ], tags = [ - "tflite_not_portable_ios_arm64", - "tflite_not_portable_ios_x86_64", + "tflite_not_portable_ios", ], deps = [ ":builtin_ops", @@ -924,4 +840,16 @@ tf_cc_test( ], ) +filegroup( + name = "all_files", + srcs = glob( + ["**/*"], + exclude = [ + "**/METADATA", + "**/OWNERS", + ], + ), + visibility = ["//tensorflow:__subpackages__"], +) + tflite_portable_test_suite() diff --git a/tensorflow/contrib/lite/kernels/add.cc b/tensorflow/contrib/lite/kernels/add.cc index 63ea89df56bafa995950afec3a58267681af304f..e0aa070e2d02cecb9c6ff500ab32b8ad2159db6e 100644 --- a/tensorflow/contrib/lite/kernels/add.cc +++ b/tensorflow/contrib/lite/kernels/add.cc @@ -176,7 +176,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { output); } else { context->ReportError(context, - "Inputs and outputs not all float|unit8 types."); + "Inputs and outputs not all float|uint8 types."); return kTfLiteError; } diff --git a/tensorflow/contrib/lite/kernels/arg_max.cc b/tensorflow/contrib/lite/kernels/arg_max.cc new file mode 100644 index 0000000000000000000000000000000000000000..a2c5e4ceadbc905d22eb02b450c88745a351f58f --- /dev/null +++ b/tensorflow/contrib/lite/kernels/arg_max.cc @@ -0,0 +1,178 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/contrib/lite/builtin_op_data.h" +#include "tensorflow/contrib/lite/context.h" +#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h" +#include "tensorflow/contrib/lite/kernels/internal/quantization_util.h" +#include "tensorflow/contrib/lite/kernels/internal/tensor.h" +#include "tensorflow/contrib/lite/kernels/kernel_util.h" +#include "tensorflow/contrib/lite/kernels/op_macros.h" + +namespace tflite { +namespace ops { +namespace builtin { +namespace arg_max { + +constexpr int kInputTensor = 0; +constexpr int kAxis = 1; +constexpr int kOutputTensor = 0; + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + TF_LITE_ENSURE_EQ(context, NumInputs(node), 2); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + + TfLiteTensor* input = GetInput(context, node, kInputTensor); + TfLiteTensor* axis = GetInput(context, node, kAxis); + // Make sure the axis is only 1 dimension. + TF_LITE_ENSURE_EQ(context, NumElements(axis), 1); + + // Make sure the axis is only either int32 or int64. + TF_LITE_ENSURE(context, + axis->type == kTfLiteInt32 || axis->type == kTfLiteInt64); + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + + auto* params = reinterpret_cast(node->builtin_data); + switch (params->output_type) { + case kTfLiteInt32: + output->type = kTfLiteInt32; + break; + case kTfLiteInt64: + output->type = kTfLiteInt64; + break; + default: + context->ReportError(context, "Unknown index output data type"); + return kTfLiteError; + } + + // Check conditions for different types. + switch (input->type) { + case kTfLiteFloat32: + case kTfLiteUInt8: + case kTfLiteInt32: + break; + + default: + context->ReportError(context, "Only float32 and int types are supported"); + return kTfLiteError; + } + + // Copy the input dimensions to output except make the last dimension 1. + TF_LITE_ENSURE(context, NumDimensions(input) >= 1); + TfLiteIntArray* output_size = TfLiteIntArrayCopy(input->dims); + output_size->data[NumDimensions(input) - 1] = 1; + + return context->ResizeTensor(context, output, output_size); +} + +// The current impl actually ignores the axis argument. +// Only determine the index of the maximum value in the last dimension. +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + TfLiteTensor* input = GetInput(context, node, kInputTensor); + TfLiteTensor* axis = GetInput(context, node, kAxis); + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + +#define TF_LITE_ARG_MAX(data_type, axis_type, output_type) \ + TF_LITE_ENSURE_EQ(context, GetTensorData(axis)[0], 3); \ + optimized_ops::ArgMax(GetTensorData(axis), \ + GetTensorData(input), GetTensorDims(input), \ + GetTensorData(output), \ + GetTensorDims(output)) + if (axis->type == kTfLiteInt32) { + switch (output->type) { + case kTfLiteInt32: { + switch (input->type) { + case kTfLiteFloat32: + TF_LITE_ARG_MAX(float, int32_t, int32_t); + break; + case kTfLiteUInt8: + TF_LITE_ARG_MAX(uint8_t, int32_t, int32_t); + break; + case kTfLiteInt32: + TF_LITE_ARG_MAX(int32_t, int32_t, int32_t); + break; + default: + return kTfLiteError; + } + } break; + case kTfLiteInt64: { + switch (input->type) { + case kTfLiteFloat32: + TF_LITE_ARG_MAX(float, int32_t, int64_t); + break; + case kTfLiteUInt8: + TF_LITE_ARG_MAX(uint8_t, int32_t, int64_t); + break; + case kTfLiteInt32: + TF_LITE_ARG_MAX(int32_t, int32_t, int64_t); + break; + default: + return kTfLiteError; + } + } break; + default: + return kTfLiteError; + } + } else { + switch (output->type) { + case kTfLiteInt32: { + switch (input->type) { + case kTfLiteFloat32: + TF_LITE_ARG_MAX(float, int64_t, int32_t); + break; + case kTfLiteUInt8: + TF_LITE_ARG_MAX(uint8_t, int64_t, int32_t); + break; + case kTfLiteInt32: + TF_LITE_ARG_MAX(int32_t, int64_t, int32_t); + break; + default: + return kTfLiteError; + } + } break; + case kTfLiteInt64: { + switch (input->type) { + case kTfLiteFloat32: + TF_LITE_ARG_MAX(float, int64_t, int64_t); + break; + case kTfLiteUInt8: + TF_LITE_ARG_MAX(uint8_t, int64_t, int64_t); + break; + case kTfLiteInt32: + TF_LITE_ARG_MAX(int32_t, int64_t, int64_t); + break; + default: + return kTfLiteError; + } + } break; + default: + return kTfLiteError; + } + } +#undef TF_LITE_ARG_MAX + + return kTfLiteOk; +} + +} // namespace arg_max + +TfLiteRegistration* Register_ARG_MAX() { + static TfLiteRegistration r = {nullptr, nullptr, arg_max::Prepare, + arg_max::Eval}; + return &r; +} + +} // namespace builtin +} // namespace ops +} // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/arg_max_test.cc b/tensorflow/contrib/lite/kernels/arg_max_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..31b15fe19ab87027c28bde9eaff7d88d03b2c213 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/arg_max_test.cc @@ -0,0 +1,106 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include "tensorflow/contrib/lite/interpreter.h" +#include "tensorflow/contrib/lite/kernels/register.h" +#include "tensorflow/contrib/lite/kernels/test_util.h" +#include "tensorflow/contrib/lite/model.h" + +namespace tflite { +namespace { + +using ::testing::ElementsAreArray; + +template +class ArgMaxOpModel : public SingleOpModel { + public: + ArgMaxOpModel(std::initializer_list input_shape, TensorType input_type, + TensorType output_type, TensorType index_output_type) { + input_ = AddInput(input_type); + axis_ = AddInput(TensorType_INT32); + output_ = AddOutput(output_type); + SetBuiltinOp(BuiltinOperator_ARG_MAX, BuiltinOptions_ArgMaxOptions, + CreateArgMaxOptions(builder_, index_output_type).Union()); + BuildInterpreter({input_shape, {1, 1, 1, 1}}); + } + + int input() { return input_; } + int axis() { return axis_; } + + std::vector GetOutput() { return ExtractVector(output_); } + std::vector GetOutputShape() { return GetTensorShape(output_); } + + private: + int input_; + int axis_; + int output_; +}; + +TEST(ArgMaxOpTest, GetMaxArgFloat) { + ArgMaxOpModel model({1, 1, 1, 4}, TensorType_FLOAT32, + TensorType_INT32, TensorType_INT32); + model.PopulateTensor(model.input(), {0.1, 0.9, 0.7, 0.3}); + // Currently only support the last dimension. + model.PopulateTensor(model.axis(), {3}); + model.Invoke(); + + EXPECT_THAT(model.GetOutput(), ElementsAreArray({1})); + EXPECT_THAT(model.GetOutputShape(), ElementsAreArray({1, 1, 1, 1})); +} + +TEST(ArgMaxOpTest, GetMaxArgInt) { + ArgMaxOpModel model({1, 1, 1, 4}, TensorType_INT32, TensorType_INT32, + TensorType_INT32); + model.PopulateTensor(model.input(), {1, 9, 7, 3}); + // Currently only support the last dimension. + model.PopulateTensor(model.axis(), {3}); + model.Invoke(); + + EXPECT_THAT(model.GetOutput(), ElementsAreArray({1})); + EXPECT_THAT(model.GetOutputShape(), ElementsAreArray({1, 1, 1, 1})); +} + +TEST(ArgMaxOpTest, GetMaxArgMulDimensions) { + ArgMaxOpModel model({1, 1, 2, 4}, TensorType_INT32, TensorType_INT32, + TensorType_INT32); + model.PopulateTensor(model.input(), {1, 2, 7, 8, 1, 9, 7, 3}); + // Currently only support the last dimension. + model.PopulateTensor(model.axis(), {3}); + model.Invoke(); + + EXPECT_THAT(model.GetOutput(), ElementsAreArray({3, 1})); + EXPECT_THAT(model.GetOutputShape(), ElementsAreArray({1, 1, 2, 1})); +} + +TEST(ArgMaxOpTest, GetMaxArgOutput64) { + ArgMaxOpModel model({1, 1, 2, 4}, TensorType_INT32, TensorType_INT64, + TensorType_INT64); + model.PopulateTensor(model.input(), {10, 2, 7, 8, 1, 9, 7, 3}); + // Currently only support the last dimension. + model.PopulateTensor(model.axis(), {3}); + model.Invoke(); + + EXPECT_THAT(model.GetOutput(), ElementsAreArray({0, 1})); + EXPECT_THAT(model.GetOutputShape(), ElementsAreArray({1, 1, 2, 1})); +} + +} // namespace +} // namespace tflite + +int main(int argc, char** argv) { + ::tflite::LogToStderr(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/contrib/lite/kernels/batch_to_space_nd.cc b/tensorflow/contrib/lite/kernels/batch_to_space_nd.cc index bc438f99c6a72fdbc2794dee03524db6a7523834..90edf4f9e3683f2987dd8299a1cd5233caa24479 100644 --- a/tensorflow/contrib/lite/kernels/batch_to_space_nd.cc +++ b/tensorflow/contrib/lite/kernels/batch_to_space_nd.cc @@ -123,6 +123,8 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { GetTensorDims(op_context.input), \ GetTensorData(op_context.block_shape), \ GetTensorDims(op_context.block_shape), \ + GetTensorData(op_context.crops), \ + GetTensorDims(op_context.crops), \ GetTensorData(op_context.output), \ GetTensorDims(op_context.output)) switch (op_context.input->type) { // Already know in/out types are same. diff --git a/tensorflow/contrib/lite/kernels/cast.cc b/tensorflow/contrib/lite/kernels/cast.cc index 19942de7bc0c083f192a4b337b224b778d991140..17ef2c572ebbfa54ba6856f7eebbcd6fd9e63868 100644 --- a/tensorflow/contrib/lite/kernels/cast.cc +++ b/tensorflow/contrib/lite/kernels/cast.cc @@ -34,6 +34,15 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); TfLiteTensor* input = GetInput(context, node, kInputTensor); TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + + // TODO(ahentz): these two checks would make the new implementation + // incompatible with some existing models, where params is not specified. It + // is OK not to have them because toco would have set input and output types + // to match the parameters. + // auto* params = reinterpret_cast(node->builtin_data); + // TF_LITE_ENSURE_EQ(context, input->type, params->in_data_type); + // TF_LITE_ENSURE_EQ(context, output->type, params->out_data_type); + return context->ResizeTensor(context, output, TfLiteIntArrayCopy(input->dims)); } diff --git a/tensorflow/contrib/lite/kernels/comparisons.cc b/tensorflow/contrib/lite/kernels/comparisons.cc new file mode 100644 index 0000000000000000000000000000000000000000..87c413cb982dafd239818040d067738e786d43ff --- /dev/null +++ b/tensorflow/contrib/lite/kernels/comparisons.cc @@ -0,0 +1,119 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/contrib/lite/context.h" +#include "tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h" +#include "tensorflow/contrib/lite/kernels/internal/tensor.h" +#include "tensorflow/contrib/lite/kernels/kernel_util.h" +#include "tensorflow/contrib/lite/kernels/op_macros.h" +#include "tensorflow/contrib/lite/string_util.h" + +namespace tflite { +namespace ops { +namespace builtin { +namespace comparisons { + +constexpr int kInputTensor1 = 0; +constexpr int kInputTensor2 = 1; +constexpr int kOutputTensor = 0; + +TfLiteStatus LessPrepare(TfLiteContext* context, TfLiteNode* node) { + TF_LITE_ENSURE_EQ(context, NumInputs(node), 2); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + + TfLiteTensor* input1 = GetInput(context, node, kInputTensor1); + TfLiteTensor* input2 = GetInput(context, node, kInputTensor2); + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + + // Don't support string and bool. + TF_LITE_ENSURE(context, + input1->type != kTfLiteString || input1->type != kTfLiteBool); + // Currently only support tensors have the same type. + TF_LITE_ENSURE_EQ(context, input1->type, input2->type); + output->type = kTfLiteBool; + + bool requires_broadcast = !HaveSameShapes(input1, input2); + + TfLiteIntArray* output_size = nullptr; + if (requires_broadcast) { + TF_LITE_ENSURE_OK(context, CalculateShapeForBroadcast( + context, input1, input2, &output_size)); + } else { + output_size = TfLiteIntArrayCopy(input1->dims); + } + + return context->ResizeTensor(context, output, output_size); +} + +TfLiteStatus LessEval(TfLiteContext* context, TfLiteNode* node) { + TfLiteTensor* input1 = GetInput(context, node, kInputTensor1); + TfLiteTensor* input2 = GetInput(context, node, kInputTensor2); + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + + bool requires_broadcast = !HaveSameShapes(input1, input2); + +#define TF_LITE_LESS(type, opname) \ + reference_ops::opname(GetTensorData(input1), GetTensorDims(input1), \ + GetTensorData(input2), GetTensorDims(input2), \ + GetTensorData(output), GetTensorDims(output)); + + // TODO(renjieliu): Support quantized data. + if (requires_broadcast) { + switch (input1->type) { + case kTfLiteFloat32: + TF_LITE_LESS(float, BroadcastLess); + break; + case kTfLiteInt32: + TF_LITE_LESS(int32_t, BroadcastLess); + break; + case kTfLiteInt64: + TF_LITE_LESS(int64_t, BroadcastLess); + break; + default: + context->ReportError(context, + "Does not support type other than float|int"); + return kTfLiteError; + } + } else { + switch (input1->type) { + case kTfLiteFloat32: + TF_LITE_LESS(float, Less); + break; + case kTfLiteInt32: + TF_LITE_LESS(int32_t, Less); + break; + case kTfLiteInt64: + TF_LITE_LESS(int64_t, Less); + break; + default: + context->ReportError(context, + "Does not support type other than float|int"); + return kTfLiteError; + } + } +#undef TF_LITE_LESS + return kTfLiteOk; +} + +} // namespace comparisons + +TfLiteRegistration* Register_LESS() { + static TfLiteRegistration r = {nullptr, nullptr, comparisons::LessPrepare, + comparisons::LessEval}; + return &r; +} + +} // namespace builtin +} // namespace ops +} // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/comparisons_test.cc b/tensorflow/contrib/lite/kernels/comparisons_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..da2d7f858984a4d3bb09ca8e485fe1599bea7ded --- /dev/null +++ b/tensorflow/contrib/lite/kernels/comparisons_test.cc @@ -0,0 +1,98 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include "tensorflow/contrib/lite/interpreter.h" +#include "tensorflow/contrib/lite/kernels/register.h" +#include "tensorflow/contrib/lite/kernels/test_util.h" +#include "tensorflow/contrib/lite/model.h" + +namespace tflite { +namespace { + +using ::testing::ElementsAreArray; + +class LessOpModel : public SingleOpModel { + public: + LessOpModel(std::initializer_list input1_shape, + std::initializer_list input2_shape, TensorType input_type) { + input1_ = AddInput(input_type); + input2_ = AddInput(input_type); + output_ = AddOutput(TensorType_BOOL); + SetBuiltinOp(BuiltinOperator_LESS, BuiltinOptions_LessOptions, + CreateLessOptions(builder_).Union()); + BuildInterpreter({input1_shape, input2_shape}); + } + + int input1() { return input1_; } + int input2() { return input2_; } + + std::vector GetOutput() { return ExtractVector(output_); } + std::vector GetOutputShape() { return GetTensorShape(output_); } + + private: + int input1_; + int input2_; + int output_; +}; + +TEST(ArgMaxOpTest, LessFloat) { + LessOpModel model({1, 1, 1, 4}, {1, 1, 1, 4}, TensorType_FLOAT32); + model.PopulateTensor(model.input1(), {0.1, 0.9, 0.7, 0.3}); + model.PopulateTensor(model.input2(), {0.1, 0.2, 0.6, 0.5}); + model.Invoke(); + + EXPECT_THAT(model.GetOutput(), ElementsAreArray({false, false, false, true})); + EXPECT_THAT(model.GetOutputShape(), ElementsAreArray({1, 1, 1, 4})); +} + +TEST(ArgMaxOpTest, LessInt) { + LessOpModel model({1, 1, 1, 4}, {1, 1, 1, 4}, TensorType_INT32); + model.PopulateTensor(model.input1(), {-1, 9, 7, 3}); + model.PopulateTensor(model.input2(), {1, 2, 6, 5}); + model.Invoke(); + + EXPECT_THAT(model.GetOutput(), ElementsAreArray({true, false, false, true})); + EXPECT_THAT(model.GetOutputShape(), ElementsAreArray({1, 1, 1, 4})); +} + +TEST(ArgMaxOpTest, LessBroadcast) { + LessOpModel model({1, 1, 1, 4}, {1, 1, 1, 1}, TensorType_INT32); + model.PopulateTensor(model.input1(), {-1, 9, 7, 3}); + model.PopulateTensor(model.input2(), {7}); + model.Invoke(); + + EXPECT_THAT(model.GetOutput(), ElementsAreArray({true, false, false, true})); + EXPECT_THAT(model.GetOutputShape(), ElementsAreArray({1, 1, 1, 4})); +} + +TEST(ArgMaxOpTest, LessBroadcastTwoD) { + LessOpModel model({1, 1, 2, 4}, {1, 1, 1, 4}, TensorType_INT32); + model.PopulateTensor(model.input1(), {-1, 9, 7, 3, 2, 4, 6, 8}); + model.PopulateTensor(model.input2(), {7, 1, 2, 4}); + model.Invoke(); + + EXPECT_THAT(model.GetOutput(), ElementsAreArray({true, false, false, true, + true, false, false, false})); + EXPECT_THAT(model.GetOutputShape(), ElementsAreArray({1, 1, 2, 4})); +} + +} // namespace +} // namespace tflite + +int main(int argc, char** argv) { + ::tflite::LogToStderr(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/contrib/lite/kernels/concatenation.cc b/tensorflow/contrib/lite/kernels/concatenation.cc index a619ada86af64c299f8e518a7493db20f1011a50..45ea8d00498455be98467f2f1addc8ad7dcf35fa 100644 --- a/tensorflow/contrib/lite/kernels/concatenation.cc +++ b/tensorflow/contrib/lite/kernels/concatenation.cc @@ -67,10 +67,6 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { TfLiteTensor* t = &context->tensors[node->inputs->data[i]]; TF_LITE_ENSURE_EQ(context, t->dims->size, t0->dims->size); TF_LITE_ENSURE_EQ(context, t->type, input_type); - if (input_type == kTfLiteUInt8) { - TF_LITE_ENSURE_EQ(context, t->params.zero_point, t0->params.zero_point); - TF_LITE_ENSURE_EQ(context, t->params.scale, t0->params.scale); - } for (int d = 0; d < t0->dims->size; ++d) { if (d == axis) { sum_axis += t->dims->data[axis]; @@ -87,11 +83,6 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { TfLiteTensor* output = &context->tensors[node->outputs->data[0]]; TF_LITE_ENSURE_EQ(context, output->type, input_type); - if (input_type == kTfLiteUInt8) { - TF_LITE_ENSURE_EQ(context, output->params.zero_point, - t0->params.zero_point); - TF_LITE_ENSURE_EQ(context, output->params.scale, t0->params.scale); - } return context->ResizeTensor(context, output, output_size); } @@ -115,6 +106,14 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { all_inputs.dims(), node->inputs->size, GetTensorData(output), \ GetTensorDims(output)) +#define TF_LITE_CONCATENATION_QUANTIZED(type) \ + VectorOfQuantizedTensors all_inputs(*context, *node->inputs); \ + type::Concatenation( \ + RemapDim(NumDimensions(output), axis), all_inputs.data(), \ + all_inputs.dims(), all_inputs.zero_point(), all_inputs.scale(), \ + node->inputs->size, GetTensorData(output), GetTensorDims(output), \ + output->params.zero_point, output->params.scale) + switch (output->type) { // Already know in/outtypes are same. case kTfLiteFloat32: if (kernel_type == kReference) { @@ -125,9 +124,9 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { break; case kTfLiteUInt8: if (kernel_type == kReference) { - TF_LITE_CONCATENATION(reference_ops, uint8_t); + TF_LITE_CONCATENATION_QUANTIZED(reference_ops); } else { - TF_LITE_CONCATENATION(optimized_ops, uint8_t); + TF_LITE_CONCATENATION_QUANTIZED(optimized_ops); } break; default: @@ -136,6 +135,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { return kTfLiteError; } +#undef TF_LITE_CONCATENATION_QUANTIZED #undef TF_LITE_CONCATENATION return kTfLiteOk; diff --git a/tensorflow/contrib/lite/kernels/concatenation_test.cc b/tensorflow/contrib/lite/kernels/concatenation_test.cc index ba1ffc5f8423b9626c9c8e2a1086ea0dcca43f50..467ff6f7e149e35ae1fd11031c10d7087c4b398c 100644 --- a/tensorflow/contrib/lite/kernels/concatenation_test.cc +++ b/tensorflow/contrib/lite/kernels/concatenation_test.cc @@ -28,6 +28,7 @@ class BaseConcatenationOpModel : public SingleOpModel { public: // TODO(ahentz): Also test different activation types, axis, input // dimensions. + BaseConcatenationOpModel() {} BaseConcatenationOpModel(const TensorData& input_template, int axis, int num_inputs) { std::vector> all_input_shapes; @@ -60,6 +61,23 @@ class ConcatenationOpModel : public BaseConcatenationOpModel { class QuantizedConcatenationOpModel : public BaseConcatenationOpModel { public: using BaseConcatenationOpModel::BaseConcatenationOpModel; + QuantizedConcatenationOpModel(const std::vector& input_template, + int axis, int num_inputs, + const TensorData& output_template) { + std::vector> all_input_shapes; + CHECK_EQ(input_template.size(), num_inputs); + for (int i = 0; i < num_inputs; ++i) { + all_input_shapes.push_back(input_template[i].shape); + AddInput(input_template[i]); + } + output_ = AddOutput({output_template.type, /*shape=*/{}, + output_template.min, output_template.max}); + SetBuiltinOp( + BuiltinOperator_CONCATENATION, BuiltinOptions_ConcatenationOptions, + CreateConcatenationOptions(builder_, axis, ActivationFunctionType_NONE) + .Union()); + BuildInterpreter(all_input_shapes); + } void SetInput(int index, std::initializer_list data) { QuantizeAndPopulate(index, data); } @@ -168,6 +186,56 @@ TEST(ConcatenationOpTest, FourInputsQuantized) { })); } +TEST(ConcatenationOpTest, FourInputsQuantizedMixedRange) { + QuantizedConcatenationOpModel m0({{TensorType_UINT8, {2, 1, 2}, -10.7, 10.8}, + {TensorType_UINT8, {2, 1, 2}, 0, 12.8}, + {TensorType_UINT8, {2, 1, 2}, -11, 11.8}, + {TensorType_UINT8, {2, 1, 2}, 0, 7.4}}, + /*axis=*/2, /*num_inputs=*/4, + {TensorType_UINT8, {2, 1, 2}, -12.7, 12.8}); + + m0.SetInput(0, {1.0f, 3.0f, 4.0f, 7.0f}); + m0.SetInput(1, {1.1f, 3.1f, 4.1f, 7.1f}); + m0.SetInput(2, {1.2f, 3.2f, 4.2f, 7.2f}); + m0.SetInput(3, {1.3f, 3.3f, 4.3f, 7.3f}); + m0.Invoke(); + EXPECT_THAT(m0.GetDequantizedOutput(), + ElementsAreArray(ArrayFloatNear({ + 1.0f, 3.0f, 1.1f, 3.1f, 1.2f, 3.2f, 1.3f, 3.3f, // + 4.0f, 7.0f, 4.1f, 7.1f, 4.2f, 7.2f, 4.3f, 7.3f, // + }))); + EXPECT_THAT(m0.GetOutput(), ElementsAreArray({ + 137, 157, 138, 158, 139, 159, 140, 160, // + 167, 197, 168, 198, 169, 199, 170, 200, // + })); +} + +TEST(ConcatenationOpTest, FourInputsQuantizedMixedRangeClampingLogic) { + QuantizedConcatenationOpModel m0({{TensorType_UINT8, {2, 1, 2}, -10.7, 10.8}, + {TensorType_UINT8, {2, 1, 2}, 0, 12.8}, + {TensorType_UINT8, {2, 1, 2}, -11, 11.8}, + {TensorType_UINT8, {2, 1, 2}, 0, 7.4}}, + /*axis=*/2, /*num_inputs=*/4, + {TensorType_UINT8, {2, 1, 2}, -1., 1.}); + + m0.SetInput(0, {1.0f, -3.0f, -4.0f, -7.0f}); + m0.SetInput(1, {1.1f, 3.1f, 4.1f, 7.1f}); + m0.SetInput(2, {1.2f, -3.2f, -4.2f, 7.2f}); + m0.SetInput(3, {1.3f, 3.3f, 4.3f, 7.3f}); + m0.Invoke(); + EXPECT_THAT(m0.GetDequantizedOutput(), + ElementsAreArray(ArrayFloatNear( + { + 1.0f, -1.0f, 1.0f, 1.0f, 1.0f, -1.0f, 1.0f, 1.0f, // + -1.0f, -1.0f, 1.0f, 1.0f, -1.0f, 1.0f, 1.0f, 1.0f, // + }, + 4e-3))); + EXPECT_THAT(m0.GetOutput(), ElementsAreArray({ + 255, 0, 255, 255, 255, 0, 255, 255, // + 0, 0, 255, 255, 0, 255, 255, 255, // + })); +} + } // namespace } // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/conv.cc b/tensorflow/contrib/lite/kernels/conv.cc index 18ff33bf9f55ac1d25bb3392e714686c5305c2b8..3b467b3aa284586ab8e67ede55583adffbe06cc7 100644 --- a/tensorflow/contrib/lite/kernels/conv.cc +++ b/tensorflow/contrib/lite/kernels/conv.cc @@ -225,22 +225,27 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { // Matching GetWindowedOutputSize in TensorFlow. auto padding = params->padding; - auto computeOutSize = [padding](int imageSize, int filterSize, - int stride) -> int { + auto computeOutSize = [padding](int imageSize, int filterSize, int stride, + int dilationRate) -> int { + int effectiveFilterSize = (filterSize - 1) * dilationRate + 1; return padding == kTfLitePaddingSame ? (imageSize + stride - 1) / stride : padding == kTfLitePaddingValid - ? (imageSize - filterSize + stride) / stride + ? (imageSize - effectiveFilterSize + stride) / stride : 0; }; - int outWidth = computeOutSize(width, filter_width, params->stride_width); - int outHeight = computeOutSize(height, filter_height, params->stride_height); + int outWidth = computeOutSize(width, filter_width, params->stride_width, + params->dilation_width_factor); + int outHeight = computeOutSize(height, filter_height, params->stride_height, + params->dilation_height_factor); data->padding.height = - ComputePadding(params->stride_height, height, filter_height, outHeight); + ComputePadding(params->stride_height, params->dilation_height_factor, + height, filter_height, outHeight); data->padding.width = - ComputePadding(params->stride_width, width, filter_width, outWidth); + ComputePadding(params->stride_width, params->dilation_width_factor, width, + filter_width, outWidth); TF_LITE_ENSURE(context, hasBias); @@ -375,28 +380,40 @@ void EvalFloat(TfLiteContext* context, TfLiteNode* node, float output_activation_min, output_activation_max; CalculateActivationRangeFloat(params->activation, &output_activation_min, &output_activation_max); - - switch (kernel_type) { + KernelType effective_kernel_type; + if (((kernel_type == kMultithreadOptimized) || + (kernel_type == kCblasOptimized)) && + ((params->dilation_width_factor != 1) || + (params->dilation_height_factor != 1))) { + // kMultithreadOptimized and kCblasOptimized do not support dilation. + // Therefore, fallback to optimized. + effective_kernel_type = kGenericOptimized; + } else { + effective_kernel_type = kernel_type; + } + switch (effective_kernel_type) { case kReference: { - reference_ops::Conv(GetTensorData(input), GetTensorDims(input), - GetTensorData(filter), GetTensorDims(filter), - GetTensorData(bias), GetTensorDims(bias), - params->stride_width, params->stride_height, 1, 1, - data->padding.width, data->padding.height, - output_activation_min, output_activation_max, - GetTensorData(output), GetTensorDims(output), - GetTensorData(im2col), GetTensorDims(im2col)); + reference_ops::Conv( + GetTensorData(input), GetTensorDims(input), + GetTensorData(filter), GetTensorDims(filter), + GetTensorData(bias), GetTensorDims(bias), params->stride_width, + params->stride_height, params->dilation_width_factor, + params->dilation_height_factor, data->padding.width, + data->padding.height, output_activation_min, output_activation_max, + GetTensorData(output), GetTensorDims(output), + GetTensorData(im2col), GetTensorDims(im2col)); break; } case kGenericOptimized: { - optimized_ops::Conv(GetTensorData(input), GetTensorDims(input), - GetTensorData(filter), GetTensorDims(filter), - GetTensorData(bias), GetTensorDims(bias), - params->stride_width, params->stride_height, 1, 1, - data->padding.width, data->padding.height, - output_activation_min, output_activation_max, - GetTensorData(output), GetTensorDims(output), - GetTensorData(im2col), GetTensorDims(im2col)); + optimized_ops::Conv( + GetTensorData(input), GetTensorDims(input), + GetTensorData(filter), GetTensorDims(filter), + GetTensorData(bias), GetTensorDims(bias), params->stride_width, + params->stride_height, params->dilation_width_factor, + params->dilation_height_factor, data->padding.width, + data->padding.height, output_activation_min, output_activation_max, + GetTensorData(output), GetTensorDims(output), + GetTensorData(im2col), GetTensorDims(im2col)); break; } case kMultithreadOptimized: { diff --git a/tensorflow/contrib/lite/kernels/conv_test.cc b/tensorflow/contrib/lite/kernels/conv_test.cc index d2393c3c97bb9516e2b8a6c8ae037dc0dfdfe64b..0dcfc826fd218d2d2dfbf89201d2c13fbfe6f0e1 100644 --- a/tensorflow/contrib/lite/kernels/conv_test.cc +++ b/tensorflow/contrib/lite/kernels/conv_test.cc @@ -46,7 +46,8 @@ class BaseConvolutionOpModel : public SingleOpModel { TfLiteRegistration* registration, const TensorData& input, const TensorData& filter, const TensorData& output, int stride_width = 2, int stride_height = 2, enum Padding padding = Padding_VALID, - enum ActivationFunctionType activation = ActivationFunctionType_NONE) { + enum ActivationFunctionType activation = ActivationFunctionType_NONE, + int dilation_width_factor = 1, int dilation_height_factor = 1) { input_ = AddInput(input); filter_ = AddInput(filter); @@ -71,8 +72,9 @@ class BaseConvolutionOpModel : public SingleOpModel { } SetBuiltinOp(BuiltinOperator_CONV_2D, BuiltinOptions_Conv2DOptions, - CreateConv2DOptions(builder_, padding, stride_width, - stride_height, activation) + CreateConv2DOptions( + builder_, padding, stride_width, stride_height, activation, + dilation_width_factor, dilation_height_factor) .Union()); resolver_ = absl::make_unique(BuiltinOperator_CONV_2D, diff --git a/tensorflow/contrib/lite/kernels/depthwise_conv.cc b/tensorflow/contrib/lite/kernels/depthwise_conv.cc index cad9ce114c8387047af2b63bee704035fd329330..eeda1bc3c5ba2da5b6546ce36925a6f20fc9cbae 100644 --- a/tensorflow/contrib/lite/kernels/depthwise_conv.cc +++ b/tensorflow/contrib/lite/kernels/depthwise_conv.cc @@ -140,10 +140,10 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { int out_height = compute_out_size(height, filter_height, params->stride_height); - data->padding.height = - ComputePadding(params->stride_height, height, filter_height, out_height); + data->padding.height = ComputePadding(params->stride_height, 1, height, + filter_height, out_height); data->padding.width = - ComputePadding(params->stride_width, width, filter_width, out_width); + ComputePadding(params->stride_width, 1, width, filter_width, out_width); // Note that quantized inference requires that all tensors have their // parameters set. This is usually done during quantized training. diff --git a/tensorflow/contrib/lite/kernels/div.cc b/tensorflow/contrib/lite/kernels/div.cc index 6dd243ad62ece3e094529d923ce80d1d4a0c19ca..ec380c8e4956e5bcd0d7559bfd8f89a52d9d233c 100644 --- a/tensorflow/contrib/lite/kernels/div.cc +++ b/tensorflow/contrib/lite/kernels/div.cc @@ -106,6 +106,8 @@ void EvalFloat(TfLiteContext* context, TfLiteNode* node, #undef TF_LITE_DIV } + + template TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { auto* params = reinterpret_cast(node->builtin_data); @@ -118,7 +120,8 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { if (output->type == kTfLiteFloat32) { EvalFloat(context, node, params, data, input1, input2, output); } else { - context->ReportError(context, "Inputs and outputs not all float types."); + context->ReportError(context, + "Div only supports FLOAT32 and quantized UINT8 now."); return kTfLiteError; } diff --git a/tensorflow/contrib/lite/kernels/internal/BUILD b/tensorflow/contrib/lite/kernels/internal/BUILD index 167c0f1fde9202452a915cea69cbb935fa1af7b6..67dd1884966d8addafa54e80f9923aa66354dff9 100644 --- a/tensorflow/contrib/lite/kernels/internal/BUILD +++ b/tensorflow/contrib/lite/kernels/internal/BUILD @@ -154,6 +154,7 @@ cc_library( ], copts = tflite_copts(), deps = [ + ":quantization_util", ":types", ":round", "//third_party/eigen3", @@ -238,6 +239,7 @@ cc_library( "reference/reference_ops.h", ], deps = [ + ":quantization_util", ":round", ":types", "//third_party/eigen3", @@ -430,4 +432,13 @@ cc_library( ), ) +cc_test( + name = "batch_to_space_nd_test", + srcs = ["batch_to_space_nd_test.cc"], + deps = [ + ":optimized_base", + "@com_google_googletest//:gtest_main", + ], +) + exports_files(["optimized/eigen_tensor_reduced_instantiations_oss.h"]) diff --git a/tensorflow/contrib/lite/kernels/internal/batch_to_space_nd_test.cc b/tensorflow/contrib/lite/kernels/internal/batch_to_space_nd_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..5a2901ac8c297265e542cc30d3127fe774c19e78 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/internal/batch_to_space_nd_test.cc @@ -0,0 +1,98 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h" + +#include + +namespace tflite { +namespace { + +// A light wrapper of GetIndexRange which returns a pair of start / end +// indices. +std::pair GetIndexRange(int spatial_index_dim, int block_shape_dim, + int input_dim, int output_dim) { + int index_start = 0; + int index_end = 0; + optimized_ops::GetIndexRange(spatial_index_dim, block_shape_dim, input_dim, + output_dim, &index_start, &index_end); + return {index_start, index_end}; +} + +TEST(BatchToSpaceNDTest, TestIndexRange) { + // Simple test case, no cropping. + EXPECT_EQ(GetIndexRange(/*spatial_index_dim=*/3, /*block_shape_dim=*/6, + /*input_dim=*/1, /*output_dim=*/6), + std::make_pair(0, 1)); + + // No cropping and input_dim > 1. + EXPECT_EQ(GetIndexRange(/*spatial_index_dim=*/2, /*block_shape_dim=*/6, + /*input_dim=*/5, /*output_dim=*/30), + std::make_pair(0, 5)); + + // With small cropping values (can be either at the beginning or at the end). + EXPECT_EQ(GetIndexRange(/*spatial_index_dim=*/0, /*block_shape_dim=*/2, + /*input_dim=*/3, /*output_dim=*/4), + std::make_pair(0, 2)); + + // With positive cropping values at the beginning. + EXPECT_EQ(GetIndexRange(/*spatial_index_dim=*/-2, /*block_shape_dim=*/2, + /*input_dim=*/3, /*output_dim=*/4), + std::make_pair(1, 3)); + + // Large crop at the beginning. + EXPECT_EQ(GetIndexRange(/*spatial_index_dim=*/-30, /*block_shape_dim=*/5, + /*input_dim=*/7, /*output_dim=*/5), + std::make_pair(6, 7)); + + EXPECT_EQ(GetIndexRange(/*spatial_index_dim=*/-26, /*block_shape_dim=*/5, + /*input_dim=*/7, /*output_dim=*/5), + std::make_pair(6, 7)); + + // Large crop at the end. + EXPECT_EQ(GetIndexRange(/*spatial_index_dim=*/0, /*block_shape_dim=*/5, + /*input_dim=*/7, /*output_dim=*/5), + std::make_pair(0, 1)); + + EXPECT_EQ(GetIndexRange(/*spatial_index_dim=*/4, /*block_shape_dim=*/5, + /*input_dim=*/7, /*output_dim=*/5), + std::make_pair(0, 1)); + + // Rounding up incorrectly will fail this test. + EXPECT_EQ(GetIndexRange(/*spatial_index_dim=*/3, /*block_shape_dim=*/5, + /*input_dim=*/7, /*output_dim=*/5), + std::make_pair(0, 1)); + + // Extreme cropping with output of a single spatial location. + // Valid position 1, when large crop at the end. + EXPECT_EQ(GetIndexRange(/*spatial_index_dim=*/0, /*block_shape_dim=*/5, + /*input_dim=*/7, /*output_dim=*/1), + std::make_pair(0, 1)); + + // Valid position 2, when large crop at the beginning. + EXPECT_EQ(GetIndexRange(/*spatial_index_dim=*/-30, /*block_shape_dim=*/5, + /*input_dim=*/7, /*output_dim=*/1), + std::make_pair(6, 7)); + + // Invalid positions. + EXPECT_EQ(GetIndexRange(/*spatial_index_dim=*/1, /*block_shape_dim=*/5, + /*input_dim=*/7, /*output_dim=*/1), + std::make_pair(0, 0)); + EXPECT_EQ(GetIndexRange(/*spatial_index_dim=*/-29, /*block_shape_dim=*/5, + /*input_dim=*/7, /*output_dim=*/1), + std::make_pair(6, 6)); +} + +} // namespace +} // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/internal/compatibility.h b/tensorflow/contrib/lite/kernels/internal/compatibility.h index 51426bb1c584b82af7b1a2ffaf5a675a1dd9a6fd..93fc6b6a76f67e2e75ba3a766e5ea6fb6bada77a 100644 --- a/tensorflow/contrib/lite/kernels/internal/compatibility.h +++ b/tensorflow/contrib/lite/kernels/internal/compatibility.h @@ -77,6 +77,7 @@ limitations under the License. #endif // TODO(ahentz): Clean up. +using int8 = std::int8_t; using uint8 = std::uint8_t; using int16 = std::int16_t; using uint16 = std::uint16_t; diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h index 0f78e0f728585ab27a8116a4707ac9614a6ea060..dd6932ffe7b7a6f1101f146ce6472b0df4cbda3b 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h @@ -1696,15 +1696,15 @@ inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims, #ifdef __aarch64__ // Call kernel optimized for depthwise convolutions using 3x3 filters if // parameters are supported. - if (Fast3by3FilterKernelSupported(input_dims, filter_dims, stride_width, - stride_height, pad_width, pad_height, - depth_multiplier, output_dims)) { - DepthwiseConv3by3FilterDepth16( - input_data, input_dims, input_offset, filter_data, filter_dims, - filter_offset, bias_data, bias_dims, stride_width, stride_height, - pad_width, pad_height, depth_multiplier, output_offset, - output_multiplier, output_shift, output_activation_min, - output_activation_max, output_data, output_dims); + if (Fast3x3FilterKernelSupported(input_dims, filter_dims, stride_width, + stride_height, pad_width, pad_height, + depth_multiplier, output_dims)) { + DepthwiseConv3x3Filter(input_data, input_dims, input_offset, filter_data, + filter_dims, filter_offset, bias_data, bias_dims, + stride_width, stride_height, pad_width, pad_height, + depth_multiplier, output_offset, output_multiplier, + output_shift, output_activation_min, + output_activation_max, output_data, output_dims); return; } #endif diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h index a349892076fcc4989e2f4cad188b383d2b31d470..55e0d5c3aa9ebb8b46403550e190b00a54cb53e5 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h @@ -1,4 +1,4 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -40,412 +40,4380 @@ inline void preload_l1_keep(const uint8* ptr) { // NEON intrinsics vector data types. // See: https://bugs.llvm.org/show_bug.cgi?id=34945 -struct Int32x16 { - int32x4_t v0, v1, v2, v3; +struct Int32x8 { + int32x4_t low, high; }; -struct Int16x16 { - int16x8_t low, high; +struct Filter3x3x8 { + int16x8_t f0, f1, f2, f3, f4, f5, f6, f7, f8; }; -struct Int16x16x3 { - Int16x16 v0, v1, v2; +// Loads 3x3 filter of depth 8 and adds filter offsets. +inline Filter3x3x8 Load3x3Filter(const uint8* filter_ptr, int32 filter_offset, + int output_depth) { + Filter3x3x8 filter; + + uint8x8_t temp_u8_0, temp_u8_1, temp_u8_2, temp_u8_3, temp_u8_4, temp_u8_5, + temp_u8_6, temp_u8_7, temp_u8_8; + int16x8_t filter_offset_vec = vdupq_n_s16(filter_offset); + + temp_u8_0 = vld1_u8(filter_ptr + 0 * output_depth); + temp_u8_1 = vld1_u8(filter_ptr + 1 * output_depth); + temp_u8_2 = vld1_u8(filter_ptr + 2 * output_depth); + temp_u8_3 = vld1_u8(filter_ptr + 3 * output_depth); + temp_u8_4 = vld1_u8(filter_ptr + 4 * output_depth); + temp_u8_5 = vld1_u8(filter_ptr + 5 * output_depth); + temp_u8_6 = vld1_u8(filter_ptr + 6 * output_depth); + temp_u8_7 = vld1_u8(filter_ptr + 7 * output_depth); + temp_u8_8 = vld1_u8(filter_ptr + 8 * output_depth); + + filter.f0 = vreinterpretq_s16_u16(vmovl_u8(temp_u8_0)); + filter.f1 = vreinterpretq_s16_u16(vmovl_u8(temp_u8_1)); + filter.f2 = vreinterpretq_s16_u16(vmovl_u8(temp_u8_2)); + filter.f3 = vreinterpretq_s16_u16(vmovl_u8(temp_u8_3)); + filter.f4 = vreinterpretq_s16_u16(vmovl_u8(temp_u8_4)); + filter.f5 = vreinterpretq_s16_u16(vmovl_u8(temp_u8_5)); + filter.f6 = vreinterpretq_s16_u16(vmovl_u8(temp_u8_6)); + filter.f7 = vreinterpretq_s16_u16(vmovl_u8(temp_u8_7)); + filter.f8 = vreinterpretq_s16_u16(vmovl_u8(temp_u8_8)); + + filter.f0 = vaddq_s16(filter.f0, filter_offset_vec); + filter.f1 = vaddq_s16(filter.f1, filter_offset_vec); + filter.f2 = vaddq_s16(filter.f2, filter_offset_vec); + filter.f3 = vaddq_s16(filter.f3, filter_offset_vec); + filter.f4 = vaddq_s16(filter.f4, filter_offset_vec); + filter.f5 = vaddq_s16(filter.f5, filter_offset_vec); + filter.f6 = vaddq_s16(filter.f6, filter_offset_vec); + filter.f7 = vaddq_s16(filter.f7, filter_offset_vec); + filter.f8 = vaddq_s16(filter.f8, filter_offset_vec); + + return filter; +} + +// Applies activation, offset and downquantize on a set of accumulator +// registers that correspond to a 2x2 output of depth 8. +// Stores results to output. +inline void DownquantizeAndStore2x2Output( + Int32x8 acc_0, Int32x8 acc_1, Int32x8 acc_2, Int32x8 acc_3, + int32 output_offset, int32 output_multiplier, int output_shift, + int32 output_activation_min, int32 output_activation_max, uint8* output_ptr, + int output_depth, int output_width) { + using gemmlowp::RoundingDivideByPOT; + const int32x4_t output_offset_vec = vdupq_n_s32(output_offset); + const int32x4_t output_activation_min_vec = + vdupq_n_s32(output_activation_min); + const int32x4_t output_activation_max_vec = + vdupq_n_s32(output_activation_max); + + // Fixed-point multiplication. + acc_0.low = vqrdmulhq_n_s32(acc_0.low, output_multiplier); + acc_0.high = vqrdmulhq_n_s32(acc_0.high, output_multiplier); + acc_1.low = vqrdmulhq_n_s32(acc_1.low, output_multiplier); + acc_1.high = vqrdmulhq_n_s32(acc_1.high, output_multiplier); + acc_2.low = vqrdmulhq_n_s32(acc_2.low, output_multiplier); + acc_2.high = vqrdmulhq_n_s32(acc_2.high, output_multiplier); + acc_3.low = vqrdmulhq_n_s32(acc_3.low, output_multiplier); + acc_3.high = vqrdmulhq_n_s32(acc_3.high, output_multiplier); + + acc_0.low = RoundingDivideByPOT(acc_0.low, output_shift); + acc_0.high = RoundingDivideByPOT(acc_0.high, output_shift); + acc_1.low = RoundingDivideByPOT(acc_1.low, output_shift); + acc_1.high = RoundingDivideByPOT(acc_1.high, output_shift); + acc_2.low = RoundingDivideByPOT(acc_2.low, output_shift); + acc_2.high = RoundingDivideByPOT(acc_2.high, output_shift); + acc_3.low = RoundingDivideByPOT(acc_3.low, output_shift); + acc_3.high = RoundingDivideByPOT(acc_3.high, output_shift); + + // Add the output offset. + acc_0.low = vaddq_s32(acc_0.low, output_offset_vec); + acc_0.high = vaddq_s32(acc_0.high, output_offset_vec); + acc_1.low = vaddq_s32(acc_1.low, output_offset_vec); + acc_1.high = vaddq_s32(acc_1.high, output_offset_vec); + acc_2.low = vaddq_s32(acc_2.low, output_offset_vec); + acc_2.high = vaddq_s32(acc_2.high, output_offset_vec); + acc_3.low = vaddq_s32(acc_3.low, output_offset_vec); + acc_3.high = vaddq_s32(acc_3.high, output_offset_vec); + + // Apply the activation function. + acc_0.low = vmaxq_s32(acc_0.low, output_activation_min_vec); + acc_0.high = vmaxq_s32(acc_0.high, output_activation_min_vec); + acc_1.low = vmaxq_s32(acc_1.low, output_activation_min_vec); + acc_1.high = vmaxq_s32(acc_1.high, output_activation_min_vec); + acc_2.low = vmaxq_s32(acc_2.low, output_activation_min_vec); + acc_2.high = vmaxq_s32(acc_2.high, output_activation_min_vec); + acc_3.low = vmaxq_s32(acc_3.low, output_activation_min_vec); + acc_3.high = vmaxq_s32(acc_3.high, output_activation_min_vec); + + acc_0.low = vminq_s32(acc_0.low, output_activation_max_vec); + acc_0.high = vminq_s32(acc_0.high, output_activation_max_vec); + acc_1.low = vminq_s32(acc_1.low, output_activation_max_vec); + acc_1.high = vminq_s32(acc_1.high, output_activation_max_vec); + acc_2.low = vminq_s32(acc_2.low, output_activation_max_vec); + acc_2.high = vminq_s32(acc_2.high, output_activation_max_vec); + acc_3.low = vminq_s32(acc_3.low, output_activation_max_vec); + acc_3.high = vminq_s32(acc_3.high, output_activation_max_vec); + + // Saturating cast to uint8 and store to destination. + int16x4_t acc_0_low_s16 = vqmovn_s32(acc_0.low); + int16x4_t acc_0_high_s16 = vqmovn_s32(acc_0.high); + int16x4_t acc_1_low_s16 = vqmovn_s32(acc_1.low); + int16x4_t acc_1_high_s16 = vqmovn_s32(acc_1.high); + int16x4_t acc_2_low_s16 = vqmovn_s32(acc_2.low); + int16x4_t acc_2_high_s16 = vqmovn_s32(acc_2.high); + int16x4_t acc_3_low_s16 = vqmovn_s32(acc_3.low); + int16x4_t acc_3_high_s16 = vqmovn_s32(acc_3.high); + + int16x8_t res_0_s16 = vcombine_s16(acc_0_low_s16, acc_0_high_s16); + int16x8_t res_1_s16 = vcombine_s16(acc_1_low_s16, acc_1_high_s16); + int16x8_t res_2_s16 = vcombine_s16(acc_2_low_s16, acc_2_high_s16); + int16x8_t res_3_s16 = vcombine_s16(acc_3_low_s16, acc_3_high_s16); + + uint8x8_t res_0_u8 = vqmovun_s16(res_0_s16); + uint8x8_t res_1_u8 = vqmovun_s16(res_1_s16); + uint8x8_t res_2_u8 = vqmovun_s16(res_2_s16); + uint8x8_t res_3_u8 = vqmovun_s16(res_3_s16); + + vst1_u8(output_ptr, res_0_u8); + vst1_u8(output_ptr + output_depth, res_1_u8); + vst1_u8(output_ptr + output_depth * output_width, res_2_u8); + vst1_u8(output_ptr + output_depth * output_width + output_depth, res_3_u8); +} + +inline void DownquantizeAndStore(Int32x8 acc, int32 output_offset, + int32 output_multiplier, int output_shift, + int32 output_activation_min, + int32 output_activation_max, + uint8* output_ptr) { + using gemmlowp::RoundingDivideByPOT; + const int32x4_t output_offset_vec = vdupq_n_s32(output_offset); + const int32x4_t output_activation_min_vec = + vdupq_n_s32(output_activation_min); + const int32x4_t output_activation_max_vec = + vdupq_n_s32(output_activation_max); + + acc.low = vqrdmulhq_n_s32(acc.low, output_multiplier); + acc.high = vqrdmulhq_n_s32(acc.high, output_multiplier); + + acc.low = RoundingDivideByPOT(acc.low, output_shift); + acc.high = RoundingDivideByPOT(acc.high, output_shift); + + acc.low = vaddq_s32(acc.low, output_offset_vec); + acc.high = vaddq_s32(acc.high, output_offset_vec); + + acc.low = vmaxq_s32(acc.low, output_activation_min_vec); + acc.high = vmaxq_s32(acc.high, output_activation_min_vec); + + acc.low = vminq_s32(acc.low, output_activation_max_vec); + acc.high = vminq_s32(acc.high, output_activation_max_vec); + + int16x4_t acc_low_s16 = vqmovn_s32(acc.low); + int16x4_t acc_high_s16 = vqmovn_s32(acc.high); + + int16x8_t res_s16 = vcombine_s16(acc_low_s16, acc_high_s16); + uint8x8_t res_u8 = vqmovun_s16(res_s16); + vst1_u8(output_ptr, res_u8); +} + +inline void DownquantizeAndStore2Output( + Int32x8 acc_0, Int32x8 acc_1, int32 output_offset, int32 output_multiplier, + int output_shift, int32 output_activation_min, int32 output_activation_max, + uint8* output_ptr, int output_ptr_offset) { + { + using gemmlowp::RoundingDivideByPOT; + const int32x4_t output_offset_vec = vdupq_n_s32(output_offset); + const int32x4_t output_activation_min_vec = + vdupq_n_s32(output_activation_min); + const int32x4_t output_activation_max_vec = + vdupq_n_s32(output_activation_max); + + // Fixed-point multiplication. + acc_0.low = vqrdmulhq_n_s32(acc_0.low, output_multiplier); + acc_0.high = vqrdmulhq_n_s32(acc_0.high, output_multiplier); + acc_1.low = vqrdmulhq_n_s32(acc_1.low, output_multiplier); + acc_1.high = vqrdmulhq_n_s32(acc_1.high, output_multiplier); + + acc_0.low = RoundingDivideByPOT(acc_0.low, output_shift); + acc_0.high = RoundingDivideByPOT(acc_0.high, output_shift); + acc_1.low = RoundingDivideByPOT(acc_1.low, output_shift); + acc_1.high = RoundingDivideByPOT(acc_1.high, output_shift); + + // Add the output offset. + acc_0.low = vaddq_s32(acc_0.low, output_offset_vec); + acc_0.high = vaddq_s32(acc_0.high, output_offset_vec); + acc_1.low = vaddq_s32(acc_1.low, output_offset_vec); + acc_1.high = vaddq_s32(acc_1.high, output_offset_vec); + + // Apply the activation function. + acc_0.low = vmaxq_s32(acc_0.low, output_activation_min_vec); + acc_0.high = vmaxq_s32(acc_0.high, output_activation_min_vec); + acc_1.low = vmaxq_s32(acc_1.low, output_activation_min_vec); + acc_1.high = vmaxq_s32(acc_1.high, output_activation_min_vec); + + acc_0.low = vminq_s32(acc_0.low, output_activation_max_vec); + acc_0.high = vminq_s32(acc_0.high, output_activation_max_vec); + acc_1.low = vminq_s32(acc_1.low, output_activation_max_vec); + acc_1.high = vminq_s32(acc_1.high, output_activation_max_vec); + } + + // Saturating cast to uint8 and store to destination. + int16x8_t res_0_s16; + { + int16x4_t acc_0_low_s16 = vqmovn_s32(acc_0.low); + int16x4_t acc_0_high_s16 = vqmovn_s32(acc_0.high); + res_0_s16 = vcombine_s16(acc_0_low_s16, acc_0_high_s16); + } + + int16x8_t res_1_s16; + { + int16x4_t acc_1_low_s16 = vqmovn_s32(acc_1.low); + int16x4_t acc_1_high_s16 = vqmovn_s32(acc_1.high); + res_1_s16 = vcombine_s16(acc_1_low_s16, acc_1_high_s16); + } + + uint8x8_t res_0_u8 = vqmovun_s16(res_0_s16); + uint8x8_t res_1_u8 = vqmovun_s16(res_1_s16); + vst1_u8(output_ptr, res_0_u8); + vst1_u8(output_ptr + output_ptr_offset, res_1_u8); +} + +// Performs multiply accumulate on 3 inputs of depth 8. +inline Int32x8 MultiplyAccumulateRow(Int32x8 accum, int16x8_t f0, int16x8_t f1, + int16x8_t f2, int16x8_t i0, int16x8_t i1, + int16x8_t i2) { + accum.low = vmlal_s16(accum.low, vget_low_s16(f0), vget_low_s16(i0)); + accum.high = vmlal_s16(accum.high, vget_high_s16(f0), vget_high_s16(i0)); + accum.low = vmlal_s16(accum.low, vget_low_s16(f1), vget_low_s16(i1)); + accum.high = vmlal_s16(accum.high, vget_high_s16(f1), vget_high_s16(i1)); + accum.low = vmlal_s16(accum.low, vget_low_s16(f2), vget_low_s16(i2)); + accum.high = vmlal_s16(accum.high, vget_high_s16(f2), vget_high_s16(i2)); + return accum; +} + +// Performs multiply accumulate on 3 inputs of depth 8. +inline Int32x8 MultiplyAccumulate3x3Filter(const Filter3x3x8& f, int16x8_t i0, + int16x8_t i1, int16x8_t i2, + int16x8_t i3, int16x8_t i4, + int16x8_t i5, int16x8_t i6, + int16x8_t i7, int16x8_t i8, + Int32x8 accum) { + accum.low = vmlal_s16(accum.low, vget_low_s16(f.f0), vget_low_s16(i0)); + accum.high = vmlal_s16(accum.high, vget_high_s16(f.f0), vget_high_s16(i0)); + accum.low = vmlal_s16(accum.low, vget_low_s16(f.f1), vget_low_s16(i1)); + accum.high = vmlal_s16(accum.high, vget_high_s16(f.f1), vget_high_s16(i1)); + accum.low = vmlal_s16(accum.low, vget_low_s16(f.f2), vget_low_s16(i2)); + accum.high = vmlal_s16(accum.high, vget_high_s16(f.f2), vget_high_s16(i2)); + accum.low = vmlal_s16(accum.low, vget_low_s16(f.f3), vget_low_s16(i3)); + accum.high = vmlal_s16(accum.high, vget_high_s16(f.f3), vget_high_s16(i3)); + accum.low = vmlal_s16(accum.low, vget_low_s16(f.f4), vget_low_s16(i4)); + accum.high = vmlal_s16(accum.high, vget_high_s16(f.f4), vget_high_s16(i4)); + accum.low = vmlal_s16(accum.low, vget_low_s16(f.f5), vget_low_s16(i5)); + accum.high = vmlal_s16(accum.high, vget_high_s16(f.f5), vget_high_s16(i5)); + accum.low = vmlal_s16(accum.low, vget_low_s16(f.f6), vget_low_s16(i6)); + accum.high = vmlal_s16(accum.high, vget_high_s16(f.f6), vget_high_s16(i6)); + accum.low = vmlal_s16(accum.low, vget_low_s16(f.f7), vget_low_s16(i7)); + accum.high = vmlal_s16(accum.high, vget_high_s16(f.f7), vget_high_s16(i7)); + accum.low = vmlal_s16(accum.low, vget_low_s16(f.f8), vget_low_s16(i8)); + accum.high = vmlal_s16(accum.high, vget_high_s16(f.f8), vget_high_s16(i8)); + return accum; +} + +inline void DotProductAndStore(const Filter3x3x8& filter, int16x8_t i0, + int16x8_t i1, int16x8_t i2, int16x8_t i3, + int16x8_t i4, int16x8_t i5, int16x8_t i6, + int16x8_t i7, int16x8_t i8, + const int32* bias_ptr, int32 output_offset, + int32 output_multiplier, int output_shift, + int32 output_activation_min, + int32 output_activation_max, uint8* output_ptr) { + Int32x8 acc; + acc.low = vld1q_s32(bias_ptr); + acc.high = vld1q_s32(bias_ptr + 4); + + acc = MultiplyAccumulate3x3Filter(filter, i0, i1, i2, i3, i4, i5, i6, i7, i8, + acc); + + DownquantizeAndStore(acc, output_offset, output_multiplier, output_shift, + output_activation_min, output_activation_max, + output_ptr); +} + +// Performs multiply-accumulate on a 3x4 input for 2 horizontal outputs. +inline void DotProductAndStore2xStride1( + const Filter3x3x8& filter, int16x8_t i0, int16x8_t i1, int16x8_t i2, + int16x8_t i3, int16x8_t i4, int16x8_t i5, int16x8_t i6, int16x8_t i7, + int16x8_t i8, int16x8_t i9, int16x8_t i10, int16x8_t i11, + const int32* bias_ptr, int32 output_offset, int32 output_multiplier, + int output_shift, int32 output_activation_min, int32 output_activation_max, + uint8* output_ptr, int output_ptr_offset) { + Int32x8 acc_0, acc_1; + acc_0.low = vld1q_s32(bias_ptr); + acc_1.low = vld1q_s32(bias_ptr); + acc_0.high = vld1q_s32(bias_ptr + 4); + acc_1.high = vld1q_s32(bias_ptr + 4); + + acc_0 = MultiplyAccumulate3x3Filter(filter, i0, i1, i2, i4, i5, i6, i8, i9, + i10, acc_0); + acc_1 = MultiplyAccumulate3x3Filter(filter, i1, i2, i3, i5, i6, i7, i9, i10, + i11, acc_1); + DownquantizeAndStore2Output(acc_0, acc_1, output_offset, output_multiplier, + output_shift, output_activation_min, + output_activation_max, output_ptr, + output_ptr_offset); +} + +// Performs multiply-accumulate on a 4x3 input for 2 vertical outputs. +inline void DotProductAndStore2yStride1( + const Filter3x3x8& filter, int16x8_t i0, int16x8_t i1, int16x8_t i2, + int16x8_t i3, int16x8_t i4, int16x8_t i5, int16x8_t i6, int16x8_t i7, + int16x8_t i8, int16x8_t i9, int16x8_t i10, int16x8_t i11, + const int32* bias_ptr, int32 output_offset, int32 output_multiplier, + int output_shift, int32 output_activation_min, int32 output_activation_max, + uint8* output_ptr, int output_ptr_offset) { + Int32x8 acc_0, acc_1; + acc_0.low = vld1q_s32(bias_ptr); + acc_1.low = vld1q_s32(bias_ptr); + acc_0.high = vld1q_s32(bias_ptr + 4); + acc_1.high = vld1q_s32(bias_ptr + 4); + + acc_0 = MultiplyAccumulate3x3Filter(filter, i0, i1, i2, i3, i4, i5, i6, i7, + i8, acc_0); + acc_1 = MultiplyAccumulate3x3Filter(filter, i3, i4, i5, i6, i7, i8, i9, i10, + i11, acc_1); + DownquantizeAndStore2Output(acc_0, acc_1, output_offset, output_multiplier, + output_shift, output_activation_min, + output_activation_max, output_ptr, + output_ptr_offset); +} + +// A kernel that is optimized on the number of output cells in the x and y +// direction, and the stride. Assumes 3x3 filters of 8 depth. +template +struct ConvKernel3x3FilterDepth8 {}; + +template <> +struct ConvKernel3x3FilterDepth8<8, 8, 1, 1> { + static inline void Run(const uint8* input_ptr, int input_depth, + int32 input_offset, int input_row_size, + const uint8* filter_ptr, int32 filter_offset, + const int32* bias_ptr, int32 output_offset, + int32 output_multiplier, int output_shift, + int32 output_activation_min, + int32 output_activation_max, uint8* output_ptr, + int output_depth, int output_width) { + Filter3x3x8 filter = Load3x3Filter(filter_ptr, filter_offset, output_depth); + + const int16x8_t input_offset_vec = vdupq_n_s16(input_offset); + const int output_row_size = output_depth * output_width; + + // To process 8x8 outputs using a 3x3 filter, we require 10x10 inputs. + // Load inputs for the first 2 filters on the top left, then slide to + // the right, down, left, down, right, etc. in a snake-like path. This + // minimizes the total number of loads. + // + // INPUT OUTPUT + // |\----------------\ |\------------\ + // | \ \ | \ \ + // | \----------------\ | \------------\ + // | | 0 ... 9 | | | 0 ... 7 | + // | | 10 ... 19 | ---> | | 8 ... 15 | + // | | 20 ... 29 | \ | .. ... .. | + // \ | .. ... .. | \| 56 ... 63 | + // \| 90 ... 109 | |------------| + // |----------------| + // + // The first set of loads corresponds to: + // + // INPUT OUTPUT + // |\----------------- |\----------- + // | \ | \ + // | \----------------- | \---------- + // | | 0 1 2 3 ... | | 0 1 ... + // | | 10 11 12 13 ... ---> | | .. ... + // | | 20 21 22 23 ... | .. ... + // | | .. ... ... + // + // The next set of loads correspond to a sliding window to the right. + // It loads inputs 4, 5, 14, 15, 23, 24 and keeps 2, 3, 12, 13, and 22: + // + // INPUT OUTPUT + // |\------------------- |\------------- + // | \ | \ + // | \------------------- | \------------ + // | | .. 2 3 4 5 ... | | .. 2 3 ... + // | | .. 12 13 14 15 ... ---> | | .. ... + // | | .. 21 22 23 24 ... | .. ... + // | | .. ... ... + // + // And so on... + + int16x8_t input_0, input_1, input_2, input_3, input_4, input_5, input_6, + input_7, input_8, input_9, input_10, input_11; + + // Load inputs for 1x2 outputs starting from the top left. Referring to the + // indexes in the diagram above, this corresponds to outputs (0) and (1). + { + uint8x8_t temp_0, temp_1, temp_2, temp_3; + + const uint8* ptr = input_ptr; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + temp_2 = vld1_u8(ptr + 2 * input_depth); + temp_3 = vld1_u8(ptr + 3 * input_depth); + + input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + + input_0 = vaddq_s16(input_0, input_offset_vec); + input_1 = vaddq_s16(input_1, input_offset_vec); + input_2 = vaddq_s16(input_2, input_offset_vec); + input_3 = vaddq_s16(input_3, input_offset_vec); + + ptr += input_row_size; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + temp_2 = vld1_u8(ptr + 2 * input_depth); + temp_3 = vld1_u8(ptr + 3 * input_depth); + + input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_6 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_7 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + + input_4 = vaddq_s16(input_4, input_offset_vec); + input_5 = vaddq_s16(input_5, input_offset_vec); + input_6 = vaddq_s16(input_6, input_offset_vec); + input_7 = vaddq_s16(input_7, input_offset_vec); + + ptr += input_row_size; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + temp_2 = vld1_u8(ptr + 2 * input_depth); + temp_3 = vld1_u8(ptr + 3 * input_depth); + + input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_10 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_11 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + + input_8 = vaddq_s16(input_8, input_offset_vec); + input_9 = vaddq_s16(input_9, input_offset_vec); + input_10 = vaddq_s16(input_10, input_offset_vec); + input_11 = vaddq_s16(input_11, input_offset_vec); + } + + DotProductAndStore2xStride1( + filter, input_0, input_1, input_2, input_3, input_4, input_5, input_6, + input_7, input_8, input_9, input_10, input_11, bias_ptr, output_offset, + output_multiplier, output_shift, output_activation_min, + output_activation_max, output_ptr, output_depth); + + // Slide to the right for outputs x = [2, 3], y = 0. Referring to the + // indexes in the diagram above, this corresponds to outputs (2) and (3). + { + uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4, temp_5; + + const uint8* ptr = input_ptr + 4 * input_depth; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + + ptr += input_row_size; + temp_2 = vld1_u8(ptr); + temp_3 = vld1_u8(ptr + input_depth); + + ptr += input_row_size; + temp_4 = vld1_u8(ptr); + temp_5 = vld1_u8(ptr + input_depth); + + input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); + input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); + + input_0 = vaddq_s16(input_0, input_offset_vec); + input_1 = vaddq_s16(input_1, input_offset_vec); + input_4 = vaddq_s16(input_4, input_offset_vec); + input_5 = vaddq_s16(input_5, input_offset_vec); + input_8 = vaddq_s16(input_8, input_offset_vec); + input_9 = vaddq_s16(input_9, input_offset_vec); + } + + DotProductAndStore2xStride1( + filter, input_2, input_3, input_0, input_1, input_6, input_7, input_4, + input_5, input_10, input_11, input_8, input_9, bias_ptr, output_offset, + output_multiplier, output_shift, output_activation_min, + output_activation_max, output_ptr + 2 * output_depth, output_depth); + + // Slide to the right again for outputs x = [4, 5], y = 0. Referring to the + // indexes in the diagram above, this corresponds to outputs (4) and (5). + { + uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4, temp_5; + + const uint8* ptr = input_ptr + 6 * input_depth; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + + ptr += input_row_size; + temp_2 = vld1_u8(ptr); + temp_3 = vld1_u8(ptr + input_depth); + + ptr += input_row_size; + temp_4 = vld1_u8(ptr); + temp_5 = vld1_u8(ptr + input_depth); + + input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_6 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_7 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + input_10 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); + input_11 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); + + input_2 = vaddq_s16(input_2, input_offset_vec); + input_3 = vaddq_s16(input_3, input_offset_vec); + input_6 = vaddq_s16(input_6, input_offset_vec); + input_7 = vaddq_s16(input_7, input_offset_vec); + input_10 = vaddq_s16(input_10, input_offset_vec); + input_11 = vaddq_s16(input_11, input_offset_vec); + } + + DotProductAndStore2xStride1( + filter, input_0, input_1, input_2, input_3, input_4, input_5, input_6, + input_7, input_8, input_9, input_10, input_11, bias_ptr, output_offset, + output_multiplier, output_shift, output_activation_min, + output_activation_max, output_ptr + 4 * output_depth, output_depth); + + // Slide to the right one last time for outputs x = [6, 7], y = 0. + // Referring to the indexes in the diagram above, this corresponds to + // outputs (6) and (7). + { + uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4, temp_5; + + const uint8* ptr = input_ptr + 8 * input_depth; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + + ptr += input_row_size; + temp_2 = vld1_u8(ptr); + temp_3 = vld1_u8(ptr + input_depth); + + ptr += input_row_size; + temp_4 = vld1_u8(ptr); + temp_5 = vld1_u8(ptr + input_depth); + + input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); + input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); + + input_0 = vaddq_s16(input_0, input_offset_vec); + input_1 = vaddq_s16(input_1, input_offset_vec); + input_4 = vaddq_s16(input_4, input_offset_vec); + input_5 = vaddq_s16(input_5, input_offset_vec); + input_8 = vaddq_s16(input_8, input_offset_vec); + input_9 = vaddq_s16(input_9, input_offset_vec); + } + + DotProductAndStore2xStride1( + filter, input_2, input_3, input_0, input_1, input_6, input_7, input_4, + input_5, input_10, input_11, input_8, input_9, bias_ptr, output_offset, + output_multiplier, output_shift, output_activation_min, + output_activation_max, output_ptr + 6 * output_depth, output_depth); + + // Slide to down for outputs x = [6, 7], y = 1. Referring to the indexes in + // the diagram above, this corresponds to outputs (14) and (15). + { + uint8x8_t temp_0, temp_1, temp_2, temp_3; + + const uint8* ptr = input_ptr + 6 * input_depth + 3 * input_row_size; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + temp_2 = vld1_u8(ptr + 2 * input_depth); + temp_3 = vld1_u8(ptr + 3 * input_depth); + + input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + + input_2 = vaddq_s16(input_2, input_offset_vec); + input_3 = vaddq_s16(input_3, input_offset_vec); + input_0 = vaddq_s16(input_0, input_offset_vec); + input_1 = vaddq_s16(input_1, input_offset_vec); + } + + DotProductAndStore2xStride1( + filter, input_6, input_7, input_4, input_5, input_10, input_11, input_8, + input_9, input_2, input_3, input_0, input_1, bias_ptr, output_offset, + output_multiplier, output_shift, output_activation_min, + output_activation_max, output_ptr + 6 * output_depth + output_row_size, + output_depth); + + // Slide left for outputs x = [4, 5], y = 1. Referring to the indexes in + // the diagram above, this corresponds to outputs (12) and (13). + { + uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4, temp_5; + + const uint8* ptr = input_ptr + 4 * input_depth + input_row_size; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + + ptr += input_row_size; + temp_2 = vld1_u8(ptr); + temp_3 = vld1_u8(ptr + input_depth); + + ptr += input_row_size; + temp_4 = vld1_u8(ptr); + temp_5 = vld1_u8(ptr + input_depth); + + input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); + input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); + + input_4 = vaddq_s16(input_4, input_offset_vec); + input_5 = vaddq_s16(input_5, input_offset_vec); + input_8 = vaddq_s16(input_8, input_offset_vec); + input_9 = vaddq_s16(input_9, input_offset_vec); + input_0 = vaddq_s16(input_0, input_offset_vec); + input_1 = vaddq_s16(input_1, input_offset_vec); + } + + DotProductAndStore2xStride1( + filter, input_4, input_5, input_6, input_7, input_8, input_9, input_10, + input_11, input_0, input_1, input_2, input_3, bias_ptr, output_offset, + output_multiplier, output_shift, output_activation_min, + output_activation_max, output_ptr + 4 * output_depth + output_row_size, + output_depth); + + // Slide left again for outputs x = [2, 3], y = 1. Referring to the indexes + // in the diagram above, this corresponds to outputs (10) and (11). + { + uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4, temp_5; + + const uint8* ptr = input_ptr + 2 * input_depth + input_row_size; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + + ptr += input_row_size; + temp_2 = vld1_u8(ptr); + temp_3 = vld1_u8(ptr + input_depth); + + ptr += input_row_size; + temp_4 = vld1_u8(ptr); + temp_5 = vld1_u8(ptr + input_depth); + + input_6 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_7 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_10 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_11 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); + input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); + + input_6 = vaddq_s16(input_6, input_offset_vec); + input_7 = vaddq_s16(input_7, input_offset_vec); + input_10 = vaddq_s16(input_10, input_offset_vec); + input_11 = vaddq_s16(input_11, input_offset_vec); + input_2 = vaddq_s16(input_2, input_offset_vec); + input_3 = vaddq_s16(input_3, input_offset_vec); + } + + DotProductAndStore2xStride1( + filter, input_6, input_7, input_4, input_5, input_10, input_11, input_8, + input_9, input_2, input_3, input_0, input_1, bias_ptr, output_offset, + output_multiplier, output_shift, output_activation_min, + output_activation_max, output_ptr + 2 * output_depth + output_row_size, + output_depth); + + // Slide left one more time for outputs x = [0, 1], y = 1. Referring to the + // indexes in the diagram above, this corresponds to outputs (8) and (9). + { + uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4, temp_5; + + const uint8* ptr = input_ptr + input_row_size; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + + ptr += input_row_size; + temp_2 = vld1_u8(ptr); + temp_3 = vld1_u8(ptr + input_depth); + + ptr += input_row_size; + temp_4 = vld1_u8(ptr); + temp_5 = vld1_u8(ptr + input_depth); + + input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); + input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); + + input_4 = vaddq_s16(input_4, input_offset_vec); + input_5 = vaddq_s16(input_5, input_offset_vec); + input_8 = vaddq_s16(input_8, input_offset_vec); + input_9 = vaddq_s16(input_9, input_offset_vec); + input_0 = vaddq_s16(input_0, input_offset_vec); + input_1 = vaddq_s16(input_1, input_offset_vec); + } + + DotProductAndStore2xStride1( + filter, input_4, input_5, input_6, input_7, input_8, input_9, input_10, + input_11, input_0, input_1, input_2, input_3, bias_ptr, output_offset, + output_multiplier, output_shift, output_activation_min, + output_activation_max, output_ptr + output_row_size, output_depth); + + // Slide down for outputs x = [0, 1], y = 2. Referring to the + // indexes in the diagram above, this corresponds to outputs (16) and (17). + { + uint8x8_t temp_0, temp_1, temp_2, temp_3; + + const uint8* ptr = input_ptr + 4 * input_row_size; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + temp_2 = vld1_u8(ptr + 2 * input_depth); + temp_3 = vld1_u8(ptr + 3 * input_depth); + + input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_6 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_7 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + + input_4 = vaddq_s16(input_4, input_offset_vec); + input_5 = vaddq_s16(input_5, input_offset_vec); + input_6 = vaddq_s16(input_6, input_offset_vec); + input_7 = vaddq_s16(input_7, input_offset_vec); + } + + DotProductAndStore2xStride1( + filter, input_8, input_9, input_10, input_11, input_0, input_1, input_2, + input_3, input_4, input_5, input_6, input_7, bias_ptr, output_offset, + output_multiplier, output_shift, output_activation_min, + output_activation_max, output_ptr + 2 * output_row_size, output_depth); + + // Slide right for outputs x = [2, 3], y = 2. Referring to the + // indexes in the diagram above, this corresponds to outputs (18) and (19). + { + uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4, temp_5; + + const uint8* ptr = input_ptr + 4 * input_depth + 2 * input_row_size; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + + ptr += input_row_size; + temp_2 = vld1_u8(ptr); + temp_3 = vld1_u8(ptr + input_depth); + + ptr += input_row_size; + temp_4 = vld1_u8(ptr); + temp_5 = vld1_u8(ptr + input_depth); + + input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); + input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); + + input_8 = vaddq_s16(input_8, input_offset_vec); + input_9 = vaddq_s16(input_9, input_offset_vec); + input_0 = vaddq_s16(input_0, input_offset_vec); + input_1 = vaddq_s16(input_1, input_offset_vec); + input_4 = vaddq_s16(input_4, input_offset_vec); + input_5 = vaddq_s16(input_5, input_offset_vec); + } + + DotProductAndStore2xStride1( + filter, input_10, input_11, input_8, input_9, input_2, input_3, input_0, + input_1, input_6, input_7, input_4, input_5, bias_ptr, output_offset, + output_multiplier, output_shift, output_activation_min, + output_activation_max, + output_ptr + 2 * output_depth + 2 * output_row_size, output_depth); + + // Slide right for outputs x = [4, 5], y = 2. Referring to the + // indexes in the diagram above, this corresponds to outputs (20) and (21). + { + uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4, temp_5; + + const uint8* ptr = input_ptr + 6 * input_depth + 2 * input_row_size; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + + ptr += input_row_size; + temp_2 = vld1_u8(ptr); + temp_3 = vld1_u8(ptr + input_depth); + + ptr += input_row_size; + temp_4 = vld1_u8(ptr); + temp_5 = vld1_u8(ptr + input_depth); + + input_10 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_11 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + input_6 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); + input_7 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); + + input_10 = vaddq_s16(input_10, input_offset_vec); + input_11 = vaddq_s16(input_11, input_offset_vec); + input_2 = vaddq_s16(input_2, input_offset_vec); + input_3 = vaddq_s16(input_3, input_offset_vec); + input_6 = vaddq_s16(input_6, input_offset_vec); + input_7 = vaddq_s16(input_7, input_offset_vec); + } + + DotProductAndStore2xStride1( + filter, input_8, input_9, input_10, input_11, input_0, input_1, input_2, + input_3, input_4, input_5, input_6, input_7, bias_ptr, output_offset, + output_multiplier, output_shift, output_activation_min, + output_activation_max, + output_ptr + 4 * output_depth + 2 * output_row_size, output_depth); + + // Slide right one more time for outputs x = [6, 7], y = 2. Referring to the + // indexes in the diagram above, this corresponds to outputs (22) and (23). + { + uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4, temp_5; + + const uint8* ptr = input_ptr + 8 * input_depth + 2 * input_row_size; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + + ptr += input_row_size; + temp_2 = vld1_u8(ptr); + temp_3 = vld1_u8(ptr + input_depth); + + ptr += input_row_size; + temp_4 = vld1_u8(ptr); + temp_5 = vld1_u8(ptr + input_depth); + + input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); + input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); + + input_8 = vaddq_s16(input_8, input_offset_vec); + input_9 = vaddq_s16(input_9, input_offset_vec); + input_0 = vaddq_s16(input_0, input_offset_vec); + input_1 = vaddq_s16(input_1, input_offset_vec); + input_4 = vaddq_s16(input_4, input_offset_vec); + input_5 = vaddq_s16(input_5, input_offset_vec); + } + + DotProductAndStore2xStride1( + filter, input_10, input_11, input_8, input_9, input_2, input_3, input_0, + input_1, input_6, input_7, input_4, input_5, bias_ptr, output_offset, + output_multiplier, output_shift, output_activation_min, + output_activation_max, + output_ptr + 6 * output_depth + 2 * output_row_size, output_depth); + + // Slide down for outputs x = [6, 7], y = 3. Referring to the indexes in + // the diagram above, this corresponds to outputs (30) and (31). + { + uint8x8_t temp_0, temp_1, temp_2, temp_3; + + const uint8* ptr = input_ptr + 6 * input_depth + 5 * input_row_size; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + temp_2 = vld1_u8(ptr + 2 * input_depth); + temp_3 = vld1_u8(ptr + 3 * input_depth); + + input_10 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_11 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + + input_10 = vaddq_s16(input_10, input_offset_vec); + input_11 = vaddq_s16(input_11, input_offset_vec); + input_8 = vaddq_s16(input_8, input_offset_vec); + input_9 = vaddq_s16(input_9, input_offset_vec); + } + + DotProductAndStore2xStride1( + filter, input_2, input_3, input_0, input_1, input_6, input_7, input_4, + input_5, input_10, input_11, input_8, input_9, bias_ptr, output_offset, + output_multiplier, output_shift, output_activation_min, + output_activation_max, + output_ptr + 6 * output_depth + 3 * output_row_size, output_depth); + + // Slide left for outputs x = [4, 5], y = 3. Referring to the indexes in + // the diagram above, this corresponds to outputs (28) and (29). + { + uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4, temp_5; + + const uint8* ptr = input_ptr + 4 * input_depth + 3 * input_row_size; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + + ptr += input_row_size; + temp_2 = vld1_u8(ptr); + temp_3 = vld1_u8(ptr + input_depth); + + ptr += input_row_size; + temp_4 = vld1_u8(ptr); + temp_5 = vld1_u8(ptr + input_depth); + + input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); + input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); + + input_0 = vaddq_s16(input_0, input_offset_vec); + input_1 = vaddq_s16(input_1, input_offset_vec); + input_4 = vaddq_s16(input_4, input_offset_vec); + input_5 = vaddq_s16(input_5, input_offset_vec); + input_8 = vaddq_s16(input_8, input_offset_vec); + input_9 = vaddq_s16(input_9, input_offset_vec); + } + + DotProductAndStore2xStride1( + filter, input_0, input_1, input_2, input_3, input_4, input_5, input_6, + input_7, input_8, input_9, input_10, input_11, bias_ptr, output_offset, + output_multiplier, output_shift, output_activation_min, + output_activation_max, + output_ptr + 4 * output_depth + 3 * output_row_size, output_depth); + + // Slide left for outputs x = [2, 3], y = 3. Referring to the indexes in + // the diagram above, this corresponds to outputs (26) and (27). + { + uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4, temp_5; + + const uint8* ptr = input_ptr + 2 * input_depth + 3 * input_row_size; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + + ptr += input_row_size; + temp_2 = vld1_u8(ptr); + temp_3 = vld1_u8(ptr + input_depth); + + ptr += input_row_size; + temp_4 = vld1_u8(ptr); + temp_5 = vld1_u8(ptr + input_depth); + + input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_6 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_7 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + input_10 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); + input_11 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); + + input_2 = vaddq_s16(input_2, input_offset_vec); + input_3 = vaddq_s16(input_3, input_offset_vec); + input_6 = vaddq_s16(input_6, input_offset_vec); + input_7 = vaddq_s16(input_7, input_offset_vec); + input_10 = vaddq_s16(input_10, input_offset_vec); + input_11 = vaddq_s16(input_11, input_offset_vec); + } + + DotProductAndStore2xStride1( + filter, input_2, input_3, input_0, input_1, input_6, input_7, input_4, + input_5, input_10, input_11, input_8, input_9, bias_ptr, output_offset, + output_multiplier, output_shift, output_activation_min, + output_activation_max, + output_ptr + 2 * output_depth + 3 * output_row_size, output_depth); + + // Slide left one more time for outputs x = [0, 1], y = 3. Referring to the + // indexes in the diagram above, this corresponds to outputs (24) and (25). + { + uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4, temp_5; + + const uint8* ptr = input_ptr + 3 * input_row_size; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + + ptr += input_row_size; + temp_2 = vld1_u8(ptr); + temp_3 = vld1_u8(ptr + input_depth); + + ptr += input_row_size; + temp_4 = vld1_u8(ptr); + temp_5 = vld1_u8(ptr + input_depth); + + input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); + input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); + + input_0 = vaddq_s16(input_0, input_offset_vec); + input_1 = vaddq_s16(input_1, input_offset_vec); + input_4 = vaddq_s16(input_4, input_offset_vec); + input_5 = vaddq_s16(input_5, input_offset_vec); + input_8 = vaddq_s16(input_8, input_offset_vec); + input_9 = vaddq_s16(input_9, input_offset_vec); + } + + DotProductAndStore2xStride1( + filter, input_0, input_1, input_2, input_3, input_4, input_5, input_6, + input_7, input_8, input_9, input_10, input_11, bias_ptr, output_offset, + output_multiplier, output_shift, output_activation_min, + output_activation_max, output_ptr + 3 * output_row_size, output_depth); + + // Slide down for outputs x = [0, 1], y = 4. Referring to the indexes in + // the diagram above, this corresponds to outputs (32) and (33). + { + uint8x8_t temp_0, temp_1, temp_2, temp_3; + + const uint8* ptr = input_ptr + 6 * input_row_size; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + temp_2 = vld1_u8(ptr + 2 * input_depth); + temp_3 = vld1_u8(ptr + 3 * input_depth); + + input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + + input_0 = vaddq_s16(input_0, input_offset_vec); + input_1 = vaddq_s16(input_1, input_offset_vec); + input_2 = vaddq_s16(input_2, input_offset_vec); + input_3 = vaddq_s16(input_3, input_offset_vec); + } + + DotProductAndStore2xStride1( + filter, input_4, input_5, input_6, input_7, input_8, input_9, input_10, + input_11, input_0, input_1, input_2, input_3, bias_ptr, output_offset, + output_multiplier, output_shift, output_activation_min, + output_activation_max, output_ptr + 4 * output_row_size, output_depth); + + // Slide right for outputs x = [2, 3], y = 4. Referring to the indexes in + // the diagram above, this corresponds to outputs (34) and (35). + { + uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4, temp_5; + + const uint8* ptr = input_ptr + 4 * input_depth + 4 * input_row_size; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + + ptr += input_row_size; + temp_2 = vld1_u8(ptr); + temp_3 = vld1_u8(ptr + input_depth); + + ptr += input_row_size; + temp_4 = vld1_u8(ptr); + temp_5 = vld1_u8(ptr + input_depth); + + input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); + input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); + + input_4 = vaddq_s16(input_4, input_offset_vec); + input_5 = vaddq_s16(input_5, input_offset_vec); + input_8 = vaddq_s16(input_8, input_offset_vec); + input_9 = vaddq_s16(input_9, input_offset_vec); + input_0 = vaddq_s16(input_0, input_offset_vec); + input_1 = vaddq_s16(input_1, input_offset_vec); + } + + DotProductAndStore2xStride1( + filter, input_6, input_7, input_4, input_5, input_10, input_11, input_8, + input_9, input_2, input_3, input_0, input_1, bias_ptr, output_offset, + output_multiplier, output_shift, output_activation_min, + output_activation_max, + output_ptr + 2 * output_depth + 4 * output_row_size, output_depth); + + // Slide right for outputs x = [4, 5], y = 4. Referring to the indexes in + // the diagram above, this corresponds to outputs (36) and (37). + { + uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4, temp_5; + + const uint8* ptr = input_ptr + 6 * input_depth + 4 * input_row_size; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + + ptr += input_row_size; + temp_2 = vld1_u8(ptr); + temp_3 = vld1_u8(ptr + input_depth); + + ptr += input_row_size; + temp_4 = vld1_u8(ptr); + temp_5 = vld1_u8(ptr + input_depth); + + input_6 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_7 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_10 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_11 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); + input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); + + input_6 = vaddq_s16(input_6, input_offset_vec); + input_7 = vaddq_s16(input_7, input_offset_vec); + input_10 = vaddq_s16(input_10, input_offset_vec); + input_11 = vaddq_s16(input_11, input_offset_vec); + input_2 = vaddq_s16(input_2, input_offset_vec); + input_3 = vaddq_s16(input_3, input_offset_vec); + } + + DotProductAndStore2xStride1( + filter, input_4, input_5, input_6, input_7, input_8, input_9, input_10, + input_11, input_0, input_1, input_2, input_3, bias_ptr, output_offset, + output_multiplier, output_shift, output_activation_min, + output_activation_max, + output_ptr + 4 * output_depth + 4 * output_row_size, output_depth); + + // Slide right one more time for outputs x = [6, 7], y = 4. Referring to the + // indexes in the diagram above, this corresponds to outputs (38) and (39). + { + uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4, temp_5; + + const uint8* ptr = input_ptr + 8 * input_depth + 4 * input_row_size; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + + ptr += input_row_size; + temp_2 = vld1_u8(ptr); + temp_3 = vld1_u8(ptr + input_depth); + + ptr += input_row_size; + temp_4 = vld1_u8(ptr); + temp_5 = vld1_u8(ptr + input_depth); + + input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); + input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); + + input_4 = vaddq_s16(input_4, input_offset_vec); + input_5 = vaddq_s16(input_5, input_offset_vec); + input_8 = vaddq_s16(input_8, input_offset_vec); + input_9 = vaddq_s16(input_9, input_offset_vec); + input_0 = vaddq_s16(input_0, input_offset_vec); + input_1 = vaddq_s16(input_1, input_offset_vec); + } + + DotProductAndStore2xStride1( + filter, input_6, input_7, input_4, input_5, input_10, input_11, input_8, + input_9, input_2, input_3, input_0, input_1, bias_ptr, output_offset, + output_multiplier, output_shift, output_activation_min, + output_activation_max, + output_ptr + 6 * output_depth + 4 * output_row_size, output_depth); + + // Slide down for outputs x = [6, 7], y = 5. Referring to the indexes in + // the diagram above, this corresponds to outputs (46) and (47). + { + uint8x8_t temp_0, temp_1, temp_2, temp_3; + + const uint8* ptr = input_ptr + 6 * input_depth + 7 * input_row_size; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + temp_2 = vld1_u8(ptr + 2 * input_depth); + temp_3 = vld1_u8(ptr + 3 * input_depth); + + input_6 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_7 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + + input_6 = vaddq_s16(input_6, input_offset_vec); + input_7 = vaddq_s16(input_7, input_offset_vec); + input_4 = vaddq_s16(input_4, input_offset_vec); + input_5 = vaddq_s16(input_5, input_offset_vec); + } + + DotProductAndStore2xStride1( + filter, input_10, input_11, input_8, input_9, input_2, input_3, input_0, + input_1, input_6, input_7, input_4, input_5, bias_ptr, output_offset, + output_multiplier, output_shift, output_activation_min, + output_activation_max, + output_ptr + 6 * output_depth + 5 * output_row_size, output_depth); + + // Slide left for outputs x = [4, 5], y = 5. Referring to the indexes in + // the diagram above, this corresponds to outputs (44) and (45). + { + uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4, temp_5; + + const uint8* ptr = input_ptr + 4 * input_depth + 5 * input_row_size; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + + ptr += input_row_size; + temp_2 = vld1_u8(ptr); + temp_3 = vld1_u8(ptr + input_depth); + + ptr += input_row_size; + temp_4 = vld1_u8(ptr); + temp_5 = vld1_u8(ptr + input_depth); + + input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); + input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); + + input_8 = vaddq_s16(input_8, input_offset_vec); + input_9 = vaddq_s16(input_9, input_offset_vec); + input_0 = vaddq_s16(input_0, input_offset_vec); + input_1 = vaddq_s16(input_1, input_offset_vec); + input_4 = vaddq_s16(input_4, input_offset_vec); + input_5 = vaddq_s16(input_5, input_offset_vec); + } + + DotProductAndStore2xStride1( + filter, input_8, input_9, input_10, input_11, input_0, input_1, input_2, + input_3, input_4, input_5, input_6, input_7, bias_ptr, output_offset, + output_multiplier, output_shift, output_activation_min, + output_activation_max, + output_ptr + 4 * output_depth + 5 * output_row_size, output_depth); + + // Slide left for outputs x = [2, 3], y = 5. Referring to the indexes in + // the diagram above, this corresponds to outputs (42) and (43). + { + uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4, temp_5; + + const uint8* ptr = input_ptr + 2 * input_depth + 5 * input_row_size; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + + ptr += input_row_size; + temp_2 = vld1_u8(ptr); + temp_3 = vld1_u8(ptr + input_depth); + + ptr += input_row_size; + temp_4 = vld1_u8(ptr); + temp_5 = vld1_u8(ptr + input_depth); + + input_10 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_11 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + input_6 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); + input_7 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); + + input_10 = vaddq_s16(input_10, input_offset_vec); + input_11 = vaddq_s16(input_11, input_offset_vec); + input_2 = vaddq_s16(input_2, input_offset_vec); + input_3 = vaddq_s16(input_3, input_offset_vec); + input_6 = vaddq_s16(input_6, input_offset_vec); + input_7 = vaddq_s16(input_7, input_offset_vec); + } + + DotProductAndStore2xStride1( + filter, input_10, input_11, input_8, input_9, input_2, input_3, input_0, + input_1, input_6, input_7, input_4, input_5, bias_ptr, output_offset, + output_multiplier, output_shift, output_activation_min, + output_activation_max, + output_ptr + 2 * output_depth + 5 * output_row_size, output_depth); + + // Slide left one more time for outputs x = [0, 1], y = 5. Referring to the + // indexes in the diagram above, this corresponds to outputs (40) and (41). + { + uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4, temp_5; + + const uint8* ptr = input_ptr + 5 * input_row_size; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + + ptr += input_row_size; + temp_2 = vld1_u8(ptr); + temp_3 = vld1_u8(ptr + input_depth); + + ptr += input_row_size; + temp_4 = vld1_u8(ptr); + temp_5 = vld1_u8(ptr + input_depth); + + input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); + input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); + + input_8 = vaddq_s16(input_8, input_offset_vec); + input_9 = vaddq_s16(input_9, input_offset_vec); + input_0 = vaddq_s16(input_0, input_offset_vec); + input_1 = vaddq_s16(input_1, input_offset_vec); + input_4 = vaddq_s16(input_4, input_offset_vec); + input_5 = vaddq_s16(input_5, input_offset_vec); + } + + DotProductAndStore2xStride1( + filter, input_8, input_9, input_10, input_11, input_0, input_1, input_2, + input_3, input_4, input_5, input_6, input_7, bias_ptr, output_offset, + output_multiplier, output_shift, output_activation_min, + output_activation_max, output_ptr + 5 * output_row_size, output_depth); + + // Slide down for outputs x = [0, 1], y = 6. Referring to the indexes in + // the diagram above, this corresponds to outputs (48) and (49). + { + uint8x8_t temp_0, temp_1, temp_2, temp_3; + + const uint8* ptr = input_ptr + 8 * input_row_size; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + temp_2 = vld1_u8(ptr + 2 * input_depth); + temp_3 = vld1_u8(ptr + 3 * input_depth); + + input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_10 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_11 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + + input_8 = vaddq_s16(input_8, input_offset_vec); + input_9 = vaddq_s16(input_9, input_offset_vec); + input_10 = vaddq_s16(input_10, input_offset_vec); + input_11 = vaddq_s16(input_11, input_offset_vec); + } + + DotProductAndStore2xStride1( + filter, input_0, input_1, input_2, input_3, input_4, input_5, input_6, + input_7, input_8, input_9, input_10, input_11, bias_ptr, output_offset, + output_multiplier, output_shift, output_activation_min, + output_activation_max, output_ptr + 6 * output_row_size, output_depth); + + // Slide right for outputs x = [2, 3], y = 6. Referring to the indexes in + // the diagram above, this corresponds to outputs (50) and (51). + { + uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4, temp_5; + + const uint8* ptr = input_ptr + 4 * input_depth + 6 * input_row_size; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + + ptr += input_row_size; + temp_2 = vld1_u8(ptr); + temp_3 = vld1_u8(ptr + input_depth); + + ptr += input_row_size; + temp_4 = vld1_u8(ptr); + temp_5 = vld1_u8(ptr + input_depth); + + input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); + input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); + + input_0 = vaddq_s16(input_0, input_offset_vec); + input_1 = vaddq_s16(input_1, input_offset_vec); + input_4 = vaddq_s16(input_4, input_offset_vec); + input_5 = vaddq_s16(input_5, input_offset_vec); + input_8 = vaddq_s16(input_8, input_offset_vec); + input_9 = vaddq_s16(input_9, input_offset_vec); + } + + DotProductAndStore2xStride1( + filter, input_2, input_3, input_0, input_1, input_6, input_7, input_4, + input_5, input_10, input_11, input_8, input_9, bias_ptr, output_offset, + output_multiplier, output_shift, output_activation_min, + output_activation_max, + output_ptr + 2 * output_depth + 6 * output_row_size, output_depth); + + // Slide right for outputs x = [4, 5], y = 6. Referring to the indexes in + // the diagram above, this corresponds to outputs (52) and (53). + { + uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4, temp_5; + + const uint8* ptr = input_ptr + 6 * input_depth + 6 * input_row_size; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + + ptr += input_row_size; + temp_2 = vld1_u8(ptr); + temp_3 = vld1_u8(ptr + input_depth); + + ptr += input_row_size; + temp_4 = vld1_u8(ptr); + temp_5 = vld1_u8(ptr + input_depth); + + input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_6 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_7 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + input_10 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); + input_11 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); + + input_2 = vaddq_s16(input_2, input_offset_vec); + input_3 = vaddq_s16(input_3, input_offset_vec); + input_6 = vaddq_s16(input_6, input_offset_vec); + input_7 = vaddq_s16(input_7, input_offset_vec); + input_10 = vaddq_s16(input_10, input_offset_vec); + input_11 = vaddq_s16(input_11, input_offset_vec); + } + + DotProductAndStore2xStride1( + filter, input_0, input_1, input_2, input_3, input_4, input_5, input_6, + input_7, input_8, input_9, input_10, input_11, bias_ptr, output_offset, + output_multiplier, output_shift, output_activation_min, + output_activation_max, + output_ptr + 4 * output_depth + 6 * output_row_size, output_depth); + + // Slide right one more time for outputs x = [6, 7], y = 6. Referring to the + // indexes in the diagram above, this corresponds to outputs (54) and (55). + { + uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4, temp_5; + + const uint8* ptr = input_ptr + 8 * input_depth + 6 * input_row_size; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + + ptr += input_row_size; + temp_2 = vld1_u8(ptr); + temp_3 = vld1_u8(ptr + input_depth); + + ptr += input_row_size; + temp_4 = vld1_u8(ptr); + temp_5 = vld1_u8(ptr + input_depth); + + input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); + input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); + + input_0 = vaddq_s16(input_0, input_offset_vec); + input_1 = vaddq_s16(input_1, input_offset_vec); + input_4 = vaddq_s16(input_4, input_offset_vec); + input_5 = vaddq_s16(input_5, input_offset_vec); + input_8 = vaddq_s16(input_8, input_offset_vec); + input_9 = vaddq_s16(input_9, input_offset_vec); + } + + DotProductAndStore2xStride1( + filter, input_2, input_3, input_0, input_1, input_6, input_7, input_4, + input_5, input_10, input_11, input_8, input_9, bias_ptr, output_offset, + output_multiplier, output_shift, output_activation_min, + output_activation_max, + output_ptr + 6 * output_depth + 6 * output_row_size, output_depth); + + // Slide down for outputs x = [6, 7], y = 7. Referring to the indexes in the + // diagram above, this corresponds to outputs (62) and (63). + { + uint8x8_t temp_0, temp_1, temp_2, temp_3; + + const uint8* ptr = input_ptr + 6 * input_depth + 9 * input_row_size; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + temp_2 = vld1_u8(ptr + 2 * input_depth); + temp_3 = vld1_u8(ptr + 3 * input_depth); + + input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + + input_2 = vaddq_s16(input_2, input_offset_vec); + input_3 = vaddq_s16(input_3, input_offset_vec); + input_0 = vaddq_s16(input_0, input_offset_vec); + input_1 = vaddq_s16(input_1, input_offset_vec); + } + + DotProductAndStore2xStride1( + filter, input_6, input_7, input_4, input_5, input_10, input_11, input_8, + input_9, input_2, input_3, input_0, input_1, bias_ptr, output_offset, + output_multiplier, output_shift, output_activation_min, + output_activation_max, + output_ptr + 6 * output_depth + 7 * output_row_size, output_depth); + + // Slide left for outputs x = [4, 5], y = 7. Referring to the indexes in the + // diagram above, this corresponds to outputs (60) and (61). + { + uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4, temp_5; + + const uint8* ptr = input_ptr + 4 * input_depth + 7 * input_row_size; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + + ptr += input_row_size; + temp_2 = vld1_u8(ptr); + temp_3 = vld1_u8(ptr + input_depth); + + ptr += input_row_size; + temp_4 = vld1_u8(ptr); + temp_5 = vld1_u8(ptr + input_depth); + + input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); + input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); + + input_4 = vaddq_s16(input_4, input_offset_vec); + input_5 = vaddq_s16(input_5, input_offset_vec); + input_8 = vaddq_s16(input_8, input_offset_vec); + input_9 = vaddq_s16(input_9, input_offset_vec); + input_0 = vaddq_s16(input_0, input_offset_vec); + input_1 = vaddq_s16(input_1, input_offset_vec); + } + + DotProductAndStore2xStride1( + filter, input_4, input_5, input_6, input_7, input_8, input_9, input_10, + input_11, input_0, input_1, input_2, input_3, bias_ptr, output_offset, + output_multiplier, output_shift, output_activation_min, + output_activation_max, + output_ptr + 4 * output_depth + 7 * output_row_size, output_depth); + + // Slide left for outputs x = [2, 3], y = 7. Referring to the indexes in the + // diagram above, this corresponds to outputs (58) and (59). + { + uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4, temp_5; + + const uint8* ptr = input_ptr + 2 * input_depth + 7 * input_row_size; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + + ptr += input_row_size; + temp_2 = vld1_u8(ptr); + temp_3 = vld1_u8(ptr + input_depth); + + ptr += input_row_size; + temp_4 = vld1_u8(ptr); + temp_5 = vld1_u8(ptr + input_depth); + + input_6 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_7 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_10 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_11 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); + input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); + + input_6 = vaddq_s16(input_6, input_offset_vec); + input_7 = vaddq_s16(input_7, input_offset_vec); + input_10 = vaddq_s16(input_10, input_offset_vec); + input_11 = vaddq_s16(input_11, input_offset_vec); + input_2 = vaddq_s16(input_2, input_offset_vec); + input_3 = vaddq_s16(input_3, input_offset_vec); + } + + DotProductAndStore2xStride1( + filter, input_6, input_7, input_4, input_5, input_10, input_11, input_8, + input_9, input_2, input_3, input_0, input_1, bias_ptr, output_offset, + output_multiplier, output_shift, output_activation_min, + output_activation_max, + output_ptr + 2 * output_depth + 7 * output_row_size, output_depth); + + // Slide left one more time for outputs x = [0, 1], y = 7. Referring to the + // indexes in the diagram above, this corresponds to outputs (56) and (57). + { + uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4, temp_5; + + const uint8* ptr = input_ptr + 7 * input_row_size; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + + ptr += input_row_size; + temp_2 = vld1_u8(ptr); + temp_3 = vld1_u8(ptr + input_depth); + + ptr += input_row_size; + temp_4 = vld1_u8(ptr); + temp_5 = vld1_u8(ptr + input_depth); + + input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); + input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); + + input_4 = vaddq_s16(input_4, input_offset_vec); + input_5 = vaddq_s16(input_5, input_offset_vec); + input_8 = vaddq_s16(input_8, input_offset_vec); + input_9 = vaddq_s16(input_9, input_offset_vec); + input_0 = vaddq_s16(input_0, input_offset_vec); + input_1 = vaddq_s16(input_1, input_offset_vec); + } + + DotProductAndStore2xStride1( + filter, input_4, input_5, input_6, input_7, input_8, input_9, input_10, + input_11, input_0, input_1, input_2, input_3, bias_ptr, output_offset, + output_multiplier, output_shift, output_activation_min, + output_activation_max, output_ptr + 7 * output_row_size, output_depth); + } +}; + +template <> +struct ConvKernel3x3FilterDepth8<4, 4, 1, 1> { + static inline void Run(const uint8* input_ptr, int input_depth, + int32 input_offset, int input_row_size, + const uint8* filter_ptr, int32 filter_offset, + const int32* bias_ptr, int32 output_offset, + int32 output_multiplier, int output_shift, + int32 output_activation_min, + int32 output_activation_max, uint8* output_ptr, + int output_depth, int output_width) { + Filter3x3x8 filter = Load3x3Filter(filter_ptr, filter_offset, output_depth); + + const int16x8_t input_offset_vec = vdupq_n_s16(input_offset); + const int output_row_size = output_depth * output_width; + + // To process 4x4 outputs using a 3x3 filter, we require 6x6 inputs. + // Load inputs for the first 2 filters on the top left, then slide to + // the right, down, left, down, right, etc. in a snake-like path. This + // minimizes the total number of loads. + int16x8_t input_0, input_1, input_2, input_3, input_4, input_5, input_6, + input_7, input_8, input_9, input_10, input_11; + + // Load inputs for 1x2 outputs starting from the top left. + { + uint8x8_t temp_0, temp_1, temp_2, temp_3; + + const uint8* ptr = input_ptr; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + temp_2 = vld1_u8(ptr + 2 * input_depth); + temp_3 = vld1_u8(ptr + 3 * input_depth); + + input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + + input_0 = vaddq_s16(input_0, input_offset_vec); + input_1 = vaddq_s16(input_1, input_offset_vec); + input_2 = vaddq_s16(input_2, input_offset_vec); + input_3 = vaddq_s16(input_3, input_offset_vec); + + ptr += input_row_size; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + temp_2 = vld1_u8(ptr + 2 * input_depth); + temp_3 = vld1_u8(ptr + 3 * input_depth); + + input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_6 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_7 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + + input_4 = vaddq_s16(input_4, input_offset_vec); + input_5 = vaddq_s16(input_5, input_offset_vec); + input_6 = vaddq_s16(input_6, input_offset_vec); + input_7 = vaddq_s16(input_7, input_offset_vec); + + ptr += input_row_size; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + temp_2 = vld1_u8(ptr + 2 * input_depth); + temp_3 = vld1_u8(ptr + 3 * input_depth); + + input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_10 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_11 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + + input_8 = vaddq_s16(input_8, input_offset_vec); + input_9 = vaddq_s16(input_9, input_offset_vec); + input_10 = vaddq_s16(input_10, input_offset_vec); + input_11 = vaddq_s16(input_11, input_offset_vec); + } + + DotProductAndStore2xStride1( + filter, input_0, input_1, input_2, input_3, input_4, input_5, input_6, + input_7, input_8, input_9, input_10, input_11, bias_ptr, output_offset, + output_multiplier, output_shift, output_activation_min, + output_activation_max, output_ptr, output_depth); + + // Now load 1x2 inputs on the top right. + { + uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4, temp_5; + + const uint8* ptr = input_ptr + 4 * input_depth; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + + ptr += input_row_size; + temp_2 = vld1_u8(ptr); + temp_3 = vld1_u8(ptr + input_depth); + + ptr += input_row_size; + temp_4 = vld1_u8(ptr); + temp_5 = vld1_u8(ptr + input_depth); + + input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); + input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); + + input_0 = vaddq_s16(input_0, input_offset_vec); + input_1 = vaddq_s16(input_1, input_offset_vec); + input_4 = vaddq_s16(input_4, input_offset_vec); + input_5 = vaddq_s16(input_5, input_offset_vec); + input_8 = vaddq_s16(input_8, input_offset_vec); + input_9 = vaddq_s16(input_9, input_offset_vec); + } + + DotProductAndStore2xStride1( + filter, input_2, input_3, input_0, input_1, input_6, input_7, input_4, + input_5, input_10, input_11, input_8, input_9, bias_ptr, output_offset, + output_multiplier, output_shift, output_activation_min, + output_activation_max, output_ptr + 2 * output_depth, output_depth); + + // Now load next inputs when sliding window down. + { + uint8x8_t temp_0, temp_1, temp_2, temp_3; + + const uint8* ptr = input_ptr + 2 * input_depth + 3 * input_row_size; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + temp_2 = vld1_u8(ptr + 2 * input_depth); + temp_3 = vld1_u8(ptr + 3 * input_depth); + + input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + + input_2 = vaddq_s16(input_2, input_offset_vec); + input_3 = vaddq_s16(input_3, input_offset_vec); + input_0 = vaddq_s16(input_0, input_offset_vec); + input_1 = vaddq_s16(input_1, input_offset_vec); + } + + DotProductAndStore2xStride1( + filter, input_6, input_7, input_4, input_5, input_10, input_11, input_8, + input_9, input_2, input_3, input_0, input_1, bias_ptr, output_offset, + output_multiplier, output_shift, output_activation_min, + output_activation_max, output_ptr + 2 * output_depth + output_row_size, + output_depth); + + // Now load next inputs when sliding window left. + { + uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4, temp_5; + + const uint8* ptr = input_ptr + input_row_size; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + + ptr += input_row_size; + temp_2 = vld1_u8(ptr); + temp_3 = vld1_u8(ptr + input_depth); + + ptr += input_row_size; + temp_4 = vld1_u8(ptr); + temp_5 = vld1_u8(ptr + input_depth); + + input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); + input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); + + input_4 = vaddq_s16(input_4, input_offset_vec); + input_5 = vaddq_s16(input_5, input_offset_vec); + input_8 = vaddq_s16(input_8, input_offset_vec); + input_9 = vaddq_s16(input_9, input_offset_vec); + input_0 = vaddq_s16(input_0, input_offset_vec); + input_1 = vaddq_s16(input_1, input_offset_vec); + } + + DotProductAndStore2xStride1( + filter, input_4, input_5, input_6, input_7, input_8, input_9, input_10, + input_11, input_0, input_1, input_2, input_3, bias_ptr, output_offset, + output_multiplier, output_shift, output_activation_min, + output_activation_max, output_ptr + output_row_size, output_depth); + + // Now load next inputs when sliding window down. + { + uint8x8_t temp_0, temp_1, temp_2, temp_3; + + const uint8* ptr = input_ptr + 4 * input_row_size; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + temp_2 = vld1_u8(ptr + 2 * input_depth); + temp_3 = vld1_u8(ptr + 3 * input_depth); + + input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_6 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_7 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + + input_4 = vaddq_s16(input_4, input_offset_vec); + input_5 = vaddq_s16(input_5, input_offset_vec); + input_6 = vaddq_s16(input_6, input_offset_vec); + input_7 = vaddq_s16(input_7, input_offset_vec); + } + + DotProductAndStore2xStride1( + filter, input_8, input_9, input_10, input_11, input_0, input_1, input_2, + input_3, input_4, input_5, input_6, input_7, bias_ptr, output_offset, + output_multiplier, output_shift, output_activation_min, + output_activation_max, output_ptr + 2 * output_row_size, output_depth); + + // Now load next inputs when sliding window right. + { + uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4, temp_5; + + const uint8* ptr = input_ptr + 4 * input_depth + 2 * input_row_size; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + + ptr += input_row_size; + temp_2 = vld1_u8(ptr); + temp_3 = vld1_u8(ptr + input_depth); + + ptr += input_row_size; + temp_4 = vld1_u8(ptr); + temp_5 = vld1_u8(ptr + input_depth); + + input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); + input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); + + input_8 = vaddq_s16(input_8, input_offset_vec); + input_9 = vaddq_s16(input_9, input_offset_vec); + input_0 = vaddq_s16(input_0, input_offset_vec); + input_1 = vaddq_s16(input_1, input_offset_vec); + input_4 = vaddq_s16(input_4, input_offset_vec); + input_5 = vaddq_s16(input_5, input_offset_vec); + } + + DotProductAndStore2xStride1( + filter, input_10, input_11, input_8, input_9, input_2, input_3, input_0, + input_1, input_6, input_7, input_4, input_5, bias_ptr, output_offset, + output_multiplier, output_shift, output_activation_min, + output_activation_max, + output_ptr + 2 * output_depth + 2 * output_row_size, output_depth); + + // Now load next inputs when sliding window down. + { + uint8x8_t temp_0, temp_1, temp_2, temp_3; + + const uint8* ptr = input_ptr + 2 * input_depth + 5 * input_row_size; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + temp_2 = vld1_u8(ptr + 2 * input_depth); + temp_3 = vld1_u8(ptr + 3 * input_depth); + + input_10 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_11 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + + input_10 = vaddq_s16(input_10, input_offset_vec); + input_11 = vaddq_s16(input_11, input_offset_vec); + input_8 = vaddq_s16(input_8, input_offset_vec); + input_9 = vaddq_s16(input_9, input_offset_vec); + } + + DotProductAndStore2xStride1( + filter, input_2, input_3, input_0, input_1, input_6, input_7, input_4, + input_5, input_10, input_11, input_8, input_9, bias_ptr, output_offset, + output_multiplier, output_shift, output_activation_min, + output_activation_max, + output_ptr + 2 * output_depth + 3 * output_row_size, output_depth); + + // Now load next inputs when sliding window left. + { + uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4, temp_5; + + const uint8* ptr = input_ptr + 3 * input_row_size; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + + ptr += input_row_size; + temp_2 = vld1_u8(ptr); + temp_3 = vld1_u8(ptr + input_depth); + + ptr += input_row_size; + temp_4 = vld1_u8(ptr); + temp_5 = vld1_u8(ptr + input_depth); + + input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); + input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); + + input_0 = vaddq_s16(input_0, input_offset_vec); + input_1 = vaddq_s16(input_1, input_offset_vec); + input_4 = vaddq_s16(input_4, input_offset_vec); + input_5 = vaddq_s16(input_5, input_offset_vec); + input_8 = vaddq_s16(input_8, input_offset_vec); + input_9 = vaddq_s16(input_9, input_offset_vec); + } + + DotProductAndStore2xStride1( + filter, input_0, input_1, input_2, input_3, input_4, input_5, input_6, + input_7, input_8, input_9, input_10, input_11, bias_ptr, output_offset, + output_multiplier, output_shift, output_activation_min, + output_activation_max, output_ptr + 3 * output_row_size, output_depth); + } +}; + +template <> +struct ConvKernel3x3FilterDepth8<4, 2, 1, 1> { + static inline void Run(const uint8* input_ptr, int input_depth, + int32 input_offset, int input_row_size, + const uint8* filter_ptr, int32 filter_offset, + const int32* bias_ptr, int32 output_offset, + int32 output_multiplier, int output_shift, + int32 output_activation_min, + int32 output_activation_max, uint8* output_ptr, + int output_depth, int output_width) { + Filter3x3x8 filter = Load3x3Filter(filter_ptr, filter_offset, output_depth); + + const int16x8_t input_offset_vec = vdupq_n_s16(input_offset); + const int output_row_size = output_depth * output_width; + + int16x8_t input_0, input_1, input_2, input_3, input_4, input_5, input_6, + input_7, input_8, input_9, input_10, input_11; + + // Load inputs for 1x2 outputs starting from the top. + { + uint8x8_t temp_0, temp_1, temp_2, temp_3; + + const uint8* ptr = input_ptr; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + temp_2 = vld1_u8(ptr + 2 * input_depth); + temp_3 = vld1_u8(ptr + 3 * input_depth); + + input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + + input_0 = vaddq_s16(input_0, input_offset_vec); + input_1 = vaddq_s16(input_1, input_offset_vec); + input_2 = vaddq_s16(input_2, input_offset_vec); + input_3 = vaddq_s16(input_3, input_offset_vec); + + ptr += input_row_size; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + temp_2 = vld1_u8(ptr + 2 * input_depth); + temp_3 = vld1_u8(ptr + 3 * input_depth); + + input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_6 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_7 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + + input_4 = vaddq_s16(input_4, input_offset_vec); + input_5 = vaddq_s16(input_5, input_offset_vec); + input_6 = vaddq_s16(input_6, input_offset_vec); + input_7 = vaddq_s16(input_7, input_offset_vec); + + ptr += input_row_size; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + temp_2 = vld1_u8(ptr + 2 * input_depth); + temp_3 = vld1_u8(ptr + 3 * input_depth); + + input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_10 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_11 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + + input_8 = vaddq_s16(input_8, input_offset_vec); + input_9 = vaddq_s16(input_9, input_offset_vec); + input_10 = vaddq_s16(input_10, input_offset_vec); + input_11 = vaddq_s16(input_11, input_offset_vec); + } + + DotProductAndStore2xStride1( + filter, input_0, input_1, input_2, input_3, input_4, input_5, input_6, + input_7, input_8, input_9, input_10, input_11, bias_ptr, output_offset, + output_multiplier, output_shift, output_activation_min, + output_activation_max, output_ptr, output_depth); + + output_ptr += output_row_size; + + // Now load next inputs one row down. + { + uint8x8_t temp_0, temp_1, temp_2, temp_3; + + const uint8* ptr = input_ptr + 3 * input_row_size; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + temp_2 = vld1_u8(ptr + 2 * input_depth); + temp_3 = vld1_u8(ptr + 3 * input_depth); + + input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + + input_0 = vaddq_s16(input_0, input_offset_vec); + input_1 = vaddq_s16(input_1, input_offset_vec); + input_2 = vaddq_s16(input_2, input_offset_vec); + input_3 = vaddq_s16(input_3, input_offset_vec); + } + + DotProductAndStore2xStride1( + filter, input_4, input_5, input_6, input_7, input_8, input_9, input_10, + input_11, input_0, input_1, input_2, input_3, bias_ptr, output_offset, + output_multiplier, output_shift, output_activation_min, + output_activation_max, output_ptr, output_depth); + + output_ptr += output_row_size; + + // Now load next row. + { + uint8x8_t temp_0, temp_1, temp_2, temp_3; + + const uint8* ptr = input_ptr + 4 * input_row_size; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + temp_2 = vld1_u8(ptr + 2 * input_depth); + temp_3 = vld1_u8(ptr + 3 * input_depth); + + input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_6 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_7 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + + input_4 = vaddq_s16(input_4, input_offset_vec); + input_5 = vaddq_s16(input_5, input_offset_vec); + input_6 = vaddq_s16(input_6, input_offset_vec); + input_7 = vaddq_s16(input_7, input_offset_vec); + } + + DotProductAndStore2xStride1( + filter, input_8, input_9, input_10, input_11, input_0, input_1, input_2, + input_3, input_4, input_5, input_6, input_7, bias_ptr, output_offset, + output_multiplier, output_shift, output_activation_min, + output_activation_max, output_ptr, output_depth); + + output_ptr += output_row_size; + + // Now load last row. + { + uint8x8_t temp_0, temp_1, temp_2, temp_3; + + const uint8* ptr = input_ptr + 5 * input_row_size; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + temp_2 = vld1_u8(ptr + 2 * input_depth); + temp_3 = vld1_u8(ptr + 3 * input_depth); + + input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_10 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_11 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + + input_8 = vaddq_s16(input_8, input_offset_vec); + input_9 = vaddq_s16(input_9, input_offset_vec); + input_10 = vaddq_s16(input_10, input_offset_vec); + input_11 = vaddq_s16(input_11, input_offset_vec); + } + + DotProductAndStore2xStride1( + filter, input_0, input_1, input_2, input_3, input_4, input_5, input_6, + input_7, input_8, input_9, input_10, input_11, bias_ptr, output_offset, + output_multiplier, output_shift, output_activation_min, + output_activation_max, output_ptr, output_depth); + } +}; + +template <> +struct ConvKernel3x3FilterDepth8<4, 1, 1, 1> { + static inline void Run(const uint8* input_ptr, int input_depth, + int32 input_offset, int input_row_size, + const uint8* filter_ptr, int32 filter_offset, + const int32* bias_ptr, int32 output_offset, + int32 output_multiplier, int output_shift, + int32 output_activation_min, + int32 output_activation_max, uint8* output_ptr, + int output_depth, int output_width) { + Filter3x3x8 filter = Load3x3Filter(filter_ptr, filter_offset, output_depth); + + const int16x8_t input_offset_vec = vdupq_n_s16(input_offset); + const int output_row_size = output_depth * output_width; + + int16x8_t input_0, input_1, input_2, input_3, input_4, input_5, input_6, + input_7, input_8, input_9, input_10, input_11; + + // Load inputs for 2x1 outputs starting from the top. + { + uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4, temp_5; + + const uint8* ptr = input_ptr; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + temp_2 = vld1_u8(ptr + 2 * input_depth); + ptr += input_row_size; + temp_3 = vld1_u8(ptr); + temp_4 = vld1_u8(ptr + input_depth); + temp_5 = vld1_u8(ptr + 2 * input_depth); + + input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); + input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); + + input_0 = vaddq_s16(input_0, input_offset_vec); + input_1 = vaddq_s16(input_1, input_offset_vec); + input_2 = vaddq_s16(input_2, input_offset_vec); + input_3 = vaddq_s16(input_3, input_offset_vec); + input_4 = vaddq_s16(input_4, input_offset_vec); + input_5 = vaddq_s16(input_5, input_offset_vec); + + ptr += input_row_size; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + temp_2 = vld1_u8(ptr + 2 * input_depth); + ptr += input_row_size; + temp_3 = vld1_u8(ptr); + temp_4 = vld1_u8(ptr + input_depth); + temp_5 = vld1_u8(ptr + 2 * input_depth); + + input_6 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_7 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + input_10 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); + input_11 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); + + input_6 = vaddq_s16(input_6, input_offset_vec); + input_7 = vaddq_s16(input_7, input_offset_vec); + input_8 = vaddq_s16(input_8, input_offset_vec); + input_9 = vaddq_s16(input_9, input_offset_vec); + input_10 = vaddq_s16(input_10, input_offset_vec); + input_11 = vaddq_s16(input_11, input_offset_vec); + } + + DotProductAndStore2yStride1( + filter, input_0, input_1, input_2, input_3, input_4, input_5, input_6, + input_7, input_8, input_9, input_10, input_11, bias_ptr, output_offset, + output_multiplier, output_shift, output_activation_min, + output_activation_max, output_ptr, output_row_size); + + // Load inputs for bottom 2 rows. + { + uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4, temp_5; + + const uint8* ptr = input_ptr + 4 * input_row_size; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + temp_2 = vld1_u8(ptr + 2 * input_depth); + ptr += input_row_size; + temp_3 = vld1_u8(ptr); + temp_4 = vld1_u8(ptr + input_depth); + temp_5 = vld1_u8(ptr + 2 * input_depth); + + input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); + input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); + + input_0 = vaddq_s16(input_0, input_offset_vec); + input_1 = vaddq_s16(input_1, input_offset_vec); + input_2 = vaddq_s16(input_2, input_offset_vec); + input_3 = vaddq_s16(input_3, input_offset_vec); + input_4 = vaddq_s16(input_4, input_offset_vec); + input_5 = vaddq_s16(input_5, input_offset_vec); + } + + DotProductAndStore2yStride1( + filter, input_6, input_7, input_8, input_9, input_10, input_11, input_0, + input_1, input_2, input_3, input_4, input_5, bias_ptr, output_offset, + output_multiplier, output_shift, output_activation_min, + output_activation_max, output_ptr + 2 * output_row_size, + output_row_size); + } +}; + +template <> +struct ConvKernel3x3FilterDepth8<2, 2, 1, 1> { + static inline void Run(const uint8* input_ptr, int input_depth, + int32 input_offset, int input_row_size, + const uint8* filter_ptr, int32 filter_offset, + const int32* bias_ptr, int32 output_offset, + int32 output_multiplier, int output_shift, + int32 output_activation_min, + int32 output_activation_max, uint8* output_ptr, + int output_depth, int output_width) { + Filter3x3x8 filter = Load3x3Filter(filter_ptr, filter_offset, output_depth); + + Int32x8 acc_0, acc_1, acc_2, acc_3; + + acc_0.low = vld1q_s32(bias_ptr); + acc_1.low = vld1q_s32(bias_ptr); + acc_2.low = vld1q_s32(bias_ptr); + acc_3.low = vld1q_s32(bias_ptr); + + bias_ptr += 4; + acc_0.high = vld1q_s32(bias_ptr); + acc_1.high = vld1q_s32(bias_ptr); + acc_2.high = vld1q_s32(bias_ptr); + acc_3.high = vld1q_s32(bias_ptr); + + const int16x8_t input_offset_vec = vdupq_n_s16(input_offset); + + // Add scope for input registers to help the compiler know that it is + // not needed. + { + // To process 2x2 outputs using a 3x3 filter, we require 4x4 inputs. + // Load inputs for the top two filters first. + int16x8_t input_0, input_1, input_2, input_3, input_4, input_5, input_6, + input_7, input_8, input_9, input_10, input_11; + + const uint8* ptr = input_ptr; + + // Load top 3 rows. + { + uint8x8_t temp_0, temp_1, temp_2, temp_3; + + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + temp_2 = vld1_u8(ptr + 2 * input_depth); + temp_3 = vld1_u8(ptr + 3 * input_depth); + + input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + + input_0 = vaddq_s16(input_0, input_offset_vec); + input_1 = vaddq_s16(input_1, input_offset_vec); + input_2 = vaddq_s16(input_2, input_offset_vec); + input_3 = vaddq_s16(input_3, input_offset_vec); + + ptr += input_row_size; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + temp_2 = vld1_u8(ptr + 2 * input_depth); + temp_3 = vld1_u8(ptr + 3 * input_depth); + + input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_6 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_7 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + + input_4 = vaddq_s16(input_4, input_offset_vec); + input_5 = vaddq_s16(input_5, input_offset_vec); + input_6 = vaddq_s16(input_6, input_offset_vec); + input_7 = vaddq_s16(input_7, input_offset_vec); + + ptr += input_row_size; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + temp_2 = vld1_u8(ptr + 2 * input_depth); + temp_3 = vld1_u8(ptr + 3 * input_depth); + + input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_10 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_11 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + + input_8 = vaddq_s16(input_8, input_offset_vec); + input_9 = vaddq_s16(input_9, input_offset_vec); + input_10 = vaddq_s16(input_10, input_offset_vec); + input_11 = vaddq_s16(input_11, input_offset_vec); + } + + // Multiply-accum for top-left output. + acc_0 = MultiplyAccumulate3x3Filter(filter, input_0, input_1, input_2, + input_4, input_5, input_6, input_8, + input_9, input_10, acc_0); + + // Multiply-accum for top-right output. + acc_1 = MultiplyAccumulate3x3Filter(filter, input_1, input_2, input_3, + input_5, input_6, input_7, input_9, + input_10, input_11, acc_1); + + // Now load the bottom row. + { + uint8x8_t temp_0, temp_1, temp_2, temp_3; + + ptr += input_row_size; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + temp_2 = vld1_u8(ptr + 2 * input_depth); + temp_3 = vld1_u8(ptr + 3 * input_depth); + + input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + + input_0 = vaddq_s16(input_0, input_offset_vec); + input_1 = vaddq_s16(input_1, input_offset_vec); + input_2 = vaddq_s16(input_2, input_offset_vec); + input_3 = vaddq_s16(input_3, input_offset_vec); + } + + // Multiply-accum for bottom-left output. + acc_2 = MultiplyAccumulate3x3Filter(filter, input_4, input_5, input_6, + input_8, input_9, input_10, input_0, + input_1, input_2, acc_2); + + // Multiply-accum for bottom-right output. + acc_3 = MultiplyAccumulate3x3Filter(filter, input_5, input_6, input_7, + input_9, input_10, input_11, input_1, + input_2, input_3, acc_3); + } + + DownquantizeAndStore2x2Output(acc_0, acc_1, acc_2, acc_3, output_offset, + output_multiplier, output_shift, + output_activation_min, output_activation_max, + output_ptr, output_depth, output_width); + } +}; + +template <> +struct ConvKernel3x3FilterDepth8<2, 4, 1, 1> { + static inline void Run(const uint8* input_ptr, int input_depth, + int32 input_offset, int input_row_size, + const uint8* filter_ptr, int32 filter_offset, + const int32* bias_ptr, int32 output_offset, + int32 output_multiplier, int output_shift, + int32 output_activation_min, + int32 output_activation_max, uint8* output_ptr, + int output_depth, int output_width) { + Filter3x3x8 filter = Load3x3Filter(filter_ptr, filter_offset, output_depth); + + const int16x8_t input_offset_vec = vdupq_n_s16(input_offset); + const int output_row_size = output_depth * output_width; + + int16x8_t input_0, input_1, input_2, input_3, input_4, input_5, input_6, + input_7, input_8, input_9, input_10, input_11; + + // Load inputs for 1x2 outputs starting from the top left. + { + uint8x8_t temp_0, temp_1, temp_2, temp_3; + + const uint8* ptr = input_ptr; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + temp_2 = vld1_u8(ptr + 2 * input_depth); + temp_3 = vld1_u8(ptr + 3 * input_depth); + + input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + + input_0 = vaddq_s16(input_0, input_offset_vec); + input_1 = vaddq_s16(input_1, input_offset_vec); + input_2 = vaddq_s16(input_2, input_offset_vec); + input_3 = vaddq_s16(input_3, input_offset_vec); + + ptr += input_row_size; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + temp_2 = vld1_u8(ptr + 2 * input_depth); + temp_3 = vld1_u8(ptr + 3 * input_depth); + + input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_6 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_7 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + + input_4 = vaddq_s16(input_4, input_offset_vec); + input_5 = vaddq_s16(input_5, input_offset_vec); + input_6 = vaddq_s16(input_6, input_offset_vec); + input_7 = vaddq_s16(input_7, input_offset_vec); + + ptr += input_row_size; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + temp_2 = vld1_u8(ptr + 2 * input_depth); + temp_3 = vld1_u8(ptr + 3 * input_depth); + + input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_10 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_11 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + + input_8 = vaddq_s16(input_8, input_offset_vec); + input_9 = vaddq_s16(input_9, input_offset_vec); + input_10 = vaddq_s16(input_10, input_offset_vec); + input_11 = vaddq_s16(input_11, input_offset_vec); + } + + DotProductAndStore2xStride1( + filter, input_0, input_1, input_2, input_3, input_4, input_5, input_6, + input_7, input_8, input_9, input_10, input_11, bias_ptr, output_offset, + output_multiplier, output_shift, output_activation_min, + output_activation_max, output_ptr, output_depth); + + // Now load 1x2 inputs on the top right. + { + uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4, temp_5; + + const uint8* ptr = input_ptr + 4 * input_depth; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + + ptr += input_row_size; + temp_2 = vld1_u8(ptr); + temp_3 = vld1_u8(ptr + input_depth); + + ptr += input_row_size; + temp_4 = vld1_u8(ptr); + temp_5 = vld1_u8(ptr + input_depth); + + input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); + input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); + + input_0 = vaddq_s16(input_0, input_offset_vec); + input_1 = vaddq_s16(input_1, input_offset_vec); + input_4 = vaddq_s16(input_4, input_offset_vec); + input_5 = vaddq_s16(input_5, input_offset_vec); + input_8 = vaddq_s16(input_8, input_offset_vec); + input_9 = vaddq_s16(input_9, input_offset_vec); + } + + DotProductAndStore2xStride1( + filter, input_2, input_3, input_0, input_1, input_6, input_7, input_4, + input_5, input_10, input_11, input_8, input_9, bias_ptr, output_offset, + output_multiplier, output_shift, output_activation_min, + output_activation_max, output_ptr + 2 * output_depth, output_depth); + + // Now load next inputs when sliding window down. + { + uint8x8_t temp_0, temp_1, temp_2, temp_3; + + const uint8* ptr = input_ptr + 2 * input_depth + 3 * input_row_size; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + temp_2 = vld1_u8(ptr + 2 * input_depth); + temp_3 = vld1_u8(ptr + 3 * input_depth); + + input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + + input_2 = vaddq_s16(input_2, input_offset_vec); + input_3 = vaddq_s16(input_3, input_offset_vec); + input_0 = vaddq_s16(input_0, input_offset_vec); + input_1 = vaddq_s16(input_1, input_offset_vec); + } + + DotProductAndStore2xStride1( + filter, input_6, input_7, input_4, input_5, input_10, input_11, input_8, + input_9, input_2, input_3, input_0, input_1, bias_ptr, output_offset, + output_multiplier, output_shift, output_activation_min, + output_activation_max, output_ptr + 2 * output_depth + output_row_size, + output_depth); + + // Now load next inputs when sliding window left. + { + uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4, temp_5; + + const uint8* ptr = input_ptr + input_row_size; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + + ptr += input_row_size; + temp_2 = vld1_u8(ptr); + temp_3 = vld1_u8(ptr + input_depth); + + ptr += input_row_size; + temp_4 = vld1_u8(ptr); + temp_5 = vld1_u8(ptr + input_depth); + + input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); + input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); + + input_4 = vaddq_s16(input_4, input_offset_vec); + input_5 = vaddq_s16(input_5, input_offset_vec); + input_8 = vaddq_s16(input_8, input_offset_vec); + input_9 = vaddq_s16(input_9, input_offset_vec); + input_0 = vaddq_s16(input_0, input_offset_vec); + input_1 = vaddq_s16(input_1, input_offset_vec); + } + + DotProductAndStore2xStride1( + filter, input_4, input_5, input_6, input_7, input_8, input_9, input_10, + input_11, input_0, input_1, input_2, input_3, bias_ptr, output_offset, + output_multiplier, output_shift, output_activation_min, + output_activation_max, output_ptr + output_row_size, output_depth); + } +}; + +template <> +struct ConvKernel3x3FilterDepth8<1, 4, 1, 1> { + static inline void Run(const uint8* input_ptr, int input_depth, + int32 input_offset, int input_row_size, + const uint8* filter_ptr, int32 filter_offset, + const int32* bias_ptr, int32 output_offset, + int32 output_multiplier, int output_shift, + int32 output_activation_min, + int32 output_activation_max, uint8* output_ptr, + int output_depth, int output_width) { + Filter3x3x8 filter = Load3x3Filter(filter_ptr, filter_offset, output_depth); + + const int16x8_t input_offset_vec = vdupq_n_s16(input_offset); + + int16x8_t input_0, input_1, input_2, input_3, input_4, input_5, input_6, + input_7, input_8, input_9, input_10, input_11; + + // Load inputs for 1x2 outputs starting from the left. + { + uint8x8_t temp_0, temp_1, temp_2, temp_3; + + const uint8* ptr = input_ptr; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + temp_2 = vld1_u8(ptr + 2 * input_depth); + temp_3 = vld1_u8(ptr + 3 * input_depth); + + input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + + input_0 = vaddq_s16(input_0, input_offset_vec); + input_1 = vaddq_s16(input_1, input_offset_vec); + input_2 = vaddq_s16(input_2, input_offset_vec); + input_3 = vaddq_s16(input_3, input_offset_vec); + + ptr += input_row_size; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + temp_2 = vld1_u8(ptr + 2 * input_depth); + temp_3 = vld1_u8(ptr + 3 * input_depth); + + input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_6 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_7 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + + input_4 = vaddq_s16(input_4, input_offset_vec); + input_5 = vaddq_s16(input_5, input_offset_vec); + input_6 = vaddq_s16(input_6, input_offset_vec); + input_7 = vaddq_s16(input_7, input_offset_vec); + + ptr += input_row_size; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + temp_2 = vld1_u8(ptr + 2 * input_depth); + temp_3 = vld1_u8(ptr + 3 * input_depth); + + input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_10 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_11 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + + input_8 = vaddq_s16(input_8, input_offset_vec); + input_9 = vaddq_s16(input_9, input_offset_vec); + input_10 = vaddq_s16(input_10, input_offset_vec); + input_11 = vaddq_s16(input_11, input_offset_vec); + } + + DotProductAndStore2xStride1( + filter, input_0, input_1, input_2, input_3, input_4, input_5, input_6, + input_7, input_8, input_9, input_10, input_11, bias_ptr, output_offset, + output_multiplier, output_shift, output_activation_min, + output_activation_max, output_ptr, output_depth); + + // Now load 1x2 inputs on the right. + { + uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4, temp_5; + + const uint8* ptr = input_ptr + input_depth * 4; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + + ptr += input_row_size; + temp_2 = vld1_u8(ptr); + temp_3 = vld1_u8(ptr + input_depth); + + ptr += input_row_size; + temp_4 = vld1_u8(ptr); + temp_5 = vld1_u8(ptr + input_depth); + + input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); + input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); + + input_0 = vaddq_s16(input_0, input_offset_vec); + input_1 = vaddq_s16(input_1, input_offset_vec); + input_4 = vaddq_s16(input_4, input_offset_vec); + input_5 = vaddq_s16(input_5, input_offset_vec); + input_8 = vaddq_s16(input_8, input_offset_vec); + input_9 = vaddq_s16(input_9, input_offset_vec); + } + + DotProductAndStore2xStride1( + filter, input_2, input_3, input_0, input_1, input_6, input_7, input_4, + input_5, input_10, input_11, input_8, input_9, bias_ptr, output_offset, + output_multiplier, output_shift, output_activation_min, + output_activation_max, output_ptr + 2 * output_depth, output_depth); + } +}; + +template <> +struct ConvKernel3x3FilterDepth8<2, 1, 1, 1> { + static inline void Run(const uint8* input_ptr, int input_depth, + int32 input_offset, int input_row_size, + const uint8* filter_ptr, int32 filter_offset, + const int32* bias_ptr, int32 output_offset, + int32 output_multiplier, int output_shift, + int32 output_activation_min, + int32 output_activation_max, uint8* output_ptr, + int output_depth, int output_width) { + Filter3x3x8 filter = Load3x3Filter(filter_ptr, filter_offset, output_depth); + + // To process 2x1 outputs using a 3x3 filter, we require 4x3 inputs. + // Load all inputs at the beginning. + int16x8_t input_0, input_1, input_2, input_3, input_4, input_5, input_6, + input_7, input_8, input_9, input_10, input_11; + + // Load inputs for 1x2 outputs starting from the top left. + { + const int16x8_t input_offset_vec = vdupq_n_s16(input_offset); + uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4, temp_5; + + const uint8* ptr = input_ptr; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + temp_2 = vld1_u8(ptr + 2 * input_depth); + ptr += input_row_size; + temp_3 = vld1_u8(ptr); + temp_4 = vld1_u8(ptr + input_depth); + temp_5 = vld1_u8(ptr + 2 * input_depth); + + input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); + input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); + + input_0 = vaddq_s16(input_0, input_offset_vec); + input_1 = vaddq_s16(input_1, input_offset_vec); + input_2 = vaddq_s16(input_2, input_offset_vec); + input_3 = vaddq_s16(input_3, input_offset_vec); + input_4 = vaddq_s16(input_4, input_offset_vec); + input_5 = vaddq_s16(input_5, input_offset_vec); + + ptr += input_row_size; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + temp_2 = vld1_u8(ptr + 2 * input_depth); + ptr += input_row_size; + temp_3 = vld1_u8(ptr); + temp_4 = vld1_u8(ptr + input_depth); + temp_5 = vld1_u8(ptr + 2 * input_depth); + + input_6 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_7 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + input_10 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); + input_11 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); + + input_6 = vaddq_s16(input_6, input_offset_vec); + input_7 = vaddq_s16(input_7, input_offset_vec); + input_8 = vaddq_s16(input_8, input_offset_vec); + input_9 = vaddq_s16(input_9, input_offset_vec); + input_10 = vaddq_s16(input_10, input_offset_vec); + input_11 = vaddq_s16(input_11, input_offset_vec); + } + + DotProductAndStore2yStride1( + filter, input_0, input_1, input_2, input_3, input_4, input_5, input_6, + input_7, input_8, input_9, input_10, input_11, bias_ptr, output_offset, + output_multiplier, output_shift, output_activation_min, + output_activation_max, output_ptr, output_depth * output_width); + } +}; + +template <> +struct ConvKernel3x3FilterDepth8<4, 2, 2, 2> { + static inline void Run(const uint8* input_ptr, int input_depth, + int32 input_offset, int input_row_size, + const uint8* filter_ptr, int32 filter_offset, + const int32* bias_ptr, int32 output_offset, + int32 output_multiplier, int output_shift, + int32 output_activation_min, + int32 output_activation_max, uint8* output_ptr, + int output_depth, int output_width) { + const int output_row_size = output_depth * output_width; + + Filter3x3x8 filter = Load3x3Filter(filter_ptr, filter_offset, output_depth); + + Int32x8 acc_0, acc_1; + acc_0.low = vld1q_s32(bias_ptr); + acc_1.low = vld1q_s32(bias_ptr); + acc_0.high = vld1q_s32(bias_ptr + 4); + acc_1.high = vld1q_s32(bias_ptr + 4); + + const int16x8_t input_offset_vec = vdupq_n_s16(input_offset); + + int16x8_t input_0, input_1, input_2, input_3, input_4, input_5, input_6, + input_7, input_8, input_9; + + const uint8* ptr = input_ptr; + uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4; + + // Load first 2 rows. + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + temp_2 = vld1_u8(ptr + 2 * input_depth); + temp_3 = vld1_u8(ptr + 3 * input_depth); + temp_4 = vld1_u8(ptr + 4 * input_depth); + + input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); + + input_0 = vaddq_s16(input_0, input_offset_vec); + input_1 = vaddq_s16(input_1, input_offset_vec); + input_2 = vaddq_s16(input_2, input_offset_vec); + input_3 = vaddq_s16(input_3, input_offset_vec); + input_4 = vaddq_s16(input_4, input_offset_vec); + + ptr += input_row_size; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + temp_2 = vld1_u8(ptr + 2 * input_depth); + temp_3 = vld1_u8(ptr + 3 * input_depth); + temp_4 = vld1_u8(ptr + 4 * input_depth); + + input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_6 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_7 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); + + input_5 = vaddq_s16(input_5, input_offset_vec); + input_6 = vaddq_s16(input_6, input_offset_vec); + input_7 = vaddq_s16(input_7, input_offset_vec); + input_8 = vaddq_s16(input_8, input_offset_vec); + input_9 = vaddq_s16(input_9, input_offset_vec); + + acc_0 = MultiplyAccumulateRow(acc_0, filter.f0, filter.f1, filter.f2, + input_0, input_1, input_2); + + acc_1 = MultiplyAccumulateRow(acc_1, filter.f0, filter.f1, filter.f2, + input_2, input_3, input_4); + + acc_0 = MultiplyAccumulateRow(acc_0, filter.f3, filter.f4, filter.f5, + input_5, input_6, input_7); + + acc_1 = MultiplyAccumulateRow(acc_1, filter.f3, filter.f4, filter.f5, + input_7, input_8, input_9); + + // Load next 2 rows. + ptr += input_row_size; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + temp_2 = vld1_u8(ptr + 2 * input_depth); + temp_3 = vld1_u8(ptr + 3 * input_depth); + temp_4 = vld1_u8(ptr + 4 * input_depth); + + input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); + + input_0 = vaddq_s16(input_0, input_offset_vec); + input_1 = vaddq_s16(input_1, input_offset_vec); + input_2 = vaddq_s16(input_2, input_offset_vec); + input_3 = vaddq_s16(input_3, input_offset_vec); + input_4 = vaddq_s16(input_4, input_offset_vec); + + ptr += input_row_size; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + temp_2 = vld1_u8(ptr + 2 * input_depth); + temp_3 = vld1_u8(ptr + 3 * input_depth); + temp_4 = vld1_u8(ptr + 4 * input_depth); + + input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_6 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_7 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); + + input_5 = vaddq_s16(input_5, input_offset_vec); + input_6 = vaddq_s16(input_6, input_offset_vec); + input_7 = vaddq_s16(input_7, input_offset_vec); + input_8 = vaddq_s16(input_8, input_offset_vec); + input_9 = vaddq_s16(input_9, input_offset_vec); + + acc_0 = MultiplyAccumulateRow(acc_0, filter.f6, filter.f7, filter.f8, + input_0, input_1, input_2); + + acc_1 = MultiplyAccumulateRow(acc_1, filter.f6, filter.f7, filter.f8, + input_2, input_3, input_4); + + DownquantizeAndStore2Output( + acc_0, acc_1, output_offset, output_multiplier, output_shift, + output_activation_min, output_activation_max, output_ptr, output_depth); + + output_ptr += output_row_size; + + // Moving onto the next row of outputs. + acc_0.low = vld1q_s32(bias_ptr); + acc_1.low = vld1q_s32(bias_ptr); + acc_0.high = vld1q_s32(bias_ptr + 4); + acc_1.high = vld1q_s32(bias_ptr + 4); + + acc_0 = MultiplyAccumulateRow(acc_0, filter.f0, filter.f1, filter.f2, + input_0, input_1, input_2); + + acc_1 = MultiplyAccumulateRow(acc_1, filter.f0, filter.f1, filter.f2, + input_2, input_3, input_4); + + acc_0 = MultiplyAccumulateRow(acc_0, filter.f3, filter.f4, filter.f5, + input_5, input_6, input_7); + + acc_1 = MultiplyAccumulateRow(acc_1, filter.f3, filter.f4, filter.f5, + input_7, input_8, input_9); + + // Load next 2 rows. + ptr += input_row_size; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + temp_2 = vld1_u8(ptr + 2 * input_depth); + temp_3 = vld1_u8(ptr + 3 * input_depth); + temp_4 = vld1_u8(ptr + 4 * input_depth); + + input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); + + input_0 = vaddq_s16(input_0, input_offset_vec); + input_1 = vaddq_s16(input_1, input_offset_vec); + input_2 = vaddq_s16(input_2, input_offset_vec); + input_3 = vaddq_s16(input_3, input_offset_vec); + input_4 = vaddq_s16(input_4, input_offset_vec); + + ptr += input_row_size; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + temp_2 = vld1_u8(ptr + 2 * input_depth); + temp_3 = vld1_u8(ptr + 3 * input_depth); + temp_4 = vld1_u8(ptr + 4 * input_depth); + + input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_6 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_7 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); + + input_5 = vaddq_s16(input_5, input_offset_vec); + input_6 = vaddq_s16(input_6, input_offset_vec); + input_7 = vaddq_s16(input_7, input_offset_vec); + input_8 = vaddq_s16(input_8, input_offset_vec); + input_9 = vaddq_s16(input_9, input_offset_vec); + + acc_0 = MultiplyAccumulateRow(acc_0, filter.f6, filter.f7, filter.f8, + input_0, input_1, input_2); + + acc_1 = MultiplyAccumulateRow(acc_1, filter.f6, filter.f7, filter.f8, + input_2, input_3, input_4); + + DownquantizeAndStore2Output( + acc_0, acc_1, output_offset, output_multiplier, output_shift, + output_activation_min, output_activation_max, output_ptr, output_depth); + + output_ptr += output_row_size; + + // Moving onto the next row of outputs. + acc_0.low = vld1q_s32(bias_ptr); + acc_1.low = vld1q_s32(bias_ptr); + acc_0.high = vld1q_s32(bias_ptr + 4); + acc_1.high = vld1q_s32(bias_ptr + 4); + + acc_0 = MultiplyAccumulateRow(acc_0, filter.f0, filter.f1, filter.f2, + input_0, input_1, input_2); + + acc_1 = MultiplyAccumulateRow(acc_1, filter.f0, filter.f1, filter.f2, + input_2, input_3, input_4); + + acc_0 = MultiplyAccumulateRow(acc_0, filter.f3, filter.f4, filter.f5, + input_5, input_6, input_7); + + acc_1 = MultiplyAccumulateRow(acc_1, filter.f3, filter.f4, filter.f5, + input_7, input_8, input_9); + + // Load next 2 rows. + ptr += input_row_size; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + temp_2 = vld1_u8(ptr + 2 * input_depth); + temp_3 = vld1_u8(ptr + 3 * input_depth); + temp_4 = vld1_u8(ptr + 4 * input_depth); + + input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); + + input_0 = vaddq_s16(input_0, input_offset_vec); + input_1 = vaddq_s16(input_1, input_offset_vec); + input_2 = vaddq_s16(input_2, input_offset_vec); + input_3 = vaddq_s16(input_3, input_offset_vec); + input_4 = vaddq_s16(input_4, input_offset_vec); + + ptr += input_row_size; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + temp_2 = vld1_u8(ptr + 2 * input_depth); + temp_3 = vld1_u8(ptr + 3 * input_depth); + temp_4 = vld1_u8(ptr + 4 * input_depth); + + input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_6 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_7 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); + + input_5 = vaddq_s16(input_5, input_offset_vec); + input_6 = vaddq_s16(input_6, input_offset_vec); + input_7 = vaddq_s16(input_7, input_offset_vec); + input_8 = vaddq_s16(input_8, input_offset_vec); + input_9 = vaddq_s16(input_9, input_offset_vec); + + acc_0 = MultiplyAccumulateRow(acc_0, filter.f6, filter.f7, filter.f8, + input_0, input_1, input_2); + + acc_1 = MultiplyAccumulateRow(acc_1, filter.f6, filter.f7, filter.f8, + input_2, input_3, input_4); + + DownquantizeAndStore2Output( + acc_0, acc_1, output_offset, output_multiplier, output_shift, + output_activation_min, output_activation_max, output_ptr, output_depth); + + output_ptr += output_row_size; + + // Moving onto the next row of outputs. + acc_0.low = vld1q_s32(bias_ptr); + acc_1.low = vld1q_s32(bias_ptr); + acc_0.high = vld1q_s32(bias_ptr + 4); + acc_1.high = vld1q_s32(bias_ptr + 4); + + acc_0 = MultiplyAccumulateRow(acc_0, filter.f0, filter.f1, filter.f2, + input_0, input_1, input_2); + + acc_1 = MultiplyAccumulateRow(acc_1, filter.f0, filter.f1, filter.f2, + input_2, input_3, input_4); + + acc_0 = MultiplyAccumulateRow(acc_0, filter.f3, filter.f4, filter.f5, + input_5, input_6, input_7); + + acc_1 = MultiplyAccumulateRow(acc_1, filter.f3, filter.f4, filter.f5, + input_7, input_8, input_9); + + // Load last row. + ptr += input_row_size; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + temp_2 = vld1_u8(ptr + 2 * input_depth); + temp_3 = vld1_u8(ptr + 3 * input_depth); + temp_4 = vld1_u8(ptr + 4 * input_depth); + + input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); + + input_0 = vaddq_s16(input_0, input_offset_vec); + input_1 = vaddq_s16(input_1, input_offset_vec); + input_2 = vaddq_s16(input_2, input_offset_vec); + input_3 = vaddq_s16(input_3, input_offset_vec); + input_4 = vaddq_s16(input_4, input_offset_vec); + + acc_0 = MultiplyAccumulateRow(acc_0, filter.f6, filter.f7, filter.f8, + input_0, input_1, input_2); + + acc_1 = MultiplyAccumulateRow(acc_1, filter.f6, filter.f7, filter.f8, + input_2, input_3, input_4); + + DownquantizeAndStore2Output( + acc_0, acc_1, output_offset, output_multiplier, output_shift, + output_activation_min, output_activation_max, output_ptr, output_depth); + } +}; + +template <> +struct ConvKernel3x3FilterDepth8<4, 4, 2, 2> { + static inline void Run(const uint8* input_ptr, int input_depth, + int32 input_offset, int input_row_size, + const uint8* filter_ptr, int32 filter_offset, + const int32* bias_ptr, int32 output_offset, + int32 output_multiplier, int output_shift, + int32 output_activation_min, + int32 output_activation_max, uint8* output_ptr, + int output_depth, int output_width) { + // Reuse 4x2 kernel twice. + ConvKernel3x3FilterDepth8<4, 2, 2, 2>::Run( + input_ptr, input_depth, input_offset, input_row_size, filter_ptr, + filter_offset, bias_ptr, output_offset, output_multiplier, output_shift, + output_activation_min, output_activation_max, output_ptr, output_depth, + output_width); + + ConvKernel3x3FilterDepth8<4, 2, 2, 2>::Run( + input_ptr + 4 * input_depth, input_depth, input_offset, input_row_size, + filter_ptr, filter_offset, bias_ptr, output_offset, output_multiplier, + output_shift, output_activation_min, output_activation_max, + output_ptr + 2 * output_depth, output_depth, output_width); + } +}; + +template <> +struct ConvKernel3x3FilterDepth8<4, 1, 2, 2> { + static inline void Run(const uint8* input_ptr, int input_depth, + int32 input_offset, int input_row_size, + const uint8* filter_ptr, int32 filter_offset, + const int32* bias_ptr, int32 output_offset, + int32 output_multiplier, int output_shift, + int32 output_activation_min, + int32 output_activation_max, uint8* output_ptr, + int output_depth, int output_width) { + const int output_row_size = output_depth * output_width; + + Filter3x3x8 filter = Load3x3Filter(filter_ptr, filter_offset, output_depth); + + const int16x8_t input_offset_vec = vdupq_n_s16(input_offset); + int16x8_t input_0, input_1, input_2, input_3, input_4, input_5, input_6, + input_7, input_8; + uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4, temp_5, temp_6, temp_7, + temp_8; + + const uint8* ptr = input_ptr; + + // Load all inputs for top output. + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + temp_2 = vld1_u8(ptr + 2 * input_depth); + ptr += input_row_size; + temp_3 = vld1_u8(ptr); + temp_4 = vld1_u8(ptr + input_depth); + temp_5 = vld1_u8(ptr + 2 * input_depth); + ptr += input_row_size; + temp_6 = vld1_u8(ptr); + temp_7 = vld1_u8(ptr + input_depth); + temp_8 = vld1_u8(ptr + 2 * input_depth); + + input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); + input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); + input_6 = vreinterpretq_s16_u16(vmovl_u8(temp_6)); + input_7 = vreinterpretq_s16_u16(vmovl_u8(temp_7)); + input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_8)); + + input_0 = vaddq_s16(input_0, input_offset_vec); + input_1 = vaddq_s16(input_1, input_offset_vec); + input_2 = vaddq_s16(input_2, input_offset_vec); + input_3 = vaddq_s16(input_3, input_offset_vec); + input_4 = vaddq_s16(input_4, input_offset_vec); + input_5 = vaddq_s16(input_5, input_offset_vec); + input_6 = vaddq_s16(input_6, input_offset_vec); + input_7 = vaddq_s16(input_7, input_offset_vec); + input_8 = vaddq_s16(input_8, input_offset_vec); + + DotProductAndStore( + filter, input_0, input_1, input_2, input_3, input_4, input_5, input_6, + input_7, input_8, bias_ptr, output_offset, output_multiplier, + output_shift, output_activation_min, output_activation_max, output_ptr); + + // Second output. + output_ptr += output_row_size; + + ptr += input_row_size; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + temp_2 = vld1_u8(ptr + 2 * input_depth); + ptr += input_row_size; + temp_3 = vld1_u8(ptr); + temp_4 = vld1_u8(ptr + input_depth); + temp_5 = vld1_u8(ptr + 2 * input_depth); + + input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); + input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); + + input_0 = vaddq_s16(input_0, input_offset_vec); + input_1 = vaddq_s16(input_1, input_offset_vec); + input_2 = vaddq_s16(input_2, input_offset_vec); + input_3 = vaddq_s16(input_3, input_offset_vec); + input_4 = vaddq_s16(input_4, input_offset_vec); + input_5 = vaddq_s16(input_5, input_offset_vec); + + DotProductAndStore( + filter, input_6, input_7, input_8, input_0, input_1, input_2, input_3, + input_4, input_5, bias_ptr, output_offset, output_multiplier, + output_shift, output_activation_min, output_activation_max, output_ptr); + + // Third output. + output_ptr += output_row_size; + + ptr += input_row_size; + temp_6 = vld1_u8(ptr); + temp_7 = vld1_u8(ptr + input_depth); + temp_8 = vld1_u8(ptr + 2 * input_depth); + ptr += input_row_size; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + temp_2 = vld1_u8(ptr + 2 * input_depth); + + input_6 = vreinterpretq_s16_u16(vmovl_u8(temp_6)); + input_7 = vreinterpretq_s16_u16(vmovl_u8(temp_7)); + input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_8)); + input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + + input_6 = vaddq_s16(input_6, input_offset_vec); + input_7 = vaddq_s16(input_7, input_offset_vec); + input_8 = vaddq_s16(input_8, input_offset_vec); + input_0 = vaddq_s16(input_0, input_offset_vec); + input_1 = vaddq_s16(input_1, input_offset_vec); + input_2 = vaddq_s16(input_2, input_offset_vec); + + DotProductAndStore( + filter, input_3, input_4, input_5, input_6, input_7, input_8, input_0, + input_1, input_2, bias_ptr, output_offset, output_multiplier, + output_shift, output_activation_min, output_activation_max, output_ptr); + + // Fourth output. + output_ptr += output_row_size; + + ptr += input_row_size; + temp_3 = vld1_u8(ptr); + temp_4 = vld1_u8(ptr + input_depth); + temp_5 = vld1_u8(ptr + 2 * input_depth); + ptr += input_row_size; + temp_6 = vld1_u8(ptr); + temp_7 = vld1_u8(ptr + input_depth); + temp_8 = vld1_u8(ptr + 2 * input_depth); + + input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); + input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); + input_6 = vreinterpretq_s16_u16(vmovl_u8(temp_6)); + input_7 = vreinterpretq_s16_u16(vmovl_u8(temp_7)); + input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_8)); + + input_3 = vaddq_s16(input_3, input_offset_vec); + input_4 = vaddq_s16(input_4, input_offset_vec); + input_5 = vaddq_s16(input_5, input_offset_vec); + input_6 = vaddq_s16(input_6, input_offset_vec); + input_7 = vaddq_s16(input_7, input_offset_vec); + input_8 = vaddq_s16(input_8, input_offset_vec); + + DotProductAndStore( + filter, input_0, input_1, input_2, input_3, input_4, input_5, input_6, + input_7, input_8, bias_ptr, output_offset, output_multiplier, + output_shift, output_activation_min, output_activation_max, output_ptr); + } +}; + +template <> +struct ConvKernel3x3FilterDepth8<2, 2, 2, 2> { + static inline void Run(const uint8* input_ptr, int input_depth, + int32 input_offset, int input_row_size, + const uint8* filter_ptr, int32 filter_offset, + const int32* bias_ptr, int32 output_offset, + int32 output_multiplier, int output_shift, + int32 output_activation_min, + int32 output_activation_max, uint8* output_ptr, + int output_depth, int output_width) { + Filter3x3x8 filter = Load3x3Filter(filter_ptr, filter_offset, output_depth); + + Int32x8 acc_0, acc_1, acc_2, acc_3; + acc_0.low = vld1q_s32(bias_ptr); + acc_1.low = vld1q_s32(bias_ptr); + acc_2.low = vld1q_s32(bias_ptr); + acc_3.low = vld1q_s32(bias_ptr); + + bias_ptr += 4; + acc_0.high = vld1q_s32(bias_ptr); + acc_1.high = vld1q_s32(bias_ptr); + acc_2.high = vld1q_s32(bias_ptr); + acc_3.high = vld1q_s32(bias_ptr); + + const int16x8_t input_offset_vec = vdupq_n_s16(input_offset); + + // Add scope for input registers to help the compiler know that it is + // not needed. + { + // To process 2x2 outputs using a 3x3 filter at stride 2, we require + // 5x5 inputs. We load the first 5x2 inputs at a time. + int16x8_t input_0, input_1, input_2, input_3, input_4, input_5, input_6, + input_7, input_8, input_9; + + const uint8* ptr = input_ptr; + + // Load inputs. + { + uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4; + + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + temp_2 = vld1_u8(ptr + 2 * input_depth); + temp_3 = vld1_u8(ptr + 3 * input_depth); + temp_4 = vld1_u8(ptr + 4 * input_depth); + + input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); + + input_0 = vaddq_s16(input_0, input_offset_vec); + input_1 = vaddq_s16(input_1, input_offset_vec); + input_2 = vaddq_s16(input_2, input_offset_vec); + input_3 = vaddq_s16(input_3, input_offset_vec); + input_4 = vaddq_s16(input_4, input_offset_vec); + + ptr += input_row_size; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + temp_2 = vld1_u8(ptr + 2 * input_depth); + temp_3 = vld1_u8(ptr + 3 * input_depth); + temp_4 = vld1_u8(ptr + 4 * input_depth); + + input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_6 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_7 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); + + input_5 = vaddq_s16(input_5, input_offset_vec); + input_6 = vaddq_s16(input_6, input_offset_vec); + input_7 = vaddq_s16(input_7, input_offset_vec); + input_8 = vaddq_s16(input_8, input_offset_vec); + input_9 = vaddq_s16(input_9, input_offset_vec); + } + + acc_0 = MultiplyAccumulateRow(acc_0, filter.f0, filter.f1, filter.f2, + input_0, input_1, input_2); + + acc_1 = MultiplyAccumulateRow(acc_1, filter.f0, filter.f1, filter.f2, + input_2, input_3, input_4); + + acc_0 = MultiplyAccumulateRow(acc_0, filter.f3, filter.f4, filter.f5, + input_5, input_6, input_7); + + acc_1 = MultiplyAccumulateRow(acc_1, filter.f3, filter.f4, filter.f5, + input_7, input_8, input_9); + + // Load next inputs. + { + uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4; + + ptr += input_row_size; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + temp_2 = vld1_u8(ptr + 2 * input_depth); + temp_3 = vld1_u8(ptr + 3 * input_depth); + temp_4 = vld1_u8(ptr + 4 * input_depth); + + input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); + + input_0 = vaddq_s16(input_0, input_offset_vec); + input_1 = vaddq_s16(input_1, input_offset_vec); + input_2 = vaddq_s16(input_2, input_offset_vec); + input_3 = vaddq_s16(input_3, input_offset_vec); + input_4 = vaddq_s16(input_4, input_offset_vec); + + ptr += input_row_size; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + temp_2 = vld1_u8(ptr + 2 * input_depth); + temp_3 = vld1_u8(ptr + 3 * input_depth); + temp_4 = vld1_u8(ptr + 4 * input_depth); + + input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_6 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_7 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); + + input_5 = vaddq_s16(input_5, input_offset_vec); + input_6 = vaddq_s16(input_6, input_offset_vec); + input_7 = vaddq_s16(input_7, input_offset_vec); + input_8 = vaddq_s16(input_8, input_offset_vec); + input_9 = vaddq_s16(input_9, input_offset_vec); + } + + acc_0 = MultiplyAccumulateRow(acc_0, filter.f6, filter.f7, filter.f8, + input_0, input_1, input_2); + + acc_1 = MultiplyAccumulateRow(acc_1, filter.f6, filter.f7, filter.f8, + input_2, input_3, input_4); + + // Moving onto the two bottom outputs. + acc_2 = MultiplyAccumulateRow(acc_2, filter.f0, filter.f1, filter.f2, + input_0, input_1, input_2); + + acc_3 = MultiplyAccumulateRow(acc_3, filter.f0, filter.f1, filter.f2, + input_2, input_3, input_4); + + acc_2 = MultiplyAccumulateRow(acc_2, filter.f3, filter.f4, filter.f5, + input_5, input_6, input_7); + + acc_3 = MultiplyAccumulateRow(acc_3, filter.f3, filter.f4, filter.f5, + input_7, input_8, input_9); + + // Load last input row. + { + uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4; + + ptr += input_row_size; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + temp_2 = vld1_u8(ptr + 2 * input_depth); + temp_3 = vld1_u8(ptr + 3 * input_depth); + temp_4 = vld1_u8(ptr + 4 * input_depth); + + input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); + + input_0 = vaddq_s16(input_0, input_offset_vec); + input_1 = vaddq_s16(input_1, input_offset_vec); + input_2 = vaddq_s16(input_2, input_offset_vec); + input_3 = vaddq_s16(input_3, input_offset_vec); + input_4 = vaddq_s16(input_4, input_offset_vec); + } + + acc_2 = MultiplyAccumulateRow(acc_2, filter.f6, filter.f7, filter.f8, + input_0, input_1, input_2); + + acc_3 = MultiplyAccumulateRow(acc_3, filter.f6, filter.f7, filter.f8, + input_2, input_3, input_4); + } + + DownquantizeAndStore2x2Output(acc_0, acc_1, acc_2, acc_3, output_offset, + output_multiplier, output_shift, + output_activation_min, output_activation_max, + output_ptr, output_depth, output_width); + } }; -struct Filter3x3x16 { - Int16x16x3 r0, r1, r2; +template <> +struct ConvKernel3x3FilterDepth8<2, 4, 2, 2> { + static inline void Run(const uint8* input_ptr, int input_depth, + int32 input_offset, int input_row_size, + const uint8* filter_ptr, int32 filter_offset, + const int32* bias_ptr, int32 output_offset, + int32 output_multiplier, int output_shift, + int32 output_activation_min, + int32 output_activation_max, uint8* output_ptr, + int output_depth, int output_width) { + // Reuse 2x2 kernel twice. + ConvKernel3x3FilterDepth8<2, 2, 2, 2>::Run( + input_ptr, input_depth, input_offset, input_row_size, filter_ptr, + filter_offset, bias_ptr, output_offset, output_multiplier, output_shift, + output_activation_min, output_activation_max, output_ptr, output_depth, + output_width); + + ConvKernel3x3FilterDepth8<2, 2, 2, 2>::Run( + input_ptr + 4 * input_depth, input_depth, input_offset, input_row_size, + filter_ptr, filter_offset, bias_ptr, output_offset, output_multiplier, + output_shift, output_activation_min, output_activation_max, + output_ptr + 2 * output_depth, output_depth, output_width); + } }; -// Loads 3x3 filter of depth 16 and adds filter offsets. -inline Filter3x3x16 LoadFilterDepth16(const uint8* filter_ptr, - int32 filter_offset, int output_depth) { - Filter3x3x16 filter; +template <> +struct ConvKernel3x3FilterDepth8<2, 1, 2, 2> { + static inline void Run(const uint8* input_ptr, int input_depth, + int32 input_offset, int input_row_size, + const uint8* filter_ptr, int32 filter_offset, + const int32* bias_ptr, int32 output_offset, + int32 output_multiplier, int output_shift, + int32 output_activation_min, + int32 output_activation_max, uint8* output_ptr, + int output_depth, int output_width) { + const int output_row_size = output_depth * output_width; - uint8x8_t temp_u8_0, temp_u8_1, temp_u8_2, temp_u8_3, temp_u8_4, temp_u8_5, - temp_u8_6, temp_u8_7, temp_u8_8, temp_u8_9, temp_u8_10, temp_u8_11, - temp_u8_12, temp_u8_13, temp_u8_14, temp_u8_15, temp_u8_16, temp_u8_17; - int16x8_t filter_offset_vec = vdupq_n_s16(filter_offset); + Filter3x3x8 filter = Load3x3Filter(filter_ptr, filter_offset, output_depth); - temp_u8_0 = vld1_u8(filter_ptr + 0 * output_depth); - temp_u8_1 = vld1_u8(filter_ptr + 0 * output_depth + 8); - temp_u8_2 = vld1_u8(filter_ptr + 1 * output_depth); - temp_u8_3 = vld1_u8(filter_ptr + 1 * output_depth + 8); - temp_u8_4 = vld1_u8(filter_ptr + 2 * output_depth); - temp_u8_5 = vld1_u8(filter_ptr + 2 * output_depth + 8); - - temp_u8_6 = vld1_u8(filter_ptr + 3 * output_depth); - temp_u8_7 = vld1_u8(filter_ptr + 3 * output_depth + 8); - temp_u8_8 = vld1_u8(filter_ptr + 4 * output_depth); - temp_u8_9 = vld1_u8(filter_ptr + 4 * output_depth + 8); - temp_u8_10 = vld1_u8(filter_ptr + 5 * output_depth); - temp_u8_11 = vld1_u8(filter_ptr + 5 * output_depth + 8); - - temp_u8_12 = vld1_u8(filter_ptr + 6 * output_depth); - temp_u8_13 = vld1_u8(filter_ptr + 6 * output_depth + 8); - temp_u8_14 = vld1_u8(filter_ptr + 7 * output_depth); - temp_u8_15 = vld1_u8(filter_ptr + 7 * output_depth + 8); - temp_u8_16 = vld1_u8(filter_ptr + 8 * output_depth); - temp_u8_17 = vld1_u8(filter_ptr + 8 * output_depth + 8); - - filter.r0.v0.low = vreinterpretq_s16_u16(vmovl_u8(temp_u8_0)); - filter.r0.v0.high = vreinterpretq_s16_u16(vmovl_u8(temp_u8_1)); - filter.r0.v1.low = vreinterpretq_s16_u16(vmovl_u8(temp_u8_2)); - filter.r0.v1.high = vreinterpretq_s16_u16(vmovl_u8(temp_u8_3)); - filter.r0.v2.low = vreinterpretq_s16_u16(vmovl_u8(temp_u8_4)); - filter.r0.v2.high = vreinterpretq_s16_u16(vmovl_u8(temp_u8_5)); - - filter.r1.v0.low = vreinterpretq_s16_u16(vmovl_u8(temp_u8_6)); - filter.r1.v0.high = vreinterpretq_s16_u16(vmovl_u8(temp_u8_7)); - filter.r1.v1.low = vreinterpretq_s16_u16(vmovl_u8(temp_u8_8)); - filter.r1.v1.high = vreinterpretq_s16_u16(vmovl_u8(temp_u8_9)); - filter.r1.v2.low = vreinterpretq_s16_u16(vmovl_u8(temp_u8_10)); - filter.r1.v2.high = vreinterpretq_s16_u16(vmovl_u8(temp_u8_11)); - - filter.r2.v0.low = vreinterpretq_s16_u16(vmovl_u8(temp_u8_12)); - filter.r2.v0.high = vreinterpretq_s16_u16(vmovl_u8(temp_u8_13)); - filter.r2.v1.low = vreinterpretq_s16_u16(vmovl_u8(temp_u8_14)); - filter.r2.v1.high = vreinterpretq_s16_u16(vmovl_u8(temp_u8_15)); - filter.r2.v2.low = vreinterpretq_s16_u16(vmovl_u8(temp_u8_16)); - filter.r2.v2.high = vreinterpretq_s16_u16(vmovl_u8(temp_u8_17)); - - filter.r0.v0.low = vaddq_s16(filter.r0.v0.low, filter_offset_vec); - filter.r0.v0.high = vaddq_s16(filter.r0.v0.high, filter_offset_vec); - filter.r0.v1.low = vaddq_s16(filter.r0.v1.low, filter_offset_vec); - filter.r0.v1.high = vaddq_s16(filter.r0.v1.high, filter_offset_vec); - filter.r0.v2.low = vaddq_s16(filter.r0.v2.low, filter_offset_vec); - filter.r0.v2.high = vaddq_s16(filter.r0.v2.high, filter_offset_vec); - - filter.r1.v0.low = vaddq_s16(filter.r1.v0.low, filter_offset_vec); - filter.r1.v0.high = vaddq_s16(filter.r1.v0.high, filter_offset_vec); - filter.r1.v1.low = vaddq_s16(filter.r1.v1.low, filter_offset_vec); - filter.r1.v1.high = vaddq_s16(filter.r1.v1.high, filter_offset_vec); - filter.r1.v2.low = vaddq_s16(filter.r1.v2.low, filter_offset_vec); - filter.r1.v2.high = vaddq_s16(filter.r1.v2.high, filter_offset_vec); - - filter.r2.v0.low = vaddq_s16(filter.r2.v0.low, filter_offset_vec); - filter.r2.v0.high = vaddq_s16(filter.r2.v0.high, filter_offset_vec); - filter.r2.v1.low = vaddq_s16(filter.r2.v1.low, filter_offset_vec); - filter.r2.v1.high = vaddq_s16(filter.r2.v1.high, filter_offset_vec); - filter.r2.v2.low = vaddq_s16(filter.r2.v2.low, filter_offset_vec); - filter.r2.v2.high = vaddq_s16(filter.r2.v2.high, filter_offset_vec); + const int16x8_t input_offset_vec = vdupq_n_s16(input_offset); + int16x8_t input_0, input_1, input_2, input_3, input_4, input_5, input_6, + input_7, input_8; + uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4, temp_5, temp_6, temp_7, + temp_8; - return filter; -} + const uint8* ptr = input_ptr; -// Loads 3 input cells of depth 16 and adds input offsets. -inline Int16x16x3 LoadInputRowDepth16(const uint8* ptr, int input_depth, - int32 input_offset, - Int16x16x3 input_row) { - uint8x8_t temp_0, temp_1; - int16x8_t offset_vec = vdupq_n_s16(input_offset); - - temp_0 = vld1_u8(ptr + 0 * input_depth); - temp_1 = vld1_u8(ptr + 0 * input_depth + 8); - input_row.v0.low = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_row.v0.high = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_row.v0.low = vaddq_s16(input_row.v0.low, offset_vec); - input_row.v0.high = vaddq_s16(input_row.v0.high, offset_vec); - - temp_0 = vld1_u8(ptr + 1 * input_depth); - temp_1 = vld1_u8(ptr + 1 * input_depth + 8); - input_row.v1.low = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_row.v1.high = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_row.v1.low = vaddq_s16(input_row.v1.low, offset_vec); - input_row.v1.high = vaddq_s16(input_row.v1.high, offset_vec); - - temp_0 = vld1_u8(ptr + 2 * input_depth); - temp_1 = vld1_u8(ptr + 2 * input_depth + 8); - input_row.v2.low = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_row.v2.high = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_row.v2.low = vaddq_s16(input_row.v2.low, offset_vec); - input_row.v2.high = vaddq_s16(input_row.v2.high, offset_vec); - - return input_row; -} + // Load all inputs for top output. + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + temp_2 = vld1_u8(ptr + 2 * input_depth); + ptr += input_row_size; + temp_3 = vld1_u8(ptr); + temp_4 = vld1_u8(ptr + input_depth); + temp_5 = vld1_u8(ptr + 2 * input_depth); + ptr += input_row_size; + temp_6 = vld1_u8(ptr); + temp_7 = vld1_u8(ptr + input_depth); + temp_8 = vld1_u8(ptr + 2 * input_depth); -// Performs multiply accumulate on 3 inputs of depth 16. -inline Int32x16 MultiplyAccumulateRowDepth16(Int32x16 output, - const Int16x16x3& filter_row, - const Int16x16x3& input_row) { - output.v0 = vmlal_s16(output.v0, vget_low_s16(filter_row.v0.low), - vget_low_s16(input_row.v0.low)); - output.v1 = vmlal_s16(output.v1, vget_high_s16(filter_row.v0.low), - vget_high_s16(input_row.v0.low)); - output.v2 = vmlal_s16(output.v2, vget_low_s16(filter_row.v0.high), - vget_low_s16(input_row.v0.high)); - output.v3 = vmlal_s16(output.v3, vget_high_s16(filter_row.v0.high), - vget_high_s16(input_row.v0.high)); - - output.v0 = vmlal_s16(output.v0, vget_low_s16(filter_row.v1.low), - vget_low_s16(input_row.v1.low)); - output.v1 = vmlal_s16(output.v1, vget_high_s16(filter_row.v1.low), - vget_high_s16(input_row.v1.low)); - output.v2 = vmlal_s16(output.v2, vget_low_s16(filter_row.v1.high), - vget_low_s16(input_row.v1.high)); - output.v3 = vmlal_s16(output.v3, vget_high_s16(filter_row.v1.high), - vget_high_s16(input_row.v1.high)); - - output.v0 = vmlal_s16(output.v0, vget_low_s16(filter_row.v2.low), - vget_low_s16(input_row.v2.low)); - output.v1 = vmlal_s16(output.v1, vget_high_s16(filter_row.v2.low), - vget_high_s16(input_row.v2.low)); - output.v2 = vmlal_s16(output.v2, vget_low_s16(filter_row.v2.high), - vget_low_s16(input_row.v2.high)); - output.v3 = vmlal_s16(output.v3, vget_high_s16(filter_row.v2.high), - vget_high_s16(input_row.v2.high)); - - return output; -} + input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); + input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); + input_6 = vreinterpretq_s16_u16(vmovl_u8(temp_6)); + input_7 = vreinterpretq_s16_u16(vmovl_u8(temp_7)); + input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_8)); -// Applies activation, offset and downquantize on a set of accumulator -// registers of depth 16. Stores results to output. -inline void DownquantizeAndStoreDepth16(Int32x16 acc, int32 output_multiplier, - int output_shift, - int32x4_t output_offset_vec, - int32x4_t output_activation_min_vec, - int32x4_t output_activation_max_vec, - uint8* output_ptr) { - // Fixed-point multiplication. - acc.v0 = vqrdmulhq_n_s32(acc.v0, output_multiplier); - acc.v1 = vqrdmulhq_n_s32(acc.v1, output_multiplier); - acc.v2 = vqrdmulhq_n_s32(acc.v2, output_multiplier); - acc.v3 = vqrdmulhq_n_s32(acc.v3, output_multiplier); + input_0 = vaddq_s16(input_0, input_offset_vec); + input_1 = vaddq_s16(input_1, input_offset_vec); + input_2 = vaddq_s16(input_2, input_offset_vec); + input_3 = vaddq_s16(input_3, input_offset_vec); + input_4 = vaddq_s16(input_4, input_offset_vec); + input_5 = vaddq_s16(input_5, input_offset_vec); + input_6 = vaddq_s16(input_6, input_offset_vec); + input_7 = vaddq_s16(input_7, input_offset_vec); + input_8 = vaddq_s16(input_8, input_offset_vec); - using gemmlowp::RoundingDivideByPOT; - acc.v0 = RoundingDivideByPOT(acc.v0, output_shift); - acc.v1 = RoundingDivideByPOT(acc.v1, output_shift); - acc.v2 = RoundingDivideByPOT(acc.v2, output_shift); - acc.v3 = RoundingDivideByPOT(acc.v3, output_shift); + DotProductAndStore( + filter, input_0, input_1, input_2, input_3, input_4, input_5, input_6, + input_7, input_8, bias_ptr, output_offset, output_multiplier, + output_shift, output_activation_min, output_activation_max, output_ptr); - // Add the output offset. - acc.v0 = vaddq_s32(acc.v0, output_offset_vec); - acc.v1 = vaddq_s32(acc.v1, output_offset_vec); - acc.v2 = vaddq_s32(acc.v2, output_offset_vec); - acc.v3 = vaddq_s32(acc.v3, output_offset_vec); + // Second output. + output_ptr += output_row_size; - // Apply the activation function. - acc.v0 = vmaxq_s32(acc.v0, output_activation_min_vec); - acc.v1 = vmaxq_s32(acc.v1, output_activation_min_vec); - acc.v2 = vmaxq_s32(acc.v2, output_activation_min_vec); - acc.v3 = vmaxq_s32(acc.v3, output_activation_min_vec); + ptr += input_row_size; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + temp_2 = vld1_u8(ptr + 2 * input_depth); + ptr += input_row_size; + temp_3 = vld1_u8(ptr); + temp_4 = vld1_u8(ptr + input_depth); + temp_5 = vld1_u8(ptr + 2 * input_depth); - acc.v0 = vminq_s32(acc.v0, output_activation_max_vec); - acc.v1 = vminq_s32(acc.v1, output_activation_max_vec); - acc.v2 = vminq_s32(acc.v2, output_activation_max_vec); - acc.v3 = vminq_s32(acc.v3, output_activation_max_vec); + input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); + input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); - // Saturating cast to uint8 and store to destination. - int16x4_t acc_tlla_s16 = vqmovn_s32(acc.v0); - int16x4_t acc_tllb_s16 = vqmovn_s32(acc.v1); - int16x4_t acc_tlha_s16 = vqmovn_s32(acc.v2); - int16x4_t acc_tlhb_s16 = vqmovn_s32(acc.v3); - - int16x8_t res_s16_0 = vcombine_s16(acc_tlla_s16, acc_tllb_s16); - int16x8_t res_s16_1 = vcombine_s16(acc_tlha_s16, acc_tlhb_s16); - uint8x8_t res_u8_0 = vqmovun_s16(res_s16_0); - uint8x8_t res_u8_1 = vqmovun_s16(res_s16_1); - vst1q_u8(output_ptr, vcombine_u8(res_u8_0, res_u8_1)); + input_0 = vaddq_s16(input_0, input_offset_vec); + input_1 = vaddq_s16(input_1, input_offset_vec); + input_2 = vaddq_s16(input_2, input_offset_vec); + input_3 = vaddq_s16(input_3, input_offset_vec); + input_4 = vaddq_s16(input_4, input_offset_vec); + input_5 = vaddq_s16(input_5, input_offset_vec); + + DotProductAndStore( + filter, input_6, input_7, input_8, input_0, input_1, input_2, input_3, + input_4, input_5, bias_ptr, output_offset, output_multiplier, + output_shift, output_activation_min, output_activation_max, output_ptr); + } +}; + +template <> +struct ConvKernel3x3FilterDepth8<1, 2, 2, 2> { + static inline void Run(const uint8* input_ptr, int input_depth, + int32 input_offset, int input_row_size, + const uint8* filter_ptr, int32 filter_offset, + const int32* bias_ptr, int32 output_offset, + int32 output_multiplier, int output_shift, + int32 output_activation_min, + int32 output_activation_max, uint8* output_ptr, + int output_depth, int output_width) { + Filter3x3x8 filter = Load3x3Filter(filter_ptr, filter_offset, output_depth); + + const int16x8_t input_offset_vec = vdupq_n_s16(input_offset); + int16x8_t input_0, input_1, input_2, input_3, input_4, input_5, input_6, + input_7, input_8; + uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4, temp_5, temp_6, temp_7, + temp_8; + + const uint8* ptr = input_ptr; + + // Load all inputs for top output. + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + temp_2 = vld1_u8(ptr + 2 * input_depth); + ptr += input_row_size; + temp_3 = vld1_u8(ptr); + temp_4 = vld1_u8(ptr + input_depth); + temp_5 = vld1_u8(ptr + 2 * input_depth); + ptr += input_row_size; + temp_6 = vld1_u8(ptr); + temp_7 = vld1_u8(ptr + input_depth); + temp_8 = vld1_u8(ptr + 2 * input_depth); + + input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); + input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); + input_6 = vreinterpretq_s16_u16(vmovl_u8(temp_6)); + input_7 = vreinterpretq_s16_u16(vmovl_u8(temp_7)); + input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_8)); + + input_0 = vaddq_s16(input_0, input_offset_vec); + input_1 = vaddq_s16(input_1, input_offset_vec); + input_2 = vaddq_s16(input_2, input_offset_vec); + input_3 = vaddq_s16(input_3, input_offset_vec); + input_4 = vaddq_s16(input_4, input_offset_vec); + input_5 = vaddq_s16(input_5, input_offset_vec); + input_6 = vaddq_s16(input_6, input_offset_vec); + input_7 = vaddq_s16(input_7, input_offset_vec); + input_8 = vaddq_s16(input_8, input_offset_vec); + + DotProductAndStore( + filter, input_0, input_1, input_2, input_3, input_4, input_5, input_6, + input_7, input_8, bias_ptr, output_offset, output_multiplier, + output_shift, output_activation_min, output_activation_max, output_ptr); + + // Second output. + output_ptr += output_depth; + + ptr = input_ptr + 3 * input_depth; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + ptr += input_row_size; + temp_3 = vld1_u8(ptr); + temp_4 = vld1_u8(ptr + input_depth); + ptr += input_row_size; + temp_6 = vld1_u8(ptr); + temp_7 = vld1_u8(ptr + input_depth); + + input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); + input_6 = vreinterpretq_s16_u16(vmovl_u8(temp_6)); + input_7 = vreinterpretq_s16_u16(vmovl_u8(temp_7)); + + input_0 = vaddq_s16(input_0, input_offset_vec); + input_1 = vaddq_s16(input_1, input_offset_vec); + input_3 = vaddq_s16(input_3, input_offset_vec); + input_4 = vaddq_s16(input_4, input_offset_vec); + input_6 = vaddq_s16(input_6, input_offset_vec); + input_7 = vaddq_s16(input_7, input_offset_vec); + + DotProductAndStore( + filter, input_2, input_0, input_1, input_5, input_3, input_4, input_8, + input_6, input_7, bias_ptr, output_offset, output_multiplier, + output_shift, output_activation_min, output_activation_max, output_ptr); + } +}; + +template <> +struct ConvKernel3x3FilterDepth8<1, 4, 2, 2> { + static inline void Run(const uint8* input_ptr, int input_depth, + int32 input_offset, int input_row_size, + const uint8* filter_ptr, int32 filter_offset, + const int32* bias_ptr, int32 output_offset, + int32 output_multiplier, int output_shift, + int32 output_activation_min, + int32 output_activation_max, uint8* output_ptr, + int output_depth, int output_width) { + Filter3x3x8 filter = Load3x3Filter(filter_ptr, filter_offset, output_depth); + + const int16x8_t input_offset_vec = vdupq_n_s16(input_offset); + int16x8_t input_0, input_1, input_2, input_3, input_4, input_5, input_6, + input_7, input_8; + uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4, temp_5, temp_6, temp_7, + temp_8; + + const uint8* ptr = input_ptr; + + // Load all inputs for top output. + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + temp_2 = vld1_u8(ptr + 2 * input_depth); + ptr += input_row_size; + temp_3 = vld1_u8(ptr); + temp_4 = vld1_u8(ptr + input_depth); + temp_5 = vld1_u8(ptr + 2 * input_depth); + ptr += input_row_size; + temp_6 = vld1_u8(ptr); + temp_7 = vld1_u8(ptr + input_depth); + temp_8 = vld1_u8(ptr + 2 * input_depth); + + input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); + input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); + input_6 = vreinterpretq_s16_u16(vmovl_u8(temp_6)); + input_7 = vreinterpretq_s16_u16(vmovl_u8(temp_7)); + input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_8)); + + input_0 = vaddq_s16(input_0, input_offset_vec); + input_1 = vaddq_s16(input_1, input_offset_vec); + input_2 = vaddq_s16(input_2, input_offset_vec); + input_3 = vaddq_s16(input_3, input_offset_vec); + input_4 = vaddq_s16(input_4, input_offset_vec); + input_5 = vaddq_s16(input_5, input_offset_vec); + input_6 = vaddq_s16(input_6, input_offset_vec); + input_7 = vaddq_s16(input_7, input_offset_vec); + input_8 = vaddq_s16(input_8, input_offset_vec); + + DotProductAndStore( + filter, input_0, input_1, input_2, input_3, input_4, input_5, input_6, + input_7, input_8, bias_ptr, output_offset, output_multiplier, + output_shift, output_activation_min, output_activation_max, output_ptr); + + // Second output. + output_ptr += output_depth; + + ptr = input_ptr + 3 * input_depth; + temp_0 = vld1_u8(ptr); + temp_1 = vld1_u8(ptr + input_depth); + ptr += input_row_size; + temp_3 = vld1_u8(ptr); + temp_4 = vld1_u8(ptr + input_depth); + ptr += input_row_size; + temp_6 = vld1_u8(ptr); + temp_7 = vld1_u8(ptr + input_depth); + + input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); + input_6 = vreinterpretq_s16_u16(vmovl_u8(temp_6)); + input_7 = vreinterpretq_s16_u16(vmovl_u8(temp_7)); + + input_0 = vaddq_s16(input_0, input_offset_vec); + input_1 = vaddq_s16(input_1, input_offset_vec); + input_3 = vaddq_s16(input_3, input_offset_vec); + input_4 = vaddq_s16(input_4, input_offset_vec); + input_6 = vaddq_s16(input_6, input_offset_vec); + input_7 = vaddq_s16(input_7, input_offset_vec); + + DotProductAndStore( + filter, input_2, input_0, input_1, input_5, input_3, input_4, input_8, + input_6, input_7, bias_ptr, output_offset, output_multiplier, + output_shift, output_activation_min, output_activation_max, output_ptr); + + // Third output. + output_ptr += output_depth; + + ptr = input_ptr + 5 * input_depth; + temp_2 = vld1_u8(ptr); + temp_0 = vld1_u8(ptr + input_depth); + ptr += input_row_size; + temp_5 = vld1_u8(ptr); + temp_3 = vld1_u8(ptr + input_depth); + ptr += input_row_size; + temp_8 = vld1_u8(ptr); + temp_6 = vld1_u8(ptr + input_depth); + + input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); + input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_8)); + input_6 = vreinterpretq_s16_u16(vmovl_u8(temp_6)); + + input_2 = vaddq_s16(input_2, input_offset_vec); + input_0 = vaddq_s16(input_0, input_offset_vec); + input_5 = vaddq_s16(input_5, input_offset_vec); + input_3 = vaddq_s16(input_3, input_offset_vec); + input_8 = vaddq_s16(input_8, input_offset_vec); + input_6 = vaddq_s16(input_6, input_offset_vec); + + DotProductAndStore( + filter, input_1, input_2, input_0, input_4, input_5, input_3, input_7, + input_8, input_6, bias_ptr, output_offset, output_multiplier, + output_shift, output_activation_min, output_activation_max, output_ptr); + + // Fourth output. + output_ptr += output_depth; + + ptr = input_ptr + 7 * input_depth; + temp_1 = vld1_u8(ptr); + temp_2 = vld1_u8(ptr + input_depth); + ptr += input_row_size; + temp_4 = vld1_u8(ptr); + temp_5 = vld1_u8(ptr + input_depth); + ptr += input_row_size; + temp_7 = vld1_u8(ptr); + temp_8 = vld1_u8(ptr + input_depth); + + input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); + input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); + input_7 = vreinterpretq_s16_u16(vmovl_u8(temp_7)); + input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_8)); + + input_1 = vaddq_s16(input_1, input_offset_vec); + input_2 = vaddq_s16(input_2, input_offset_vec); + input_4 = vaddq_s16(input_4, input_offset_vec); + input_5 = vaddq_s16(input_5, input_offset_vec); + input_7 = vaddq_s16(input_7, input_offset_vec); + input_8 = vaddq_s16(input_8, input_offset_vec); + + DotProductAndStore( + filter, input_0, input_1, input_2, input_3, input_4, input_5, input_6, + input_7, input_8, bias_ptr, output_offset, output_multiplier, + output_shift, output_activation_min, output_activation_max, output_ptr); + } +}; + +template +struct ConvKernel3x3FilterDepth8<1, 1, kFixedStrideWidth, kFixedStrideHeight> { + static inline void Run(const uint8* input_ptr, int input_depth, + int32 input_offset, int input_row_size, + const uint8* filter_ptr, int32 filter_offset, + const int32* bias_ptr, int32 output_offset, + int32 output_multiplier, int output_shift, + int32 output_activation_min, + int32 output_activation_max, uint8* output_ptr, + int output_depth, int output_width) { + Filter3x3x8 filter = Load3x3Filter(filter_ptr, filter_offset, output_depth); + + int16x8_t input_0, input_1, input_2, input_3, input_4, input_5, input_6, + input_7, input_8; + + uint8x8_t temp_0 = vld1_u8(input_ptr); + uint8x8_t temp_1 = vld1_u8(input_ptr + input_depth); + uint8x8_t temp_2 = vld1_u8(input_ptr + 2 * input_depth); + + input_ptr += input_row_size; + uint8x8_t temp_3 = vld1_u8(input_ptr); + uint8x8_t temp_4 = vld1_u8(input_ptr + input_depth); + uint8x8_t temp_5 = vld1_u8(input_ptr + 2 * input_depth); + + input_ptr += input_row_size; + uint8x8_t temp_6 = vld1_u8(input_ptr); + uint8x8_t temp_7 = vld1_u8(input_ptr + input_depth); + uint8x8_t temp_8 = vld1_u8(input_ptr + 2 * input_depth); + + input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); + input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); + input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); + input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); + input_6 = vreinterpretq_s16_u16(vmovl_u8(temp_6)); + input_7 = vreinterpretq_s16_u16(vmovl_u8(temp_7)); + input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_8)); + + const int16x8_t input_offset_vec = vdupq_n_s16(input_offset); + input_0 = vaddq_s16(input_0, input_offset_vec); + input_1 = vaddq_s16(input_1, input_offset_vec); + input_2 = vaddq_s16(input_2, input_offset_vec); + input_3 = vaddq_s16(input_3, input_offset_vec); + input_4 = vaddq_s16(input_4, input_offset_vec); + input_5 = vaddq_s16(input_5, input_offset_vec); + input_6 = vaddq_s16(input_6, input_offset_vec); + input_7 = vaddq_s16(input_7, input_offset_vec); + input_8 = vaddq_s16(input_8, input_offset_vec); + + DotProductAndStore( + filter, input_0, input_1, input_2, input_3, input_4, input_5, input_6, + input_7, input_8, bias_ptr, output_offset, output_multiplier, + output_shift, output_activation_min, output_activation_max, output_ptr); + } +}; + +inline void ShuffleInput(const uint8* input_ptr, int input_depth, + int input_width, int input_height, int output_depth, + int output_width, int output_height, + uint8* output_ptr) { + const int input_row_size = input_depth * input_width; + + for (int y = 0; y < output_height; y++) { + const uint8* ptr = input_ptr; + for (int x = 0; x < output_width; x++) { + memcpy(output_ptr, ptr, output_depth); + output_ptr += output_depth; + ptr += input_depth; + } + input_ptr += input_row_size; + } } -// A kernel that is optimized on the number of output cells in the x and y -// direction, and the stride. Assumes 3x3 filters of 16 depth. -template -struct ConvKernel3x3FilterDepth16 {}; +template +struct ConvRow3x3FilterDepth8 {}; + +template +struct ConvRow3x3FilterDepth8<1, kFixedStrideWidth, kFixedStrideHeight> { + static inline void Run(const uint8* input_data, int start_x, int start_y, + int input_depth, int input_width, int input_height, + int input_row_size, int32 input_offset, + const uint8* filter_data, int32 filter_offset, + const int32* bias_data, int32 output_offset, + int32 output_multiplier, int output_shift, + int32 output_activation_min, + int32 output_activation_max, uint8* output_data, + int output_depth, int output_width, + uint8* shuffle_workspace) { + int out_x = start_x; + + // 1x4 at a time. + for (; out_x <= output_width - 4; out_x += 4) { + const int32* bias_ptr = bias_data; + const uint8* filter_ptr = filter_data; + + const uint8* input_ptr = input_data; + uint8* output_ptr = output_data; + + for (int depth = 0; depth <= output_depth - 8; depth += 8) { + ConvKernel3x3FilterDepth8<1, 4, kFixedStrideWidth, kFixedStrideHeight>:: + Run(input_ptr, input_depth, input_offset, input_row_size, + filter_ptr, filter_offset, bias_ptr, output_offset, + output_multiplier, output_shift, output_activation_min, + output_activation_max, output_ptr, output_depth, output_width); + + input_ptr += 8; + output_ptr += 8; + filter_ptr += 8; + bias_ptr += 8; + } + + input_data += 4 * kFixedStrideWidth * input_depth; + output_data += 4 * output_depth; + } + + // 1x1 at a time. + for (; out_x < output_width; out_x++) { + const int32* bias_ptr = bias_data; + const uint8* filter_ptr = filter_data; + + const uint8* input_ptr = input_data; + uint8* output_ptr = output_data; + + for (int depth = 0; depth <= output_depth - 8; depth += 8) { + ConvKernel3x3FilterDepth8<1, 1, kFixedStrideWidth, kFixedStrideHeight>:: + Run(input_ptr, input_depth, input_offset, input_row_size, + filter_ptr, filter_offset, bias_ptr, output_offset, + output_multiplier, output_shift, output_activation_min, + output_activation_max, output_ptr, output_depth, output_width); + + input_ptr += 8; + output_ptr += 8; + filter_ptr += 8; + bias_ptr += 8; + } + + input_data += kFixedStrideWidth * input_depth; + output_data += output_depth; + } + } +}; + +template +struct ConvRow3x3FilterDepth8<2, kFixedStrideWidth, kFixedStrideHeight> { + static inline void Run(const uint8* input_data, int start_x, int start_y, + int input_depth, int input_width, int input_height, + int input_row_size, int32 input_offset, + const uint8* filter_data, int32 filter_offset, + const int32* bias_data, int32 output_offset, + int32 output_multiplier, int output_shift, + int32 output_activation_min, + int32 output_activation_max, uint8* output_data, + int output_depth, int output_width, + uint8* shuffle_workspace) { + int out_x = start_x; + + // 2x4 at a time. + for (; out_x <= output_width - 4; out_x += 4) { + const int32* bias_ptr = bias_data; + const uint8* filter_ptr = filter_data; + + const uint8* input_ptr = input_data; + uint8* output_ptr = output_data; + + for (int depth = 0; depth <= output_depth - 8; depth += 8) { + ConvKernel3x3FilterDepth8<2, 4, kFixedStrideWidth, kFixedStrideHeight>:: + Run(input_ptr, input_depth, input_offset, input_row_size, + filter_ptr, filter_offset, bias_ptr, output_offset, + output_multiplier, output_shift, output_activation_min, + output_activation_max, output_ptr, output_depth, output_width); + + input_ptr += 8; + output_ptr += 8; + filter_ptr += 8; + bias_ptr += 8; + } + + input_data += 4 * kFixedStrideWidth * input_depth; + output_data += 4 * output_depth; + } + + // 2x2 at a time. + for (; out_x <= output_width - 2; out_x += 2) { + const int32* bias_ptr = bias_data; + const uint8* filter_ptr = filter_data; + + const uint8* input_ptr = input_data; + uint8* output_ptr = output_data; + + for (int depth = 0; depth <= output_depth - 8; depth += 8) { + ConvKernel3x3FilterDepth8<2, 2, kFixedStrideWidth, kFixedStrideHeight>:: + Run(input_ptr, input_depth, input_offset, input_row_size, + filter_ptr, filter_offset, bias_ptr, output_offset, + output_multiplier, output_shift, output_activation_min, + output_activation_max, output_ptr, output_depth, output_width); + + input_ptr += 8; + output_ptr += 8; + filter_ptr += 8; + bias_ptr += 8; + } + + input_data += 2 * kFixedStrideWidth * input_depth; + output_data += 2 * output_depth; + } + + // 2x1 at a time. + for (; out_x < output_width; out_x++) { + const int32* bias_ptr = bias_data; + const uint8* filter_ptr = filter_data; + + const uint8* input_ptr = input_data; + uint8* output_ptr = output_data; + + for (int depth = 0; depth <= output_depth - 8; depth += 8) { + ConvKernel3x3FilterDepth8<2, 1, kFixedStrideWidth, kFixedStrideHeight>:: + Run(input_ptr, input_depth, input_offset, input_row_size, + filter_ptr, filter_offset, bias_ptr, output_offset, + output_multiplier, output_shift, output_activation_min, + output_activation_max, output_ptr, output_depth, output_width); + + input_ptr += 8; + output_ptr += 8; + filter_ptr += 8; + bias_ptr += 8; + } + + input_data += kFixedStrideWidth * input_depth; + output_data += output_depth; + } + } +}; + +template <> +struct ConvRow3x3FilterDepth8<4, 1, 1> { + static inline void Run(const uint8* input_data, int start_x, int start_y, + int input_depth, int input_width, int input_height, + int input_row_size, int32 input_offset, + const uint8* filter_data, int32 filter_offset, + const int32* bias_data, int32 output_offset, + int32 output_multiplier, int output_shift, + int32 output_activation_min, + int32 output_activation_max, uint8* output_data, + int output_depth, int output_width, + uint8* shuffle_workspace) { + int out_x = start_x; + + // 4x4 at a time. + for (; out_x <= output_width - 4; out_x += 4) { + const int32* bias_ptr = bias_data; + const uint8* filter_ptr = filter_data; + + const uint8* input_ptr = input_data; + uint8* output_ptr = output_data; + + for (int depth = 0; depth <= output_depth - 8; depth += 8) { + ConvKernel3x3FilterDepth8<4, 4, 1, 1>::Run( + input_ptr, input_depth, input_offset, input_row_size, filter_ptr, + filter_offset, bias_ptr, output_offset, output_multiplier, + output_shift, output_activation_min, output_activation_max, + output_ptr, output_depth, output_width); + + input_ptr += 8; + output_ptr += 8; + filter_ptr += 8; + bias_ptr += 8; + } + + input_data += 4 * input_depth; + output_data += 4 * output_depth; + } + + // Handle the rest of the right side. + // 4x2 at a time. + for (; out_x <= output_width - 2; out_x += 2) { + const int32* bias_ptr = bias_data; + const uint8* filter_ptr = filter_data; + + const uint8* input_ptr = input_data; + uint8* output_ptr = output_data; + + for (int depth = 0; depth <= output_depth - 8; depth += 8) { + ConvKernel3x3FilterDepth8<4, 2, 1, 1>::Run( + input_ptr, input_depth, input_offset, input_row_size, filter_ptr, + filter_offset, bias_ptr, output_offset, output_multiplier, + output_shift, output_activation_min, output_activation_max, + output_ptr, output_depth, output_width); + + input_ptr += 8; + output_ptr += 8; + filter_ptr += 8; + bias_ptr += 8; + } + + input_data += 2 * input_depth; + output_data += 2 * output_depth; + } + + // 4x1 at a time. + for (; out_x < output_width; out_x++) { + const int32* bias_ptr = bias_data; + const uint8* filter_ptr = filter_data; + + const uint8* input_ptr = input_data; + uint8* output_ptr = output_data; + + for (int depth = 0; depth <= output_depth - 8; depth += 8) { + ConvKernel3x3FilterDepth8<4, 1, 1, 1>::Run( + input_ptr, input_depth, input_offset, input_row_size, filter_ptr, + filter_offset, bias_ptr, output_offset, output_multiplier, + output_shift, output_activation_min, output_activation_max, + output_ptr, output_depth, output_width); + + input_ptr += 8; + output_ptr += 8; + filter_ptr += 8; + bias_ptr += 8; + } + + input_data += input_depth; + output_data += output_depth; + } + } +}; template <> -struct ConvKernel3x3FilterDepth16<1, 2, 1> { - static void Run(const Filter3x3x16& filter, const uint8* input_ptr, - int input_depth, int32 input_offset, int input_row_width, - const int32* bias_ptr, int32 output_offset, - int32 output_multiplier, int output_shift, - int32 output_activation_min, int32 output_activation_max, - uint8* output_ptr, int output_depth, int output_width) { - // 16 depth accumulators for the 2 outputs. - Int32x16 acc0, acc1; - - // Accumulators for top filter. - acc0.v0 = vld1q_s32(bias_ptr); - acc0.v1 = vld1q_s32(bias_ptr + 4); - acc0.v2 = vld1q_s32(bias_ptr + 8); - acc0.v3 = vld1q_s32(bias_ptr + 12); - // Accumulators for bottom filter. - acc1.v0 = vld1q_s32(bias_ptr); - acc1.v1 = vld1q_s32(bias_ptr + 4); - acc1.v2 = vld1q_s32(bias_ptr + 8); - acc1.v3 = vld1q_s32(bias_ptr + 12); - - // Main multiply accumulate work. - { - // Load inputs for one filter row at a time. - Int16x16x3 input; - - // Do first row of top filter. - input = LoadInputRowDepth16(input_ptr, input_depth, input_offset, input); - acc0 = MultiplyAccumulateRowDepth16(acc0, filter.r0, input); - - // Do second row of top filter. - input = LoadInputRowDepth16(input_ptr + input_row_width, input_depth, - input_offset, input); - acc0 = MultiplyAccumulateRowDepth16(acc0, filter.r1, input); - - // The inputs to second row of the top filter are also the inputs to the - // first row of the bottom filter. - acc1 = MultiplyAccumulateRowDepth16(acc1, filter.r0, input); - - // Do third row of top filter. - input = LoadInputRowDepth16(input_ptr + 2 * input_row_width, input_depth, - input_offset, input); - acc0 = MultiplyAccumulateRowDepth16(acc0, filter.r2, input); - - // The inputs to third row of the top filter are also the inputs to the - // second row of the bottom filter. - acc1 = MultiplyAccumulateRowDepth16(acc1, filter.r1, input); - - // Do third row of bottom filter. - input = LoadInputRowDepth16(input_ptr + 3 * input_row_width, input_depth, - input_offset, input); - acc1 = MultiplyAccumulateRowDepth16(acc1, filter.r2, input); - } - - // Apply activation, downquantize and store. - int32x4_t output_offset_vec = vdupq_n_s32(output_offset); - int32x4_t output_activation_min_vec = vdupq_n_s32(output_activation_min); - int32x4_t output_activation_max_vec = vdupq_n_s32(output_activation_max); - - DownquantizeAndStoreDepth16(acc0, output_multiplier, output_shift, - output_offset_vec, output_activation_min_vec, - output_activation_max_vec, output_ptr); - - DownquantizeAndStoreDepth16(acc1, output_multiplier, output_shift, - output_offset_vec, output_activation_min_vec, - output_activation_max_vec, - output_ptr + output_depth * output_width); +struct ConvRow3x3FilterDepth8<4, 2, 2> { + // The buffer size of the shuffled input. + static inline constexpr int ShuffleWorkspaceSize() { return 64 * 9 * 9; } + + static inline void Run(const uint8* input_data, int start_x, int start_y, + int input_depth, int input_width, int input_height, + int input_row_size, int32 input_offset, + const uint8* filter_data, int32 filter_offset, + const int32* bias_data, int32 output_offset, + int32 output_multiplier, int output_shift, + int32 output_activation_min, + int32 output_activation_max, uint8* output_data, + int output_depth, int output_width, + uint8* shuffle_workspace) { + // Branch and cache misses increase substantially with stride 2 kernels. + // Adding prefetching reduces latency by as much as 2x. + const int i0 = 0; + const int i1 = input_depth; + const int i2 = 2 * input_depth; + const int i3 = 3 * input_depth; + const int i4 = 4 * input_depth; + const int i5 = 5 * input_depth; + const int i6 = 6 * input_depth; + const int i7 = 7 * input_depth; + const int i8 = 8 * input_depth; + +#define DEPTHWISECONV_PRELOAD_ROW(input_ptr, i) \ + preload_l1_keep(input_ptr + i * input_row_size + i0); \ + preload_l1_keep(input_ptr + i * input_row_size + i1); \ + preload_l1_keep(input_ptr + i * input_row_size + i2); \ + preload_l1_keep(input_ptr + i * input_row_size + i3); \ + preload_l1_keep(input_ptr + i * input_row_size + i4); \ + preload_l1_keep(input_ptr + i * input_row_size + i5); \ + preload_l1_keep(input_ptr + i * input_row_size + i6); \ + preload_l1_keep(input_ptr + i * input_row_size + i7); \ + preload_l1_keep(input_ptr + i * input_row_size + i8); + + int out_x = start_x; + // 4x4 at a time. + for (; out_x <= output_width - 4; out_x += 4) { + const int32* bias_ptr = bias_data; + const uint8* filter_ptr = filter_data; + + const uint8* input_ptr = input_data; + uint8* output_ptr = output_data; + + int depth = 0; + for (; depth <= output_depth - 64; depth += 64) { + // Preload 9x9 input. + DEPTHWISECONV_PRELOAD_ROW(input_ptr, 0); + DEPTHWISECONV_PRELOAD_ROW(input_ptr, 1); + DEPTHWISECONV_PRELOAD_ROW(input_ptr, 2); + DEPTHWISECONV_PRELOAD_ROW(input_ptr, 3); + DEPTHWISECONV_PRELOAD_ROW(input_ptr, 4); + DEPTHWISECONV_PRELOAD_ROW(input_ptr, 5); + DEPTHWISECONV_PRELOAD_ROW(input_ptr, 6); + DEPTHWISECONV_PRELOAD_ROW(input_ptr, 7); + DEPTHWISECONV_PRELOAD_ROW(input_ptr, 8); + + // For a large input window (64x9x9) that is small enough to fit in L1 + // cache, copy the input into a separate buffer and run the kernel on + // this new buffer. This reduces the likelihood of cache misses when + // the kernel is loading input data. If this size is ever changed, + // update the ShuffleWorkspaceSize() function to return the new size. + ShuffleInput(input_ptr, input_depth, input_width, input_height, 64, 9, + 9, shuffle_workspace); + const uint8* shuffled_ptr = &shuffle_workspace[0]; + + for (int micro_depth = 0; micro_depth <= 64 - 8; micro_depth += 8) { + ConvKernel3x3FilterDepth8<4, 4, 2, 2>::Run( + shuffled_ptr, 64, input_offset, 64 * 9, filter_ptr, filter_offset, + bias_ptr, output_offset, output_multiplier, output_shift, + output_activation_min, output_activation_max, output_ptr, + output_depth, output_width); + + shuffled_ptr += 8; + output_ptr += 8; + filter_ptr += 8; + bias_ptr += 8; + } + input_ptr += 64; + } + + // Preload 9x9 input one more time for the rest of the depth. + DEPTHWISECONV_PRELOAD_ROW(input_ptr, 0); + DEPTHWISECONV_PRELOAD_ROW(input_ptr, 1); + DEPTHWISECONV_PRELOAD_ROW(input_ptr, 2); + DEPTHWISECONV_PRELOAD_ROW(input_ptr, 3); + DEPTHWISECONV_PRELOAD_ROW(input_ptr, 4); + DEPTHWISECONV_PRELOAD_ROW(input_ptr, 5); + DEPTHWISECONV_PRELOAD_ROW(input_ptr, 6); + DEPTHWISECONV_PRELOAD_ROW(input_ptr, 7); + DEPTHWISECONV_PRELOAD_ROW(input_ptr, 8); + + for (; depth <= output_depth - 8; depth += 8) { + ConvKernel3x3FilterDepth8<4, 4, 2, 2>::Run( + input_ptr, input_depth, input_offset, input_row_size, filter_ptr, + filter_offset, bias_ptr, output_offset, output_multiplier, + output_shift, output_activation_min, output_activation_max, + output_ptr, output_depth, output_width); + + input_ptr += 8; + output_ptr += 8; + filter_ptr += 8; + bias_ptr += 8; + } + + input_data += 4 * 2 * input_depth; + output_data += 4 * output_depth; + } + +#undef DEPTHWISECONV_PRELOAD_ROW + + // Handle the rest of the right side. + // 4x2 at a time. + for (; out_x <= output_width - 2; out_x += 2) { + const int32* bias_ptr = bias_data; + const uint8* filter_ptr = filter_data; + + const uint8* input_ptr = input_data; + uint8* output_ptr = output_data; + + for (int depth = 0; depth <= output_depth - 8; depth += 8) { + ConvKernel3x3FilterDepth8<4, 2, 2, 2>::Run( + input_ptr, input_depth, input_offset, input_row_size, filter_ptr, + filter_offset, bias_ptr, output_offset, output_multiplier, + output_shift, output_activation_min, output_activation_max, + output_ptr, output_depth, output_width); + + input_ptr += 8; + output_ptr += 8; + filter_ptr += 8; + bias_ptr += 8; + } + + input_data += 2 * 2 * input_depth; + output_data += 2 * output_depth; + } + + // 4x1 at a time. + for (; out_x < output_width; out_x++) { + const int32* bias_ptr = bias_data; + const uint8* filter_ptr = filter_data; + + const uint8* input_ptr = input_data; + uint8* output_ptr = output_data; + + for (int depth = 0; depth <= output_depth - 8; depth += 8) { + ConvKernel3x3FilterDepth8<4, 1, 2, 2>::Run( + input_ptr, input_depth, input_offset, input_row_size, filter_ptr, + filter_offset, bias_ptr, output_offset, output_multiplier, + output_shift, output_activation_min, output_activation_max, + output_ptr, output_depth, output_width); + + input_ptr += 8; + output_ptr += 8; + filter_ptr += 8; + bias_ptr += 8; + } + + input_data += 2 * input_depth; + output_data += output_depth; + } } }; template <> -struct ConvKernel3x3FilterDepth16<1, 2, 2> { - static void Run(const Filter3x3x16& filter, const uint8* input_ptr, - int input_depth, int32 input_offset, int input_row_width, - const int32* bias_ptr, int32 output_offset, - int32 output_multiplier, int output_shift, - int32 output_activation_min, int32 output_activation_max, - uint8* output_ptr, int output_depth, int output_width) { - // 16 depth accumulators for the 2 outputs. - Int32x16 acc0, acc1; - - // Accumulators for top filter. - acc0.v0 = vld1q_s32(bias_ptr); - acc0.v1 = vld1q_s32(bias_ptr + 4); - acc0.v2 = vld1q_s32(bias_ptr + 8); - acc0.v3 = vld1q_s32(bias_ptr + 12); - // Accumulators for bottom filter. - acc1.v0 = vld1q_s32(bias_ptr); - acc1.v1 = vld1q_s32(bias_ptr + 4); - acc1.v2 = vld1q_s32(bias_ptr + 8); - acc1.v3 = vld1q_s32(bias_ptr + 12); - - // Main multiply accumulate work. - { - // Load inputs for one filter row at a time. - Int16x16x3 input; - - // Do first row of top filter. - input = LoadInputRowDepth16(input_ptr, input_depth, input_offset, input); - acc0 = MultiplyAccumulateRowDepth16(acc0, filter.r0, input); - - // Do second row of top filter. - input = LoadInputRowDepth16(input_ptr + input_row_width, input_depth, - input_offset, input); - acc0 = MultiplyAccumulateRowDepth16(acc0, filter.r1, input); - - // Do third row of top filter. - input = LoadInputRowDepth16(input_ptr + 2 * input_row_width, input_depth, - input_offset, input); - acc0 = MultiplyAccumulateRowDepth16(acc0, filter.r2, input); - - // The inputs to third row of the top filter are also the inputs - // to first row of the bottom filter. - acc1 = MultiplyAccumulateRowDepth16(acc1, filter.r0, input); - - // Do second row of bottom filter. - input = LoadInputRowDepth16(input_ptr + 3 * input_row_width, input_depth, - input_offset, input); - acc1 = MultiplyAccumulateRowDepth16(acc1, filter.r1, input); - - // Do third row of bottom filter. - input = LoadInputRowDepth16(input_ptr + 4 * input_row_width, input_depth, - input_offset, input); - acc1 = MultiplyAccumulateRowDepth16(acc1, filter.r2, input); - } - - // Apply activation, downquantize and store. - int32x4_t output_offset_vec = vdupq_n_s32(output_offset); - int32x4_t output_activation_min_vec = vdupq_n_s32(output_activation_min); - int32x4_t output_activation_max_vec = vdupq_n_s32(output_activation_max); - - DownquantizeAndStoreDepth16(acc0, output_multiplier, output_shift, - output_offset_vec, output_activation_min_vec, - output_activation_max_vec, output_ptr); - - DownquantizeAndStoreDepth16(acc1, output_multiplier, output_shift, - output_offset_vec, output_activation_min_vec, - output_activation_max_vec, - output_ptr + output_depth * output_width); +struct ConvRow3x3FilterDepth8<8, 2, 2> { + static inline void Run(const uint8* input_data, int start_x, int start_y, + int input_depth, int input_width, int input_height, + int input_row_size, int32 input_offset, + const uint8* filter_data, int32 filter_offset, + const int32* bias_data, int32 output_offset, + int32 output_multiplier, int output_shift, + int32 output_activation_min, + int32 output_activation_max, uint8* output_data, + int output_depth, int output_width, + uint8* shuffle_workspace) { + // Reuse 4 row kernels twice. + ConvRow3x3FilterDepth8<4, 2, 2>::Run( + input_data, start_x, start_y, input_depth, input_width, input_height, + input_row_size, input_offset, filter_data, filter_offset, bias_data, + output_offset, output_multiplier, output_shift, output_activation_min, + output_activation_max, output_data, output_depth, output_width, + shuffle_workspace); + + ConvRow3x3FilterDepth8<4, 2, 2>::Run( + input_data + 2 * 4 * input_row_size, start_x, start_y + 4, input_depth, + input_width, input_height, input_row_size, input_offset, filter_data, + filter_offset, bias_data, output_offset, output_multiplier, + output_shift, output_activation_min, output_activation_max, + output_data + 4 * output_depth * output_width, output_depth, + output_width, shuffle_workspace); } }; template <> -struct ConvKernel3x3FilterDepth16<1, 1> { - static void Run(const Filter3x3x16& filter, const uint8* input_ptr, - int input_depth, int32 input_offset, int input_row_width, - const int32* bias_ptr, int32 output_offset, - int32 output_multiplier, int output_shift, - int32 output_activation_min, int32 output_activation_max, - uint8* output_ptr, int output_depth, int output_width) { - Int32x16 acc; - acc.v0 = vld1q_s32(bias_ptr); - acc.v1 = vld1q_s32(bias_ptr + 4); - acc.v2 = vld1q_s32(bias_ptr + 8); - acc.v3 = vld1q_s32(bias_ptr + 12); - - // Main multiply accumulate work. - { - // Load inputs for one filter row at a time. - Int16x16x3 input; - - // Do first row. - input = LoadInputRowDepth16(input_ptr, input_depth, input_offset, input); - acc = MultiplyAccumulateRowDepth16(acc, filter.r0, input); - - // Do second row. - input = LoadInputRowDepth16(input_ptr + input_row_width, input_depth, - input_offset, input); - acc = MultiplyAccumulateRowDepth16(acc, filter.r1, input); - - // Do third row. - input = LoadInputRowDepth16(input_ptr + 2 * input_row_width, input_depth, - input_offset, input); - acc = MultiplyAccumulateRowDepth16(acc, filter.r2, input); - } - - // Apply activation, downquantize and store. - int32x4_t output_offset_vec = vdupq_n_s32(output_offset); - int32x4_t output_activation_min_vec = vdupq_n_s32(output_activation_min); - int32x4_t output_activation_max_vec = vdupq_n_s32(output_activation_max); - - DownquantizeAndStoreDepth16(acc, output_multiplier, output_shift, - output_offset_vec, output_activation_min_vec, - output_activation_max_vec, output_ptr); +struct ConvRow3x3FilterDepth8<8, 1, 1> { + // The buffer size of the shuffled input. + static inline constexpr int ShuffleWorkspaceSize() { return 64 * 10 * 10; } + + static inline void Run(const uint8* input_data, int start_x, int start_y, + int input_depth, int input_width, int input_height, + int input_row_size, int32 input_offset, + const uint8* filter_data, int32 filter_offset, + const int32* bias_data, int32 output_offset, + int32 output_multiplier, int output_shift, + int32 output_activation_min, + int32 output_activation_max, uint8* output_data, + int output_depth, int output_width, + uint8* shuffle_workspace) { + int out_x = start_x; + // 8x8 at a time. + for (; out_x <= output_width - 8; out_x += 8) { + const int32* bias_ptr = bias_data; + const uint8* filter_ptr = filter_data; + + const uint8* input_ptr = input_data; + uint8* output_ptr = output_data; + + int depth = 0; + for (; depth <= output_depth - 64; depth += 64) { + // For a large input window (64x10x10) that is small enough to fit in L1 + // cache, copy the input into a separate buffer and run the kernel on + // this new buffer. This reduces the likelihood of cache misses when + // the kernel is loading input data. If the size of the input window + // changes, update the function ShuffleWorkspaceSize() with the new + // size. + ShuffleInput(input_ptr, input_depth, input_width, input_height, 64, 10, + 10, shuffle_workspace); + const uint8* shuffled_ptr = shuffle_workspace; + + for (int micro_depth = 0; micro_depth <= 64 - 8; micro_depth += 8) { + ConvKernel3x3FilterDepth8<8, 8, 1, 1>::Run( + shuffled_ptr, 64, input_offset, 64 * 10, filter_ptr, + filter_offset, bias_ptr, output_offset, output_multiplier, + output_shift, output_activation_min, output_activation_max, + output_ptr, output_depth, output_width); + + shuffled_ptr += 8; + output_ptr += 8; + filter_ptr += 8; + bias_ptr += 8; + } + input_ptr += 64; + } + + for (; depth <= output_depth - 8; depth += 8) { + ConvKernel3x3FilterDepth8<8, 8, 1, 1>::Run( + input_ptr, input_depth, input_offset, input_row_size, filter_ptr, + filter_offset, bias_ptr, output_offset, output_multiplier, + output_shift, output_activation_min, output_activation_max, + output_ptr, output_depth, output_width); + + input_ptr += 8; + output_ptr += 8; + filter_ptr += 8; + bias_ptr += 8; + } + + input_data += 8 * input_depth; + output_data += 8 * output_depth; + } + + // Handle the rest of the right side by re-using 4 row kernels twice. + ConvRow3x3FilterDepth8<4, 1, 1>::Run( + input_data, out_x, start_y, input_depth, input_width, input_height, + input_row_size, input_offset, filter_data, filter_offset, bias_data, + output_offset, output_multiplier, output_shift, output_activation_min, + output_activation_max, output_data, output_depth, output_width, + shuffle_workspace); + + ConvRow3x3FilterDepth8<4, 1, 1>::Run( + input_data + 4 * input_row_size, out_x, start_y + 4, input_depth, + input_width, input_height, input_row_size, input_offset, filter_data, + filter_offset, bias_data, output_offset, output_multiplier, + output_shift, output_activation_min, output_activation_max, + output_data + 4 * output_depth * output_width, output_depth, + output_width, shuffle_workspace); } }; -inline bool Fast3by3FilterKernelSupported(const Dims<4>& input_dims, - const Dims<4>& filter_dims, - int stride_width, int stride_height, - int pad_width, int pad_height, - int depth_multiplier, - const Dims<4>& output_dims) { +inline bool Fast3x3FilterKernelSupported(const Dims<4>& input_dims, + const Dims<4>& filter_dims, + int stride_width, int stride_height, + int pad_width, int pad_height, + int depth_multiplier, + const Dims<4>& output_dims) { const int input_height = ArraySize(input_dims, 2); const int input_width = ArraySize(input_dims, 1); const int input_depth = ArraySize(input_dims, 0); @@ -458,14 +4426,15 @@ inline bool Fast3by3FilterKernelSupported(const Dims<4>& input_dims, depth_multiplier == 1 && (stride_width == 1 || stride_width == 2) && (stride_height == 1 || stride_height == 2) && - pad_width == 0 && pad_height == 0 && (input_depth % 16) == 0; + (stride_width == stride_height) && pad_width == 0 && + pad_height == 0 && (input_depth % 8) == 0; if (!supported) { return false; } - // Handle case where padding is zero but type is not kValid. This would - // require special boundary case handling that is not supported yet. + // Handle case where padding is zero but padding type is not kValid. + // This would require special boundary case handling that is not supported. const int out_x = output_width - 1; const int out_y = output_height - 1; @@ -481,7 +4450,7 @@ inline bool Fast3by3FilterKernelSupported(const Dims<4>& input_dims, return in_x_end <= input_width && in_y_end <= input_height; } -inline void DepthwiseConv3by3FilterDepth16( +inline void DepthwiseConv3x3Filter( const uint8* input_data, const Dims<4>& input_dims, int32 input_offset, const uint8* filter_data, const Dims<4>& filter_dims, int32 filter_offset, const int32* bias_data, const Dims<4>& bias_dims, int stride_width, @@ -500,241 +4469,109 @@ inline void DepthwiseConv3by3FilterDepth16( const int output_width = ArraySize(output_dims, 1); // Algorithm assumes below constraints. It is optimized for depth multiplier - // of 1, 3x3 filter, no padding, strides 1 and 2. + // of 1, 3x3 filter, no padding and strides 1 and 2. TFLITE_DCHECK(output_depth == input_depth * depth_multiplier); TFLITE_DCHECK(depth_multiplier == 1); TFLITE_DCHECK(filter_height == 3); TFLITE_DCHECK(filter_width == 3); TFLITE_DCHECK(pad_height == 0); TFLITE_DCHECK(pad_width == 0); - TFLITE_DCHECK(stride_width == 1 || stride_width == 2); TFLITE_DCHECK(stride_height == 1 || stride_height == 2); + TFLITE_DCHECK(stride_width == 1 || stride_width == 2); + TFLITE_DCHECK(stride_width == stride_height); - // The number of outputs to process in the main loop. - const int num_x_outputs = 1; - const int num_y_outputs = 2; - - const int input_row_width = output_depth * (input_width + 2 * pad_width); - const int input_batch_size = - input_row_width * (input_height + 2 * pad_height); + const int input_row_size = input_depth * (input_width + 2 * pad_width); + const int output_row_size = output_depth * output_width; + const int input_batch_size = input_row_size * (input_height + 2 * pad_height); const int output_batch_size = output_depth * output_width * output_height; - const int input_ptr_x_increment = input_depth * stride_width; - // Calculate extents of non-boundary loop. - int out_x_start = 0; - for (; out_x_start < input_width; out_x_start++) { - int in_x = (out_x_start * stride_width) - pad_width; - if (in_x >= 0) { - break; - } - } - int out_x_end = output_width - 1; - for (; out_x_end >= 0; out_x_end--) { - int in_x = (out_x_end * stride_width) - pad_width; - int in_x_end = in_x + filter_width + (num_x_outputs - 1) * stride_width; - if (in_x_end <= input_width) { - out_x_end++; - break; - } - } - int out_y_start = 0; - for (; out_y_start < input_height; out_y_start++) { - int in_y = (out_y_start * stride_height) - pad_height; - if (in_y >= 0) { - break; - } - } - int out_y_end = output_height - 1; - for (; out_y_end >= 0; out_y_end--) { - int in_y = (out_y_end * stride_height) - pad_height; - int in_y_end = in_y + filter_height + (num_y_outputs - 1) * stride_height; - if (in_y_end <= input_height) { - out_y_end++; - break; - } + using conv_row_func_t = decltype(&ConvRow3x3FilterDepth8<1, 1, 1>::Run); + conv_row_func_t conv_1_output_row = ConvRow3x3FilterDepth8<1, 1, 1>::Run; + conv_row_func_t conv_2_output_rows = ConvRow3x3FilterDepth8<2, 1, 1>::Run; + conv_row_func_t conv_4_output_rows = ConvRow3x3FilterDepth8<4, 1, 1>::Run; + conv_row_func_t conv_8_output_rows = ConvRow3x3FilterDepth8<8, 1, 1>::Run; + + if (stride_width == 2) { + conv_1_output_row = ConvRow3x3FilterDepth8<1, 2, 2>::Run; + conv_2_output_rows = ConvRow3x3FilterDepth8<2, 2, 2>::Run; + conv_4_output_rows = ConvRow3x3FilterDepth8<4, 2, 2>::Run; + conv_8_output_rows = ConvRow3x3FilterDepth8<8, 2, 2>::Run; } - using dot_product_func_t = - decltype(&ConvKernel3x3FilterDepth16<1, 2, 1>::Run); - dot_product_func_t dot_product_func = nullptr; + // Allocate maximum memory needed for shuffled input. + // TODO(mariewhite): The size of this workspace is small enough to be + // allocated on the stack. Eventually we will want to move it to the heap + // and have it allocated outside of this function, like the im2col_array used + // in gemmlowp. +#define DEPTHWISECONV_SHUFFLE_WORKSPACE_SIZE 10 * 10 * 64 + uint8 shuffle_workspace[DEPTHWISECONV_SHUFFLE_WORKSPACE_SIZE]; - if (stride_width == 1 && stride_height == 1) { - dot_product_func = ConvKernel3x3FilterDepth16<1, 2, 1>::Run; - } else { - dot_product_func = ConvKernel3x3FilterDepth16<1, 2, 2>::Run; - } + // Make sure the kernels using this buffer will not run out of bounds. + static_assert(ConvRow3x3FilterDepth8<8, 1, 1>::ShuffleWorkspaceSize() <= + DEPTHWISECONV_SHUFFLE_WORKSPACE_SIZE, + "Shuffle workspace size is too small."); + static_assert(ConvRow3x3FilterDepth8<4, 2, 2>::ShuffleWorkspaceSize() <= + DEPTHWISECONV_SHUFFLE_WORKSPACE_SIZE, + "Shuffle workspace size is too small."); - // Offsets for preloading inputs. - const int i0 = 0; - const int i1 = input_depth; - const int i2 = 2 * input_depth; - const int i3 = input_row_width; - const int i4 = input_row_width + input_depth; - const int i5 = input_row_width + 2 * input_depth; - const int i6 = 2 * input_row_width; - const int i7 = 2 * input_row_width + input_depth; - const int i8 = 2 * input_row_width + 2 * input_depth; - const int i9 = 3 * input_row_width; - const int i10 = 3 * input_row_width + input_depth; - const int i11 = 3 * input_row_width + 2 * input_depth; - const int i12 = 4 * input_row_width; - const int i13 = 4 * input_row_width + input_depth; - const int i14 = 4 * input_row_width + 2 * input_depth; +#undef DEPTHWISECONV_SHUFFLE_WORKSPACE_SIZE for (int b = 0; b < batches; ++b) { - const int32* bias_ptr = bias_data; - const uint8* filter_ptr = filter_data; - - const int in_batch_offset = b * input_batch_size; - const int out_batch_offset = b * output_batch_size; - - int depth = 0; - for (; depth <= output_depth - 16; depth += 16) { - Filter3x3x16 filter = - LoadFilterDepth16(filter_ptr, filter_offset, output_depth); - - // Handle 1x2 outputs. - int out_y = out_y_start; - for (; out_y < out_y_end; out_y += num_y_outputs) { - int out_x = out_x_start; - - int in_y_offset = - stride_height * input_row_width * (out_y + pad_height); - int in_x_offset = stride_width * input_depth * (out_x + pad_width); - - const uint8* input_ptr = - input_data + depth + in_x_offset + in_y_offset + in_batch_offset; - - // Preload inputs. If input depth is large, preload every value of the - // input for this depth range. Otherwise, preload only the first values - // of each row. - if (input_depth >= 32) { - preload_l1_keep(input_ptr + i0); - preload_l1_keep(input_ptr + i1); - preload_l1_keep(input_ptr + i2); - preload_l1_keep(input_ptr + i3); - preload_l1_keep(input_ptr + i4); - preload_l1_keep(input_ptr + i5); - preload_l1_keep(input_ptr + i6); - preload_l1_keep(input_ptr + i7); - preload_l1_keep(input_ptr + i8); - preload_l1_keep(input_ptr + i9); - preload_l1_keep(input_ptr + i10); - preload_l1_keep(input_ptr + i11); - - if (stride_height == 2) { - preload_l1_keep(input_ptr + i12); - preload_l1_keep(input_ptr + i13); - preload_l1_keep(input_ptr + i14); - } - } else { - preload_l1_keep(input_ptr + i0); - preload_l1_keep(input_ptr + i3); - preload_l1_keep(input_ptr + i6); - preload_l1_keep(input_ptr + i9); - - if (stride_height == 2) { - preload_l1_keep(input_ptr + i12); - } - } + const uint8* input_ptr = input_data + b * input_batch_size; + uint8* output_ptr = output_data + b * output_batch_size; - uint8* output_ptr = output_data + depth + (out_x * output_depth) + - (output_depth * output_width * out_y) + - out_batch_offset; - - for (; out_x < out_x_end; out_x += num_x_outputs) { - dot_product_func(filter, input_ptr, input_depth, input_offset, - input_row_width, bias_ptr, output_offset, - output_multiplier, output_shift, - output_activation_min, output_activation_max, - output_ptr, output_depth, output_width); - - input_ptr += input_ptr_x_increment * num_x_outputs; - output_ptr += output_depth * num_x_outputs; - - // Preload the next inputs depending on stride. - if (stride_width == 1) { - preload_l1_keep(input_ptr + i2); - preload_l1_keep(input_ptr + i5); - preload_l1_keep(input_ptr + i8); - preload_l1_keep(input_ptr + i11); - } else if (stride_width == 2) { - preload_l1_keep(input_ptr + i1); - preload_l1_keep(input_ptr + i2); - preload_l1_keep(input_ptr + i4); - preload_l1_keep(input_ptr + i5); - preload_l1_keep(input_ptr + i7); - preload_l1_keep(input_ptr + i8); - preload_l1_keep(input_ptr + i10); - preload_l1_keep(input_ptr + i11); - preload_l1_keep(input_ptr + i13); - preload_l1_keep(input_ptr + i14); - } - } + int out_y = 0; - // Handle the rest of the right side. - for (; out_x < output_width; out_x++) { - // This code path can only be reached if we're handling >1 x outputs - // at a time or support kSame padding. - } - } + // Handle 8 rows at a time. + for (; out_y <= output_height - 8; out_y += 8) { + conv_8_output_rows(input_ptr, 0, out_y, input_depth, input_width, + input_height, input_row_size, input_offset, + filter_data, filter_offset, bias_data, output_offset, + output_multiplier, output_shift, output_activation_min, + output_activation_max, output_ptr, output_depth, + output_width, shuffle_workspace); - // Handle the rest of the bottom side. - for (; out_y < output_height; out_y++) { - int out_x = out_x_start; - - int in_y_offset = - stride_height * input_row_width * (out_y + pad_height); - int in_x_offset = stride_width * input_depth * (out_x + pad_width); - - const uint8* input_ptr = - input_data + depth + in_x_offset + in_y_offset + in_batch_offset; - - if (input_depth >= 32) { - preload_l1_keep(input_ptr + i0); - preload_l1_keep(input_ptr + i1); - preload_l1_keep(input_ptr + i2); - preload_l1_keep(input_ptr + i3); - preload_l1_keep(input_ptr + i4); - preload_l1_keep(input_ptr + i5); - preload_l1_keep(input_ptr + i6); - preload_l1_keep(input_ptr + i7); - } else { - preload_l1_keep(input_ptr + i0); - preload_l1_keep(input_ptr + i3); - preload_l1_keep(input_ptr + i6); - } + input_ptr += 8 * stride_height * input_row_size; + output_ptr += 8 * output_row_size; + } - uint8* output_ptr = output_data + depth + (out_x * output_depth) + - (output_depth * output_width * out_y) + - out_batch_offset; + // Handle 4 rows at a time. + for (; out_y <= output_height - 4; out_y += 4) { + conv_4_output_rows(input_ptr, 0, out_y, input_depth, input_width, + input_height, input_row_size, input_offset, + filter_data, filter_offset, bias_data, output_offset, + output_multiplier, output_shift, output_activation_min, + output_activation_max, output_ptr, output_depth, + output_width, shuffle_workspace); - for (; out_x < output_width; out_x++) { - ConvKernel3x3FilterDepth16<1, 1>::Run( - filter, input_ptr, input_depth, input_offset, input_row_width, - bias_ptr, output_offset, output_multiplier, output_shift, - output_activation_min, output_activation_max, output_ptr, - output_depth, output_width); + input_ptr += 4 * stride_height * input_row_size; + output_ptr += 4 * output_row_size; + } - input_ptr += input_ptr_x_increment; - output_ptr += output_depth; - - if (stride_width == 1) { - preload_l1_keep(input_ptr + i2); - preload_l1_keep(input_ptr + i5); - preload_l1_keep(input_ptr + i8); - } else if (stride_width == 2) { - preload_l1_keep(input_ptr + i1); - preload_l1_keep(input_ptr + i2); - preload_l1_keep(input_ptr + i4); - preload_l1_keep(input_ptr + i5); - preload_l1_keep(input_ptr + i7); - preload_l1_keep(input_ptr + i8); - } - } - } - filter_ptr += 16; - bias_ptr += 16; + // Handle 2 rows at a time. + for (; out_y <= output_height - 2; out_y += 2) { + conv_2_output_rows(input_ptr, 0, out_y, input_depth, input_width, + input_height, input_row_size, input_offset, + filter_data, filter_offset, bias_data, output_offset, + output_multiplier, output_shift, output_activation_min, + output_activation_max, output_ptr, output_depth, + output_width, shuffle_workspace); + + input_ptr += 2 * stride_height * input_row_size; + output_ptr += 2 * output_row_size; + } + + // Handle one row at a time. + for (; out_y < output_height; out_y++) { + conv_1_output_row(input_ptr, 0, out_y, input_depth, input_width, + input_height, input_row_size, input_offset, filter_data, + filter_offset, bias_data, output_offset, + output_multiplier, output_shift, output_activation_min, + output_activation_max, output_ptr, output_depth, + output_width, shuffle_workspace); + + input_ptr += stride_height * input_row_size; + output_ptr += output_row_size; } } } diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h index 3642da311cbe938f672d64338a335181764c4175..d27c6ccf3d404cb18460876fc2961d8447d5f819 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h @@ -30,6 +30,7 @@ limitations under the License. #include "fixedpoint/fixedpoint.h" #include "public/gemmlowp.h" #include "tensorflow/contrib/lite/kernels/internal/common.h" +#include "tensorflow/contrib/lite/kernels/internal/quantization_util.h" #include "tensorflow/contrib/lite/kernels/internal/round.h" #include "tensorflow/contrib/lite/kernels/internal/types.h" @@ -554,88 +555,261 @@ inline void GEMVForLstmCellWithSymmetricRange( // exercises it). We just guard our assumptions about size evenness with // the following assertions. TFLITE_DCHECK(!(output_size % 4)); - TFLITE_DCHECK(!(input_size % 8)); + TFLITE_DCHECK(!(input_size % 64)); const int32* bias_ptr = bias_data; int16* output_ptr = output_data; const uint8x16_t signbit = vdupq_n_u8(0x80); for (int in = 0; in < input_size; in += 32) { optimized_ops_preload_l1_keep(input_data + in); } + const int left_shift = accum_shift > 0 ? accum_shift : 0; + const int right_shift = accum_shift > 0 ? 0 : -accum_shift; for (int out = 0; out < output_size; out += 4) { - const uint8* weights_ptr_0 = weights_data + out * input_size; - const uint8* weights_ptr_1 = weights_ptr_0 + 1 * input_size; - const uint8* weights_ptr_2 = weights_ptr_0 + 2 * input_size; - const uint8* weights_ptr_3 = weights_ptr_0 + 3 * input_size; + // Load the bias values + int32x4_t bias_vec = vld1q_s32(bias_ptr); + bias_ptr += 4; - int32x4_t acc_0 = vdupq_n_s32(0); - int32x4_t acc_1 = vdupq_n_s32(0); - int32x4_t acc_2 = vdupq_n_s32(0); - int32x4_t acc_3 = vdupq_n_s32(0); - int in = 0; - const int kReadAhead = 256; - // Handle 16 levels of depth at a time. - for (; in < input_size; in += 16) { - int8x16_t weights_val_0 = - vreinterpretq_s8_u8(veorq_u8(signbit, vld1q_u8(weights_ptr_0))); - int8x16_t weights_val_1 = - vreinterpretq_s8_u8(veorq_u8(signbit, vld1q_u8(weights_ptr_1))); - int8x16_t weights_val_2 = - vreinterpretq_s8_u8(veorq_u8(signbit, vld1q_u8(weights_ptr_2))); - int8x16_t weights_val_3 = - vreinterpretq_s8_u8(veorq_u8(signbit, vld1q_u8(weights_ptr_3))); - int8x16_t input_val = - vreinterpretq_s8_u8(veorq_u8(signbit, vld1q_u8(input_data + in))); - int16x8_t acc16_0 = - vmull_s8(vget_low_s8(weights_val_0), vget_low_s8(input_val)); - int16x8_t acc16_1 = - vmull_s8(vget_low_s8(weights_val_1), vget_low_s8(input_val)); - int16x8_t acc16_2 = - vmull_s8(vget_low_s8(weights_val_2), vget_low_s8(input_val)); - int16x8_t acc16_3 = - vmull_s8(vget_low_s8(weights_val_3), vget_low_s8(input_val)); - acc16_0 = vmlal_s8(acc16_0, vget_high_s8(weights_val_0), - vget_high_s8(input_val)); - acc16_1 = vmlal_s8(acc16_1, vget_high_s8(weights_val_1), - vget_high_s8(input_val)); - acc16_2 = vmlal_s8(acc16_2, vget_high_s8(weights_val_2), - vget_high_s8(input_val)); - acc16_3 = vmlal_s8(acc16_3, vget_high_s8(weights_val_3), - vget_high_s8(input_val)); - acc_0 = vpadalq_s16(acc_0, acc16_0); - acc_1 = vpadalq_s16(acc_1, acc16_1); - acc_2 = vpadalq_s16(acc_2, acc16_2); - acc_3 = vpadalq_s16(acc_3, acc16_3); - weights_ptr_0 += 16; - weights_ptr_1 += 16; - weights_ptr_2 += 16; - weights_ptr_3 += 16; - optimized_ops_preload_l1_stream(weights_ptr_0 + kReadAhead); - optimized_ops_preload_l1_stream(weights_ptr_1 + kReadAhead); - optimized_ops_preload_l1_stream(weights_ptr_2 + kReadAhead); - optimized_ops_preload_l1_stream(weights_ptr_3 + kReadAhead); + // Clear accumulators. We use 2 accumulator registers per row, + // for 4 rows. row_accumRN is the N-th accumulator for row R. + int32x4_t row_accum00 = vdupq_n_s32(0); + int32x4_t row_accum01 = vdupq_n_s32(0); + int32x4_t row_accum10 = vdupq_n_s32(0); + int32x4_t row_accum11 = vdupq_n_s32(0); + int32x4_t row_accum20 = vdupq_n_s32(0); + int32x4_t row_accum21 = vdupq_n_s32(0); + int32x4_t row_accum30 = vdupq_n_s32(0); + int32x4_t row_accum31 = vdupq_n_s32(0); + + // kReadAhead parametrizes how far ahead we prefetch weights into L1 cache. + const int kReadAhead = 512; + // Prefetch the first weights values. + for (int k = 0; k < kReadAhead; k += 64) { + optimized_ops_preload_l1_stream(weights_data + (out + 0) * input_size + + k); + optimized_ops_preload_l1_stream(weights_data + (out + 1) * input_size + + k); + optimized_ops_preload_l1_stream(weights_data + (out + 2) * input_size + + k); + optimized_ops_preload_l1_stream(weights_data + (out + 3) * input_size + + k); + } + // Loop along the rows, handling 64 bytes per iteration because that's + // cache line size on most current ARM-architecture CPUs. + for (int in = 0; in < input_size; in += 64) { + // Prefetch some future weights values. + optimized_ops_preload_l1_stream(weights_data + (out + 0) * input_size + + in + kReadAhead); + optimized_ops_preload_l1_stream(weights_data + (out + 1) * input_size + + in + kReadAhead); + optimized_ops_preload_l1_stream(weights_data + (out + 2) * input_size + + in + kReadAhead); + optimized_ops_preload_l1_stream(weights_data + (out + 3) * input_size + + in + kReadAhead); + + // We will use 2 local 16-bit accumulators per row, for 2 rows. + // See below (*) for the rationale of processing only 2 rows at a time. + // local_accumRN is the N-th local accumulator for row R. + int16x8_t local_accum00; + int16x8_t local_accum01; + int16x8_t local_accum10; + int16x8_t local_accum11; + + // Load 64 bytes of input activations values. Convert to signed int8 + // by flipping the sign bit (i.e. subtracting 128, the required + // zero_point value). + int8x16_t input0 = vreinterpretq_s8_u8( + veorq_u8(signbit, vld1q_u8(input_data + in + 16 * 0))); + int8x16_t input1 = vreinterpretq_s8_u8( + veorq_u8(signbit, vld1q_u8(input_data + in + 16 * 1))); + int8x16_t input2 = vreinterpretq_s8_u8( + veorq_u8(signbit, vld1q_u8(input_data + in + 16 * 2))); + int8x16_t input3 = vreinterpretq_s8_u8( + veorq_u8(signbit, vld1q_u8(input_data + in + 16 * 3))); + + // Beginning of the core accumulation. Notice how while we have 4 + // rows to process, this code is taking care of only 2 rows at a time, + // thus being divided into two parts looking similar ("Rows 0 and 1" and + // "Rows 2 and 3"). + // + // (*) The rationale for handling only 2 rows at a time is to avoid + // cache aliasing issues on 4-way set-associative L1-cache CPUs, such + // as Cortex-A53. With sufficiently large, power-of-two matrix dimensions, + // we may find ourselves in a situation where rows alias each other in + // the L1 cache, and moreover may also mutually alias with the input + // activations. If we try to load 4 rows at a time, together with the + // input activations, that may be 5 mutually-aliasing vectors, resulting + // in constant mutual eviction from L1 cache. Handling 2 rows at a time + // here largely mitigates these issues, and seems at least to be very + // effective on Cortex-A53: + // Before After + // big (Cortex-A73) 2.85 ms 2.85 ms + // little (Cortex-A53) 11.0 ms 5.16 ms + + // Rows 0 and 1: + // Load 64 bytes of weights values from each row. Convert to signed int8 + // by flipping the sign bit (i.e. subtracting 128, the required + // zero_point value). + int8x16_t weights00 = vreinterpretq_s8_u8(veorq_u8( + signbit, + vld1q_u8(weights_data + (out + 0) * input_size + in + 16 * 0))); + int8x16_t weights01 = vreinterpretq_s8_u8(veorq_u8( + signbit, + vld1q_u8(weights_data + (out + 0) * input_size + in + 16 * 1))); + int8x16_t weights02 = vreinterpretq_s8_u8(veorq_u8( + signbit, + vld1q_u8(weights_data + (out + 0) * input_size + in + 16 * 2))); + int8x16_t weights03 = vreinterpretq_s8_u8(veorq_u8( + signbit, + vld1q_u8(weights_data + (out + 0) * input_size + in + 16 * 3))); + int8x16_t weights10 = vreinterpretq_s8_u8(veorq_u8( + signbit, + vld1q_u8(weights_data + (out + 1) * input_size + in + 16 * 0))); + int8x16_t weights11 = vreinterpretq_s8_u8(veorq_u8( + signbit, + vld1q_u8(weights_data + (out + 1) * input_size + in + 16 * 1))); + int8x16_t weights12 = vreinterpretq_s8_u8(veorq_u8( + signbit, + vld1q_u8(weights_data + (out + 1) * input_size + in + 16 * 2))); + int8x16_t weights13 = vreinterpretq_s8_u8(veorq_u8( + signbit, + vld1q_u8(weights_data + (out + 1) * input_size + in + 16 * 3))); + // Multiply-accumulate into local 16-bit accumulators. + // We can accumulate two products without overflow because weights are + // required to never be -128, so each product is at most 127^2 in absolute + // value. + local_accum00 = vmull_s8(vget_low_s8(weights00), vget_low_s8(input0)); + local_accum01 = vmull_s8(vget_low_s8(weights01), vget_low_s8(input1)); + local_accum10 = vmull_s8(vget_low_s8(weights10), vget_low_s8(input0)); + local_accum11 = vmull_s8(vget_low_s8(weights11), vget_low_s8(input1)); + local_accum00 = vmlal_s8(local_accum00, vget_high_s8(weights00), + vget_high_s8(input0)); + local_accum01 = vmlal_s8(local_accum01, vget_high_s8(weights01), + vget_high_s8(input1)); + local_accum10 = vmlal_s8(local_accum10, vget_high_s8(weights10), + vget_high_s8(input0)); + local_accum11 = vmlal_s8(local_accum11, vget_high_s8(weights11), + vget_high_s8(input1)); + // Pairwise add and accumulate into 32-bit accumulators + row_accum00 = vpadalq_s16(row_accum00, local_accum00); + row_accum01 = vpadalq_s16(row_accum01, local_accum01); + row_accum10 = vpadalq_s16(row_accum10, local_accum10); + row_accum11 = vpadalq_s16(row_accum11, local_accum11); + // Multiply-accumulate into local 16-bit accumulators. + // We can accumulate two products without overflow because weights are + // required to never be -128, so each product is at most 127^2 in absolute + // value. + local_accum00 = vmull_s8(vget_low_s8(weights02), vget_low_s8(input2)); + local_accum01 = vmull_s8(vget_low_s8(weights03), vget_low_s8(input3)); + local_accum10 = vmull_s8(vget_low_s8(weights12), vget_low_s8(input2)); + local_accum11 = vmull_s8(vget_low_s8(weights13), vget_low_s8(input3)); + local_accum00 = vmlal_s8(local_accum00, vget_high_s8(weights02), + vget_high_s8(input2)); + local_accum01 = vmlal_s8(local_accum01, vget_high_s8(weights03), + vget_high_s8(input3)); + local_accum10 = vmlal_s8(local_accum10, vget_high_s8(weights12), + vget_high_s8(input2)); + local_accum11 = vmlal_s8(local_accum11, vget_high_s8(weights13), + vget_high_s8(input3)); + // Pairwise add and accumulate into 32-bit accumulators + row_accum00 = vpadalq_s16(row_accum00, local_accum00); + row_accum01 = vpadalq_s16(row_accum01, local_accum01); + row_accum10 = vpadalq_s16(row_accum10, local_accum10); + row_accum11 = vpadalq_s16(row_accum11, local_accum11); + + // Rows 2 and 3: + // Load 64 bytes of weights values from each row. Convert to signed int8 + // by flipping the sign bit (i.e. subtracting 128, the required + // zero_point value). + weights00 = vreinterpretq_s8_u8(veorq_u8( + signbit, + vld1q_u8(weights_data + (out + 2) * input_size + in + 16 * 0))); + weights01 = vreinterpretq_s8_u8(veorq_u8( + signbit, + vld1q_u8(weights_data + (out + 2) * input_size + in + 16 * 1))); + weights02 = vreinterpretq_s8_u8(veorq_u8( + signbit, + vld1q_u8(weights_data + (out + 2) * input_size + in + 16 * 2))); + weights03 = vreinterpretq_s8_u8(veorq_u8( + signbit, + vld1q_u8(weights_data + (out + 2) * input_size + in + 16 * 3))); + weights10 = vreinterpretq_s8_u8(veorq_u8( + signbit, + vld1q_u8(weights_data + (out + 3) * input_size + in + 16 * 0))); + weights11 = vreinterpretq_s8_u8(veorq_u8( + signbit, + vld1q_u8(weights_data + (out + 3) * input_size + in + 16 * 1))); + weights12 = vreinterpretq_s8_u8(veorq_u8( + signbit, + vld1q_u8(weights_data + (out + 3) * input_size + in + 16 * 2))); + weights13 = vreinterpretq_s8_u8(veorq_u8( + signbit, + vld1q_u8(weights_data + (out + 3) * input_size + in + 16 * 3))); + // Multiply-accumulate into local 16-bit accumulators. + // We can accumulate two products without overflow because weights are + // required to never be -128, so each product is at most 127^2 in absolute + // value. + local_accum00 = vmull_s8(vget_low_s8(weights00), vget_low_s8(input0)); + local_accum01 = vmull_s8(vget_low_s8(weights01), vget_low_s8(input1)); + local_accum10 = vmull_s8(vget_low_s8(weights10), vget_low_s8(input0)); + local_accum11 = vmull_s8(vget_low_s8(weights11), vget_low_s8(input1)); + local_accum00 = vmlal_s8(local_accum00, vget_high_s8(weights00), + vget_high_s8(input0)); + local_accum01 = vmlal_s8(local_accum01, vget_high_s8(weights01), + vget_high_s8(input1)); + local_accum10 = vmlal_s8(local_accum10, vget_high_s8(weights10), + vget_high_s8(input0)); + local_accum11 = vmlal_s8(local_accum11, vget_high_s8(weights11), + vget_high_s8(input1)); + // Pairwise add and accumulate into 32-bit accumulators + row_accum20 = vpadalq_s16(row_accum20, local_accum00); + row_accum21 = vpadalq_s16(row_accum21, local_accum01); + row_accum30 = vpadalq_s16(row_accum30, local_accum10); + row_accum31 = vpadalq_s16(row_accum31, local_accum11); + // Multiply-accumulate into local 16-bit accumulators. + // We can accumulate two products without overflow because weights are + // required to never be -128, so each product is at most 127^2 in absolute + // value. + local_accum00 = vmull_s8(vget_low_s8(weights02), vget_low_s8(input2)); + local_accum01 = vmull_s8(vget_low_s8(weights03), vget_low_s8(input3)); + local_accum10 = vmull_s8(vget_low_s8(weights12), vget_low_s8(input2)); + local_accum11 = vmull_s8(vget_low_s8(weights13), vget_low_s8(input3)); + local_accum00 = vmlal_s8(local_accum00, vget_high_s8(weights02), + vget_high_s8(input2)); + local_accum01 = vmlal_s8(local_accum01, vget_high_s8(weights03), + vget_high_s8(input3)); + local_accum10 = vmlal_s8(local_accum10, vget_high_s8(weights12), + vget_high_s8(input2)); + local_accum11 = vmlal_s8(local_accum11, vget_high_s8(weights13), + vget_high_s8(input3)); + // Pairwise add and accumulate into 32-bit accumulators + row_accum20 = vpadalq_s16(row_accum20, local_accum00); + row_accum21 = vpadalq_s16(row_accum21, local_accum01); + row_accum30 = vpadalq_s16(row_accum30, local_accum10); + row_accum31 = vpadalq_s16(row_accum31, local_accum11); } + + row_accum00 = vaddq_s32(row_accum00, row_accum01); + row_accum10 = vaddq_s32(row_accum10, row_accum11); + row_accum20 = vaddq_s32(row_accum20, row_accum21); + row_accum30 = vaddq_s32(row_accum30, row_accum31); // Horizontally reduce accumulators int32x2_t pairwise_reduced_acc_0, pairwise_reduced_acc_1, pairwise_reduced_acc_2, pairwise_reduced_acc_3; pairwise_reduced_acc_0 = - vpadd_s32(vget_low_s32(acc_0), vget_high_s32(acc_0)); + vpadd_s32(vget_low_s32(row_accum00), vget_high_s32(row_accum00)); pairwise_reduced_acc_1 = - vpadd_s32(vget_low_s32(acc_1), vget_high_s32(acc_1)); + vpadd_s32(vget_low_s32(row_accum10), vget_high_s32(row_accum10)); pairwise_reduced_acc_2 = - vpadd_s32(vget_low_s32(acc_2), vget_high_s32(acc_2)); + vpadd_s32(vget_low_s32(row_accum20), vget_high_s32(row_accum20)); pairwise_reduced_acc_3 = - vpadd_s32(vget_low_s32(acc_3), vget_high_s32(acc_3)); + vpadd_s32(vget_low_s32(row_accum30), vget_high_s32(row_accum30)); const int32x2_t reduced_lo = vpadd_s32(pairwise_reduced_acc_0, pairwise_reduced_acc_1); const int32x2_t reduced_hi = vpadd_s32(pairwise_reduced_acc_2, pairwise_reduced_acc_3); int32x4_t reduced = vcombine_s32(reduced_lo, reduced_hi); // Add bias values. - int32x4_t bias_vec = vld1q_s32(bias_ptr); - bias_ptr += 4; reduced = vaddq_s32(reduced, bias_vec); - int left_shift = accum_shift > 0 ? accum_shift : 0; - int right_shift = accum_shift > 0 ? 0 : -accum_shift; reduced = vshlq_s32(reduced, vdupq_n_s32(left_shift)); // Multiply by the fixed-point multiplier. reduced = vqrdmulhq_n_s32(reduced, accum_multiplier); @@ -962,7 +1136,7 @@ inline void FullyConnected( #ifdef GEMMLOWP_NEON if (batches == 1 && input_offset == -128 && output_activation_min == -32768 && output_activation_max == 32767) { - if (filter_offset == -128 && !(output_depth % 4) && !(accum_depth % 16)) { + if (filter_offset == -128 && !(output_depth % 4) && !(accum_depth % 64)) { GEMVForLstmCellWithSymmetricRange(input_data, input_dims, filter_data, filter_dims, bias_data_int32, bias_dims, output_multiplier, -output_shift, @@ -1029,6 +1203,142 @@ void FullyConnected(const uint8* input_data, const Dims<4>& input_dims, output_activation_max, output_data, output_dims, gemm_context); } +inline void ExperimentalShuffledFullyConnected( + const uint8* input_data, const Dims<4>& input_dims, + const uint8* shuffled_weights_data, const Dims<4>& weights_dims, + const int32* bias_data, const Dims<4>& bias_dims, int32 output_multiplier, + int output_shift, int32 output_activation_min, int32 output_activation_max, + int16* output_data, const Dims<4>& output_dims, + gemmlowp::GemmContext* gemm_context) { + gemmlowp::ScopedProfilingLabel label( + "ExperimentalShuffledFullyConnected/8bit"); + (void)gemm_context; // only used in optimized code. + TFLITE_DCHECK_EQ(output_activation_min, -32768); + TFLITE_DCHECK_EQ(output_activation_max, 32767); + // TODO(benoitjacob): This really should be: + // const int batches = ArraySize(output_dims, 1); + // but the current --variable_batch hack consists in overwriting the 3rd + // dimension with the runtime batch size, as we don't keep track for each + // array of which dimension is the batch dimension in it. + const int batches = ArraySize(output_dims, 1) * ArraySize(output_dims, 2) * + ArraySize(output_dims, 3); + const int output_depth = MatchingArraySize(weights_dims, 1, output_dims, 0); + const int accum_depth = ArraySize(weights_dims, 0); + TFLITE_DCHECK(IsPackedWithoutStrides(input_dims)); + TFLITE_DCHECK(IsPackedWithoutStrides(weights_dims)); + // The experimental shuffling is an optimization for matrix*vector product. + // We aren't interested in supporting non-matrix*vector-product cases, i.e. + // batches>1. + TFLITE_DCHECK_EQ(batches, 1); + // Shuffled weights have had their sign bit (0x80) pre-flipped (xor'd) + // so that just reinterpreting them as int8 values is equivalent to + // subtracting 128 from them, thus implementing for free the subtraction of + // the zero_point value 128. + const int8* shuffled_weights_ptr = + reinterpret_cast(shuffled_weights_data); +#if defined USE_NEON + // We'll only need to xor signbit to the input activation values, as + // that xor-ing is pre-built into the shuffled weights values. + const uint8x16_t signbit = vdupq_n_u8(0x80); + const int right_shift = output_shift > 0 ? output_shift : 0; + const int left_shift = output_shift > 0 ? 0 : -output_shift; + for (int c = 0; c < output_depth; c += 4) { + // Accumulation loop. + int32x4_t row_accum0 = vdupq_n_s32(0); + int32x4_t row_accum1 = vdupq_n_s32(0); + int32x4_t row_accum2 = vdupq_n_s32(0); + int32x4_t row_accum3 = vdupq_n_s32(0); + for (int d = 0; d < accum_depth; d += 16) { + int8x16_t weights0 = vld1q_s8(shuffled_weights_ptr + 0); + int8x16_t weights1 = vld1q_s8(shuffled_weights_ptr + 16); + int8x16_t weights2 = vld1q_s8(shuffled_weights_ptr + 32); + int8x16_t weights3 = vld1q_s8(shuffled_weights_ptr + 48); + shuffled_weights_ptr += 64; + int8x16_t input = + vreinterpretq_s8_u8(veorq_u8(signbit, vld1q_u8(input_data + d))); + int16x8_t local_accum0 = + vmull_s8(vget_low_s8(weights0), vget_low_s8(input)); + int16x8_t local_accum1 = + vmull_s8(vget_low_s8(weights1), vget_low_s8(input)); + int16x8_t local_accum2 = + vmull_s8(vget_low_s8(weights2), vget_low_s8(input)); + int16x8_t local_accum3 = + vmull_s8(vget_low_s8(weights3), vget_low_s8(input)); + local_accum0 = + vmlal_s8(local_accum0, vget_high_s8(weights0), vget_high_s8(input)); + local_accum1 = + vmlal_s8(local_accum1, vget_high_s8(weights1), vget_high_s8(input)); + local_accum2 = + vmlal_s8(local_accum2, vget_high_s8(weights2), vget_high_s8(input)); + local_accum3 = + vmlal_s8(local_accum3, vget_high_s8(weights3), vget_high_s8(input)); + row_accum0 = vpadalq_s16(row_accum0, local_accum0); + row_accum1 = vpadalq_s16(row_accum1, local_accum1); + row_accum2 = vpadalq_s16(row_accum2, local_accum2); + row_accum3 = vpadalq_s16(row_accum3, local_accum3); + } + // Horizontally reduce accumulators + int32x2_t pairwise_reduced_acc_0, pairwise_reduced_acc_1, + pairwise_reduced_acc_2, pairwise_reduced_acc_3; + pairwise_reduced_acc_0 = + vpadd_s32(vget_low_s32(row_accum0), vget_high_s32(row_accum0)); + pairwise_reduced_acc_1 = + vpadd_s32(vget_low_s32(row_accum1), vget_high_s32(row_accum1)); + pairwise_reduced_acc_2 = + vpadd_s32(vget_low_s32(row_accum2), vget_high_s32(row_accum2)); + pairwise_reduced_acc_3 = + vpadd_s32(vget_low_s32(row_accum3), vget_high_s32(row_accum3)); + const int32x2_t reduced_lo = + vpadd_s32(pairwise_reduced_acc_0, pairwise_reduced_acc_1); + const int32x2_t reduced_hi = + vpadd_s32(pairwise_reduced_acc_2, pairwise_reduced_acc_3); + int32x4_t reduced = vcombine_s32(reduced_lo, reduced_hi); + // Add bias values. + int32x4_t bias_vec = vld1q_s32(bias_data + c); + reduced = vaddq_s32(reduced, bias_vec); + reduced = vshlq_s32(reduced, vdupq_n_s32(left_shift)); + // Multiply by the fixed-point multiplier. + reduced = vqrdmulhq_n_s32(reduced, output_multiplier); + // Rounding-shift-right. + using gemmlowp::RoundingDivideByPOT; + reduced = RoundingDivideByPOT(reduced, right_shift); + // Narrow values down to 16 bit signed. + const int16x4_t res16 = vqmovn_s32(reduced); + vst1_s16(output_data + c, res16); + } +#else + for (int c = 0; c < output_depth; c += 4) { + // Internal accumulation. + // Initialize accumulator with the bias-value. + int32 accum[4] = {0}; + // Accumulation loop. + for (int d = 0; d < accum_depth; d += 16) { + for (int i = 0; i < 4; i++) { + for (int j = 0; j < 16; j++) { + int8 input_val = input_data[d + j] - 128; + int8 weights_val = *shuffled_weights_ptr++; + accum[i] += weights_val * input_val; + } + } + } + for (int i = 0; i < 4; i++) { + // Add bias value + int acc = accum[i] + bias_data[c + i]; + // Down-scale the final int32 accumulator to the scale used by our + // (16-bit, typically 3 integer bits) fixed-point format. The quantized + // multiplier and shift here have been pre-computed offline + // (e.g. by toco). + acc = + MultiplyByQuantizedMultiplier(acc, output_multiplier, -output_shift); + // Saturate, cast to int16, and store to output array. + acc = std::max(acc, output_activation_min); + acc = std::min(acc, output_activation_max); + output_data[c + i] = acc; + } + } +#endif +} + template inline void ExtractPatchIntoBufferColumn( const Dims<4>& input_dims, int w, int h, int b, int kheight, int kwidth, @@ -2732,6 +3042,62 @@ void Concatenation(int concat_dim, const Scalar* const* input_data, } } +// TODO(prabhumk): This is the same as the reference implementation. +// TODO(prabhumk): The quantized implementation of concatentation isn't fully +// quantized as it takes scale as a floating point value. This should be fixed +// when optimizng this routine further. +inline void Concatenation(int concat_dim, const uint8* const* input_data, + const Dims<4>* const* input_dims, + const int32* input_zeropoint, + const float* input_scale, int inputs_count, + uint8* output_data, const Dims<4>& output_dims, + const int32 output_zeropoint, + const float output_scale) { + // The arguments input_zeropoint and input_scale are expected to be an array + // that have the quantization paramaters for all the inputs to the concat + // operator. + gemmlowp::ScopedProfilingLabel label("Concatenation"); + TFLITE_DCHECK_GT(inputs_count, 1); + int concat_size = 0; + for (int i = 0; i < inputs_count; i++) { + for (int j = 0; j < 4; j++) { + if (j != concat_dim) { + MatchingArraySize(*input_dims[i], j, output_dims, j); + } + } + concat_size += ArraySize(*input_dims[i], concat_dim); + } + TFLITE_DCHECK_EQ(concat_size, ArraySize(output_dims, concat_dim)); + int outer_size = 1; + for (int i = concat_dim + 1; i < 4; i++) { + outer_size *= output_dims.sizes[i]; + } + const float inverse_output_scale = 1.f / output_scale; + uint8* output_ptr = output_data; + for (int k = 0; k < outer_size; k++) { + for (int i = 0; i < inputs_count; ++i) { + const int copy_size = + input_dims[i]->sizes[concat_dim] * input_dims[i]->strides[concat_dim]; + const uint8* input_ptr = input_data[i] + k * copy_size; + if (input_zeropoint[i] == output_zeropoint && + input_scale[i] == output_scale) { + memcpy(output_ptr, input_ptr, copy_size); + } else { + const float scale = input_scale[i] * inverse_output_scale; + const float bias = -input_zeropoint[i] * scale; + for (int j = 0; j < copy_size; ++j) { + const int32_t value = + static_cast(round(input_ptr[j] * scale + bias)) + + output_zeropoint; + output_ptr[j] = + static_cast(std::max(std::min(255, value), 0)); + } + } + output_ptr += copy_size; + } + } +} + template void DepthConcatenation(const Scalar* const* input_data, const Dims<4>* const* input_dims, int inputs_count, @@ -3708,7 +4074,7 @@ inline void Softmax(const uint8* input_data, const Dims<4>& input_dims, using FixedPointAccum = gemmlowp::FixedPoint; using FixedPoint0 = gemmlowp::FixedPoint; - gemmlowp::ScopedProfilingLabel label("Softmax/8bit"); +gemmlowp::ScopedProfilingLabel label("Softmax/8bit"); const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); const int height = MatchingArraySize(input_dims, 2, output_dims, 2); const int width = MatchingArraySize(input_dims, 1, output_dims, 1); @@ -3906,6 +4272,7 @@ inline void Softmax(const uint8* input_data, const Dims<4>& input_dims, // optimized yet. inline void LogSoftmax(const float* input_data, const Dims<4>& input_dims, float* output_data, const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("LogSoftmax"); const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); const int height = MatchingArraySize(input_dims, 2, output_dims, 2); const int width = MatchingArraySize(input_dims, 1, output_dims, 1); @@ -3939,6 +4306,94 @@ inline void LogSoftmax(const float* input_data, const Dims<4>& input_dims, } } +// Currently just a copy of the reference code. +inline void LogSoftmax(const uint8* input_data, const Dims<4>& input_dims, + int32 input_multiplier, int32 input_left_shift, + int32 reverse_scaling_divisor, + int32 reverse_scaling_right_shift, int diff_min, + uint8* output_data, const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("LogSoftmax/Uint8"); + // The representation chosen for the input to the exp() function is Q5.26. + // We need to leave extra space since values that we skip might be as large as + // -32 before multiplying by input_beta_multiplier, and therefore as large as + // -16 afterwards. Note that exp(-8) is definitely not insignificant to + // accumulation, but exp(-16) definitely is. + static constexpr int kScaledDiffIntegerBits = 5; + static constexpr int kAccumulationIntegerBits = 12; + static constexpr int kOutputIntegerBits = 4; + using FixedPointScaledDiff = + gemmlowp::FixedPoint; + using FixedPointAccum = gemmlowp::FixedPoint; + using FixedPoint0 = gemmlowp::FixedPoint; + + const int outer_size = MatchingFlatSizeSkipDim(input_dims, 0, output_dims); + const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); + + for (int i = 0; i < outer_size; ++i) { + uint8 max_in_row = 0; + for (int c = 0; c < depth; ++c) { + max_in_row = std::max(max_in_row, input_data[i * depth + c]); + } + + FixedPointAccum sum_of_exps = FixedPointAccum::Zero(); + for (int c = 0; c < depth; ++c) { + int32 input_diff = + static_cast(input_data[i * depth + c]) - max_in_row; + if (input_diff >= diff_min) { + const int32 input_diff_rescaled = + MultiplyByQuantizedMultiplierGreaterThanOne( + input_diff, input_multiplier, input_left_shift); + const FixedPointScaledDiff scaled_diff_f8 = + FixedPointScaledDiff::FromRaw(input_diff_rescaled); + sum_of_exps = sum_of_exps + gemmlowp::Rescale( + exp_on_negative_values(scaled_diff_f8)); + } + } + + // TODO(b/77858996): Implement fixed-point log(). + // Not a fully-quantized implementation: floating-point log(). + const float float_log_sum_of_exps = + std::log(static_cast(sum_of_exps.raw()) / + (1 << (31 - kAccumulationIntegerBits))); + const int32 fixed_log_sum_of_exps = static_cast(TfLiteRound( + float_log_sum_of_exps * (1 << (31 - kScaledDiffIntegerBits)))); + + // rescaled_diff_min is smallest representable in + // Q(kScaledDiffIntegerBits).(31-kScaledDiffIntegerBits) plus the + // log-sub-exps that will be subtracted in the loop. + // + // The thresholds diff_min, etc are negative. + const int rescaled_diff_min = + fixed_log_sum_of_exps + std::numeric_limits::lowest(); + const int adjusted_diff_min = + std::max(diff_min - 1, // Note use of > below instead of >= above. + MultiplyByQuantizedMultiplierSmallerThanOne( + rescaled_diff_min, reverse_scaling_divisor, + reverse_scaling_right_shift)); + + for (int c = 0; c < depth; ++c) { + int32 input_diff = + static_cast(input_data[i * depth + c]) - max_in_row; + if (input_diff > adjusted_diff_min) { + const int32 input_diff_rescaled = + MultiplyByQuantizedMultiplierGreaterThanOne( + input_diff, input_multiplier, input_left_shift); + int32 unsat_output = + gemmlowp::RoundingDivideByPOT( + (input_diff_rescaled - fixed_log_sum_of_exps), + 31 - kScaledDiffIntegerBits - kOutputIntegerBits) + + 255; + + output_data[i * depth + c] = static_cast( + std::max(std::min(unsat_output, static_cast(255)), 0)); + } else { + // Set output to smallest value. + output_data[i * depth + c] = 0; + } + } + } +} + inline void Logistic(const float* input_data, const Dims<4>& input_dims, float* output_data, const Dims<4>& output_dims) { gemmlowp::ScopedProfilingLabel label("Logistic"); @@ -3952,7 +4407,7 @@ inline void Logistic(const uint8* input_data, const Dims<4>& input_dims, int32 input_zero_point, int32 input_range_radius, int32 input_multiplier, int input_left_shift, uint8* output_data, const Dims<4>& output_dims) { - gemmlowp::ScopedProfilingLabel label("Logistic"); + gemmlowp::ScopedProfilingLabel label("Logistic/Uint8"); /* batches */ MatchingArraySize(input_dims, 3, output_dims, 3); /* height */ MatchingArraySize(input_dims, 2, output_dims, 2); /* width */ MatchingArraySize(input_dims, 1, output_dims, 1); @@ -4432,66 +4887,23 @@ inline void Dequantize(const uint8* input_data, const Dims<4>& input_dims, } inline void FakeQuant(const float* input_data, const Dims<4>& input_dims, - float rmin, float rmax, float* output_data, + float rmin, float rmax, int num_bits, float* output_data, const Dims<4>& output_dims) { gemmlowp::ScopedProfilingLabel label("FakeQuant"); // 0 should always be a representable value. Let's assume that the initial // min,max range contains 0. - TFLITE_DCHECK_LE(rmin, 0.); - TFLITE_DCHECK_GE(rmax, 0.); - - // Determine quantization parameters: zero_point, scale. - using Integer = uint8; - const Integer qmin = std::numeric_limits::min(); - const Integer qmax = std::numeric_limits::max(); - const float qmin_float = qmin; - const float qmax_float = qmax; - int32 zero_point = 0; - float scale = 0.f; - // If rmin==rmax, both must be zero per the above assertion, - // so we are done. - if (rmin != rmax) { - // First determine the scale. - scale = (rmax - rmin) / (qmax_float - qmin_float); - - // Zero-point computation. - // First the initial floating-point computation. The zero-point can be - // determined from solving an affine equation for any known pair - // (real value, corresponding quantized value). - // We know two such pairs: (rmin, qmin) and (rmax, qmax). - // The arithmetic error on the zero point computed from either pair - // will be roughly machine_epsilon * (sum of absolute values of terms) - // so we want to use the variant that adds the smaller terms. - const float zero_point_from_min = qmin_float - rmin / scale; - const float zero_point_from_max = qmax_float - rmax / scale; - const float zero_point_from_min_error = - std::abs(qmin_float) + std::abs(rmin / scale); - const float zero_point_from_max_error = - std::abs(qmax_float) + std::abs(rmax / scale); - - const float zero_point_float = - zero_point_from_min_error < zero_point_from_max_error - ? zero_point_from_min - : zero_point_from_max; - - // Now we need to nudge the zero point to be an integer - // (our zero points are integer, and this is motivated by the requirement - // to be able to represent the real value "0" exactly as a quantized value, - // which is required in multiple places, for example in Im2col with SAME - // padding). - if (zero_point_float < qmin_float) { - zero_point = qmin; - } else if (zero_point_float > qmax_float) { - zero_point = qmax; - } else { - zero_point = static_cast(TfLiteRound(zero_point_float)); - } - // The zero point should always be in the range of quantized value, - // [qmin, qmax]. - TFLITE_DCHECK_GE(zero_point, qmin); - TFLITE_DCHECK_LE(zero_point, qmax); - } + TFLITE_DCHECK_LE(rmin, 0.0f); + TFLITE_DCHECK_GE(rmax, 0.0f); + TFLITE_DCHECK_LT(rmin, rmax); + + // Code matches tensorflow's FakeQuantWithMinMaxArgsFunctor. + int quant_min = 0; + int quant_max = (1 << num_bits) - 1; + float nudged_min, nudged_max, nudged_scale; + NudgeQuantizationRange(rmin, rmax, quant_min, quant_max, &nudged_min, + &nudged_max, &nudged_scale); + const float inv_nudged_scale = 1.0f / nudged_scale; const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); const int height = MatchingArraySize(input_dims, 2, output_dims, 2); @@ -4502,11 +4914,12 @@ inline void FakeQuant(const float* input_data, const Dims<4>& input_dims, for (int x = 0; x < width; ++x) { for (int c = 0; c < depth; ++c) { const float src_val = input_data[Offset(input_dims, c, x, y, b)]; - const float unclamped_quantized_val = - TfLiteRound(zero_point + src_val / scale); - const float quantized_val = std::min( - qmax_float, std::max(qmin_float, unclamped_quantized_val)); - const float dst_val = scale * (quantized_val - zero_point); + const float clamped = + std::min(nudged_max, std::max(nudged_min, src_val)); + const float clamped_shifted = clamped - nudged_min; + const float dst_val = + TfLiteRound(clamped_shifted * inv_nudged_scale) * nudged_scale + + nudged_min; output_data[Offset(output_dims, c, x, y, b)] = dst_val; } } @@ -4935,6 +5348,7 @@ inline void SpaceToBatchND(const T* input_data, const Dims<4>& input_dims, const int32* paddings_data, const Dims<4>& paddings_dims, T* output_data, const Dims<4>& output_dims) { + // Unoptimized - Straight copy from reference ops. gemmlowp::ScopedProfilingLabel label("SpaceToBatchND"); const int output_batch_size = ArraySize(output_dims, 3); @@ -4976,29 +5390,76 @@ inline void SpaceToBatchND(const T* input_data, const Dims<4>& input_dims, } } +// Helper methods for BatchToSpaceND. +// `spatial_index_dim` specifies post-crop offset index in this spatial +// dimension, i.e. spatial offset introduced by flattening batch to spatial +// dimension minus the crop size at beginning. `block_shape_dim` is the block +// size in current dimension. `input_dim` and `output_dim` are input and output +// size of BatchToSpaceND operation in current dimension. +// Output start index is inclusive and end index is exclusive. +inline void GetIndexRange(int spatial_index_dim, int block_shape_dim, + int input_dim, int output_dim, int* start_index, + int* end_index) { + // (*start_index) * block_shape_dim is effectively rounded up to the next + // multiple of block_shape_dim by the integer division. + *start_index = + std::max(0, (-spatial_index_dim + block_shape_dim - 1) / block_shape_dim); + // Similarly, (*end_index) * block_shape_dim is rounded up too (note that + // end_index is exclusive). + *end_index = std::min( + input_dim, + (output_dim - spatial_index_dim + block_shape_dim - 1) / block_shape_dim); +} + template inline void BatchToSpaceND(const T* input_data, const Dims<4>& input_dims, const int32* block_shape_data, - const Dims<4>& block_shape_dims, T* output_data, - const Dims<4>& output_dims) { + const Dims<4>& block_shape_dims, + const int32* crops_data, const Dims<4>& crops_dims, + T* output_data, const Dims<4>& output_dims) { gemmlowp::ScopedProfilingLabel label("BatchToSpaceND"); const int output_batch_size = ArraySize(output_dims, 3); + const int output_height = ArraySize(output_dims, 2); + const int output_width = ArraySize(output_dims, 1); const int input_batch_size = ArraySize(input_dims, 3); const int input_height = ArraySize(input_dims, 2); const int input_width = ArraySize(input_dims, 1); const int depth = ArraySize(input_dims, 0); const int block_shape_width = block_shape_data[1]; const int block_shape_height = block_shape_data[0]; + const int crops_top = crops_data[0]; + const int crops_left = crops_data[2]; for (int in_batch = 0; in_batch < input_batch_size; ++in_batch) { - for (int in_h = 0; in_h < input_height; ++in_h) { - for (int in_w = 0; in_w < input_width; ++in_w) { - int out_batch = in_batch % output_batch_size; - int out_w = in_w * block_shape_width + - (in_batch / output_batch_size) % block_shape_width; - int out_h = in_h * block_shape_height + - (in_batch / output_batch_size) / block_shape_width; + const int out_batch = in_batch % output_batch_size; + const int spatial_offset = in_batch / output_batch_size; + + int in_h_start = 0; + int in_h_end = 0; + // GetIndexRange ensures start and end indices are in [0, output_height). + GetIndexRange(spatial_offset / block_shape_width - crops_top, + block_shape_height, input_height, output_height, &in_h_start, + &in_h_end); + + for (int in_h = in_h_start; in_h < in_h_end; ++in_h) { + const int out_h = in_h * block_shape_height + + spatial_offset / block_shape_width - crops_top; + TFLITE_DCHECK_GE(out_h, 0); + TFLITE_DCHECK_LT(out_h, output_height); + + int in_w_start = 0; + int in_w_end = 0; + // GetIndexRange ensures start and end indices are in [0, output_width). + GetIndexRange(spatial_offset % block_shape_width - crops_left, + block_shape_width, input_width, output_width, &in_w_start, + &in_w_end); + + for (int in_w = in_w_start; in_w < in_w_end; ++in_w) { + const int out_w = in_w * block_shape_width + + spatial_offset % block_shape_width - crops_left; + TFLITE_DCHECK_GE(out_w, 0); + TFLITE_DCHECK_LT(out_w, output_width); T* out = output_data + Offset(output_dims, 0, out_w, out_h, out_batch); const T* in = input_data + Offset(input_dims, 0, in_w, in_h, in_batch); memcpy(out, in, depth * sizeof(T)); @@ -5011,7 +5472,7 @@ template inline void Pad(const T* input_data, const Dims<4>& input_dims, const std::vector& left_paddings, const std::vector& right_paddings, T* output_data, - const Dims<4>& output_dims) { + const Dims<4>& output_dims, const int32_t pad_value) { gemmlowp::ScopedProfilingLabel label("Pad"); const int output_batch = ArraySize(output_dims, 3); const int output_height = ArraySize(output_dims, 2); @@ -5031,27 +5492,27 @@ inline void Pad(const T* input_data, const Dims<4>& input_dims, const int input_depth = ArraySize(input_dims, 0); if (left_b_padding != 0) { - memset(output_data, 0, + memset(output_data, pad_value, left_b_padding * output_height * output_width * output_depth * sizeof(T)); } for (int out_b = left_b_padding; out_b < output_batch - right_b_padding; ++out_b) { if (left_h_padding != 0) { - memset(output_data + Offset(output_dims, 0, 0, 0, out_b), 0, + memset(output_data + Offset(output_dims, 0, 0, 0, out_b), pad_value, left_h_padding * output_width * output_depth * sizeof(T)); } for (int out_h = left_h_padding; out_h < output_height - right_h_padding; ++out_h) { if (left_w_padding != 0) { - memset(output_data + Offset(output_dims, 0, 0, out_h, out_b), 0, + memset(output_data + Offset(output_dims, 0, 0, out_h, out_b), pad_value, left_w_padding * output_depth * sizeof(T)); } for (int out_w = left_w_padding; out_w < output_width - right_w_padding; ++out_w) { if (left_d_padding != 0) { - memset(output_data + Offset(output_dims, 0, out_w, out_h, out_b), 0, - left_d_padding * sizeof(T)); + memset(output_data + Offset(output_dims, 0, out_w, out_h, out_b), + pad_value, left_d_padding * sizeof(T)); } T* out = output_data + @@ -5065,20 +5526,21 @@ inline void Pad(const T* input_data, const Dims<4>& input_dims, memset( output_data + Offset(output_dims, output_depth - right_d_padding, out_w, out_h, out_b), - 0, right_d_padding * sizeof(T)); + pad_value, right_d_padding * sizeof(T)); } } if (right_w_padding != 0) { memset( output_data + Offset(output_dims, 0, output_width - right_w_padding, out_h, out_b), - 0, right_w_padding * output_depth * sizeof(T)); + pad_value, right_w_padding * output_depth * sizeof(T)); } } if (right_h_padding != 0) { memset(output_data + Offset(output_dims, 0, 0, output_height - right_h_padding, out_b), - 0, right_h_padding * output_width * output_depth * sizeof(T)); + pad_value, + right_h_padding * output_width * output_depth * sizeof(T)); } } if (right_b_padding != 0) { @@ -5090,43 +5552,136 @@ inline void Pad(const T* input_data, const Dims<4>& input_dims, } } +template +inline void Pad(const T* input_data, const Dims<4>& input_dims, + const std::vector& left_paddings, + const std::vector& right_paddings, T* output_data, + const Dims<4>& output_dims) { + Pad(input_data, input_dims, left_paddings, right_paddings, output_data, + output_dims, 0); +} + +// UNOPTIMIZED COPY of StridedSlice from reference_ops.h (see comments there). + +// Use until std::clamp() is available from C++17. +inline int Clamp(const int v, const int lo, const int hi) { + TFLITE_DCHECK(!(hi < lo)); + if (hi < v) return hi; + if (v < lo) return lo; + return v; +} + +inline int StartForAxis(int begin_mask, const std::vector& start_indices, + const std::vector& strides, + const Dims<4>& input_shape, int axis) { + // Begin with the specified index + int start = start_indices[axis]; + + // begin_mask override + if (begin_mask & 1 << axis) { + if (strides[axis] > 0) { + // Forward iteration - use the first element. These values will get + // clamped below (Note: We could have set them to 0 and axis_size-1, but + // use lowest() and max() to maintain symmetry with StopForAxis()) + start = std::numeric_limits::lowest(); + } else { + // Backward iteration - use the last element. + start = std::numeric_limits::max(); + } + } + + // Handle negative indices + int axis_size = input_shape.sizes[axis]; + if (start < 0) { + start += axis_size; + } + + // Clamping + start = Clamp(start, 0, axis_size - 1); + + return start; +} + +inline int StopForAxis(int end_mask, const std::vector& stop_indices, + const std::vector& strides, + const Dims<4>& input_shape, int axis) { + // Begin with the specified index + int stop = stop_indices[axis]; + + // end_mask override + if (end_mask & (1 << axis)) { + if (strides[axis] > 0) { + // Forward iteration - use the last element. These values will get + // clamped below + stop = std::numeric_limits::max(); + } else { + // Backward iteration - use the first element. + stop = std::numeric_limits::lowest(); + } + } + + // Handle negative indices + int axis_size = input_shape.sizes[axis]; + if (stop < 0) { + stop += axis_size; + } + + // Clamping + // Because the end index points one past the last element, we need slightly + // different clamping ranges depending on the direction. + if (strides[axis] > 0) { + // Forward iteration + stop = Clamp(stop, 0, axis_size); + } else { + // Backward iteration + stop = Clamp(stop, -1, axis_size - 1); + } + + return stop; +} + +inline bool LoopCondition(int index, int stop, int stride) { + // True when we have reached the end of an axis and should loop. + return stride > 0 ? index >= stop : index <= stop; +} + template inline void StridedSlice(const T* input_data, const Dims<4>& input_dims, int begin_mask, int end_mask, - const std::vector& starts, - const std::vector& stops, + const std::vector& start_indices, + const std::vector& stop_indices, const std::vector& strides, T* output_data, const Dims<4>& output_dims) { - gemmlowp::ScopedProfilingLabel label("StridedSlice"); - const int start_b = (begin_mask & 8) ? 0 : starts[3]; - const int stop_b = (end_mask & 8) ? input_dims.sizes[3] : stops[3]; - const int start_h = (begin_mask & 4) ? 0 : starts[2]; - const int stop_h = (end_mask & 4) ? input_dims.sizes[2] : stops[2]; - const int start_w = (begin_mask & 2) ? 0 : starts[1]; - const int stop_w = (end_mask & 2) ? input_dims.sizes[1] : stops[1]; - const int start_d = (begin_mask & 1) ? 0 : starts[0]; - const int stop_d = (end_mask & 1) ? input_dims.sizes[0] : stops[0]; + TFLITE_DCHECK_EQ(start_indices.size(), 4); + TFLITE_DCHECK_EQ(stop_indices.size(), 4); + TFLITE_DCHECK_EQ(strides.size(), 4); + const int start_b = + StartForAxis(begin_mask, start_indices, strides, input_dims, 3); + const int stop_b = + StopForAxis(end_mask, stop_indices, strides, input_dims, 3); + const int start_h = + StartForAxis(begin_mask, start_indices, strides, input_dims, 2); + const int stop_h = + StopForAxis(end_mask, stop_indices, strides, input_dims, 2); + const int start_w = + StartForAxis(begin_mask, start_indices, strides, input_dims, 1); + const int stop_w = + StopForAxis(end_mask, stop_indices, strides, input_dims, 1); + const int start_d = + StartForAxis(begin_mask, start_indices, strides, input_dims, 0); + const int stop_d = + StopForAxis(end_mask, stop_indices, strides, input_dims, 0); T* out_ptr = output_data; - if (strides[0] == 0) { - for (int in_b = start_b; in_b < stop_b; in_b += strides[3]) { - for (int in_h = start_h; in_h < stop_h; in_h += strides[2]) { - for (int in_w = start_w; in_w < stop_w; in_w += strides[1]) { - const int len = stop_d - start_d; - memcpy(out_ptr, - input_data + Offset(input_dims, start_d, in_w, in_h, in_b), - len * sizeof(T)); - out_ptr += len; - } - } - } - } else { - for (int in_b = start_b; in_b < stop_b; in_b += strides[3]) { - for (int in_h = start_h; in_h < stop_h; in_h += strides[2]) { - for (int in_w = start_w; in_w < stop_w; in_w += strides[1]) { - for (int in_d = start_d; in_d < stop_d; in_d += strides[0]) { - *out_ptr++ = input_data[Offset(input_dims, in_d, in_w, in_h, in_b)]; - } + for (int in_b = start_b; !LoopCondition(in_b, stop_b, strides[3]); + in_b += strides[3]) { + for (int in_h = start_h; !LoopCondition(in_h, stop_h, strides[2]); + in_h += strides[2]) { + for (int in_w = start_w; !LoopCondition(in_w, stop_w, strides[1]); + in_w += strides[1]) { + for (int in_d = start_d; !LoopCondition(in_d, stop_d, strides[0]); + in_d += strides[0]) { + *out_ptr++ = input_data[Offset(input_dims, in_d, in_w, in_h, in_b)]; } } } diff --git a/tensorflow/contrib/lite/kernels/internal/quantization_util.cc b/tensorflow/contrib/lite/kernels/internal/quantization_util.cc index 18be6777a5caeb45a4ffabd8b7f1793de7b053f8..b0951aac8cbb98a181d9dcaef88770fadfc74f62 100644 --- a/tensorflow/contrib/lite/kernels/internal/quantization_util.cc +++ b/tensorflow/contrib/lite/kernels/internal/quantization_util.cc @@ -78,6 +78,22 @@ void PreprocessSoftmaxScaling(double beta, double input_scale, quantized_multiplier, left_shift); } +void PreprocessLogSoftmaxScaling(double beta, double input_scale, + int input_integer_bits, + int32_t* quantized_multiplier, int* left_shift, + int32_t* reverse_scaling_divisor, + int* reverse_scaling_right_shift) { + PreprocessSoftmaxScaling(beta, input_scale, input_integer_bits, + quantized_multiplier, left_shift); + + // Also calculate what amounts to the inverse scaling factor for the input. + const double real_reverse_scaling_divisor = + (1 << (31 - *left_shift)) / static_cast(*quantized_multiplier); + tflite::QuantizeMultiplierSmallerThanOne(real_reverse_scaling_divisor, + reverse_scaling_divisor, + reverse_scaling_right_shift); +} + int CalculateInputRadius(int input_integer_bits, int input_left_shift) { const double max_input_rescaled = 1.0 * ((1 << input_integer_bits) - 1) * (1ll << (31 - input_integer_bits)) / @@ -88,4 +104,25 @@ int CalculateInputRadius(int input_integer_bits, int input_left_shift) { return static_cast(std::floor(max_input_rescaled)); } +void NudgeQuantizationRange(const float min, const float max, + const int quant_min, const int quant_max, + float* nudged_min, float* nudged_max, + float* scale) { + // This code originates from tensorflow/core/kernels/fake_quant_ops_functor.h. + const float quant_min_float = static_cast(quant_min); + const float quant_max_float = static_cast(quant_max); + *scale = (max - min) / (quant_max_float - quant_min_float); + const float zero_point_from_min = quant_min_float - min / *scale; + uint16 nudged_zero_point; + if (zero_point_from_min < quant_min_float) { + nudged_zero_point = static_cast(quant_min); + } else if (zero_point_from_min > quant_max_float) { + nudged_zero_point = static_cast(quant_max); + } else { + nudged_zero_point = static_cast(TfLiteRound(zero_point_from_min)); + } + *nudged_min = (quant_min_float - nudged_zero_point) * (*scale); + *nudged_max = (quant_max_float - nudged_zero_point) * (*scale); +} + } // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/internal/quantization_util.h b/tensorflow/contrib/lite/kernels/internal/quantization_util.h index 9a04b76e56b2527b06f5b0ec1e75e991fd1cbdea..4a217515f142b2451ebd61e423871b95cdc09748 100644 --- a/tensorflow/contrib/lite/kernels/internal/quantization_util.h +++ b/tensorflow/contrib/lite/kernels/internal/quantization_util.h @@ -196,7 +196,12 @@ void QuantizeMultiplier(double double_multiplier, int32_t* quantized_multiplier, void PreprocessSoftmaxScaling(double beta, double input_scale, int input_integer_bits, int32_t* quantized_multiplier, int* left_shift); - +// Like PreprocessSoftmaxScaling, but inverse scaling factors also calculated. +void PreprocessLogSoftmaxScaling(double beta, double input_scale, + int input_integer_bits, + int32_t* quantized_multiplier, int* left_shift, + int32_t* reverse_scaling_divisor, + int* reverse_scaling_right_shift); // Calculate the largest input that will result in a within-bounds intermediate // result within MultiplyByQuantizedMultiplierGreaterThanOne. In other words, // it must not overflow before we reduce the value by multiplication by the @@ -204,6 +209,14 @@ void PreprocessSoftmaxScaling(double beta, double input_scale, // Softmax. int CalculateInputRadius(int input_integer_bits, int input_left_shift); +// Nudges a min/max quantization range to ensure zero is zero. +// Gymnastics with nudged zero point is to ensure that real zero maps to +// an integer, which is required for e.g. zero-padding in convolutional layers. +// Outputs nudged_min, nudged_max, nudged_scale. +void NudgeQuantizationRange(const float min, const float max, + const int quant_min, const int quant_max, + float* nudged_min, float* nudged_max, float* scale); + } // namespace tflite #endif // TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_QUANTIZATION_UTIL_H_ diff --git a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h index 3575974ae9362e53592c50c8d4a0675bee8f8034..9ad125b8eba7f6457d582cd3ee512b87234a0e95 100644 --- a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h @@ -27,6 +27,7 @@ limitations under the License. #include "fixedpoint/fixedpoint.h" #include "public/gemmlowp.h" #include "tensorflow/contrib/lite/kernels/internal/common.h" +#include "tensorflow/contrib/lite/kernels/internal/quantization_util.h" #include "tensorflow/contrib/lite/kernels/internal/round.h" #include "tensorflow/contrib/lite/kernels/internal/types.h" @@ -601,6 +602,67 @@ inline void FullyConnected(const uint8* input_data, const Dims<4>& input_dims, } } +inline void ExperimentalShuffledFullyConnected( + const uint8* input_data, const Dims<4>& input_dims, + const uint8* shuffled_weights_data, const Dims<4>& weights_dims, + const int32* bias_data, const Dims<4>& bias_dims, int32 output_multiplier, + int output_shift, int32 output_activation_min, int32 output_activation_max, + int16* output_data, const Dims<4>& output_dims, + gemmlowp::GemmContext* gemm_context) { + (void)gemm_context; // only used in optimized code. + TFLITE_DCHECK_LE(output_activation_min, output_activation_max); + // TODO(benoitjacob): This really should be: + // const int batches = ArraySize(output_dims, 1); + // but the current --variable_batch hack consists in overwriting the 3rd + // dimension with the runtime batch size, as we don't keep track for each + // array of which dimension is the batch dimension in it. + const int batches = ArraySize(output_dims, 1) * ArraySize(output_dims, 2) * + ArraySize(output_dims, 3); + const int output_depth = MatchingArraySize(weights_dims, 1, output_dims, 0); + const int accum_depth = ArraySize(weights_dims, 0); + TFLITE_DCHECK(IsPackedWithoutStrides(input_dims)); + TFLITE_DCHECK(IsPackedWithoutStrides(weights_dims)); + // The experimental shuffling is an optimization for matrix*vector product. + // We aren't interested in supporting non-matrix*vector-product cases, i.e. + // batches>1. + TFLITE_DCHECK_EQ(batches, 1); + // Shuffled weights have had their sign bit (0x80) pre-flipped (xor'd) + // so that just reinterpreting them as int8 values is equivalent to + // subtracting 128 from them, thus implementing for free the subtraction of + // the zero_point value 128. + const int8* shuffled_weights_ptr = + reinterpret_cast(shuffled_weights_data); + for (int c = 0; c < output_depth; c += 4) { + // Internal accumulation. + // Initialize accumulator with the bias-value. + int32 accum[4] = {0}; + // Accumulation loop. + for (int d = 0; d < accum_depth; d += 16) { + for (int i = 0; i < 4; i++) { + for (int j = 0; j < 16; j++) { + int8 input_val = input_data[d + j] - 128; + int8 weights_val = *shuffled_weights_ptr++; + accum[i] += weights_val * input_val; + } + } + } + for (int i = 0; i < 4; i++) { + // Add bias value + int acc = accum[i] + bias_data[c + i]; + // Down-scale the final int32 accumulator to the scale used by our + // (16-bit, typically 3 integer bits) fixed-point format. The quantized + // multiplier and shift here have been pre-computed offline + // (e.g. by toco). + acc = + MultiplyByQuantizedMultiplier(acc, output_multiplier, -output_shift); + // Saturate, cast to int16, and store to output array. + acc = std::max(acc, output_activation_min); + acc = std::min(acc, output_activation_max); + output_data[c + i] = acc; + } + } +} + // legacy, for compatibility with old checked-in code template void FullyConnected(const uint8* input_data, const Dims<4>& input_dims, @@ -635,27 +697,14 @@ void NonGlobalBatchNormalization( const Dims<4>& offset_dims, float* output_data, const Dims<4>& output_dims) { const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); - const int height = - MatchingArraySize(input_dims, 2, mean_dims, 2, multiplier_dims, 2, - offset_dims, 2, output_dims, 2); - const int width = - MatchingArraySize(input_dims, 1, mean_dims, 1, multiplier_dims, 1, - offset_dims, 1, output_dims, 1); - const int depth = - MatchingArraySize(input_dims, 0, mean_dims, 0, multiplier_dims, 0, - offset_dims, 0, output_dims, 0); + const int inner_size = MatchingFlatSizeSkipDim( + input_dims, 3, mean_dims, multiplier_dims, offset_dims, output_dims); for (int b = 0; b < batches; ++b) { - for (int y = 0; y < height; ++y) { - for (int x = 0; x < width; ++x) { - for (int c = 0; c < depth; ++c) { - output_data[Offset(output_dims, c, x, y, b)] = ActivationFunction( - (input_data[Offset(input_dims, c, x, y, b)] - - mean_data[Offset(mean_dims, c, x, y, 0)]) * - multiplier_data[Offset(multiplier_dims, c, x, y, 0)] + - offset_data[Offset(offset_dims, c, x, y, 0)]); - } - } + for (int i = 0; i < inner_size; ++i) { + output_data[b * inner_size + i] = ActivationFunction( + (input_data[b * inner_size + i] - mean_data[i]) * multiplier_data[i] + + offset_data[i]); } } } @@ -669,87 +718,52 @@ void GlobalBatchNormalization(const float* input_data, const float* offset_data, const Dims<4>& offset_dims, float* output_data, const Dims<4>& output_dims) { - const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); - const int height = MatchingArraySize(input_dims, 2, output_dims, 2); - const int width = MatchingArraySize(input_dims, 1, output_dims, 1); + const int outer_size = MatchingFlatSizeSkipDim(input_dims, 0, output_dims); const int depth = MatchingArraySize(input_dims, 0, mean_dims, 0, multiplier_dims, 0, offset_dims, 0, output_dims, 0); - for (int b = 0; b < batches; ++b) { - for (int y = 0; y < height; ++y) { - for (int x = 0; x < width; ++x) { - for (int c = 0; c < depth; ++c) { - output_data[Offset(output_dims, c, x, y, b)] = ActivationFunction( - (input_data[Offset(input_dims, c, x, y, b)] - - mean_data[Offset(mean_dims, c, 0, 0, 0)]) * - multiplier_data[Offset(multiplier_dims, c, 0, 0, 0)] + - offset_data[Offset(offset_dims, c, 0, 0, 0)]); - } - } + for (int i = 0; i < outer_size; ++i) { + for (int c = 0; c < depth; ++c) { + output_data[depth * i + c] = ActivationFunction( + (input_data[depth * i + c] - mean_data[c]) * multiplier_data[c] + + offset_data[c]); } } } inline void Relu(const float* input_data, const Dims<4>& input_dims, float* output_data, const Dims<4>& output_dims) { - const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); - const int height = MatchingArraySize(input_dims, 2, output_dims, 2); - const int width = MatchingArraySize(input_dims, 1, output_dims, 1); - const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); - for (int b = 0; b < batches; ++b) { - for (int y = 0; y < height; ++y) { - for (int x = 0; x < width; ++x) { - for (int c = 0; c < depth; ++c) { - float val = input_data[Offset(input_dims, c, x, y, b)]; - const float lower = 0; - float clamped = val < lower ? lower : val; - output_data[Offset(output_dims, c, x, y, b)] = clamped; - } - } - } + const int flat_size = MatchingFlatSize(input_dims, output_dims); + for (int i = 0; i < flat_size; ++i) { + const float val = input_data[i]; + const float lower = 0; + const float clamped = val < lower ? lower : val; + output_data[i] = clamped; } } inline void Relu1(const float* input_data, const Dims<4>& input_dims, float* output_data, const Dims<4>& output_dims) { - const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); - const int height = MatchingArraySize(input_dims, 2, output_dims, 2); - const int width = MatchingArraySize(input_dims, 1, output_dims, 1); - const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); - for (int b = 0; b < batches; ++b) { - for (int y = 0; y < height; ++y) { - for (int x = 0; x < width; ++x) { - for (int c = 0; c < depth; ++c) { - float val = input_data[Offset(input_dims, c, x, y, b)]; - const float upper = 1; - const float lower = -1; - float clamped = val > upper ? upper : val < lower ? lower : val; - output_data[Offset(output_dims, c, x, y, b)] = clamped; - } - } - } + const int flat_size = MatchingFlatSize(input_dims, output_dims); + for (int i = 0; i < flat_size; ++i) { + const float val = input_data[i]; + const float upper = 1; + const float lower = -1; + const float clamped = val > upper ? upper : val < lower ? lower : val; + output_data[i] = clamped; } } inline void Relu6(const float* input_data, const Dims<4>& input_dims, float* output_data, const Dims<4>& output_dims) { - const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); - const int height = MatchingArraySize(input_dims, 2, output_dims, 2); - const int width = MatchingArraySize(input_dims, 1, output_dims, 1); - const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); - for (int b = 0; b < batches; ++b) { - for (int y = 0; y < height; ++y) { - for (int x = 0; x < width; ++x) { - for (int c = 0; c < depth; ++c) { - float val = input_data[Offset(input_dims, c, x, y, b)]; - const float upper = 6; - const float lower = 0; - float clamped = val > upper ? upper : val < lower ? lower : val; - output_data[Offset(output_dims, c, x, y, b)] = clamped; - } - } - } + const int flat_size = MatchingFlatSize(input_dims, output_dims); + for (int i = 0; i < flat_size; ++i) { + const float val = input_data[i]; + const float upper = 6; + const float lower = 0; + const float clamped = val > upper ? upper : val < lower ? lower : val; + output_data[i] = clamped; } } @@ -757,24 +771,17 @@ template void L2Normalization(const float* input_data, const Dims<4>& input_dims, float* output_data, const Dims<4>& output_dims) { static_assert(Ac == FusedActivationFunctionType::kNone, ""); - const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); - const int height = MatchingArraySize(input_dims, 2, output_dims, 2); - const int width = MatchingArraySize(input_dims, 1, output_dims, 1); + const int outer_size = MatchingFlatSizeSkipDim(input_dims, 0, output_dims); const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); - for (int b = 0; b < batches; ++b) { - for (int y = 0; y < height; ++y) { - for (int x = 0; x < width; ++x) { - float squared_l2_norm = 0; - for (int c = 0; c < depth; ++c) { - float val = input_data[Offset(input_dims, c, x, y, b)]; - squared_l2_norm += val * val; - } - float l2_norm = std::sqrt(squared_l2_norm); - for (int c = 0; c < depth; ++c) { - output_data[Offset(output_dims, c, x, y, b)] = - input_data[Offset(input_dims, c, x, y, b)] / l2_norm; - } - } + for (int i = 0; i < outer_size; ++i) { + float squared_l2_norm = 0; + for (int c = 0; c < depth; ++c) { + const float val = input_data[depth * i + c]; + squared_l2_norm += val * val; + } + const float l2_norm = std::sqrt(squared_l2_norm); + for (int c = 0; c < depth; ++c) { + output_data[depth * i + c] = input_data[depth * i + c] / l2_norm; } } } @@ -859,26 +866,11 @@ inline void Add(const float* input1_data, const Dims<4>& input1_dims, const float* input2_data, const Dims<4>& input2_dims, float output_activation_min, float output_activation_max, float* output_data, const Dims<4>& output_dims) { - const int batches = - MatchingArraySize(input1_dims, 3, input2_dims, 3, output_dims, 3); - const int height = - MatchingArraySize(input1_dims, 2, input2_dims, 2, output_dims, 2); - const int width = - MatchingArraySize(input1_dims, 1, input2_dims, 1, output_dims, 1); - const int depth = - MatchingArraySize(input1_dims, 0, input2_dims, 0, output_dims, 0); - for (int b = 0; b < batches; ++b) { - for (int y = 0; y < height; ++y) { - for (int x = 0; x < width; ++x) { - for (int c = 0; c < depth; ++c) { - output_data[Offset(output_dims, c, x, y, b)] = - ActivationFunctionWithMinMax( - input1_data[Offset(input1_dims, c, x, y, b)] + - input2_data[Offset(input2_dims, c, x, y, b)], - output_activation_min, output_activation_max); - } - } - } + const int flat_size = MatchingFlatSize(input1_dims, input2_dims, output_dims); + for (int i = 0; i < flat_size; ++i) { + output_data[i] = ActivationFunctionWithMinMax( + input1_data[i] + input2_data[i], output_activation_min, + output_activation_max); } } @@ -1141,26 +1133,11 @@ inline void Mul(const float* input1_data, const Dims<4>& input1_dims, const float* input2_data, const Dims<4>& input2_dims, float output_activation_min, float output_activation_max, float* output_data, const Dims<4>& output_dims) { - const int batches = - MatchingArraySize(input1_dims, 3, input2_dims, 3, output_dims, 3); - const int height = - MatchingArraySize(input1_dims, 2, input2_dims, 2, output_dims, 2); - const int width = - MatchingArraySize(input1_dims, 1, input2_dims, 1, output_dims, 1); - const int depth = - MatchingArraySize(input1_dims, 0, input2_dims, 0, output_dims, 0); - for (int b = 0; b < batches; ++b) { - for (int y = 0; y < height; ++y) { - for (int x = 0; x < width; ++x) { - for (int c = 0; c < depth; ++c) { - output_data[Offset(output_dims, c, x, y, b)] = - ActivationFunctionWithMinMax( - input1_data[Offset(input1_dims, c, x, y, b)] * - input2_data[Offset(input2_dims, c, x, y, b)], - output_activation_min, output_activation_max); - } - } - } + const int flat_size = MatchingFlatSize(input1_dims, input2_dims, output_dims); + for (int i = 0; i < flat_size; ++i) { + output_data[i] = ActivationFunctionWithMinMax( + input1_data[i] * input2_data[i], output_activation_min, + output_activation_max); } } @@ -1339,6 +1316,33 @@ inline void BroadcastMul(const uint8* input1_data, const Dims<4>& input1_dims, output_data, output_dims); } +inline void Div(const float* input1_data, const Dims<4>& input1_dims, + const float* input2_data, const Dims<4>& input2_dims, + float output_activation_min, float output_activation_max, + float* output_data, const Dims<4>& output_dims) { + const int batches = + MatchingArraySize(input1_dims, 3, input2_dims, 3, output_dims, 3); + const int height = + MatchingArraySize(input1_dims, 2, input2_dims, 2, output_dims, 2); + const int width = + MatchingArraySize(input1_dims, 1, input2_dims, 1, output_dims, 1); + const int depth = + MatchingArraySize(input1_dims, 0, input2_dims, 0, output_dims, 0); + for (int b = 0; b < batches; ++b) { + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + for (int c = 0; c < depth; ++c) { + output_data[Offset(output_dims, c, x, y, b)] = + ActivationFunctionWithMinMax( + input1_data[Offset(input1_dims, c, x, y, b)] / + input2_data[Offset(input2_dims, c, x, y, b)], + output_activation_min, output_activation_max); + } + } + } + } +} + // TODO(jiawen): We can implement BroadcastDiv on buffers of arbitrary // dimensionality if the runtime code does a single loop over one dimension // that handles broadcasting as the base case. The code generator would then @@ -1380,57 +1384,15 @@ void BroadcastDiv(const T* input1_data, const Dims<4>& input1_dims, } } -inline void Div(const float* input1_data, const Dims<4>& input1_dims, - const float* input2_data, const Dims<4>& input2_dims, - float output_activation_min, float output_activation_max, - float* output_data, const Dims<4>& output_dims) { - const int batches = - MatchingArraySize(input1_dims, 3, input2_dims, 3, output_dims, 3); - const int height = - MatchingArraySize(input1_dims, 2, input2_dims, 2, output_dims, 2); - const int width = - MatchingArraySize(input1_dims, 1, input2_dims, 1, output_dims, 1); - const int depth = - MatchingArraySize(input1_dims, 0, input2_dims, 0, output_dims, 0); - for (int b = 0; b < batches; ++b) { - for (int y = 0; y < height; ++y) { - for (int x = 0; x < width; ++x) { - for (int c = 0; c < depth; ++c) { - output_data[Offset(output_dims, c, x, y, b)] = - ActivationFunctionWithMinMax( - input1_data[Offset(input1_dims, c, x, y, b)] / - input2_data[Offset(input2_dims, c, x, y, b)], - output_activation_min, output_activation_max); - } - } - } - } -} - inline void Sub(const float* input1_data, const Dims<4>& input1_dims, const float* input2_data, const Dims<4>& input2_dims, float output_activation_min, float output_activation_max, float* output_data, const Dims<4>& output_dims) { - const int batches = - MatchingArraySize(input1_dims, 3, input2_dims, 3, output_dims, 3); - const int height = - MatchingArraySize(input1_dims, 2, input2_dims, 2, output_dims, 2); - const int width = - MatchingArraySize(input1_dims, 1, input2_dims, 1, output_dims, 1); - const int depth = - MatchingArraySize(input1_dims, 0, input2_dims, 0, output_dims, 0); - for (int b = 0; b < batches; ++b) { - for (int y = 0; y < height; ++y) { - for (int x = 0; x < width; ++x) { - for (int c = 0; c < depth; ++c) { - output_data[Offset(output_dims, c, x, y, b)] = - ActivationFunctionWithMinMax( - input1_data[Offset(input1_dims, c, x, y, b)] - - input2_data[Offset(input2_dims, c, x, y, b)], - output_activation_min, output_activation_max); - } - } - } + const int flat_size = MatchingFlatSize(input1_dims, input2_dims, output_dims); + for (int i = 0; i < flat_size; ++i) { + output_data[i] = ActivationFunctionWithMinMax( + input1_data[i] - input2_data[i], output_activation_min, + output_activation_max); } } @@ -1566,6 +1528,61 @@ void Concatenation(int concat_dim, const Scalar* const* input_data, } } +// TODO(prabhumk): This is the same as the optimized implementation. +// TODO(prabhumk): The quantized implementation of concatentation isn't fully +// quantized as it takes scale as a floating point value. This should be fixed +// when optimizng this routine further. +inline void Concatenation(int concat_dim, const uint8* const* input_data, + const Dims<4>* const* input_dims, + const int32* input_zeropoint, + const float* input_scale, int inputs_count, + uint8* output_data, const Dims<4>& output_dims, + const int32 output_zeropoint, + const float output_scale) { + // The arguments input_zeropoint and input_scale are expected to be an array + // that have the quantization paramaters for all the inputs to the concat + // operator. + TFLITE_DCHECK_GT(inputs_count, 1); + int64_t concat_size = 0; + for (int i = 0; i < inputs_count; i++) { + for (int j = 0; j < 4; j++) { + if (j != concat_dim) { + MatchingArraySize(*input_dims[i], j, output_dims, j); + } + } + concat_size += ArraySize(*input_dims[i], concat_dim); + } + TFLITE_DCHECK_EQ(concat_size, ArraySize(output_dims, concat_dim)); + int64_t outer_size = 1; + for (int i = concat_dim + 1; i < 4; i++) { + outer_size *= output_dims.sizes[i]; + } + const float inverse_output_scale = 1.f / output_scale; + uint8* output_ptr = output_data; + for (int k = 0; k < outer_size; k++) { + for (int i = 0; i < inputs_count; ++i) { + const int copy_size = + input_dims[i]->sizes[concat_dim] * input_dims[i]->strides[concat_dim]; + const uint8* input_ptr = input_data[i] + k * copy_size; + if (input_zeropoint[i] == output_zeropoint && + input_scale[i] == output_scale) { + memcpy(output_ptr, input_ptr, copy_size); + } else { + const float scale = input_scale[i] * inverse_output_scale; + const float bias = -input_zeropoint[i] * scale; + for (int j = 0; j < copy_size; ++j) { + const int32_t value = + static_cast(round(input_ptr[j] * scale + bias)) + + output_zeropoint; + output_ptr[j] = + static_cast(std::max(std::min(255, value), 0)); + } + } + output_ptr += copy_size; + } + } +} + template void DepthConcatenation(const Scalar* const* input_data, const Dims<4>* const* input_dims, int inputs_count, @@ -1757,15 +1774,9 @@ void LstmCell(const uint8* input_data_uint8, const Dims<4>& input_dims, (void)gemm_context; // only used in optimized code. // Gather dimensions information, and perform consistency checks. - const int batches = - MatchingArraySize(input_dims, 3, prev_activ_dims, 3, prev_state_dims, 3, - output_state_dims, 3, output_activ_dims, 3); - const int height = - MatchingArraySize(input_dims, 2, prev_activ_dims, 2, prev_state_dims, 2, - output_state_dims, 2, output_activ_dims, 2); - const int width = - MatchingArraySize(input_dims, 1, prev_activ_dims, 1, prev_state_dims, 1, - output_state_dims, 1, output_activ_dims, 1); + const int outer_size = + MatchingFlatSizeSkipDim(input_dims, 0, prev_activ_dims, prev_state_dims, + output_state_dims, output_activ_dims); TFLITE_CHECK_EQ(ArraySize(weights_dims, 2), 1); TFLITE_CHECK_EQ(ArraySize(weights_dims, 3), 1); const int input_depth = ArraySize(input_dims, 0); @@ -1781,9 +1792,7 @@ void LstmCell(const uint8* input_data_uint8, const Dims<4>& input_dims, MatchingArraySize(prev_state_dims, 0, prev_activ_dims, 0, output_state_dims, 0, output_activ_dims, 0); TFLITE_CHECK_EQ(output_depth, intern_activ_depth / 4); - const int fc_batches = ArraySize(activ_temp_dims, 1) * - ArraySize(activ_temp_dims, 2) * - ArraySize(activ_temp_dims, 3); + const int fc_batches = FlatSizeSkipDim(activ_temp_dims, 0); const int fc_output_depth = MatchingArraySize(weights_dims, 1, activ_temp_dims, 0); const int fc_accum_depth = ArraySize(weights_dims, 0); @@ -1828,7 +1837,6 @@ void LstmCell(const uint8* input_data_uint8, const Dims<4>& input_dims, // Rest of the LSTM cell: tanh and logistic math functions, and some adds // and muls, all done in 16-bit fixed-point. - const int outer_size = batches * width * height; for (int b = 0; b < outer_size; ++b) { for (int c = 0; c < output_depth; ++c) { // Define the fixed-point data types that we will use here. All use @@ -2363,28 +2371,20 @@ inline void LocalResponseNormalization(const float* input_data, float bias, float alpha, float beta, float* output_data, const Dims<4>& output_dims) { - const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); - const int height = MatchingArraySize(input_dims, 2, output_dims, 2); - const int width = MatchingArraySize(input_dims, 1, output_dims, 1); + const int outer_size = MatchingFlatSizeSkipDim(input_dims, 0, output_dims); const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); - for (int b = 0; b < batches; ++b) { - for (int y = 0; y < height; ++y) { - for (int x = 0; x < width; ++x) { - for (int c = 0; c < depth; ++c) { - const int begin_input_c = std::max(0, c - range); - const int end_input_c = std::min(depth, c + range); - float accum = 0.f; - for (int input_c = begin_input_c; input_c < end_input_c; ++input_c) { - const float input_val = - input_data[Offset(input_dims, input_c, x, y, b)]; - accum += input_val * input_val; - } - const float multiplier = std::pow(bias + alpha * accum, -beta); - output_data[Offset(output_dims, c, x, y, b)] = - input_data[Offset(input_dims, c, x, y, b)] * multiplier; - } + for (int i = 0; i < outer_size; ++i) { + for (int c = 0; c < depth; ++c) { + const int begin_input_c = std::max(0, c - range); + const int end_input_c = std::min(depth, c + range); + float accum = 0.f; + for (int input_c = begin_input_c; input_c < end_input_c; ++input_c) { + const float input_val = input_data[i * depth + input_c]; + accum += input_val * input_val; } + const float multiplier = std::pow(bias + alpha * accum, -beta); + output_data[i * depth + c] = input_data[i * depth + c] * multiplier; } } } @@ -2392,37 +2392,28 @@ inline void LocalResponseNormalization(const float* input_data, inline void Softmax(const float* input_data, const Dims<4>& input_dims, float beta, float* output_data, const Dims<4>& output_dims) { - const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); - const int height = MatchingArraySize(input_dims, 2, output_dims, 2); - const int width = MatchingArraySize(input_dims, 1, output_dims, 1); + const int outer_size = MatchingFlatSizeSkipDim(input_dims, 0, output_dims); const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); - for (int b = 0; b < batches; ++b) { - for (int y = 0; y < height; ++y) { - for (int x = 0; x < width; ++x) { - // Find max element value which we'll use to ensure numerical stability - // taking advantage of the following equality: - // exp(x[i])/sum(exp(x[i])) == exp(x[i]+C)/sum(exp(x[i]+C)) - float max = std::numeric_limits::lowest(); - for (int c = 0; c < depth; ++c) { - max = std::max(max, input_data[Offset(input_dims, c, x, y, b)]); - } + for (int i = 0; i < outer_size; ++i) { + // Find max element value which we'll use to ensure numerical stability + // taking advantage of the following equality: + // exp(x[i])/sum(exp(x[i])) == exp(x[i]+C)/sum(exp(x[i]+C)) + float max = std::numeric_limits::lowest(); + for (int c = 0; c < depth; ++c) { + max = std::max(max, input_data[i * depth + c]); + } - // Compute sum. - float sum = 0.f; - for (int c = 0; c < depth; ++c) { - sum += std::exp((input_data[Offset(input_dims, c, x, y, b)] - max) * - beta); - } + // Compute sum. + float sum = 0.f; + for (int c = 0; c < depth; ++c) { + sum += std::exp((input_data[i * depth + c] - max) * beta); + } - // Compute result. - for (int c = 0; c < depth; ++c) { - output_data[Offset(output_dims, c, x, y, b)] = - std::exp((input_data[Offset(input_dims, c, x, y, b)] - max) * - beta) / - sum; - } - } + // Compute result. + for (int c = 0; c < depth; ++c) { + output_data[i * depth + c] = + std::exp((input_data[i * depth + c] - max) * beta) / sum; } } } @@ -2443,73 +2434,63 @@ inline void Softmax(const uint8* input_data, const Dims<4>& input_dims, using FixedPointAccum = gemmlowp::FixedPoint; using FixedPoint0 = gemmlowp::FixedPoint; - const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); - const int height = MatchingArraySize(input_dims, 2, output_dims, 2); - const int width = MatchingArraySize(input_dims, 1, output_dims, 1); + const int outer_size = MatchingFlatSizeSkipDim(input_dims, 0, output_dims); const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); - for (int b = 0; b < batches; ++b) { - for (int x = 0; x < width; ++x) { - for (int y = 0; y < height; ++y) { - uint8 max_in_row = 0; - for (int c = 0; c < depth; ++c) { - max_in_row = - std::max(max_in_row, input_data[Offset(input_dims, c, x, y, b)]); - } - - FixedPointAccum sum_of_exps = FixedPointAccum::Zero(); - for (int c = 0; c < depth; ++c) { - int32 input_diff = - static_cast(input_data[Offset(input_dims, c, x, y, b)]) - - max_in_row; - if (input_diff >= diff_min) { - const int32 input_diff_rescaled = - MultiplyByQuantizedMultiplierGreaterThanOne( - input_diff, input_beta_multiplier, input_beta_left_shift); - const FixedPointScaledDiff scaled_diff_f8 = - FixedPointScaledDiff::FromRaw(input_diff_rescaled); - sum_of_exps = - sum_of_exps + gemmlowp::Rescale( - exp_on_negative_values(scaled_diff_f8)); - } - } - - int32 fixed_sum_of_exps = sum_of_exps.raw(); - int headroom_plus_one = - CountLeadingZeros(static_cast(fixed_sum_of_exps)); - // This is the number of bits to the left of the binary point above 1.0. - // Consider fixed_sum_of_exps=1.25. In that case shifted_scale=0.8 and - // no later adjustment will be needed. - int num_bits_over_unit = kAccumulationIntegerBits - headroom_plus_one; - int32 shifted_sum_minus_one = static_cast( - (static_cast(fixed_sum_of_exps) << headroom_plus_one) - - (static_cast(1) << 31)); + for (int i = 0; i < outer_size; ++i) { + uint8 max_in_row = 0; + for (int c = 0; c < depth; ++c) { + max_in_row = std::max(max_in_row, input_data[i * depth + c]); + } + + FixedPointAccum sum_of_exps = FixedPointAccum::Zero(); + for (int c = 0; c < depth; ++c) { + int32 input_diff = + static_cast(input_data[i * depth + c]) - max_in_row; + if (input_diff >= diff_min) { + const int32 input_diff_rescaled = + MultiplyByQuantizedMultiplierGreaterThanOne( + input_diff, input_beta_multiplier, input_beta_left_shift); + const FixedPointScaledDiff scaled_diff_f8 = + FixedPointScaledDiff::FromRaw(input_diff_rescaled); + sum_of_exps = sum_of_exps + gemmlowp::Rescale( + exp_on_negative_values(scaled_diff_f8)); + } + } + + int32 fixed_sum_of_exps = sum_of_exps.raw(); + int headroom_plus_one = + CountLeadingZeros(static_cast(fixed_sum_of_exps)); + // This is the number of bits to the left of the binary point above 1.0. + // Consider fixed_sum_of_exps=1.25. In that case shifted_scale=0.8 and + // no later adjustment will be needed. + int num_bits_over_unit = kAccumulationIntegerBits - headroom_plus_one; + int32 shifted_sum_minus_one = static_cast( + (static_cast(fixed_sum_of_exps) << headroom_plus_one) - + (static_cast(1) << 31)); + + FixedPoint0 shifted_scale = gemmlowp::one_over_one_plus_x_for_x_in_0_1( + FixedPoint0::FromRaw(shifted_sum_minus_one)); + + for (int c = 0; c < depth; ++c) { + int32 input_diff = + static_cast(input_data[i * depth + c]) - max_in_row; + if (input_diff >= diff_min) { + const int32 input_diff_rescaled = + MultiplyByQuantizedMultiplierGreaterThanOne( + input_diff, input_beta_multiplier, input_beta_left_shift); + const FixedPointScaledDiff scaled_diff_f8 = + FixedPointScaledDiff::FromRaw(input_diff_rescaled); + + FixedPoint0 exp_in_0 = exp_on_negative_values(scaled_diff_f8); + int32 unsat_output = gemmlowp::RoundingDivideByPOT( + (shifted_scale * exp_in_0).raw(), num_bits_over_unit + 31 - 8); + + output_data[i * depth + c] = static_cast( + std::max(std::min(unsat_output, static_cast(255)), 0)); - FixedPoint0 shifted_scale = gemmlowp::one_over_one_plus_x_for_x_in_0_1( - FixedPoint0::FromRaw(shifted_sum_minus_one)); - - for (int c = 0; c < depth; ++c) { - int32 input_diff = - static_cast(input_data[Offset(input_dims, c, x, y, b)]) - - max_in_row; - if (input_diff >= diff_min) { - const int32 input_diff_rescaled = - MultiplyByQuantizedMultiplierGreaterThanOne( - input_diff, input_beta_multiplier, input_beta_left_shift); - const FixedPointScaledDiff scaled_diff_f8 = - FixedPointScaledDiff::FromRaw(input_diff_rescaled); - - FixedPoint0 exp_in_0 = exp_on_negative_values(scaled_diff_f8); - int32 unsat_output = gemmlowp::RoundingDivideByPOT( - (shifted_scale * exp_in_0).raw(), num_bits_over_unit + 31 - 8); - - output_data[Offset(output_dims, c, x, y, b)] = static_cast( - std::max(std::min(unsat_output, static_cast(255)), 0)); - - } else { - output_data[Offset(output_dims, c, x, y, b)] = 0; - } - } + } else { + output_data[i * depth + c] = 0; } } } @@ -2517,109 +2498,170 @@ inline void Softmax(const uint8* input_data, const Dims<4>& input_dims, inline void LogSoftmax(const float* input_data, const Dims<4>& input_dims, float* output_data, const Dims<4>& output_dims) { - const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); - const int height = MatchingArraySize(input_dims, 2, output_dims, 2); - const int width = MatchingArraySize(input_dims, 1, output_dims, 1); + const int outer_size = MatchingFlatSizeSkipDim(input_dims, 0, output_dims); const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); - for (int b = 0; b < batches; ++b) { - for (int y = 0; y < height; ++y) { - for (int x = 0; x < width; ++x) { - // Find max element value which we'll use to ensure numerical stability - // taking advantage of the following equality: - // log(exp(x[i])/sum(exp(x[i]))) == log(exp(x[i]+C)/sum(exp(x[i]+C))) - float max = std::numeric_limits::lowest(); - for (int c = 0; c < depth; ++c) { - max = std::max(max, input_data[Offset(input_dims, c, x, y, b)]); - } + for (int i = 0; i < outer_size; ++i) { + // Find max element value which we'll use to ensure numerical stability + // taking advantage of the following equality: + // log(exp(x[i])/sum(exp(x[i]))) == log(exp(x[i]+C)/sum(exp(x[i]+C))) + float max = std::numeric_limits::lowest(); + for (int c = 0; c < depth; ++c) { + max = std::max(max, input_data[i * depth + c]); + } - // Compute sum. - float sum = 0.f; - for (int c = 0; c < depth; ++c) { - sum += std::exp(input_data[Offset(input_dims, c, x, y, b)] - max); - } + // Compute sum. + float sum = 0.f; + for (int c = 0; c < depth; ++c) { + sum += std::exp(input_data[i * depth + c] - max); + } - // Compute result. - const float log_sum = std::log(sum); - for (int c = 0; c < depth; ++c) { - output_data[Offset(output_dims, c, x, y, b)] = - input_data[Offset(input_dims, c, x, y, b)] - max - log_sum; - } - } + // Compute result. + const float log_sum = std::log(sum); + for (int c = 0; c < depth; ++c) { + output_data[i * depth + c] = input_data[i * depth + c] - max - log_sum; } } } -inline void Logistic(const float* input_data, const Dims<4>& input_dims, - float* output_data, const Dims<4>& output_dims) { - const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); - const int height = MatchingArraySize(input_dims, 2, output_dims, 2); - const int width = MatchingArraySize(input_dims, 1, output_dims, 1); +inline void LogSoftmax(const uint8* input_data, const Dims<4>& input_dims, + int32 input_multiplier, int32 input_left_shift, + int32 reverse_scaling_divisor, + int32 reverse_scaling_right_shift, int diff_min, + uint8* output_data, const Dims<4>& output_dims) { + // The representation chosen for the input to the exp() function is Q5.26. + // We need to leave extra space since values that we skip might be as large as + // -32 before multiplying by input_beta_multiplier, and therefore as large as + // -16 afterwards. Note that exp(-8) is definitely not insignificant to + // accumulation, but exp(-16) definitely is. + static constexpr int kScaledDiffIntegerBits = 5; + static constexpr int kAccumulationIntegerBits = 12; + static constexpr int kOutputIntegerBits = 4; + using FixedPointScaledDiff = + gemmlowp::FixedPoint; + using FixedPointAccum = gemmlowp::FixedPoint; + using FixedPoint0 = gemmlowp::FixedPoint; + + const int outer_size = MatchingFlatSizeSkipDim(input_dims, 0, output_dims); const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); - for (int b = 0; b < batches; ++b) { - for (int y = 0; y < height; ++y) { - for (int x = 0; x < width; ++x) { - for (int c = 0; c < depth; ++c) { - float val = input_data[Offset(input_dims, c, x, y, b)]; - float result = 1.f / (1.f + std::exp(-val)); - output_data[Offset(output_dims, c, x, y, b)] = result; - } + + for (int i = 0; i < outer_size; ++i) { + uint8 max_in_row = 0; + for (int c = 0; c < depth; ++c) { + max_in_row = std::max(max_in_row, input_data[i * depth + c]); + } + + FixedPointAccum sum_of_exps = FixedPointAccum::Zero(); + for (int c = 0; c < depth; ++c) { + int32 input_diff = + static_cast(input_data[i * depth + c]) - max_in_row; + if (input_diff >= diff_min) { + const int32 input_diff_rescaled = + MultiplyByQuantizedMultiplierGreaterThanOne( + input_diff, input_multiplier, input_left_shift); + const FixedPointScaledDiff scaled_diff_f8 = + FixedPointScaledDiff::FromRaw(input_diff_rescaled); + sum_of_exps = sum_of_exps + gemmlowp::Rescale( + exp_on_negative_values(scaled_diff_f8)); + } + } + + // TODO(b/77858996): Implement fixed-point log(). + // Not a fully-quantized implementation: floating-point log(). + const float float_log_sum_of_exps = + std::log(static_cast(sum_of_exps.raw()) / + (1 << (31 - kAccumulationIntegerBits))); + const int32 fixed_log_sum_of_exps = static_cast(TfLiteRound( + float_log_sum_of_exps * (1 << (31 - kScaledDiffIntegerBits)))); + + // rescaled_diff_min is smallest representable in + // Q(kScaledDiffIntegerBits).(31-kScaledDiffIntegerBits) plus the + // log-sub-exps that will be subtracted in the loop. + // + // The thresholds diff_min, etc are negative. + const int rescaled_diff_min = + fixed_log_sum_of_exps + std::numeric_limits::lowest(); + const int adjusted_diff_min = + std::max(diff_min - 1, // Note use of > below instead of >= above. + MultiplyByQuantizedMultiplierSmallerThanOne( + rescaled_diff_min, reverse_scaling_divisor, + reverse_scaling_right_shift)); + + for (int c = 0; c < depth; ++c) { + int32 input_diff = + static_cast(input_data[i * depth + c]) - max_in_row; + if (input_diff > adjusted_diff_min) { + const int32 input_diff_rescaled = + MultiplyByQuantizedMultiplierGreaterThanOne( + input_diff, input_multiplier, input_left_shift); + int32 unsat_output = + gemmlowp::RoundingDivideByPOT( + (input_diff_rescaled - fixed_log_sum_of_exps), + 31 - kScaledDiffIntegerBits - kOutputIntegerBits) + + 255; + + output_data[i * depth + c] = static_cast( + std::max(std::min(unsat_output, static_cast(255)), 0)); + } else { + // Set output to smallest value. + output_data[i * depth + c] = 0; } } } } +inline void Logistic(const float* input_data, const Dims<4>& input_dims, + float* output_data, const Dims<4>& output_dims) { + const int flat_size = MatchingFlatSize(output_dims, input_dims); + + for (int i = 0; i < flat_size; i++) { + float val = input_data[i]; + float result = 1.f / (1.f + std::exp(-val)); + output_data[i] = result; + } +} + inline void Logistic(const uint8* input_data, const Dims<4>& input_dims, int32 input_zero_point, int32 input_range_radius, int32 input_multiplier, int input_left_shift, uint8* output_data, const Dims<4>& output_dims) { - const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); - const int height = MatchingArraySize(input_dims, 2, output_dims, 2); - const int width = MatchingArraySize(input_dims, 1, output_dims, 1); - const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); - for (int b = 0; b < batches; ++b) { - for (int y = 0; y < height; ++y) { - for (int x = 0; x < width; ++x) { - for (int c = 0; c < depth; ++c) { - const uint8 input_val_u8 = input_data[Offset(input_dims, c, x, y, b)]; - const int32 input_val_centered = - static_cast(input_val_u8) - input_zero_point; - uint8 output_val; - if (input_val_centered <= -input_range_radius) { - output_val = 0; - } else if (input_val_centered >= input_range_radius) { - output_val = 255; - } else { - const int32 input_val_rescaled = - MultiplyByQuantizedMultiplierGreaterThanOne( - input_val_centered, input_multiplier, input_left_shift); - using FixedPoint4 = gemmlowp::FixedPoint; - using FixedPoint0 = gemmlowp::FixedPoint; - const FixedPoint4 input_val_f4 = - FixedPoint4::FromRaw(input_val_rescaled); - const FixedPoint0 output_val_f0 = gemmlowp::logistic(input_val_f4); - // Convert from Q0.31 to Q23.8. - using gemmlowp::RoundingDivideByPOT; - int32 output_val_s32 = RoundingDivideByPOT(output_val_f0.raw(), 23); - if (output_val_s32 == 256) { - output_val_s32 = 255; - } - // Reinterpret as U0.8. - TFLITE_DCHECK_GE(output_val_s32, 0); - TFLITE_DCHECK_LE(output_val_s32, 255); - output_val = static_cast(output_val_s32); - } - output_data[Offset(output_dims, c, x, y, b)] = output_val; - } + const int flat_size = MatchingFlatSize(output_dims, input_dims); + + for (int i = 0; i < flat_size; i++) { + const uint8 input_val_u8 = input_data[i]; + const int32 input_val_centered = + static_cast(input_val_u8) - input_zero_point; + uint8 output_val; + if (input_val_centered <= -input_range_radius) { + output_val = 0; + } else if (input_val_centered >= input_range_radius) { + output_val = 255; + } else { + const int32 input_val_rescaled = + MultiplyByQuantizedMultiplierGreaterThanOne( + input_val_centered, input_multiplier, input_left_shift); + using FixedPoint4 = gemmlowp::FixedPoint; + using FixedPoint0 = gemmlowp::FixedPoint; + const FixedPoint4 input_val_f4 = FixedPoint4::FromRaw(input_val_rescaled); + const FixedPoint0 output_val_f0 = gemmlowp::logistic(input_val_f4); + // Convert from Q0.31 to Q23.8. + using gemmlowp::RoundingDivideByPOT; + int32 output_val_s32 = RoundingDivideByPOT(output_val_f0.raw(), 23); + if (output_val_s32 == 256) { + output_val_s32 = 255; } + // Reinterpret as U0.8. + TFLITE_DCHECK_GE(output_val_s32, 0); + TFLITE_DCHECK_LE(output_val_s32, 255); + output_val = static_cast(output_val_s32); } + output_data[i] = output_val; } } inline void Logistic(const int16* input_data, const Dims<4>& input_dims, int16* output_data, const Dims<4>& output_dims) { - const int flat_size = RequiredBufferSizeForDims(output_dims); - TFLITE_DCHECK_EQ(RequiredBufferSizeForDims(input_dims), flat_size); + const int flat_size = MatchingFlatSize(output_dims, input_dims); for (int i = 0; i < flat_size; i++) { // F0 uses 0 integer bits, range [-1, 1]. @@ -2637,20 +2679,12 @@ inline void Logistic(const int16* input_data, const Dims<4>& input_dims, inline void Tanh(const float* input_data, const Dims<4>& input_dims, float* output_data, const Dims<4>& output_dims) { - const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); - const int height = MatchingArraySize(input_dims, 2, output_dims, 2); - const int width = MatchingArraySize(input_dims, 1, output_dims, 1); - const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); - for (int b = 0; b < batches; ++b) { - for (int y = 0; y < height; ++y) { - for (int x = 0; x < width; ++x) { - for (int c = 0; c < depth; ++c) { - float val = input_data[Offset(input_dims, c, x, y, b)]; - float result = std::tanh(val); - output_data[Offset(output_dims, c, x, y, b)] = result; - } - } - } + const int flat_size = MatchingFlatSize(output_dims, input_dims); + + for (int i = 0; i < flat_size; i++) { + float val = input_data[i]; + float result = std::tanh(val); + output_data[i] = result; } } @@ -2659,47 +2693,38 @@ inline void Tanh(const uint8* input_data, const Dims<4>& input_dims, int32 input_multiplier, int input_left_shift, uint8* output_data, const Dims<4>& output_dims) { const int32 output_zero_point = 128; - const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); - const int height = MatchingArraySize(input_dims, 2, output_dims, 2); - const int width = MatchingArraySize(input_dims, 1, output_dims, 1); - const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); - for (int b = 0; b < batches; ++b) { - for (int y = 0; y < height; ++y) { - for (int x = 0; x < width; ++x) { - for (int c = 0; c < depth; ++c) { - const uint8 input_val_u8 = input_data[Offset(input_dims, c, x, y, b)]; - const int32 input_val_centered = - static_cast(input_val_u8) - input_zero_point; - uint8 output_val; - if (input_val_centered <= -input_range_radius) { - output_val = 0; - } else if (input_val_centered >= input_range_radius) { - output_val = 255; - } else { - const int32 input_val_rescaled = - MultiplyByQuantizedMultiplierGreaterThanOne( - input_val_centered, input_multiplier, input_left_shift); - using FixedPoint4 = gemmlowp::FixedPoint; - using FixedPoint0 = gemmlowp::FixedPoint; - const FixedPoint4 input_val_f4 = - FixedPoint4::FromRaw(input_val_rescaled); - const FixedPoint0 output_val_f0 = gemmlowp::tanh(input_val_f4); - // Convert from Q0.31 to Q24.7. - using gemmlowp::RoundingDivideByPOT; - int32 output_val_s32 = RoundingDivideByPOT(output_val_f0.raw(), 24); - output_val_s32 += output_zero_point; - if (output_val_s32 == 256) { - output_val_s32 = 255; - } - // Reinterpret as Q0.7, encoded in uint8. - TFLITE_DCHECK_GE(output_val_s32, 0); - TFLITE_DCHECK_LE(output_val_s32, 255); - output_val = static_cast(output_val_s32); - } - output_data[Offset(output_dims, c, x, y, b)] = output_val; - } + const int flat_size = MatchingFlatSize(output_dims, input_dims); + + for (int i = 0; i < flat_size; i++) { + const uint8 input_val_u8 = input_data[i]; + const int32 input_val_centered = + static_cast(input_val_u8) - input_zero_point; + uint8 output_val; + if (input_val_centered <= -input_range_radius) { + output_val = 0; + } else if (input_val_centered >= input_range_radius) { + output_val = 255; + } else { + const int32 input_val_rescaled = + MultiplyByQuantizedMultiplierGreaterThanOne( + input_val_centered, input_multiplier, input_left_shift); + using FixedPoint4 = gemmlowp::FixedPoint; + using FixedPoint0 = gemmlowp::FixedPoint; + const FixedPoint4 input_val_f4 = FixedPoint4::FromRaw(input_val_rescaled); + const FixedPoint0 output_val_f0 = gemmlowp::tanh(input_val_f4); + // Convert from Q0.31 to Q24.7. + using gemmlowp::RoundingDivideByPOT; + int32 output_val_s32 = RoundingDivideByPOT(output_val_f0.raw(), 24); + output_val_s32 += output_zero_point; + if (output_val_s32 == 256) { + output_val_s32 = 255; } + // Reinterpret as Q0.7, encoded in uint8. + TFLITE_DCHECK_GE(output_val_s32, 0); + TFLITE_DCHECK_LE(output_val_s32, 255); + output_val = static_cast(output_val_s32); } + output_data[i] = output_val; } } @@ -2711,8 +2736,7 @@ inline void Tanh(const int16* input_data, const Dims<4>& input_dims, TFLITE_DCHECK_GE(input_left_shift, 0); TFLITE_DCHECK_LE(input_left_shift, 1); - const int flat_size = RequiredBufferSizeForDims(output_dims); - TFLITE_DCHECK_EQ(RequiredBufferSizeForDims(input_dims), flat_size); + const int flat_size = MatchingFlatSize(output_dims, input_dims); // F0 uses 0 integer bits, range [-1, 1]. // This is the return type of math functions such as tanh, logistic, @@ -2740,138 +2764,62 @@ inline void Tanh(const int16* input_data, const Dims<4>& input_dims, inline void Dequantize(const uint8* input_data, const Dims<4>& input_dims, int32 zero_point, double scale, float* output_data, const Dims<4>& output_dims) { - const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); - const int height = MatchingArraySize(input_dims, 2, output_dims, 2); - const int width = MatchingArraySize(input_dims, 1, output_dims, 1); - const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); - for (int b = 0; b < batches; ++b) { - for (int y = 0; y < height; ++y) { - for (int x = 0; x < width; ++x) { - for (int c = 0; c < depth; ++c) { - int32 val = input_data[Offset(input_dims, c, x, y, b)]; - float result = static_cast(scale * (val - zero_point)); - output_data[Offset(output_dims, c, x, y, b)] = result; - } - } - } + const int flat_size = MatchingFlatSize(output_dims, input_dims); + + for (int i = 0; i < flat_size; i++) { + int32 val = input_data[i]; + float result = static_cast(scale * (val - zero_point)); + output_data[i] = result; } } inline void FakeQuant(const float* input_data, const Dims<4>& input_dims, - float rmin, float rmax, float* output_data, + float rmin, float rmax, int num_bits, float* output_data, const Dims<4>& output_dims) { // 0 should always be a representable value. Let's assume that the initial // min,max range contains 0. - TFLITE_DCHECK_LE(rmin, 0.); - TFLITE_DCHECK_GE(rmax, 0.); - - // Determine quantization parameters: zero_point, scale. - using Integer = uint8; - const Integer qmin = std::numeric_limits::min(); - const Integer qmax = std::numeric_limits::max(); - const float qmin_float = qmin; - const float qmax_float = qmax; - int32 zero_point = 0; - float scale = 0.f; - // If rmin==rmax, both must be zero per the above assertion, - // so we are done. - if (rmin != rmax) { - // First determine the scale. - scale = (rmax - rmin) / (qmax_float - qmin_float); - - // Zero-point computation. - // First the initial floating-point computation. The zero-point can be - // determined from solving an affine equation for any known pair - // (real value, corresponding quantized value). - // We know two such pairs: (rmin, qmin) and (rmax, qmax). - // The arithmetic error on the zero point computed from either pair - // will be roughly machine_epsilon * (sum of absolute values of terms) - // so we want to use the variant that adds the smaller terms. - const float zero_point_from_min = qmin_float - rmin / scale; - const float zero_point_from_max = qmax_float - rmax / scale; - const float zero_point_from_min_error = - std::abs(qmin_float) + std::abs(rmin / scale); - const float zero_point_from_max_error = - std::abs(qmax_float) + std::abs(rmax / scale); - - const float zero_point_float = - zero_point_from_min_error < zero_point_from_max_error - ? zero_point_from_min - : zero_point_from_max; - - // Now we need to nudge the zero point to be an integer - // (our zero points are integer, and this is motivated by the requirement - // to be able to represent the real value "0" exactly as a quantized value, - // which is required in multiple places, for example in Im2col with SAME - // padding). - if (zero_point_float < qmin_float) { - zero_point = qmin; - } else if (zero_point_float > qmax_float) { - zero_point = qmax; - } else { - zero_point = static_cast(TfLiteRound(zero_point_float)); - } - // The zero point should always be in the range of quantized value, - // [qmin, qmax]. - TFLITE_DCHECK_GE(zero_point, qmin); - TFLITE_DCHECK_LE(zero_point, qmax); - } - - const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); - const int height = MatchingArraySize(input_dims, 2, output_dims, 2); - const int width = MatchingArraySize(input_dims, 1, output_dims, 1); - const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); - for (int b = 0; b < batches; ++b) { - for (int y = 0; y < height; ++y) { - for (int x = 0; x < width; ++x) { - for (int c = 0; c < depth; ++c) { - const float src_val = input_data[Offset(input_dims, c, x, y, b)]; - const float unclamped_quantized_val = - TfLiteRound(zero_point + src_val / scale); - const float quantized_val = std::min( - qmax_float, std::max(qmin_float, unclamped_quantized_val)); - const float dst_val = scale * (quantized_val - zero_point); - output_data[Offset(output_dims, c, x, y, b)] = dst_val; - } - } - } + TFLITE_DCHECK_LE(rmin, 0.0f); + TFLITE_DCHECK_GE(rmax, 0.0f); + TFLITE_DCHECK_LT(rmin, rmax); + + // Code matches tensorflow's FakeQuantWithMinMaxArgsFunctor. + int quant_min = 0; + int quant_max = (1 << num_bits) - 1; + float nudged_min, nudged_max, nudged_scale; + NudgeQuantizationRange(rmin, rmax, quant_min, quant_max, &nudged_min, + &nudged_max, &nudged_scale); + const float inv_nudged_scale = 1.0f / nudged_scale; + + const int flat_size = MatchingFlatSize(output_dims, input_dims); + for (int i = 0; i < flat_size; i++) { + const float src_val = input_data[i]; + const float clamped = std::min(nudged_max, std::max(nudged_min, src_val)); + const float clamped_shifted = clamped - nudged_min; + const float dst_val = + TfLiteRound(clamped_shifted * inv_nudged_scale) * nudged_scale + + nudged_min; + output_data[i] = dst_val; } } template inline void Cast(const SrcT* input_data, const Dims<4>& input_dims, DstT* output_data, const Dims<4>& output_dims) { - const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); - const int height = MatchingArraySize(input_dims, 2, output_dims, 2); - const int width = MatchingArraySize(input_dims, 1, output_dims, 1); - const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); - for (int b = 0; b < batches; ++b) { - for (int y = 0; y < height; ++y) { - for (int x = 0; x < width; ++x) { - for (int c = 0; c < depth; ++c) { - int offset = Offset(input_dims, c, x, y, b); - output_data[offset] = static_cast(input_data[offset]); - } - } - } + const int flat_size = MatchingFlatSize(output_dims, input_dims); + + for (int i = 0; i < flat_size; i++) { + int offset = i; + output_data[offset] = static_cast(input_data[offset]); } } inline void Floor(const float* input_data, const Dims<4>& input_dims, float* output_data, const Dims<4>& output_dims) { - const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); - const int height = MatchingArraySize(input_dims, 2, output_dims, 2); - const int width = MatchingArraySize(input_dims, 1, output_dims, 1); - const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); - for (int b = 0; b < batches; ++b) { - for (int y = 0; y < height; ++y) { - for (int x = 0; x < width; ++x) { - for (int c = 0; c < depth; ++c) { - int offset = Offset(input_dims, c, x, y, b); - output_data[offset] = std::floor(input_data[offset]); - } - } - } + const int flat_size = MatchingFlatSize(output_dims, input_dims); + + for (int i = 0; i < flat_size; i++) { + int offset = i; + output_data[offset] = std::floor(input_data[offset]); } } @@ -3001,24 +2949,37 @@ inline void SpaceToBatchND(const T* input_data, const Dims<4>& input_dims, template inline void BatchToSpaceND(const T* input_data, const Dims<4>& input_dims, const int32* block_shape_data, - const Dims<4>& block_shape_dims, T* output_data, - const Dims<4>& output_dims) { + const Dims<4>& block_shape_dims, + const int32* crops_data, const Dims<4>& crops_dims, + T* output_data, const Dims<4>& output_dims) { const int output_batch_size = ArraySize(output_dims, 3); + const int output_height = ArraySize(output_dims, 2); + const int output_width = ArraySize(output_dims, 1); const int input_batch_size = ArraySize(input_dims, 3); const int input_height = ArraySize(input_dims, 2); const int input_width = ArraySize(input_dims, 1); const int depth = ArraySize(input_dims, 0); const int block_shape_width = block_shape_data[1]; const int block_shape_height = block_shape_data[0]; + const int crops_top = crops_data[0]; + const int crops_left = crops_data[2]; for (int in_batch = 0; in_batch < input_batch_size; ++in_batch) { + const int out_batch = in_batch % output_batch_size; + const int spatial_offset = in_batch / output_batch_size; for (int in_h = 0; in_h < input_height; ++in_h) { + const int out_h = in_h * block_shape_height + + spatial_offset / block_shape_width - crops_top; + if (out_h < 0 || out_h >= output_height) { + continue; + } for (int in_w = 0; in_w < input_width; ++in_w) { - int out_batch = in_batch % output_batch_size; - int out_w = in_w * block_shape_width + - (in_batch / output_batch_size) % block_shape_width; - int out_h = in_h * block_shape_height + - (in_batch / output_batch_size) / block_shape_width; + const int out_w = in_w * block_shape_width + + spatial_offset % block_shape_width - crops_left; + + if (out_w < 0 || out_w >= output_width) { + continue; + } T* out = output_data + Offset(output_dims, 0, out_w, out_h, out_batch); const T* in = input_data + Offset(input_dims, 0, in_w, in_h, in_batch); memcpy(out, in, depth * sizeof(T)); @@ -3031,7 +2992,7 @@ template inline void Pad(const T* input_data, const Dims<4>& input_dims, const std::vector& left_paddings, const std::vector& right_paddings, T* output_data, - const Dims<4>& output_dims) { + const Dims<4>& output_dims, const int32_t pad_value) { const int output_batch = ArraySize(output_dims, 3); const int output_height = ArraySize(output_dims, 2); const int output_width = ArraySize(output_dims, 1); @@ -3061,7 +3022,7 @@ inline void Pad(const T* input_data, const Dims<4>& input_dims, out_w >= output_width - right_w_padding || out_d < left_d_padding || out_d >= output_depth - right_d_padding) { - *out_ptr++ = 0; + *out_ptr++ = static_cast(pad_value); } else { *out_ptr++ = *in_ptr++; } @@ -3071,59 +3032,148 @@ inline void Pad(const T* input_data, const Dims<4>& input_dims, } } -inline bool LoopCondition(int index, int stop, int stride) { - return stride > 0 ? index < stop : index > stop; +template +inline void Pad(const T* input_data, const Dims<4>& input_dims, + const std::vector& left_paddings, + const std::vector& right_paddings, T* output_data, + const Dims<4>& output_dims) { + Pad(input_data, input_dims, left_paddings, right_paddings, output_data, + output_dims, 0); } -inline int StartIndex(int start, int stride, int dim, bool masked) { - return masked ? (stride > 0 ? 0 : dim - 1) : start; +// STRIDED SLICE +// The functions below for StridedSlice are mirrored in a number of places: +// +// propagate_fixed_sizes.cc +// propagate_shapes.cc +// resolve_constant_strided_slice.cc +// optimized_ops.h +// +// It is designed for an arbitrary number of dimensions, even though dimensions +// here are fixed at 4. This is because we expect to eventually support +// arbitrary dimensionality. Also note that the axis orders are reversed for +// runtime ops, and so the indices and masks must be as well too. +// +// Be warned this code involves some rather subtle logic of python slicing. The +// best "ground truth" is to compare results to actual python execution. + +// Use until std::clamp() is available from C++17. +inline int Clamp(const int v, const int lo, const int hi) { + TFLITE_DCHECK(!(hi < lo)); + if (hi < v) return hi; + if (v < lo) return lo; + return v; +} + +inline int StartForAxis(int begin_mask, const std::vector& start_indices, + const std::vector& strides, + const Dims<4>& input_shape, int axis) { + // Begin with the specified index + int start = start_indices[axis]; + + // begin_mask override + if (begin_mask & 1 << axis) { + if (strides[axis] > 0) { + // Forward iteration - use the first element. These values will get + // clamped below (Note: We could have set them to 0 and axis_size-1, but + // use lowest() and max() to maintain symmetry with StopForAxis()) + start = std::numeric_limits::lowest(); + } else { + // Backward iteration - use the last element. + start = std::numeric_limits::max(); + } + } + + // Handle negative indices + int axis_size = input_shape.sizes[axis]; + if (start < 0) { + start += axis_size; + } + + // Clamping + start = Clamp(start, 0, axis_size - 1); + + return start; } -inline int StopIndex(int start, int stop, int stride, int dim, bool masked, - bool shrink_axis_masked) { - return shrink_axis_masked ? stride > 0 ? start + 1 : start - 1 - : masked ? (stride > 0 ? dim : -1) : stop; +inline int StopForAxis(int end_mask, const std::vector& stop_indices, + const std::vector& strides, + const Dims<4>& input_shape, int axis) { + // Begin with the specified index + int stop = stop_indices[axis]; + + // end_mask override + if (end_mask & (1 << axis)) { + if (strides[axis] > 0) { + // Forward iteration - use the last element. These values will get + // clamped below + stop = std::numeric_limits::max(); + } else { + // Backward iteration - use the first element. + stop = std::numeric_limits::lowest(); + } + } + + // Handle negative indices + int axis_size = input_shape.sizes[axis]; + if (stop < 0) { + stop += axis_size; + } + + // Clamping + // Because the end index points one past the last element, we need slightly + // different clamping ranges depending on the direction. + if (strides[axis] > 0) { + // Forward iteration + stop = Clamp(stop, 0, axis_size); + } else { + // Backward iteration + stop = Clamp(stop, -1, axis_size - 1); + } + + return stop; +} + +inline bool LoopCondition(int index, int stop, int stride) { + // True when we have reached the end of an axis and should loop. + return stride > 0 ? index >= stop : index <= stop; } template inline void StridedSlice(const T* input_data, const Dims<4>& input_dims, - int begin_mask, int end_mask, int shrink_axis_mask, - const std::vector& starts, - const std::vector& stops, + int begin_mask, int end_mask, + const std::vector& start_indices, + const std::vector& stop_indices, const std::vector& strides, T* output_data, const Dims<4>& output_dims) { - TFLITE_DCHECK_EQ(starts.size(), 4); - TFLITE_DCHECK_EQ(stops.size(), 4); + TFLITE_DCHECK_EQ(start_indices.size(), 4); + TFLITE_DCHECK_EQ(stop_indices.size(), 4); TFLITE_DCHECK_EQ(strides.size(), 4); const int start_b = - StartIndex(starts[3], strides[3], input_dims.sizes[3], begin_mask & 8); + StartForAxis(begin_mask, start_indices, strides, input_dims, 3); const int stop_b = - StopIndex(start_b, stops[3], strides[3], input_dims.sizes[3], - end_mask & 8, shrink_axis_mask & 8); + StopForAxis(end_mask, stop_indices, strides, input_dims, 3); const int start_h = - StartIndex(starts[2], strides[2], input_dims.sizes[2], begin_mask & 4); + StartForAxis(begin_mask, start_indices, strides, input_dims, 2); const int stop_h = - StopIndex(start_h, stops[2], strides[2], input_dims.sizes[2], - end_mask & 4, shrink_axis_mask & 4); + StopForAxis(end_mask, stop_indices, strides, input_dims, 2); const int start_w = - StartIndex(starts[1], strides[1], input_dims.sizes[1], begin_mask & 2); + StartForAxis(begin_mask, start_indices, strides, input_dims, 1); const int stop_w = - StopIndex(start_w, stops[1], strides[1], input_dims.sizes[1], - end_mask & 2, shrink_axis_mask & 2); + StopForAxis(end_mask, stop_indices, strides, input_dims, 1); const int start_d = - StartIndex(starts[0], strides[0], input_dims.sizes[0], begin_mask & 1); + StartForAxis(begin_mask, start_indices, strides, input_dims, 0); const int stop_d = - StopIndex(start_d, stops[0], strides[0], input_dims.sizes[0], - end_mask & 1, shrink_axis_mask & 1); + StopForAxis(end_mask, stop_indices, strides, input_dims, 0); T* out_ptr = output_data; - for (int in_b = start_b; LoopCondition(in_b, stop_b, strides[3]); + for (int in_b = start_b; !LoopCondition(in_b, stop_b, strides[3]); in_b += strides[3]) { - for (int in_h = start_h; LoopCondition(in_h, stop_h, strides[2]); + for (int in_h = start_h; !LoopCondition(in_h, stop_h, strides[2]); in_h += strides[2]) { - for (int in_w = start_w; LoopCondition(in_w, stop_w, strides[1]); + for (int in_w = start_w; !LoopCondition(in_w, stop_w, strides[1]); in_w += strides[1]) { - for (int in_d = start_d; LoopCondition(in_d, stop_d, strides[0]); + for (int in_d = start_d; !LoopCondition(in_d, stop_d, strides[0]); in_d += strides[0]) { *out_ptr++ = input_data[Offset(input_dims, in_d, in_w, in_h, in_b)]; } @@ -3132,18 +3182,6 @@ inline void StridedSlice(const T* input_data, const Dims<4>& input_dims, } } -template -inline void StridedSlice(const T* input_data, const Dims<4>& input_dims, - int begin_mask, int end_mask, - const std::vector& starts, - const std::vector& stops, - const std::vector& strides, T* output_data, - const Dims<4>& output_dims) { - StridedSlice(input_data, input_dims, begin_mask, end_mask, - /*shrink_axis_mask=*/0, starts, stops, strides, output_data, - output_dims); -} - template inline void Slice(const T* input_data, const Dims<4>& input_dims, const std::vector& begin, const std::vector& size, @@ -3320,23 +3358,11 @@ template void TensorFlowMinimum(const T* input1_data, const Dims<4>& input1_dims, const T* input2_data, T* output_data, const Dims<4>& output_dims) { - int batches = MatchingArraySize(input1_dims, 3, output_dims, 3); - int input_height = MatchingArraySize(input1_dims, 2, output_dims, 2); - int input_width = MatchingArraySize(input1_dims, 1, output_dims, 1); - int depth = MatchingArraySize(input1_dims, 0, output_dims, 0); + const int flat_size = MatchingFlatSize(output_dims, input1_dims); auto min_value = input2_data[0]; - - for (int b = 0; b < batches; b++) { - for (int y = 0; y < input_height; y++) { - for (int x = 0; x < input_width; x++) { - for (int c = 0; c < depth; c++) { - int offset = Offset(input1_dims, c, x, y, b); - output_data[offset] = - input1_data[offset] > min_value ? min_value : input1_data[offset]; - } - } - } + for (int i = 0; i < flat_size; i++) { + output_data[i] = input1_data[i] > min_value ? min_value : input1_data[i]; } } @@ -3344,30 +3370,19 @@ template void TensorFlowMaximum(const T* input1_data, const Dims<4>& input1_dims, const T* input2_data, T* output_data, const Dims<4>& output_dims) { - int batches = MatchingArraySize(input1_dims, 3, output_dims, 3); - int input_height = MatchingArraySize(input1_dims, 2, output_dims, 2); - int input_width = MatchingArraySize(input1_dims, 1, output_dims, 1); - int depth = MatchingArraySize(input1_dims, 0, output_dims, 0); + const int flat_size = MatchingFlatSize(output_dims, input1_dims); auto max_value = input2_data[0]; - - for (int b = 0; b < batches; b++) { - for (int y = 0; y < input_height; y++) { - for (int x = 0; x < input_width; x++) { - for (int c = 0; c < depth; c++) { - int offset = Offset(input1_dims, c, x, y, b); - output_data[offset] = - input1_data[offset] < max_value ? max_value : input1_data[offset]; - } - } - } + for (int i = 0; i < flat_size; i++) { + output_data[i] = input1_data[i] < max_value ? max_value : input1_data[i]; } } -template -void TensorFlowMaximum(const T* input1_data, const Dims<4>& input1_dims, - const T* input2_data, const Dims<4>& input2_dims, - T* output_data, const Dims<4>& output_dims) { +template +void TensorFlowMaximumMinimum(const T* input1_data, const Dims<4>& input1_dims, + const T* input2_data, const Dims<4>& input2_dims, + T* output_data, const Dims<4>& output_dims, + Op op) { NdArrayDesc<4> desc1; NdArrayDesc<4> desc2; NdArrayDescsForElementwiseBroadcast(input1_dims, input2_dims, &desc1, &desc2); @@ -3381,7 +3396,7 @@ void TensorFlowMaximum(const T* input1_data, const Dims<4>& input1_dims, auto in2_idx = SubscriptToIndex(desc2, c, x, y, b); auto in1_val = input1_data[in1_idx]; auto in2_val = input2_data[in2_idx]; - output_data[out_idx] = in1_val > in2_val ? in1_val : in2_val; + output_data[out_idx] = op(in1_val, in2_val); } } } @@ -3400,25 +3415,20 @@ void ArgMax(const T3* axis, const T1* input_data, const Dims<4>& input_dims, // input dimensions here. We enforce the constraint that the last dimension // must always be 1. TFLITE_DCHECK_EQ(ArraySize(output_dims, 0), 1); - const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); - const int height = MatchingArraySize(input_dims, 2, output_dims, 2); - const int width = MatchingArraySize(input_dims, 1, output_dims, 1); + const int outer_size = MatchingFlatSizeSkipDim(input_dims, 0, output_dims); const int depth = ArraySize(input_dims, 0); - for (int b = 0; b < batches; ++b) { - for (int y = 0; y < height; ++y) { - for (int x = 0; x < width; ++x) { - auto max_value = input_data[Offset(input_dims, 0, x, y, b)]; - int max_index = 0; - for (int d = 1; d < depth; ++d) { - const auto& curr_value = input_data[Offset(input_dims, d, x, y, b)]; - if (curr_value > max_value) { - max_value = curr_value; - max_index = d; - } - } - output_data[Offset(output_dims, 0, x, y, b)] = max_index; + + for (int i = 0; i < outer_size; ++i) { + auto max_value = input_data[i * depth]; + int max_index = 0; + for (int d = 1; d < depth; ++d) { + const auto& curr_value = input_data[i * depth + d]; + if (curr_value > max_value) { + max_value = curr_value; + max_index = d; } } + output_data[i] = max_index; } } @@ -3468,11 +3478,11 @@ inline void TransposeConv(const float* input_data, const Dims<4>& input_dims, // Although transpose convolution simplifies to convolution with transposed // weights for strides of 1, non-unitary striding complicates matters. To - // keep this reference implementation as clear as possible, we use a "scatter" - // access pattern, where we loop through all the input elements, computing - // their influence on the output, rather than looping through the output - // elements in the typical "gather" access pattern of a conv. We therefore - // must initialize the output array to zero. + // keep this reference implementation as clear as possible, we use a + // "scatter" access pattern, where we loop through all the input elements, + // computing their influence on the output, rather than looping through the + // output elements in the typical "gather" access pattern of a conv. We + // therefore must initialize the output array to zero. for (int i = 0; i < RequiredBufferSizeForDims(output_dims); i++) { output_data[i] = 0.0f; } @@ -3512,6 +3522,51 @@ inline void TransposeConv(const float* input_data, const Dims<4>& input_dims, } } +template +inline void Less(int64_t num_elements, const T* input1, const T* input2, + bool* output) { + for (int64_t i = 0; i < num_elements; ++i) { + output[i] = input1[i] < input2[i]; + } +} + +template +inline void Less(const T* input1_data, const Dims<4>& input1_dims, + const T* input2_data, const Dims<4>& input2_dims, + bool* output_data, const Dims<4>& output_dims) { + const int64_t batches = + MatchingArraySize(input1_dims, 3, input2_dims, 3, output_dims, 3); + const int64_t height = + MatchingArraySize(input1_dims, 2, input2_dims, 2, output_dims, 2); + const int64_t width = + MatchingArraySize(input1_dims, 1, input2_dims, 1, output_dims, 1); + const int64_t depth = + MatchingArraySize(input1_dims, 0, input2_dims, 0, output_dims, 0); + Less(batches * height * width * depth, input1_data, input2_data, output_data); +} + +template +inline void BroadcastLess(T1* input1_data, const Dims<4>& input1_dims, + T2* input2_data, const Dims<4>& input2_dims, + bool* output_data, const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("BroadcastLess"); + NdArrayDesc<4> desc1; + NdArrayDesc<4> desc2; + NdArrayDescsForElementwiseBroadcast(input1_dims, input2_dims, &desc1, &desc2); + + for (int b = 0; b < ArraySize(output_dims, 3); ++b) { + for (int y = 0; y < ArraySize(output_dims, 2); ++y) { + for (int x = 0; x < ArraySize(output_dims, 1); ++x) { + for (int c = 0; c < ArraySize(output_dims, 0); ++c) { + output_data[Offset(output_dims, c, x, y, b)] = + input1_data[SubscriptToIndex(desc1, c, x, y, b)] < + input2_data[SubscriptToIndex(desc2, c, x, y, b)]; + } + } + } + } +} + } // namespace reference_ops } // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/internal/tensor.h b/tensorflow/contrib/lite/kernels/internal/tensor.h index 62e38e0d4c3e023d0ed2242fc9438b096b86dc59..62cea143e6afc0631493012be26808a89eb03138 100644 --- a/tensorflow/contrib/lite/kernels/internal/tensor.h +++ b/tensorflow/contrib/lite/kernels/internal/tensor.h @@ -44,6 +44,11 @@ inline int64_t* GetTensorData(TfLiteTensor* tensor) { return tensor != nullptr ? tensor->data.i64 : nullptr; } +template <> +inline bool* GetTensorData(TfLiteTensor* tensor) { + return tensor != nullptr ? tensor->data.b : nullptr; +} + inline int RemapDim(int max_dimensions, int d) { return max_dimensions - d - 1; } @@ -126,6 +131,29 @@ class VectorOfTensors { std::vector*> all_dims_ptr_; }; +// A list of quantized tensors in a format that can be used by kernels like +// split and concatenation. +class VectorOfQuantizedTensors : public VectorOfTensors { + public: + // Build with the tensors in 'tensor_list'. + VectorOfQuantizedTensors(const TfLiteContext& context, + const TfLiteIntArray& tensor_list) + : VectorOfTensors(context, tensor_list) { + for (int i = 0; i < tensor_list.size; ++i) { + TfLiteTensor* t = &context.tensors[tensor_list.data[i]]; + zero_point_.push_back(t->params.zero_point); + scale_.push_back(t->params.scale); + } + } + + const float* scale() const { return scale_.data(); } + const int32* zero_point() const { return zero_point_.data(); } + + private: + std::vector zero_point_; + std::vector scale_; +}; + } // namespace tflite #endif // TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_TENSOR_H_ diff --git a/tensorflow/contrib/lite/kernels/internal/types.h b/tensorflow/contrib/lite/kernels/internal/types.h index 293538fcbb6406d6065d8efd25adb3b163638c92..3290c364c18224edb733c177ad72bf86b6892434 100644 --- a/tensorflow/contrib/lite/kernels/internal/types.h +++ b/tensorflow/contrib/lite/kernels/internal/types.h @@ -130,14 +130,125 @@ int MatchingArraySize(const ArrayType1& array1, int index1, return MatchingArraySize(array1, index1, args...); } -inline int RequiredBufferSizeForDims(const Dims<4>& dims) { +template +inline int FlatSize(const Dims& dims) { int max_offset = 0; - for (int i = 0; i < 4; i++) { + for (int i = 0; i < N; i++) { max_offset += (dims.sizes[i] - 1) * dims.strides[i]; } return max_offset + 1; } +// Deprecated. Prefer FlatSize. +inline int RequiredBufferSizeForDims(const Dims<4>& dims) { + return FlatSize(dims); +} + +// Flat size calculation, checking that dimensions match with one or more other +// arrays. +template +inline int MatchingFlatSize(const Dims& dims, const Dims& check_dims_0) { + for (int i = 0; i < N; i++) { + TFLITE_DCHECK_EQ(ArraySize(dims, i), ArraySize(check_dims_0, i)); + } + return FlatSize(dims); +} + +template +inline int MatchingFlatSize(const Dims& dims, const Dims& check_dims_0, + const Dims& check_dims_1) { + for (int i = 0; i < N; i++) { + TFLITE_DCHECK_EQ(ArraySize(dims, i), ArraySize(check_dims_0, i)); + } + return MatchingFlatSize(dims, check_dims_1); +} + +template +inline int MatchingFlatSize(const Dims& dims, const Dims& check_dims_0, + const Dims& check_dims_1, + const Dims& check_dims_2) { + for (int i = 0; i < N; i++) { + TFLITE_DCHECK_EQ(ArraySize(dims, i), ArraySize(check_dims_0, i)); + } + return FlatSize(dims, check_dims_1, check_dims_2); +} + +template +inline int MatchingFlatSize(const Dims& dims, const Dims& check_dims_0, + const Dims& check_dims_1, + const Dims& check_dims_2, + const Dims& check_dims_3) { + for (int i = 0; i < N; i++) { + TFLITE_DCHECK_EQ(ArraySize(dims, i), ArraySize(check_dims_0, i)); + } + return FlatSize(dims, check_dims_1, check_dims_2, check_dims_3); +} + +// Data is required to be contiguous, and so many operators can use either the +// full array flat size or the flat size with one dimension skipped (commonly +// the depth). +template +inline int FlatSizeSkipDim(const Dims& dims, int skip_dim) { + TFLITE_DCHECK(skip_dim >= 0 && skip_dim < N); + int flat_size = 1; + for (int i = 0; i < N; i++) { + flat_size *= (i == skip_dim) ? 1 : dims.sizes[i]; + } + return flat_size; +} + +// A combination of MatchingFlatSize() and FlatSizeSkipDim(). +template +inline int MatchingFlatSizeSkipDim(const Dims& dims, int skip_dim, + const Dims& check_dims_0) { + for (int i = 0; i < N; i++) { + if (i != skip_dim) { + TFLITE_DCHECK_EQ(ArraySize(dims, i), ArraySize(check_dims_0, i)); + } + } + return FlatSizeSkipDim(dims, skip_dim); +} + +template +inline int MatchingFlatSizeSkipDim(const Dims& dims, int skip_dim, + const Dims& check_dims_0, + const Dims& check_dims_1) { + for (int i = 0; i < N; i++) { + if (i != skip_dim) { + TFLITE_DCHECK_EQ(ArraySize(dims, i), ArraySize(check_dims_0, i)); + } + } + return MatchingFlatSizeSkipDim(dims, skip_dim, check_dims_1); +} + +template +inline int MatchingFlatSizeSkipDim(const Dims& dims, int skip_dim, + const Dims& check_dims_0, + const Dims& check_dims_1, + const Dims& check_dims_2) { + for (int i = 0; i < N; i++) { + if (i != skip_dim) { + TFLITE_DCHECK_EQ(ArraySize(dims, i), ArraySize(check_dims_0, i)); + } + } + return MatchingFlatSizeSkipDim(dims, skip_dim, check_dims_1, check_dims_2); +} + +template +inline int MatchingFlatSizeSkipDim(const Dims& dims, int skip_dim, + const Dims& check_dims_0, + const Dims& check_dims_1, + const Dims& check_dims_2, + const Dims& check_dims_3) { + for (int i = 0; i < N; i++) { + if (i != skip_dim) { + TFLITE_DCHECK_EQ(ArraySize(dims, i), ArraySize(check_dims_0, i)); + } + } + return MatchingFlatSizeSkipDim(dims, skip_dim, check_dims_1, check_dims_2, + check_dims_3); +} + template bool IsPackedWithoutStrides(const Dims& dims) { int expected_stride = 1; diff --git a/tensorflow/contrib/lite/kernels/l2norm.cc b/tensorflow/contrib/lite/kernels/l2norm.cc index ee8bfe56d95e9f383ef49b40b8f58b63d61da3e1..e67f4e06f3680f8c9447a9e831b63415994ea176 100644 --- a/tensorflow/contrib/lite/kernels/l2norm.cc +++ b/tensorflow/contrib/lite/kernels/l2norm.cc @@ -45,10 +45,15 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { TF_LITE_ENSURE(context, NumDimensions(input) <= 4); - // TODO(ahentz): Our current implementations only support float32. - TF_LITE_ENSURE_EQ(context, output->type, kTfLiteFloat32); + TF_LITE_ENSURE( + context, output->type == kTfLiteFloat32 || output->type == kTfLiteUInt8); TF_LITE_ENSURE_EQ(context, input->type, output->type); + if (output->type == kTfLiteUInt8) { + TF_LITE_ENSURE_EQ(context, output->params.scale, (1. / 128.)); + TF_LITE_ENSURE_EQ(context, output->params.zero_point, 128); + } + // TODO(ahentz): For some reason our implementations don't support // activations. TF_LITE_ENSURE_EQ(context, params->activation, kTfLiteActNone); @@ -75,6 +80,19 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { TF_LITE_L2NORM(optimized_ops); } #undef TF_LITE_L2NORM + } else if (output->type == kTfLiteUInt8) { +#define TF_LITE_L2NORM(type) \ + type::L2Normalization(GetTensorData(input), GetTensorDims(input), \ + input->params.zero_point, \ + GetTensorData(output), GetTensorDims(output)) + + if (kernel_type == kReference) { + TF_LITE_L2NORM(reference_ops); + } + if (kernel_type == kGenericOptimized) { + TF_LITE_L2NORM(optimized_ops); + } +#undef TF_LITE_L2NORM } else { context->ReportError(context, "Inputs and outputs not all float types."); return kTfLiteError; diff --git a/tensorflow/contrib/lite/kernels/l2norm_test.cc b/tensorflow/contrib/lite/kernels/l2norm_test.cc index 30e103f3303484c339ef98e6a68e0438291c102f..042314ccf55cb6de12c743448fbe040f35e7baab 100644 --- a/tensorflow/contrib/lite/kernels/l2norm_test.cc +++ b/tensorflow/contrib/lite/kernels/l2norm_test.cc @@ -25,10 +25,22 @@ using ::testing::ElementsAreArray; class L2NormOpModel : public SingleOpModel { public: - L2NormOpModel(std::initializer_list input_shape, - ActivationFunctionType activation_type) { - input_ = AddInput(TensorType_FLOAT32); - output_ = AddOutput(TensorType_FLOAT32); + L2NormOpModel(const std::initializer_list input_shape, + const TensorType tensor_type, + const ActivationFunctionType activation_type) { + TensorData data = TensorData{tensor_type}; + if (tensor_type != TensorType_FLOAT32) { + data.min = -2.0; + data.max = 2.0; + data.scale = 2.0; + data.zero_point = 128; + } + input_ = AddInput(data); + if (tensor_type != TensorType_FLOAT32) { + data.min = -1.0; + data.max = 127.0 / 128.0; + } + output_ = AddOutput(data); SetBuiltinOp(BuiltinOperator_L2_NORMALIZATION, BuiltinOptions_L2NormOptions, CreateL2NormOptions(builder_, activation_type).Union()); BuildInterpreter({input_shape}); @@ -38,7 +50,17 @@ class L2NormOpModel : public SingleOpModel { PopulateTensor(input_, data); } - std::vector GetOutput() { return ExtractVector(output_); } + template + std::vector GetOutput() { + return ExtractVector(output_); + } + + std::vector GetDequantizedOutput() { + return Dequantize(ExtractVector(output_), + GetScale(output_), GetZeroPoint(output_)); + } + + int input() const { return input_; } private: int input_; @@ -46,13 +68,26 @@ class L2NormOpModel : public SingleOpModel { }; TEST(L2NormOpTest, SimpleTest) { - L2NormOpModel m({1, 1, 1, 6}, ActivationFunctionType_NONE); + L2NormOpModel m({1, 1, 1, 6}, TensorType_FLOAT32, + ActivationFunctionType_NONE); m.SetInput({-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}); m.Invoke(); - EXPECT_THAT(m.GetOutput(), + EXPECT_THAT(m.GetOutput(), ElementsAreArray({-0.55, 0.3, 0.35, 0.6, -0.35, 0.05})); } +TEST(L2NormOpTest, SimpleUint8Test) { + L2NormOpModel m({1, 1, 1, 6}, TensorType_UINT8, ActivationFunctionType_NONE); + + m.QuantizeAndPopulate(m.input(), {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), + ElementsAreArray({58, 166, 173, 205, 83, 134})); + EXPECT_THAT(m.GetDequantizedOutput(), + ElementsAreArray( + ArrayFloatNear({-0.55, 0.3, 0.35, 0.6, -0.35, 0.05}, 0.1))); +} + } // namespace } // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/maximum.cc b/tensorflow/contrib/lite/kernels/maximum_minimum.cc similarity index 50% rename from tensorflow/contrib/lite/kernels/maximum.cc rename to tensorflow/contrib/lite/kernels/maximum_minimum.cc index 9fdf2b47eaf421bda11e7474ad819692106a90ac..5a28d663c9e756040746f0a98b356afba76cceab 100644 --- a/tensorflow/contrib/lite/kernels/maximum.cc +++ b/tensorflow/contrib/lite/kernels/maximum_minimum.cc @@ -24,9 +24,9 @@ limitations under the License. namespace tflite { namespace ops { namespace builtin { -namespace maximum { +namespace maximum_minimum { -// This file has a reference implemenation of TFMaximum. +// This file has a reference implemenation of TFMaximum/TFMinimum. enum KernelType { kReference, }; @@ -35,8 +35,8 @@ constexpr int kInputTensor1 = 0; constexpr int kInputTensor2 = 1; constexpr int kOutputTensor = 0; -struct MaximumContext { - MaximumContext(TfLiteContext* context, TfLiteNode* node) { +struct OpContext { + OpContext(TfLiteContext* context, TfLiteNode* node) { input1 = GetInput(context, node, kInputTensor1); input2 = GetInput(context, node, kInputTensor2); output = GetOutput(context, node, kOutputTensor); @@ -50,30 +50,68 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { TF_LITE_ENSURE_EQ(context, NumInputs(node), 2); TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); - MaximumContext op_context(context, node); + OpContext op_context(context, node); TF_LITE_ENSURE_EQ(context, op_context.input1->type, op_context.input2->type); - TfLiteIntArray* output_dims = TfLiteIntArrayCopy(op_context.input2->dims); - op_context.output->type = op_context.input2->type; - return context->ResizeTensor(context, op_context.output, output_dims); + op_context.output->type = op_context.input1->type; + + bool requires_broadcast = + !HaveSameShapes(op_context.input1, op_context.input2); + + TfLiteIntArray* output_size = nullptr; + if (requires_broadcast) { + TF_LITE_ENSURE_OK( + context, CalculateShapeForBroadcast(context, op_context.input1, + op_context.input2, &output_size)); + } else { + output_size = TfLiteIntArrayCopy(op_context.input1->dims); + } + + return context->ResizeTensor(context, op_context.output, output_size); } -template -TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { - MaximumContext op_context(context, node); +struct MaximumOp { + template + static data_type op(data_type el1, data_type el2) { + return el1 > el2 ? el1 : el2; + } +}; -#define TF_LITE_MAXIMUM(kernel_type, data_type) \ - kernel_type::TensorFlowMaximum( \ - GetTensorData(op_context.input1), \ - GetTensorDims(op_context.input1), \ - GetTensorData(op_context.input2), \ - GetTensorDims(op_context.input2), \ - GetTensorData(op_context.output), \ - GetTensorDims(op_context.output)) +struct MinimumOp { + template + static data_type op(data_type el1, data_type el2) { + return el1 < el2 ? el1 : el2; + } +}; + +template +void TFLiteOperation(TfLiteContext* context, TfLiteNode* node, + const OpContext& op_context) { + reference_ops::TensorFlowMaximumMinimum( + GetTensorData(op_context.input1), + GetTensorDims(op_context.input1), + GetTensorData(op_context.input2), + GetTensorDims(op_context.input2), + GetTensorData(op_context.output), + GetTensorDims(op_context.output), op_type::template op); +} + +template +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + OpContext op_context(context, node); if (kernel_type == kReference) { switch (op_context.output->type) { case kTfLiteFloat32: - TF_LITE_MAXIMUM(reference_ops, float); + TFLiteOperation(context, node, op_context); + break; + case kTfLiteUInt8: + TFLiteOperation(context, node, op_context); + break; + case kTfLiteInt32: + TFLiteOperation(context, node, op_context); + break; + case kTfLiteInt64: + TFLiteOperation(context, node, op_context); break; default: context->ReportError(context, @@ -87,19 +125,28 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { op_context.output->type); return kTfLiteError; } -#undef TF_LITE_MAXIMUM return kTfLiteOk; } -} // namespace maximum +} // namespace maximum_minimum TfLiteRegistration* Register_MAXIMUM_REF() { - static TfLiteRegistration r = {nullptr, nullptr, maximum::Prepare, - maximum::Eval}; + static TfLiteRegistration r = { + nullptr, nullptr, maximum_minimum::Prepare, + maximum_minimum::Eval}; return &r; } +TfLiteRegistration* Register_MINIMUM_REF() { + static TfLiteRegistration r = { + nullptr, nullptr, maximum_minimum::Prepare, + maximum_minimum::Eval}; + return &r; +} TfLiteRegistration* Register_MAXIMUM() { return Register_MAXIMUM_REF(); } +TfLiteRegistration* Register_MINIMUM() { return Register_MINIMUM_REF(); } } // namespace builtin } // namespace ops diff --git a/tensorflow/contrib/lite/kernels/maximum_minimum_test.cc b/tensorflow/contrib/lite/kernels/maximum_minimum_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..0752aa1804722accb1f88910fe013ffd632a4503 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/maximum_minimum_test.cc @@ -0,0 +1,143 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include "tensorflow/contrib/lite/interpreter.h" +#include "tensorflow/contrib/lite/kernels/register.h" +#include "tensorflow/contrib/lite/kernels/test_util.h" +#include "tensorflow/contrib/lite/model.h" + +namespace tflite { +namespace { + +using ::testing::ElementsAreArray; + +class MaxMinOpModel : public SingleOpModel { + public: + MaxMinOpModel(tflite::BuiltinOperator op, const TensorData& input1, + const TensorData& input2, const TensorType& output) { + input1_ = AddInput(input1); + input2_ = AddInput(input2); + output_ = AddOutput(output); + SetBuiltinOp(op, BuiltinOptions_MaximumMinimumOptions, + CreateMaximumMinimumOptions(builder_).Union()); + BuildInterpreter({GetShape(input1_), GetShape(input2_)}); + } + + template + void SetInput1(std::initializer_list data) { + PopulateTensor(input1_, data); + } + + template + void SetInput2(std::initializer_list data) { + PopulateTensor(input2_, data); + } + + template + std::vector GetOutput() { + return ExtractVector(output_); + } + std::vector GetOutputShape() { return GetTensorShape(output_); } + + protected: + int input1_; + int input2_; + int output_; +}; + +template +void TestModel(tflite::BuiltinOperator op, const TensorData& input1, + const TensorData& input2, const TensorData& output, + std::initializer_list input1_values, + std::initializer_list input2_values, + std::initializer_list output_values) { + MaxMinOpModel m(op, input1, input2, output.type); + m.SetInput1(input1_values); + m.SetInput2(input2_values); + m.Invoke(); + EXPECT_THAT(m.GetOutputShape(), ElementsAreArray(output.shape)); + EXPECT_THAT(m.GetOutput(), ElementsAreArray(output_values)); +} + +template <> +void TestModel(tflite::BuiltinOperator op, const TensorData& input1, + const TensorData& input2, const TensorData& output, + std::initializer_list input1_values, + std::initializer_list input2_values, + std::initializer_list output_values) { + MaxMinOpModel m(op, input1, input2, output.type); + m.SetInput1(input1_values); + m.SetInput2(input2_values); + m.Invoke(); + EXPECT_THAT(m.GetOutputShape(), ElementsAreArray(output.shape)); + EXPECT_THAT(m.GetOutput(), + ElementsAreArray(ArrayFloatNear(output_values))); +} + +TEST(MaximumOpTest, FloatTest) { + std::initializer_list data1 = {1.0, 0.0, -1.0, 11.0, -2.0, -1.44}; + std::initializer_list data2 = {-1.0, 0.0, 1.0, 12.0, -3.0, -1.43}; + TestModel(BuiltinOperator_MAXIMUM, {TensorType_FLOAT32, {3, 1, 2}}, + {TensorType_FLOAT32, {3, 1, 2}}, + {TensorType_FLOAT32, {3, 1, 2}}, data1, data2, + {1.0, 0.0, 1.0, 12.0, -2.0, -1.43}); + TestModel(BuiltinOperator_MINIMUM, {TensorType_FLOAT32, {3, 1, 2}}, + {TensorType_FLOAT32, {3, 1, 2}}, + {TensorType_FLOAT32, {3, 1, 2}}, data1, data2, + {-1.0, 0.0, -1.0, 11.0, -3.0, -1.44}); +} + +TEST(MaxMinOpTest, Uint8Test) { + std::initializer_list data1 = {1, 0, 2, 11, 2, 23}; + std::initializer_list data2 = {0, 0, 1, 12, 255, 1}; + TestModel(BuiltinOperator_MAXIMUM, {TensorType_UINT8, {3, 1, 2}}, + {TensorType_UINT8, {3, 1, 2}}, + {TensorType_UINT8, {3, 1, 2}}, data1, data2, + {1, 0, 2, 12, 255, 23}); + TestModel(BuiltinOperator_MINIMUM, {TensorType_UINT8, {3, 1, 2}}, + {TensorType_UINT8, {3, 1, 2}}, + {TensorType_UINT8, {3, 1, 2}}, data1, data2, + {0, 0, 1, 11, 2, 1}); +} + +TEST(MaximumOpTest, FloatWithBroadcastTest) { + std::initializer_list data1 = {1.0, 0.0, -1.0, -2.0, -1.44, 11.0}; + std::initializer_list data2 = {0.5, 2.0}; + TestModel(BuiltinOperator_MAXIMUM, {TensorType_FLOAT32, {3, 1, 2}}, + {TensorType_FLOAT32, {2}}, {TensorType_FLOAT32, {3, 1, 2}}, + data1, data2, {1.0, 2.0, 0.5, 2.0, 0.5, 11.0}); + TestModel(BuiltinOperator_MINIMUM, {TensorType_FLOAT32, {3, 1, 2}}, + {TensorType_FLOAT32, {2}}, {TensorType_FLOAT32, {3, 1, 2}}, + data1, data2, {0.5, 0.0, -1.0, -2.0, -1.44, 2.0}); +} + +TEST(MaximumOpTest, Int32WithBroadcastTest) { + std::initializer_list data1 = {1, 0, -1, -2, 3, 11}; + std::initializer_list data2 = {2}; + TestModel(BuiltinOperator_MAXIMUM, {TensorType_INT32, {3, 1, 2}}, + {TensorType_INT32, {1}}, {TensorType_INT32, {3, 1, 2}}, + data1, data2, {2, 2, 2, 2, 3, 11}); + TestModel(BuiltinOperator_MINIMUM, {TensorType_INT32, {3, 1, 2}}, + {TensorType_INT32, {1}}, {TensorType_INT32, {3, 1, 2}}, + data1, data2, {1, 0, -1, -2, 2, 2}); +} +} // namespace +} // namespace tflite + +int main(int argc, char** argv) { + ::tflite::LogToStderr(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/contrib/lite/kernels/maximum_test.cc b/tensorflow/contrib/lite/kernels/maximum_test.cc deleted file mode 100644 index b3fd7d4e6f40e53db51edf2e7594662629302add..0000000000000000000000000000000000000000 --- a/tensorflow/contrib/lite/kernels/maximum_test.cc +++ /dev/null @@ -1,81 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#include -#include "tensorflow/contrib/lite/interpreter.h" -#include "tensorflow/contrib/lite/kernels/register.h" -#include "tensorflow/contrib/lite/kernels/test_util.h" -#include "tensorflow/contrib/lite/model.h" - -namespace tflite { -namespace { - -using ::testing::ElementsAreArray; - -class MaximumOpModel : public SingleOpModel { - public: - MaximumOpModel(const TensorData& input1, const TensorData& input2, - const TensorType& output) { - input1_ = AddInput(input1); - input2_ = AddInput(input2); - output_ = AddOutput(output); - SetBuiltinOp(BuiltinOperator_MAXIMUM, BuiltinOptions_MaximumOptions, - CreateMaximumOptions(builder_).Union()); - BuildInterpreter({GetShape(input1_), GetShape(input2_)}); - } - - template - void SetInput1(std::initializer_list data) { - PopulateTensor(input1_, data); - } - - template - void SetInput2(std::initializer_list data) { - PopulateTensor(input2_, data); - } - - template - std::vector GetOutput() { - return ExtractVector(output_); - } - std::vector GetOutputShape() { return GetTensorShape(output_); } - - protected: - int input1_; - int input2_; - int output_; -}; - -TEST(MaximumOpTest, FloatTest) { - std::initializer_list data1 = {1.0, 0.0, -1.0, 11.0, -2.0, -1.44}; - std::initializer_list data2 = {-1.0, 0.0, 1.0, 12.0, -3.0, -1.43}; - MaximumOpModel m({TensorType_FLOAT32, {3, 1, 2}}, - {TensorType_FLOAT32, {3, 1, 2}}, TensorType_FLOAT32); - m.SetInput1(data1); - m.SetInput2(data2); - m.Invoke(); - EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({3, 1, 2})); - EXPECT_THAT( - m.GetOutput(), - ElementsAreArray(ArrayFloatNear({1.0, 0.0, 1.0, 12.0, -2.0, -1.43}))); -} - -} // namespace -} // namespace tflite - -int main(int argc, char** argv) { - ::tflite::LogToStderr(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/tensorflow/contrib/lite/kernels/pad.cc b/tensorflow/contrib/lite/kernels/pad.cc index c29da3862e84d6756bf5ef34b2ca06307b0a065d..4f9449a225c66a0fb2a9285e6aff3a1f7147f5dd 100644 --- a/tensorflow/contrib/lite/kernels/pad.cc +++ b/tensorflow/contrib/lite/kernels/pad.cc @@ -119,39 +119,46 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { after_padding.push_back(paddings_data[idx * 2 + 1]); } -#define TF_LITE_PAD(type, scalar) \ +#define TF_LITE_PAD(type, scalar, pad_value) \ type::Pad(GetTensorData(op_context.input), \ GetTensorDims(op_context.input), before_padding, after_padding, \ GetTensorData(op_context.output), \ - GetTensorDims(op_context.output)) + GetTensorDims(op_context.output), pad_value) switch (op_context.input->type) { case kTfLiteFloat32: if (kernel_type == kReference) { - TF_LITE_PAD(reference_ops, float); + TF_LITE_PAD(reference_ops, float, 0); } else if (kernel_type == kGenericOptimized) { - TF_LITE_PAD(optimized_ops, float); + TF_LITE_PAD(optimized_ops, float, 0); } break; case kTfLiteUInt8: + // Quantized Pad requires that 0 is represented in the quantized range. + TF_LITE_ENSURE(context, op_context.output->params.zero_point >= + std::numeric_limits::min()); + TF_LITE_ENSURE(context, op_context.output->params.zero_point <= + std::numeric_limits::max()); if (kernel_type == kReference) { - TF_LITE_PAD(reference_ops, uint8_t); + TF_LITE_PAD(reference_ops, uint8_t, + op_context.output->params.zero_point); } else if (kernel_type == kGenericOptimized) { - TF_LITE_PAD(optimized_ops, uint8_t); + TF_LITE_PAD(optimized_ops, uint8_t, + op_context.output->params.zero_point); } break; case kTfLiteInt32: if (kernel_type == kReference) { - TF_LITE_PAD(reference_ops, int32_t); + TF_LITE_PAD(reference_ops, int32_t, 0); } else if (kernel_type == kGenericOptimized) { - TF_LITE_PAD(optimized_ops, int32_t); + TF_LITE_PAD(optimized_ops, int32_t, 0); } break; case kTfLiteInt64: if (kernel_type == kReference) { - TF_LITE_PAD(reference_ops, int64_t); + TF_LITE_PAD(reference_ops, int64_t, 0); } else if (kernel_type == kGenericOptimized) { - TF_LITE_PAD(optimized_ops, int64_t); + TF_LITE_PAD(optimized_ops, int64_t, 0); } break; default: diff --git a/tensorflow/contrib/lite/kernels/pad_test.cc b/tensorflow/contrib/lite/kernels/pad_test.cc index 28834ad0719291b2e868bca2d86a6685e6eb9962..c06237e5720874e66c5953edab2d3749cc88af28 100644 --- a/tensorflow/contrib/lite/kernels/pad_test.cc +++ b/tensorflow/contrib/lite/kernels/pad_test.cc @@ -22,6 +22,7 @@ namespace tflite { namespace { using ::testing::ElementsAreArray; +using ::testing::Matcher; class PadOpModel : public SingleOpModel { public: @@ -29,6 +30,10 @@ class PadOpModel : public SingleOpModel { PopulateTensor(input_, data); } + void SetQuantizedInput(std::initializer_list data) { + QuantizeAndPopulate(input_, data); + } + void SetPaddings(std::initializer_list paddings) { PopulateTensor(paddings_, paddings); } @@ -36,6 +41,11 @@ class PadOpModel : public SingleOpModel { std::vector GetOutput() { return ExtractVector(output_); } std::vector GetOutputShape() { return GetTensorShape(output_); } + std::vector GetDequantizedOutput() { + return Dequantize(ExtractVector(output_), + GetScale(output_), GetZeroPoint(output_)); + } + protected: int input_; int output_; @@ -50,16 +60,17 @@ class PadOpModel : public SingleOpModel { // m.Invoke(); class PadOpConstModel : public PadOpModel { public: - PadOpConstModel(std::initializer_list input_shape, + PadOpConstModel(const TensorData& input, std::initializer_list paddings_shape, - std::initializer_list paddings) { - input_ = AddInput(TensorType_FLOAT32); + std::initializer_list paddings, + const TensorData& output) { + input_ = AddInput(input); paddings_ = AddConstInput(TensorType_INT32, paddings, paddings_shape); - output_ = AddOutput(TensorType_FLOAT32); + output_ = AddOutput(output); SetBuiltinOp(BuiltinOperator_PAD, BuiltinOptions_PadOptions, CreatePadOptions(builder_).Union()); - BuildInterpreter({input_shape}); + BuildInterpreter({input.shape}); } }; @@ -72,40 +83,45 @@ class PadOpConstModel : public PadOpModel { // m.Invoke(); class PadOpDynamicModel : public PadOpModel { public: - PadOpDynamicModel(std::initializer_list input_shape, - std::initializer_list paddings_shape) { - input_ = AddInput(TensorType_FLOAT32); + PadOpDynamicModel(const TensorData& input, + std::initializer_list paddings_shape, + const TensorData& output) { + input_ = AddInput(input); paddings_ = AddInput(TensorType_INT32); - output_ = AddOutput(TensorType_FLOAT32); + output_ = AddOutput(output); SetBuiltinOp(BuiltinOperator_PAD, BuiltinOptions_PadOptions, CreatePadOptions(builder_).Union()); - BuildInterpreter({input_shape, paddings_shape}); + BuildInterpreter({input.shape, paddings_shape}); } }; TEST(PadOpTest, TooManyDimensions) { EXPECT_DEATH( - PadOpConstModel({1, 2, 3, 4, 5, 6, 7, 8, 9}, {9, 2}, - {1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9}), + PadOpConstModel({TensorType_FLOAT32, {1, 2, 3, 4, 5, 6, 7, 8, 9}}, {9, 2}, + {1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9}, + {TensorType_FLOAT32}), "dims != 4"); } TEST(PadOpTest, UnequalDimensions) { - EXPECT_DEATH(PadOpConstModel({1, 1, 2, 1}, {3, 2}, {1, 1, 2, 2, 3, 3}), + EXPECT_DEATH(PadOpConstModel({TensorType_FLOAT32, {1, 1, 2, 1}}, {3, 2}, + {1, 1, 2, 2, 3, 3}, {TensorType_FLOAT32}), "3 != 4"); } TEST(PadOpTest, InvalidPadValue) { EXPECT_DEATH( - PadOpConstModel({1, 1, 2, 1}, {4, 2}, {0, 0, 1, -1, 2, -1, 0, 0}), + PadOpConstModel({TensorType_FLOAT32, {1, 1, 2, 1}}, {4, 2}, + {0, 0, 1, -1, 2, -1, 0, 0}, {TensorType_FLOAT32}), "Pad value has to be greater than equal to 0."); } TEST(PadOpTest, SimpleConstTest) { // Padding is represented as four 2-D lists representing above padding and // below padding (i.e. {{0, 0}, {1, 1}, {1, 1}, {0, 0}}). - PadOpConstModel m({1, 2, 2, 1}, {4, 2}, {0, 0, 1, 1, 1, 1, 0, 0}); + PadOpConstModel m({TensorType_FLOAT32, {1, 2, 2, 1}}, {4, 2}, + {0, 0, 1, 1, 1, 1, 0, 0}, {TensorType_FLOAT32}); m.SetInput({1, 2, 3, 4}); m.Invoke(); EXPECT_THAT(m.GetOutput(), ElementsAreArray({0, 0, 0, 0, 0, 1, 2, 0, 0, 3, 4, @@ -114,7 +130,8 @@ TEST(PadOpTest, SimpleConstTest) { } TEST(PadOpTest, SimpleDynamicTest) { - PadOpDynamicModel m({1, 2, 2, 1}, {4, 2}); + PadOpDynamicModel m({TensorType_FLOAT32, {1, 2, 2, 1}}, {4, 2}, + {TensorType_FLOAT32}); m.SetInput({1, 2, 3, 4}); m.SetPaddings({0, 0, 1, 1, 1, 1, 0, 0}); m.Invoke(); @@ -124,7 +141,8 @@ TEST(PadOpTest, SimpleDynamicTest) { } TEST(PadOpTest, AdvancedConstTest) { - PadOpConstModel m({1, 2, 3, 1}, {4, 2}, {0, 0, 0, 2, 1, 3, 0, 0}); + PadOpConstModel m({TensorType_FLOAT32, {1, 2, 3, 1}}, {4, 2}, + {0, 0, 0, 2, 1, 3, 0, 0}, {TensorType_FLOAT32}); m.SetInput({1, 2, 3, 4, 5, 6}); m.Invoke(); EXPECT_THAT(m.GetOutput(), @@ -134,7 +152,8 @@ TEST(PadOpTest, AdvancedConstTest) { } TEST(PadOpTest, AdvancedDynamicTest) { - PadOpDynamicModel m({1, 2, 3, 1}, {4, 2}); + PadOpDynamicModel m({TensorType_FLOAT32, {1, 2, 3, 1}}, {4, 2}, + {TensorType_FLOAT32}); m.SetInput({1, 2, 3, 4, 5, 6}); m.SetPaddings({0, 0, 0, 2, 1, 3, 0, 0}); m.Invoke(); @@ -144,6 +163,80 @@ TEST(PadOpTest, AdvancedDynamicTest) { EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 4, 7, 1})); } +class QuantizedPadOpTest : public ::testing::Test { + protected: + std::vector> DequantizedArrayNear( + const std::vector& values, const float min, const float max) { + const float quantization_tolerance = (max - min) / 255.0; + return ArrayFloatNear(values, quantization_tolerance); + } +}; + +TEST_F(QuantizedPadOpTest, ZeroNotInQuantizationRange) { + // The test_util and actual quantization code currently ensure that the range + // must include zero, but if that ever changes, this test will catch it. + EXPECT_DEATH(PadOpConstModel m({TensorType_UINT8, {1, 2, 2, 1}, 1.0, 2.0}, + {4, 2}, {0, 0, 1, 1, 1, 1, 0, 0}, + {TensorType_UINT8, {}, 1.0, 2.0}), + ".*Check failed: f_min <= 0.*"); +} + +TEST_F(QuantizedPadOpTest, SimpleConstTest) { + // Padding is represented as four 2-D lists representing above padding and + // below padding (i.e. {{0, 0}, {1, 1}, {1, 1}, {0, 0}}). + PadOpConstModel m({TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0}, {4, 2}, + {0, 0, 1, 1, 1, 1, 0, 0}, + {TensorType_UINT8, {}, -1.0, 1.0}); + m.SetQuantizedInput({-0.8, 0.2, 0.9, 0.7}); + m.Invoke(); + EXPECT_THAT(m.GetDequantizedOutput(), + ElementsAreArray(DequantizedArrayNear( + {0, 0, 0, 0, 0, -0.8, 0.2, 0, 0, 0.9, 0.7, 0, 0, 0, 0, 0}, + -1.0, 1.0))); + EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 4, 4, 1})); +} + +TEST_F(QuantizedPadOpTest, SimpleDynamicTest) { + PadOpDynamicModel m({TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0}, {4, 2}, + {TensorType_UINT8, {}, -1.0, 1.0}); + m.SetQuantizedInput({-0.8, 0.2, 0.9, 0.7}); + m.SetPaddings({0, 0, 1, 1, 1, 1, 0, 0}); + m.Invoke(); + EXPECT_THAT(m.GetDequantizedOutput(), + ElementsAreArray(DequantizedArrayNear( + {0, 0, 0, 0, 0, -0.8, 0.2, 0, 0, 0.9, 0.7, 0, 0, 0, 0, 0}, + -1.0, 1.0))); + EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 4, 4, 1})); +} + +TEST_F(QuantizedPadOpTest, AdvancedConstTest) { + PadOpConstModel m({TensorType_UINT8, {1, 2, 3, 1}, -1.0, 1.0}, {4, 2}, + {0, 0, 0, 2, 1, 3, 0, 0}, + {TensorType_UINT8, {}, -1.0, 1.0}); + m.SetQuantizedInput({-0.8, 0.2, 0.9, 0.7, 0.1, -0.3}); + m.Invoke(); + EXPECT_THAT(m.GetDequantizedOutput(), + ElementsAreArray(DequantizedArrayNear( + {0, -0.8, 0.2, 0.9, 0, 0, 0, 0, 0.7, 0.1, -0.3, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + -1.0, 1.0))); + EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 4, 7, 1})); +} + +TEST_F(QuantizedPadOpTest, AdvancedDynamicTest) { + PadOpDynamicModel m({TensorType_UINT8, {1, 2, 3, 1}, -1.0, 1.0}, {4, 2}, + {TensorType_UINT8, {}, -1.0, 1.0}); + m.SetQuantizedInput({-0.8, 0.2, 0.9, 0.7, 0.1, -0.3}); + m.SetPaddings({0, 0, 0, 2, 1, 3, 0, 0}); + m.Invoke(); + EXPECT_THAT(m.GetDequantizedOutput(), + ElementsAreArray(DequantizedArrayNear( + {0, -0.8, 0.2, 0.9, 0, 0, 0, 0, 0.7, 0.1, -0.3, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + -1.0, 1.0))); + EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 4, 7, 1})); +} + } // namespace } // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/padding.h b/tensorflow/contrib/lite/kernels/padding.h index 40b8476b3779c66e31a04856bce8aebd378f1e5f..e81b970e0fb149e8c5d95ed12622917fdc336f7a 100644 --- a/tensorflow/contrib/lite/kernels/padding.h +++ b/tensorflow/contrib/lite/kernels/padding.h @@ -17,9 +17,10 @@ limitations under the License. namespace tflite { -inline int ComputePadding(int stride, int in_size, int filter_size, - int out_size) { - int padding = ((out_size - 1) * stride + filter_size - in_size) / 2; +inline int ComputePadding(int stride, int dilation_rate, int in_size, + int filter_size, int out_size) { + int effective_filter_size = (filter_size - 1) * dilation_rate + 1; + int padding = ((out_size - 1) * stride + effective_filter_size - in_size) / 2; return padding > 0 ? padding : 0; } diff --git a/tensorflow/contrib/lite/kernels/pooling.cc b/tensorflow/contrib/lite/kernels/pooling.cc index b79880110897a1438a589d97363fd861c61667e7..0bf27c34c1337b4ae4b8b73ee2dafcc931c7ce3c 100644 --- a/tensorflow/contrib/lite/kernels/pooling.cc +++ b/tensorflow/contrib/lite/kernels/pooling.cc @@ -94,9 +94,9 @@ TfLiteStatus GenericPrepare(TfLiteContext* context, TfLiteNode* node) { int outHeight = computeOutSize(height, params->filter_height, params->stride_height); - data->padding.height = ComputePadding(params->stride_height, height, + data->padding.height = ComputePadding(params->stride_height, 1, height, params->filter_height, outHeight); - data->padding.width = ComputePadding(params->stride_width, width, + data->padding.width = ComputePadding(params->stride_width, 1, width, params->filter_width, outWidth); if (input->type == kTfLiteUInt8) { diff --git a/tensorflow/contrib/lite/kernels/register.cc b/tensorflow/contrib/lite/kernels/register.cc index 0f98154b904b1f776016e6bbee3263027f815244..b07e7b6ff32e9ea513e60619078e09b7e6d1db72 100644 --- a/tensorflow/contrib/lite/kernels/register.cc +++ b/tensorflow/contrib/lite/kernels/register.cc @@ -77,6 +77,9 @@ TfLiteRegistration* Register_CAST(); TfLiteRegistration* Register_DEQUANTIZE(); TfLiteRegistration* Register_PRELU(); TfLiteRegistration* Register_MAXIMUM(); +TfLiteRegistration* Register_MINIMUM(); +TfLiteRegistration* Register_ARG_MAX(); +TfLiteRegistration* Register_LESS(); BuiltinOpResolver::BuiltinOpResolver() { AddBuiltin(BuiltinOperator_RELU, Register_RELU()); @@ -135,6 +138,9 @@ BuiltinOpResolver::BuiltinOpResolver() { AddBuiltin(BuiltinOperator_DEQUANTIZE, Register_DEQUANTIZE()); AddBuiltin(BuiltinOperator_PRELU, Register_PRELU()); AddBuiltin(BuiltinOperator_MAXIMUM, Register_MAXIMUM()); + AddBuiltin(BuiltinOperator_MINIMUM, Register_MINIMUM()); + AddBuiltin(BuiltinOperator_ARG_MAX, Register_ARG_MAX()); + AddBuiltin(BuiltinOperator_LESS, Register_LESS()); // TODO(andrewharp, ahentz): Move these somewhere more appropriate so that // custom ops aren't always included by default. diff --git a/tensorflow/contrib/lite/kernels/strided_slice.cc b/tensorflow/contrib/lite/kernels/strided_slice.cc index eb374d903182f46b40f5c80bfd769a19a5594742..40ac436b7dcabe7a12166e5381f0381941a204d3 100644 --- a/tensorflow/contrib/lite/kernels/strided_slice.cc +++ b/tensorflow/contrib/lite/kernels/strided_slice.cc @@ -87,6 +87,8 @@ inline int32_t ClampedIndex(int32_t index, int dim, bool pos_stride) { std::min(std::max(index, -dim), dim - 1), dim)); } +// TODO(b/77971377) this logic should be removed, as it's a duplication of +// StartForAxis() & StopForAxis() in kernels/internal/reference/reference_ops.h inline int32_t GetBeginValueAtIndex(StridedSliceContext* op_context, int idx) { const int dim = op_context->input->dims->data[idx]; const bool pos_stride = GetTensorData(op_context->strides)[idx] > 0; @@ -188,8 +190,8 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { std::vector strides; for (int idx = op_context.dims - 1; idx >= 0; --idx) { - starts.emplace_back(GetBeginValueAtIndex(&op_context, idx)); - stops.emplace_back(GetEndValueAtIndex(&op_context, idx)); + starts.emplace_back(GetTensorData(op_context.begin)[idx]); + stops.emplace_back(GetTensorData(op_context.end)[idx]); strides.emplace_back(GetTensorData(op_context.strides)[idx]); } @@ -202,15 +204,13 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { int begin_mask = ReverseMaskBits(op_context.params->begin_mask, op_context.dims); int end_mask = ReverseMaskBits(op_context.params->end_mask, op_context.dims); - int shrink_axis_mask = - ReverseMaskBits(op_context.params->shrink_axis_mask, op_context.dims); -#define TF_LITE_STRIDED_SLICE(kernel_type, data_type) \ - kernel_type::StridedSlice( \ - GetTensorData(op_context.input), \ - GetTensorDims(op_context.input), begin_mask, end_mask, shrink_axis_mask, \ - starts, stops, strides, GetTensorData(op_context.output), \ - GetTensorDims(op_context.output)) +#define TF_LITE_STRIDED_SLICE(kernel_type, data_type) \ + kernel_type::StridedSlice(GetTensorData(op_context.input), \ + GetTensorDims(op_context.input), begin_mask, \ + end_mask, starts, stops, strides, \ + GetTensorData(op_context.output), \ + GetTensorDims(op_context.output)) switch (op_context.input->type) { case kTfLiteFloat32: @@ -228,6 +228,11 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { TF_LITE_STRIDED_SLICE(reference_ops, int64_t); } break; + case kTfLiteUInt8: + if (kernel_type == kReference) { + TF_LITE_STRIDED_SLICE(reference_ops, uint8_t); + } + break; default: context->ReportError(context, "Type is currently not supported " diff --git a/tensorflow/contrib/lite/kernels/strided_slice_test.cc b/tensorflow/contrib/lite/kernels/strided_slice_test.cc index 5c98c5f43181fe75f35716dae5682113bde883ec..cc39179bc705aa1083e74b06f8f7f3fb45e9f616 100644 --- a/tensorflow/contrib/lite/kernels/strided_slice_test.cc +++ b/tensorflow/contrib/lite/kernels/strided_slice_test.cc @@ -24,6 +24,8 @@ namespace { using ::int32; using ::testing::ElementsAreArray; +template class StridedSliceOpModel : public SingleOpModel { public: StridedSliceOpModel(std::initializer_list input_shape, @@ -32,11 +34,11 @@ class StridedSliceOpModel : public SingleOpModel { std::initializer_list strides_shape, int begin_mask, int end_mask, int ellipsis_mask, int new_axis_mask, int shrink_axis_mask) { - input_ = AddInput(TensorType_FLOAT32); + input_ = AddInput(tensor_input_type); begin_ = AddInput(TensorType_INT32); end_ = AddInput(TensorType_INT32); strides_ = AddInput(TensorType_INT32); - output_ = AddOutput(TensorType_FLOAT32); + output_ = AddOutput(tensor_input_type); SetBuiltinOp( BuiltinOperator_STRIDED_SLICE, BuiltinOptions_StridedSliceOptions, CreateStridedSliceOptions(builder_, begin_mask, end_mask, ellipsis_mask, @@ -45,8 +47,8 @@ class StridedSliceOpModel : public SingleOpModel { BuildInterpreter({input_shape, begin_shape, end_shape, strides_shape}); } - void SetInput(std::initializer_list data) { - PopulateTensor(input_, data); + void SetInput(std::initializer_list data) { + PopulateTensor(input_, data); } void SetBegin(std::initializer_list data) { PopulateTensor(begin_, data); @@ -58,7 +60,9 @@ class StridedSliceOpModel : public SingleOpModel { PopulateTensor(strides_, data); } - std::vector GetOutput() { return ExtractVector(output_); } + std::vector GetOutput() { + return ExtractVector(output_); + } std::vector GetOutputShape() { return GetTensorShape(output_); } private: @@ -71,19 +75,19 @@ class StridedSliceOpModel : public SingleOpModel { TEST(StridedSliceOpTest, UnsupportedInputSize) { EXPECT_DEATH( - StridedSliceOpModel({2, 2, 2, 2, 2}, {5}, {5}, {5}, 0, 0, 0, 0, 0), + StridedSliceOpModel<>({2, 2, 2, 2, 2}, {5}, {5}, {5}, 0, 0, 0, 0, 0), "StridedSlice op only supports 1D-4D input arrays."); } TEST(StridedSliceOpTest, UnssupportedArgs) { - EXPECT_DEATH(StridedSliceOpModel({3, 2}, {2}, {2}, {2}, 0, 0, 1, 0, 0), + EXPECT_DEATH(StridedSliceOpModel<>({3, 2}, {2}, {2}, {2}, 0, 0, 1, 0, 0), "ellipsis_mask is not implemented yet."); - EXPECT_DEATH(StridedSliceOpModel({3, 2}, {2}, {2}, {2}, 0, 0, 0, 1, 0), + EXPECT_DEATH(StridedSliceOpModel<>({3, 2}, {2}, {2}, {2}, 0, 0, 0, 1, 0), "new_axis_mask is not implemented yet."); } TEST(StridedSliceOpTest, In1D) { - StridedSliceOpModel m({4}, {1}, {1}, {1}, 0, 0, 0, 0, 0); + StridedSliceOpModel<> m({4}, {1}, {1}, {1}, 0, 0, 0, 0, 0); m.SetInput({1, 2, 3, 4}); m.SetBegin({1}); m.SetEnd({3}); @@ -94,7 +98,7 @@ TEST(StridedSliceOpTest, In1D) { } TEST(StridedSliceOpTest, In1D_EmptyOutput) { - StridedSliceOpModel m({4}, {1}, {1}, {1}, 0, 0, 0, 0, 0); + StridedSliceOpModel<> m({4}, {1}, {1}, {1}, 0, 0, 0, 0, 0); m.SetInput({1, 2, 3, 4}); m.SetBegin({10}); m.SetEnd({3}); @@ -104,7 +108,7 @@ TEST(StridedSliceOpTest, In1D_EmptyOutput) { } TEST(StridedSliceOpTest, In1D_NegativeBegin) { - StridedSliceOpModel m({4}, {1}, {1}, {1}, 0, 0, 0, 0, 0); + StridedSliceOpModel<> m({4}, {1}, {1}, {1}, 0, 0, 0, 0, 0); m.SetInput({1, 2, 3, 4}); m.SetBegin({-3}); m.SetEnd({3}); @@ -115,7 +119,7 @@ TEST(StridedSliceOpTest, In1D_NegativeBegin) { } TEST(StridedSliceOpTest, In1D_OutOfRangeBegin) { - StridedSliceOpModel m({4}, {1}, {1}, {1}, 0, 0, 0, 0, 0); + StridedSliceOpModel<> m({4}, {1}, {1}, {1}, 0, 0, 0, 0, 0); m.SetInput({1, 2, 3, 4}); m.SetBegin({-5}); m.SetEnd({3}); @@ -126,7 +130,7 @@ TEST(StridedSliceOpTest, In1D_OutOfRangeBegin) { } TEST(StridedSliceOpTest, In1D_NegativeEnd) { - StridedSliceOpModel m({4}, {1}, {1}, {1}, 0, 0, 0, 0, 0); + StridedSliceOpModel<> m({4}, {1}, {1}, {1}, 0, 0, 0, 0, 0); m.SetInput({1, 2, 3, 4}); m.SetBegin({1}); m.SetEnd({-2}); @@ -137,7 +141,7 @@ TEST(StridedSliceOpTest, In1D_NegativeEnd) { } TEST(StridedSliceOpTest, In1D_OutOfRangeEnd) { - StridedSliceOpModel m({4}, {1}, {1}, {1}, 0, 0, 0, 0, 0); + StridedSliceOpModel<> m({4}, {1}, {1}, {1}, 0, 0, 0, 0, 0); m.SetInput({1, 2, 3, 4}); m.SetBegin({-3}); m.SetEnd({5}); @@ -148,7 +152,7 @@ TEST(StridedSliceOpTest, In1D_OutOfRangeEnd) { } TEST(StridedSliceOpTest, In1D_BeginMask) { - StridedSliceOpModel m({4}, {1}, {1}, {1}, 1, 0, 0, 0, 0); + StridedSliceOpModel<> m({4}, {1}, {1}, {1}, 1, 0, 0, 0, 0); m.SetInput({1, 2, 3, 4}); m.SetBegin({1}); m.SetEnd({3}); @@ -159,7 +163,7 @@ TEST(StridedSliceOpTest, In1D_BeginMask) { } TEST(StridedSliceOpTest, In1D_NegativeBeginNegativeStride) { - StridedSliceOpModel m({4}, {1}, {1}, {1}, 0, 0, 0, 0, 0); + StridedSliceOpModel<> m({4}, {1}, {1}, {1}, 0, 0, 0, 0, 0); m.SetInput({1, 2, 3, 4}); m.SetBegin({-2}); m.SetEnd({-3}); @@ -170,7 +174,7 @@ TEST(StridedSliceOpTest, In1D_NegativeBeginNegativeStride) { } TEST(StridedSliceOpTest, In1D_OutOfRangeBeginNegativeStride) { - StridedSliceOpModel m({4}, {1}, {1}, {1}, 0, 0, 0, 0, 0); + StridedSliceOpModel<> m({4}, {1}, {1}, {1}, 0, 0, 0, 0, 0); m.SetInput({1, 2, 3, 4}); m.SetBegin({5}); m.SetEnd({2}); @@ -181,7 +185,7 @@ TEST(StridedSliceOpTest, In1D_OutOfRangeBeginNegativeStride) { } TEST(StridedSliceOpTest, In1D_NegativeEndNegativeStride) { - StridedSliceOpModel m({4}, {1}, {1}, {1}, 0, 0, 0, 0, 0); + StridedSliceOpModel<> m({4}, {1}, {1}, {1}, 0, 0, 0, 0, 0); m.SetInput({1, 2, 3, 4}); m.SetBegin({2}); m.SetEnd({-4}); @@ -192,7 +196,7 @@ TEST(StridedSliceOpTest, In1D_NegativeEndNegativeStride) { } TEST(StridedSliceOpTest, In1D_OutOfRangeEndNegativeStride) { - StridedSliceOpModel m({4}, {1}, {1}, {1}, 0, 0, 0, 0, 0); + StridedSliceOpModel<> m({4}, {1}, {1}, {1}, 0, 0, 0, 0, 0); m.SetInput({1, 2, 3, 4}); m.SetBegin({-3}); m.SetEnd({-5}); @@ -203,7 +207,7 @@ TEST(StridedSliceOpTest, In1D_OutOfRangeEndNegativeStride) { } TEST(StridedSliceOpTest, In1D_EndMask) { - StridedSliceOpModel m({4}, {1}, {1}, {1}, 0, 1, 0, 0, 0); + StridedSliceOpModel<> m({4}, {1}, {1}, {1}, 0, 1, 0, 0, 0); m.SetInput({1, 2, 3, 4}); m.SetBegin({1}); m.SetEnd({3}); @@ -214,7 +218,7 @@ TEST(StridedSliceOpTest, In1D_EndMask) { } TEST(StridedSliceOpTest, In1D_NegStride) { - StridedSliceOpModel m({3}, {1}, {1}, {1}, 0, 0, 0, 0, 0); + StridedSliceOpModel<> m({3}, {1}, {1}, {1}, 0, 0, 0, 0, 0); m.SetInput({1, 2, 3}); m.SetBegin({-1}); m.SetEnd({-4}); @@ -225,7 +229,7 @@ TEST(StridedSliceOpTest, In1D_NegStride) { } TEST(StridedSliceOpTest, In1D_EvenLenStride2) { - StridedSliceOpModel m({2}, {1}, {1}, {1}, 0, 0, 0, 0, 0); + StridedSliceOpModel<> m({2}, {1}, {1}, {1}, 0, 0, 0, 0, 0); m.SetInput({1, 2}); m.SetBegin({0}); m.SetEnd({2}); @@ -236,7 +240,7 @@ TEST(StridedSliceOpTest, In1D_EvenLenStride2) { } TEST(StridedSliceOpTest, In1D_OddLenStride2) { - StridedSliceOpModel m({3}, {1}, {1}, {1}, 0, 0, 0, 0, 0); + StridedSliceOpModel<> m({3}, {1}, {1}, {1}, 0, 0, 0, 0, 0); m.SetInput({1, 2, 3}); m.SetBegin({0}); m.SetEnd({3}); @@ -247,7 +251,7 @@ TEST(StridedSliceOpTest, In1D_OddLenStride2) { } TEST(StridedSliceOpTest, In2D_Identity) { - StridedSliceOpModel m({2, 3}, {2}, {2}, {2}, 0, 0, 0, 0, 0); + StridedSliceOpModel<> m({2, 3}, {2}, {2}, {2}, 0, 0, 0, 0, 0); m.SetInput({1, 2, 3, 4, 5, 6}); m.SetBegin({0, 0}); m.SetEnd({2, 3}); @@ -258,7 +262,7 @@ TEST(StridedSliceOpTest, In2D_Identity) { } TEST(StridedSliceOpTest, In2D) { - StridedSliceOpModel m({2, 3}, {2}, {2}, {2}, 0, 0, 0, 0, 0); + StridedSliceOpModel<> m({2, 3}, {2}, {2}, {2}, 0, 0, 0, 0, 0); m.SetInput({1, 2, 3, 4, 5, 6}); m.SetBegin({1, 0}); m.SetEnd({2, 2}); @@ -269,7 +273,7 @@ TEST(StridedSliceOpTest, In2D) { } TEST(StridedSliceOpTest, In2D_Stride2) { - StridedSliceOpModel m({2, 3}, {2}, {2}, {2}, 0, 0, 0, 0, 0); + StridedSliceOpModel<> m({2, 3}, {2}, {2}, {2}, 0, 0, 0, 0, 0); m.SetInput({1, 2, 3, 4, 5, 6}); m.SetBegin({0, 0}); m.SetEnd({2, 3}); @@ -280,7 +284,7 @@ TEST(StridedSliceOpTest, In2D_Stride2) { } TEST(StridedSliceOpTest, In2D_NegStride) { - StridedSliceOpModel m({2, 3}, {2}, {2}, {2}, 0, 0, 0, 0, 0); + StridedSliceOpModel<> m({2, 3}, {2}, {2}, {2}, 0, 0, 0, 0, 0); m.SetInput({1, 2, 3, 4, 5, 6}); m.SetBegin({1, -1}); m.SetEnd({2, -4}); @@ -291,7 +295,7 @@ TEST(StridedSliceOpTest, In2D_NegStride) { } TEST(StridedSliceOpTest, In2D_BeginMask) { - StridedSliceOpModel m({2, 3}, {2}, {2}, {2}, 1, 0, 0, 0, 0); + StridedSliceOpModel<> m({2, 3}, {2}, {2}, {2}, 1, 0, 0, 0, 0); m.SetInput({1, 2, 3, 4, 5, 6}); m.SetBegin({1, 0}); m.SetEnd({2, 2}); @@ -302,7 +306,7 @@ TEST(StridedSliceOpTest, In2D_BeginMask) { } TEST(StridedSliceOpTest, In2D_EndMask) { - StridedSliceOpModel m({2, 3}, {2}, {2}, {2}, 0, 2, 0, 0, 0); + StridedSliceOpModel<> m({2, 3}, {2}, {2}, {2}, 0, 2, 0, 0, 0); m.SetInput({1, 2, 3, 4, 5, 6}); m.SetBegin({1, 0}); m.SetEnd({2, 2}); @@ -313,7 +317,7 @@ TEST(StridedSliceOpTest, In2D_EndMask) { } TEST(StridedSliceOpTest, In2D_NegStrideBeginMask) { - StridedSliceOpModel m({2, 3}, {2}, {2}, {2}, 2, 0, 0, 0, 0); + StridedSliceOpModel<> m({2, 3}, {2}, {2}, {2}, 2, 0, 0, 0, 0); m.SetInput({1, 2, 3, 4, 5, 6}); m.SetBegin({1, -2}); m.SetEnd({2, -4}); @@ -324,7 +328,7 @@ TEST(StridedSliceOpTest, In2D_NegStrideBeginMask) { } TEST(StridedSliceOpTest, In2D_NegStrideEndMask) { - StridedSliceOpModel m({2, 3}, {2}, {2}, {2}, 0, 2, 0, 0, 0); + StridedSliceOpModel<> m({2, 3}, {2}, {2}, {2}, 0, 2, 0, 0, 0); m.SetInput({1, 2, 3, 4, 5, 6}); m.SetBegin({1, -2}); m.SetEnd({2, -3}); @@ -335,7 +339,7 @@ TEST(StridedSliceOpTest, In2D_NegStrideEndMask) { } TEST(StridedSliceOpTest, In3D_Identity) { - StridedSliceOpModel m({2, 3, 2}, {3}, {3}, {3}, 0, 0, 0, 0, 0); + StridedSliceOpModel<> m({2, 3, 2}, {3}, {3}, {3}, 0, 0, 0, 0, 0); m.SetInput({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); m.SetBegin({0, 0, 0}); m.SetEnd({2, 3, 2}); @@ -347,7 +351,7 @@ TEST(StridedSliceOpTest, In3D_Identity) { } TEST(StridedSliceOpTest, In3D_NegStride) { - StridedSliceOpModel m({2, 3, 2}, {3}, {3}, {3}, 0, 0, 0, 0, 0); + StridedSliceOpModel<> m({2, 3, 2}, {3}, {3}, {3}, 0, 0, 0, 0, 0); m.SetInput({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); m.SetBegin({-1, -1, -1}); m.SetEnd({-3, -4, -3}); @@ -359,7 +363,7 @@ TEST(StridedSliceOpTest, In3D_NegStride) { } TEST(StridedSliceOpTest, In3D_Strided2) { - StridedSliceOpModel m({2, 3, 2}, {3}, {3}, {3}, 0, 0, 0, 0, 0); + StridedSliceOpModel<> m({2, 3, 2}, {3}, {3}, {3}, 0, 0, 0, 0, 0); m.SetInput({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); m.SetBegin({0, 0, 0}); m.SetEnd({2, 3, 2}); @@ -370,32 +374,21 @@ TEST(StridedSliceOpTest, In3D_Strided2) { } TEST(StridedSliceOpTest, In1D_ShrinkAxisMask1) { - StridedSliceOpModel m({4}, {1}, {1}, {1}, 0, 0, 0, 0, 1); + StridedSliceOpModel<> m({4}, {1}, {1}, {1}, 0, 0, 0, 0, 1); m.SetInput({1, 2, 3, 4}); m.SetBegin({1}); - m.SetEnd({3}); + m.SetEnd({2}); m.SetStrides({1}); m.Invoke(); EXPECT_TRUE(m.GetOutputShape().empty()); EXPECT_THAT(m.GetOutput(), ElementsAreArray({2})); } -TEST(StridedSliceOpTest, In1D_EmptyOutputShrinkAxisMask1) { - StridedSliceOpModel m({4}, {1}, {1}, {1}, 0, 0, 0, 0, 1); - m.SetInput({1, 2, 3, 4}); - m.SetBegin({2}); - m.SetEnd({1}); - m.SetStrides({1}); - m.Invoke(); - EXPECT_TRUE(m.GetOutputShape().empty()); - EXPECT_THAT(m.GetOutput(), ElementsAreArray({3})); -} - TEST(StridedSliceOpTest, In1D_BeginMaskShrinkAxisMask1) { - StridedSliceOpModel m({4}, {1}, {1}, {1}, 1, 0, 0, 0, 1); + StridedSliceOpModel<> m({4}, {1}, {1}, {1}, 1, 0, 0, 0, 1); m.SetInput({1, 2, 3, 4}); m.SetBegin({1}); - m.SetEnd({3}); + m.SetEnd({1}); m.SetStrides({1}); m.Invoke(); EXPECT_TRUE(m.GetOutputShape().empty()); @@ -403,7 +396,7 @@ TEST(StridedSliceOpTest, In1D_BeginMaskShrinkAxisMask1) { } TEST(StridedSliceOpTest, In1D_NegativeBeginNegativeStrideShrinkAxisMask1) { - StridedSliceOpModel m({4}, {1}, {1}, {1}, 0, 0, 0, 0, 1); + StridedSliceOpModel<> m({4}, {1}, {1}, {1}, 0, 0, 0, 0, 1); m.SetInput({1, 2, 3, 4}); m.SetBegin({-2}); m.SetEnd({-3}); @@ -414,10 +407,10 @@ TEST(StridedSliceOpTest, In1D_NegativeBeginNegativeStrideShrinkAxisMask1) { } TEST(StridedSliceOpTest, In2D_ShrinkAxisMask1) { - StridedSliceOpModel m({2, 3}, {2}, {2}, {2}, 0, 0, 0, 0, 1); + StridedSliceOpModel<> m({2, 3}, {2}, {2}, {2}, 0, 0, 0, 0, 1); m.SetInput({1, 2, 3, 4, 5, 6}); m.SetBegin({0, 0}); - m.SetEnd({2, 3}); + m.SetEnd({1, 3}); m.SetStrides({1, 1}); m.Invoke(); EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({3})); @@ -425,10 +418,10 @@ TEST(StridedSliceOpTest, In2D_ShrinkAxisMask1) { } TEST(StridedSliceOpTest, In2D_ShrinkAxisMask2) { - StridedSliceOpModel m({2, 3}, {2}, {2}, {2}, 0, 0, 0, 0, 2); + StridedSliceOpModel<> m({2, 3}, {2}, {2}, {2}, 0, 0, 0, 0, 2); m.SetInput({1, 2, 3, 4, 5, 6}); m.SetBegin({0, 0}); - m.SetEnd({2, 3}); + m.SetEnd({2, 1}); m.SetStrides({1, 1}); m.Invoke(); EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({2})); @@ -436,10 +429,10 @@ TEST(StridedSliceOpTest, In2D_ShrinkAxisMask2) { } TEST(StridedSliceOpTest, In2D_ShrinkAxisMask3) { - StridedSliceOpModel m({2, 3}, {2}, {2}, {2}, 0, 0, 0, 0, 3); + StridedSliceOpModel<> m({2, 3}, {2}, {2}, {2}, 0, 0, 0, 0, 3); m.SetInput({1, 2, 3, 4, 5, 6}); m.SetBegin({0, 0}); - m.SetEnd({2, 3}); + m.SetEnd({1, 1}); m.SetStrides({1, 1}); m.Invoke(); EXPECT_TRUE(m.GetOutputShape().empty()); @@ -447,10 +440,10 @@ TEST(StridedSliceOpTest, In2D_ShrinkAxisMask3) { } TEST(StridedSliceOpTest, In3D_IdentityShrinkAxis1) { - StridedSliceOpModel m({2, 3, 2}, {3}, {3}, {3}, 0, 0, 0, 0, 1); + StridedSliceOpModel<> m({2, 3, 2}, {3}, {3}, {3}, 0, 0, 0, 0, 1); m.SetInput({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); m.SetBegin({0, 0, 0}); - m.SetEnd({2, 3, 2}); + m.SetEnd({1, 3, 2}); m.SetStrides({1, 1, 1}); m.Invoke(); EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({3, 2})); @@ -458,10 +451,10 @@ TEST(StridedSliceOpTest, In3D_IdentityShrinkAxis1) { } TEST(StridedSliceOpTest, In3D_IdentityShrinkAxis2) { - StridedSliceOpModel m({2, 3, 2}, {3}, {3}, {3}, 0, 0, 0, 0, 2); + StridedSliceOpModel<> m({2, 3, 2}, {3}, {3}, {3}, 0, 0, 0, 0, 2); m.SetInput({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); m.SetBegin({0, 0, 0}); - m.SetEnd({2, 3, 2}); + m.SetEnd({2, 1, 2}); m.SetStrides({1, 1, 1}); m.Invoke(); EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({2, 2})); @@ -469,10 +462,10 @@ TEST(StridedSliceOpTest, In3D_IdentityShrinkAxis2) { } TEST(StridedSliceOpTest, In3D_IdentityShrinkAxis3) { - StridedSliceOpModel m({2, 3, 2}, {3}, {3}, {3}, 0, 0, 0, 0, 3); + StridedSliceOpModel<> m({2, 3, 2}, {3}, {3}, {3}, 0, 0, 0, 0, 3); m.SetInput({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); m.SetBegin({0, 0, 0}); - m.SetEnd({2, 3, 2}); + m.SetEnd({1, 1, 2}); m.SetStrides({1, 1, 1}); m.Invoke(); EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({2})); @@ -480,10 +473,10 @@ TEST(StridedSliceOpTest, In3D_IdentityShrinkAxis3) { } TEST(StridedSliceOpTest, In3D_IdentityShrinkAxis4) { - StridedSliceOpModel m({2, 3, 2}, {3}, {3}, {3}, 0, 0, 0, 0, 4); + StridedSliceOpModel<> m({2, 3, 2}, {3}, {3}, {3}, 0, 0, 0, 0, 4); m.SetInput({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); m.SetBegin({0, 0, 0}); - m.SetEnd({2, 3, 2}); + m.SetEnd({2, 3, 1}); m.SetStrides({1, 1, 1}); m.Invoke(); EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({2, 3})); @@ -491,10 +484,10 @@ TEST(StridedSliceOpTest, In3D_IdentityShrinkAxis4) { } TEST(StridedSliceOpTest, In3D_IdentityShrinkAxis5) { - StridedSliceOpModel m({2, 3, 2}, {3}, {3}, {3}, 0, 0, 0, 0, 5); + StridedSliceOpModel<> m({2, 3, 2}, {3}, {3}, {3}, 0, 0, 0, 0, 5); m.SetInput({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); m.SetBegin({0, 0, 0}); - m.SetEnd({2, 3, 2}); + m.SetEnd({1, 3, 1}); m.SetStrides({1, 1, 1}); m.Invoke(); EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({3})); @@ -502,10 +495,10 @@ TEST(StridedSliceOpTest, In3D_IdentityShrinkAxis5) { } TEST(StridedSliceOpTest, In3D_IdentityShrinkAxis6) { - StridedSliceOpModel m({2, 3, 2}, {3}, {3}, {3}, 0, 0, 0, 0, 6); + StridedSliceOpModel<> m({2, 3, 2}, {3}, {3}, {3}, 0, 0, 0, 0, 6); m.SetInput({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); m.SetBegin({0, 0, 0}); - m.SetEnd({2, 3, 2}); + m.SetEnd({2, 1, 1}); m.SetStrides({1, 1, 1}); m.Invoke(); EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({2})); @@ -513,10 +506,10 @@ TEST(StridedSliceOpTest, In3D_IdentityShrinkAxis6) { } TEST(StridedSliceOpTest, In3D_IdentityShrinkAxis7) { - StridedSliceOpModel m({2, 3, 2}, {3}, {3}, {3}, 0, 0, 0, 0, 7); + StridedSliceOpModel<> m({2, 3, 2}, {3}, {3}, {3}, 0, 0, 0, 0, 7); m.SetInput({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); m.SetBegin({0, 0, 0}); - m.SetEnd({2, 3, 2}); + m.SetEnd({1, 1, 1}); m.SetStrides({1, 1, 1}); m.Invoke(); EXPECT_TRUE(m.GetOutputShape().empty()); @@ -525,7 +518,7 @@ TEST(StridedSliceOpTest, In3D_IdentityShrinkAxis7) { // This tests catches a very subtle bug that was fixed by cl/188403234. TEST(StridedSliceOpTest, RunTwice) { - StridedSliceOpModel m({2, 3}, {2}, {2}, {2}, 1, 0, 0, 0, 0); + StridedSliceOpModel<> m({2, 3}, {2}, {2}, {2}, 1, 0, 0, 0, 0); auto setup_inputs = [&m]() { m.SetInput({1, 2, 3, 4, 5, 6}); @@ -544,6 +537,17 @@ TEST(StridedSliceOpTest, RunTwice) { EXPECT_THAT(m.GetOutput(), ElementsAreArray({1, 2, 4, 5})); } +TEST(StridedSliceOpTest, In3D_IdentityShrinkAxis1Uint8) { + StridedSliceOpModel m({2, 3, 2}, {3}, {3}, {3}, 0, 0, + 0, 0, 1); + m.SetInput({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); + m.SetBegin({0, 0, 0}); + m.SetEnd({1, 3, 2}); + m.SetStrides({1, 1, 1}); + m.Invoke(); + EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({3, 2})); + EXPECT_THAT(m.GetOutput(), ElementsAreArray({1, 2, 3, 4, 5, 6})); +} } // namespace } // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/sub.cc b/tensorflow/contrib/lite/kernels/sub.cc index 66b06aeaec52dd3d2d98acfec8218ffdd0ae6bf3..7c60a4fdbffdc96b8967f52f8dbab3e18ecbcc0a 100644 --- a/tensorflow/contrib/lite/kernels/sub.cc +++ b/tensorflow/contrib/lite/kernels/sub.cc @@ -174,7 +174,8 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { EvalQuantized(context, node, params, data, input1, input2, output); } else { - context->ReportError(context, "Inputs and outputs not all float types."); + context->ReportError(context, + "Inputs and outputs not all float|uint8 types."); return kTfLiteError; } diff --git a/tensorflow/contrib/lite/model.cc b/tensorflow/contrib/lite/model.cc index 791d1378f393594ceb6f1fcec7cc5aadaa81dab3..2dd6d67e078619df41524e8242a0475320c02013 100644 --- a/tensorflow/contrib/lite/model.cc +++ b/tensorflow/contrib/lite/model.cc @@ -30,8 +30,44 @@ limitations under the License. namespace tflite { +namespace { +// Ensure that ErrorReporter is non-null. +ErrorReporter* ValidateErrorReporter(ErrorReporter* e) { + return e ? e : DefaultErrorReporter(); +} +} // namespace + const char* kEmptyTensorName = ""; +TfLiteStatus ConvertTensorType(TensorType tensor_type, TfLiteType* type, + ErrorReporter* error_reporter) { + switch (tensor_type) { + case TensorType_FLOAT32: + *type = kTfLiteFloat32; + break; + case TensorType_INT32: + *type = kTfLiteInt32; + break; + case TensorType_UINT8: + *type = kTfLiteUInt8; + break; + case TensorType_INT64: + *type = kTfLiteInt64; + break; + case TensorType_STRING: + *type = kTfLiteString; + break; + case TensorType_BOOL: + *type = kTfLiteBool; + break; + default: + error_reporter->Report("Unimplemented data type %s (%d) in tensor\n", + EnumNameTensorType(tensor_type), tensor_type); + return kTfLiteError; + } + return kTfLiteOk; +} + // Loads a model from `filename`. If `mmap_file` is true then use mmap, // otherwise make a copy of the model in a buffer. std::unique_ptr GetAllocationFromFile(const char* filename, @@ -52,6 +88,8 @@ std::unique_ptr GetAllocationFromFile(const char* filename, std::unique_ptr FlatBufferModel::BuildFromFile( const char* filename, ErrorReporter* error_reporter) { + error_reporter = ValidateErrorReporter(error_reporter); + std::unique_ptr model; auto allocation = GetAllocationFromFile(filename, /*mmap_file=*/true, error_reporter, /*use_nnapi=*/true); @@ -63,6 +101,8 @@ std::unique_ptr FlatBufferModel::BuildFromFile( std::unique_ptr FlatBufferModel::VerifyAndBuildFromFile( const char* filename, TfLiteVerifier* verifier, ErrorReporter* error_reporter) { + error_reporter = ValidateErrorReporter(error_reporter); + std::unique_ptr model; auto allocation = GetAllocationFromFile(filename, /*mmap_file=*/true, error_reporter, /*use_nnapi=*/true); @@ -78,6 +118,8 @@ std::unique_ptr FlatBufferModel::VerifyAndBuildFromFile( std::unique_ptr FlatBufferModel::BuildFromBuffer( const char* buffer, size_t buffer_size, ErrorReporter* error_reporter) { + error_reporter = ValidateErrorReporter(error_reporter); + std::unique_ptr model; Allocation* allocation = new MemoryAllocation(buffer, buffer_size, error_reporter); @@ -88,6 +130,8 @@ std::unique_ptr FlatBufferModel::BuildFromBuffer( std::unique_ptr FlatBufferModel::BuildFromModel( const tflite::Model* model_spec, ErrorReporter* error_reporter) { + error_reporter = ValidateErrorReporter(error_reporter); + std::unique_ptr model; model.reset(new FlatBufferModel(model_spec, error_reporter)); if (!model->initialized()) model.reset(); @@ -107,15 +151,13 @@ bool FlatBufferModel::CheckModelIdentifier() const { FlatBufferModel::FlatBufferModel(const Model* model, ErrorReporter* error_reporter) - : error_reporter_(error_reporter ? error_reporter - : DefaultErrorReporter()) { + : error_reporter_(ValidateErrorReporter(error_reporter)) { model_ = model; } FlatBufferModel::FlatBufferModel(Allocation* allocation, ErrorReporter* error_reporter) - : error_reporter_(error_reporter ? error_reporter - : DefaultErrorReporter()) { + : error_reporter_(ValidateErrorReporter(error_reporter)) { allocation_ = allocation; if (!allocation_->valid() || !CheckModelIdentifier()) return; @@ -128,7 +170,7 @@ InterpreterBuilder::InterpreterBuilder(const FlatBufferModel& model, const OpResolver& op_resolver) : model_(model.GetModel()), op_resolver_(op_resolver), - error_reporter_(model.error_reporter()), + error_reporter_(ValidateErrorReporter(model.error_reporter())), allocation_(model.allocation()) {} InterpreterBuilder::InterpreterBuilder(const ::tflite::Model* model, @@ -136,8 +178,7 @@ InterpreterBuilder::InterpreterBuilder(const ::tflite::Model* model, ErrorReporter* error_reporter) : model_(model), op_resolver_(op_resolver), - error_reporter_(error_reporter ? error_reporter - : DefaultErrorReporter()) {} + error_reporter_(ValidateErrorReporter(error_reporter)) {} TfLiteStatus InterpreterBuilder::BuildLocalIndexToRegistrationMapping() { TfLiteStatus status = kTfLiteOk; @@ -223,13 +264,11 @@ T* MallocPOD() { // Parse the appropriate data out of the op. // // This handles builtin data explicitly as there are flatbuffer schemas. -// -// Returns memory that must be feed. -// -// TODO(nupurgarg): Pass in void ** and return TfLiteStatus to ensure program -// crashes if error reporter is called. -void* ParseOpData(const Operator* op, BuiltinOperator op_type, - ErrorReporter* error_reporter) { +// If it returns kTfLiteOk, it passes the data out with `builtin_data`, which +// need to be released by calling `free`.` +// If it returns kTfLiteError, `builtin_data` will be `nullptr`. +TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type, + ErrorReporter* error_reporter, void** builtin_data) { auto parse_padding = [](Padding padding) { switch (padding) { case Padding_SAME: @@ -278,7 +317,7 @@ void* ParseOpData(const Operator* op, BuiltinOperator op_type, } }; - void* builtin_data = nullptr; + *builtin_data = nullptr; switch (op_type) { case BuiltinOperator_CALL: // TODO(aselle): Implement call in BuiltinOptions, but nullptrs are @@ -294,8 +333,10 @@ void* ParseOpData(const Operator* op, BuiltinOperator op_type, params->stride_height = conv_params->stride_h(); params->activation = parse_activation(conv_params->fused_activation_function()); + params->dilation_width_factor = conv_params->dilation_w_factor(); + params->dilation_height_factor = conv_params->dilation_h_factor(); } - builtin_data = reinterpret_cast(params); + *builtin_data = reinterpret_cast(params); break; } case BuiltinOperator_TANH: @@ -307,17 +348,33 @@ void* ParseOpData(const Operator* op, BuiltinOperator op_type, case BuiltinOperator_EXP: case BuiltinOperator_TOPK_V2: case BuiltinOperator_LOG_SOFTMAX: - case BuiltinOperator_CAST: case BuiltinOperator_DEQUANTIZE: case BuiltinOperator_PRELU: break; + case BuiltinOperator_CAST: { + TfLiteCastParams* params = MallocPOD(); + if (auto* schema_params = op->builtin_options_as_CastOptions()) { + auto in_status = + ConvertTensorType(schema_params->in_data_type(), + ¶ms->in_data_type, error_reporter); + auto out_status = + ConvertTensorType(schema_params->out_data_type(), + ¶ms->out_data_type, error_reporter); + if (in_status != kTfLiteOk || out_status != kTfLiteOk) { + free(params); + return kTfLiteError; + } + } + *builtin_data = reinterpret_cast(params); + break; + } case BuiltinOperator_LSH_PROJECTION: { TfLiteLSHProjectionParams* params = MallocPOD(); if (auto* lshParams = op->builtin_options_as_LSHProjectionOptions()) { params->type = parseLSHProjectionType(lshParams->type()); } - builtin_data = reinterpret_cast(params); + *builtin_data = reinterpret_cast(params); break; } case BuiltinOperator_AVERAGE_POOL_2D: @@ -333,7 +390,7 @@ void* ParseOpData(const Operator* op, BuiltinOperator op_type, params->activation = parse_activation(pool_params->fused_activation_function()); } - builtin_data = reinterpret_cast(params); + *builtin_data = reinterpret_cast(params); break; } case BuiltinOperator_DEPTHWISE_CONV_2D: { @@ -347,7 +404,7 @@ void* ParseOpData(const Operator* op, BuiltinOperator op_type, params->activation = parse_activation(conv_params->fused_activation_function()); } - builtin_data = reinterpret_cast(params); + *builtin_data = reinterpret_cast(params); break; } case BuiltinOperator_SVDF: { @@ -357,7 +414,7 @@ void* ParseOpData(const Operator* op, BuiltinOperator op_type, params->activation = parse_activation(svdf_params->fused_activation_function()); } - builtin_data = reinterpret_cast(params); + *builtin_data = reinterpret_cast(params); break; } case BuiltinOperator_BIDIRECTIONAL_SEQUENCE_RNN: @@ -369,7 +426,7 @@ void* ParseOpData(const Operator* op, BuiltinOperator op_type, parse_activation(sequence_rnn_params->fused_activation_function()); params->time_major = sequence_rnn_params->time_major(); } - builtin_data = reinterpret_cast(params); + *builtin_data = reinterpret_cast(params); break; } case BuiltinOperator_RNN: { @@ -378,7 +435,7 @@ void* ParseOpData(const Operator* op, BuiltinOperator op_type, params->activation = parse_activation(rnn_params->fused_activation_function()); } - builtin_data = reinterpret_cast(params); + *builtin_data = reinterpret_cast(params); break; } case BuiltinOperator_EMBEDDING_LOOKUP: @@ -391,7 +448,7 @@ void* ParseOpData(const Operator* op, BuiltinOperator op_type, op->builtin_options_as_EmbeddingLookupSparseOptions()) { params->combiner = parseCombinerType(embedding_params->combiner()); } - builtin_data = reinterpret_cast(params); + *builtin_data = reinterpret_cast(params); break; } case BuiltinOperator_FULLY_CONNECTED: { @@ -402,7 +459,7 @@ void* ParseOpData(const Operator* op, BuiltinOperator op_type, params->activation = parse_activation( fully_connected_params->fused_activation_function()); } - builtin_data = reinterpret_cast(params); + *builtin_data = reinterpret_cast(params); break; } case BuiltinOperator_HASHTABLE_LOOKUP: @@ -413,7 +470,7 @@ void* ParseOpData(const Operator* op, BuiltinOperator op_type, if (auto* softmax_params = op->builtin_options_as_SoftmaxOptions()) { params->beta = softmax_params->beta(); } - builtin_data = reinterpret_cast(params); + *builtin_data = reinterpret_cast(params); break; } case BuiltinOperator_CONCATENATION: { @@ -425,7 +482,7 @@ void* ParseOpData(const Operator* op, BuiltinOperator op_type, parse_activation(concatenation_params->fused_activation_function()); params->axis = concatenation_params->axis(); } - builtin_data = reinterpret_cast(params); + *builtin_data = reinterpret_cast(params); break; } case BuiltinOperator_MUL: { @@ -434,7 +491,7 @@ void* ParseOpData(const Operator* op, BuiltinOperator op_type, params->activation = parse_activation(schema_params->fused_activation_function()); } - builtin_data = reinterpret_cast(params); + *builtin_data = reinterpret_cast(params); break; } case BuiltinOperator_ADD: { @@ -443,7 +500,7 @@ void* ParseOpData(const Operator* op, BuiltinOperator op_type, params->activation = parse_activation(schema_params->fused_activation_function()); } - builtin_data = reinterpret_cast(params); + *builtin_data = reinterpret_cast(params); break; } case BuiltinOperator_DIV: { @@ -452,7 +509,7 @@ void* ParseOpData(const Operator* op, BuiltinOperator op_type, params->activation = parse_activation(schema_params->fused_activation_function()); } - builtin_data = reinterpret_cast(params); + *builtin_data = reinterpret_cast(params); break; } case BuiltinOperator_SUB: { @@ -461,7 +518,7 @@ void* ParseOpData(const Operator* op, BuiltinOperator op_type, params->activation = parse_activation(schema_params->fused_activation_function()); } - builtin_data = reinterpret_cast(params); + *builtin_data = reinterpret_cast(params); break; } case BuiltinOperator_L2_NORMALIZATION: { @@ -470,7 +527,7 @@ void* ParseOpData(const Operator* op, BuiltinOperator op_type, params->activation = parse_activation(schema_params->fused_activation_function()); } - builtin_data = reinterpret_cast(params); + *builtin_data = reinterpret_cast(params); break; } case BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION: { @@ -482,7 +539,7 @@ void* ParseOpData(const Operator* op, BuiltinOperator op_type, params->alpha = schema_params->alpha(); params->beta = schema_params->beta(); } - builtin_data = reinterpret_cast(params); + *builtin_data = reinterpret_cast(params); break; } case BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM: @@ -495,7 +552,7 @@ void* ParseOpData(const Operator* op, BuiltinOperator op_type, params->cell_clip = lstm_params->cell_clip(); params->proj_clip = lstm_params->proj_clip(); } - builtin_data = reinterpret_cast(params); + *builtin_data = reinterpret_cast(params); break; } case BuiltinOperator_RESIZE_BILINEAR: { @@ -504,7 +561,7 @@ void* ParseOpData(const Operator* op, BuiltinOperator op_type, op->builtin_options_as_ResizeBilinearOptions()) { params->align_corners = schema_params->align_corners(); } - builtin_data = reinterpret_cast(params); + *builtin_data = reinterpret_cast(params); break; } case BuiltinOperator_PAD: { @@ -518,7 +575,7 @@ void* ParseOpData(const Operator* op, BuiltinOperator op_type, params->shape, error_reporter); params->num_dimensions = new_shape->Length(); } - builtin_data = reinterpret_cast(params); + *builtin_data = reinterpret_cast(params); break; } case BuiltinOperator_SKIP_GRAM: { @@ -528,7 +585,7 @@ void* ParseOpData(const Operator* op, BuiltinOperator op_type, params->max_skip_size = skip_gram_params->max_skip_size(); params->include_all_ngrams = skip_gram_params->include_all_ngrams(); } - builtin_data = reinterpret_cast(params); + *builtin_data = reinterpret_cast(params); break; } case BuiltinOperator_SPACE_TO_DEPTH: { @@ -536,7 +593,7 @@ void* ParseOpData(const Operator* op, BuiltinOperator op_type, if (auto* schema_params = op->builtin_options_as_SpaceToDepthOptions()) { params->block_size = schema_params->block_size(); } - builtin_data = reinterpret_cast(params); + *builtin_data = reinterpret_cast(params); break; } case BuiltinOperator_GATHER: { @@ -546,7 +603,7 @@ void* ParseOpData(const Operator* op, BuiltinOperator op_type, params->axis = gather_params->axis(); } - builtin_data = reinterpret_cast(params); + *builtin_data = reinterpret_cast(params); break; } case BuiltinOperator_SPACE_TO_BATCH_ND: { @@ -563,7 +620,7 @@ void* ParseOpData(const Operator* op, BuiltinOperator op_type, if (auto* schema_params = op->builtin_options_as_MeanOptions()) { params->keep_dims = schema_params->keep_dims(); } - builtin_data = reinterpret_cast(params); + *builtin_data = reinterpret_cast(params); break; } case BuiltinOperator_SPLIT: { @@ -571,7 +628,7 @@ void* ParseOpData(const Operator* op, BuiltinOperator op_type, if (auto* schema_params = op->builtin_options_as_SplitOptions()) { params->num_splits = schema_params->num_splits(); } - builtin_data = reinterpret_cast(params); + *builtin_data = reinterpret_cast(params); break; } case BuiltinOperator_SQUEEZE: { @@ -582,7 +639,7 @@ void* ParseOpData(const Operator* op, BuiltinOperator op_type, params->squeeze_dims, error_reporter); params->num_squeeze_dims = squeeze_dims->Length(); } - builtin_data = reinterpret_cast(params); + *builtin_data = reinterpret_cast(params); break; } case BuiltinOperator_STRIDED_SLICE: { @@ -594,19 +651,32 @@ void* ParseOpData(const Operator* op, BuiltinOperator op_type, params->new_axis_mask = schema_params->new_axis_mask(); params->shrink_axis_mask = schema_params->shrink_axis_mask(); } - builtin_data = reinterpret_cast(params); + *builtin_data = reinterpret_cast(params); + break; + } + case BuiltinOperator_MAXIMUM: + case BuiltinOperator_MINIMUM: { + break; + } + case BuiltinOperator_ARG_MAX: { + auto* params = MallocPOD(); + if (auto* schema_params = op->builtin_options_as_ArgMaxOptions()) { + ConvertTensorType(schema_params->output_type(), ¶ms->output_type, + error_reporter); + } + *builtin_data = reinterpret_cast(params); break; } - case BuiltinOperator_MAXIMUM: { + case BuiltinOperator_LESS: { break; } case BuiltinOperator_DELEGATE: { // TODO(ycling): Revisit when supporting saving delegated models. error_reporter->Report("DELEGATE op shouldn't exist in model."); - break; + return kTfLiteError; } } - return builtin_data; + return kTfLiteOk; } } // namespace @@ -646,10 +716,13 @@ TfLiteStatus InterpreterBuilder::ParseNodes( reinterpret_cast(op->custom_options()->data()), op->custom_options()->size(), nullptr, reg); } else { + void* builtin_data = nullptr; + TF_LITE_ENSURE_STATUS( + ParseOpData(op, op_type, error_reporter_, &builtin_data)); interpreter->AddNodeWithParameters( FlatBufferIntArrayToVector(op->inputs()), - FlatBufferIntArrayToVector(op->outputs()), nullptr, 0, - ParseOpData(op, op_type, error_reporter_), reg); + FlatBufferIntArrayToVector(op->outputs()), nullptr, 0, builtin_data, + reg); } } @@ -707,29 +780,10 @@ TfLiteStatus InterpreterBuilder::ParseTensors( } TfLiteType type; - switch (tensor->type()) { - case TensorType_FLOAT32: - type = kTfLiteFloat32; - break; - case TensorType_INT32: - type = kTfLiteInt32; - break; - case TensorType_UINT8: - type = kTfLiteUInt8; - break; - case TensorType_INT64: - type = kTfLiteInt64; - break; - case TensorType_STRING: - type = kTfLiteString; - break; - default: - // tensorType = ArrayType::NONE; - error_reporter_->Report("Unimplemented data type %s (%d) in tensor\n", - EnumNameTensorType(tensor->type()), - tensor->type()); - status = kTfLiteError; - continue; + if (ConvertTensorType(tensor->type(), &type, error_reporter_) != + kTfLiteOk) { + status = kTfLiteError; + continue; } auto get_readonly_data = [&](const char** buffer_data, size_t* buffer_size) { diff --git a/tensorflow/contrib/lite/model.h b/tensorflow/contrib/lite/model.h index 036dc46e03f565c40791aee55d4158cef5c832e0..5a55b031a8c28085e02782608eb820a3cfe78dde 100644 --- a/tensorflow/contrib/lite/model.h +++ b/tensorflow/contrib/lite/model.h @@ -56,27 +56,37 @@ class TfLiteVerifier { // or mmapped. This uses flatbuffers as the serialization format. class FlatBufferModel { public: - // Builds a model based on a file. Returns a nullptr in case of failure. + // Builds a model based on a file. + // Caller retains ownership of `error_reporter` and must ensure its lifetime + // is longer than the FlatBufferModel instance. + // Returns a nullptr in case of failure. static std::unique_ptr BuildFromFile( const char* filename, ErrorReporter* error_reporter = DefaultErrorReporter()); // Verifies whether the content of the file is legit, then builds a model - // based on the file. Returns a nullptr in case of failure. + // based on the file. + // Caller retains ownership of `error_reporter` and must ensure its lifetime + // is longer than the FlatBufferModel instance. + // Returns a nullptr in case of failure. static std::unique_ptr VerifyAndBuildFromFile( const char* filename, TfLiteVerifier* verifier = nullptr, ErrorReporter* error_reporter = DefaultErrorReporter()); // Builds a model based on a pre-loaded flatbuffer. The caller retains // ownership of the buffer and should keep it alive until the returned object - // is destroyed. Returns a nullptr in case of failure. + // is destroyed. Caller retains ownership of `error_reporter` and must ensure + // its lifetime is longer than the FlatBufferModel instance. + // Returns a nullptr in case of failure. static std::unique_ptr BuildFromBuffer( const char* buffer, size_t buffer_size, ErrorReporter* error_reporter = DefaultErrorReporter()); // Builds a model directly from a flatbuffer pointer. The caller retains // ownership of the buffer and should keep it alive until the returned object - // is destroyed. Returns a nullptr in case of failure. + // is destroyed. Caller retains ownership of `error_reporter` and must ensure + // its lifetime is longer than the FlatBufferModel instance. + // Returns a nullptr in case of failure. static std::unique_ptr BuildFromModel( const tflite::Model* model_spec, ErrorReporter* error_reporter = DefaultErrorReporter()); @@ -100,7 +110,10 @@ class FlatBufferModel { private: // Loads a model from a given allocation. FlatBufferModel will take over the - // ownership of `allocation`, and delete it in desctructor. + // ownership of `allocation`, and delete it in destructor. The ownership of + // `error_reporter`remains with the caller and must have lifetime at least + // as much as FlatBufferModel. This is to allow multiple models to use the + // same ErrorReporter instance. FlatBufferModel(Allocation* allocation, ErrorReporter* error_reporter = DefaultErrorReporter()); @@ -111,7 +124,10 @@ class FlatBufferModel { // Flatbuffer traverser pointer. (Model* is a pointer that is within the // allocated memory of the data allocated by allocation's internals. const tflite::Model* model_ = nullptr; + // The error reporter to use for model errors and subsequent errors when + // the interpreter is created ErrorReporter* error_reporter_; + // The allocator used for holding memory of the model. Allocation* allocation_ = nullptr; }; diff --git a/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h b/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h index 85aca3687402a89b557d76ab5ace80dea8f8b23d..ace4827d8ce2150cde69a0b0c2d5ca39203193bd 100644 --- a/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h +++ b/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h @@ -34,10 +34,13 @@ limitations under the License. inline void* loadLibrary(const char* name) { // TODO: change RTLD_LOCAL? Assumes there can be multiple instances of nn // api RT - void* handle = dlopen(name, RTLD_LAZY | RTLD_LOCAL); + void* handle = nullptr; +#ifdef __ANDROID__ + handle = dlopen(name, RTLD_LAZY | RTLD_LOCAL); if (handle == nullptr) { NNAPI_LOG("nnapi error: unable to open library %s", name); } +#endif return handle; } diff --git a/tensorflow/contrib/lite/nnapi_delegate.cc b/tensorflow/contrib/lite/nnapi_delegate.cc index decaf9f160ad35b66f0ed56d0840634c610e4246..eab82ea8ef23542cb3fc490d913313d4c757e466 100644 --- a/tensorflow/contrib/lite/nnapi_delegate.cc +++ b/tensorflow/contrib/lite/nnapi_delegate.cc @@ -162,7 +162,7 @@ void AddOpsAndParams(tflite::Interpreter* interpreter, }; auto duplicate_state_tensor_float32 = - [interpreter, &nn_model, &augmented_inputs, &next_id](int tensor_id) { + [interpreter, &nn_model, &augmented_inputs](int tensor_id) { const TfLiteTensor* tensor = interpreter->tensor(tensor_id); CHECK_NN(ANeuralNetworksModel_setOperandValue( nn_model, tensor_id, tensor->data.raw, tensor->bytes)); @@ -351,6 +351,9 @@ void AddOpsAndParams(tflite::Interpreter* interpreter, case tflite::BuiltinOperator_CAST: case tflite::BuiltinOperator_PRELU: case tflite::BuiltinOperator_MAXIMUM: + case tflite::BuiltinOperator_MINIMUM: + case tflite::BuiltinOperator_ARG_MAX: + case tflite::BuiltinOperator_LESS: FATAL("Op code %d is currently not delegated to NNAPI", builtin); nn_op_type = -1; // set to invalid break; diff --git a/tensorflow/contrib/lite/optional_debug_tools.cc b/tensorflow/contrib/lite/optional_debug_tools.cc index 1f762e6688d0cc2a91417b9d82201446e3060a6f..e1366639c78a4e90740aaf42a9ba5770ec65cb78 100644 --- a/tensorflow/contrib/lite/optional_debug_tools.cc +++ b/tensorflow/contrib/lite/optional_debug_tools.cc @@ -48,6 +48,8 @@ const char* TensorTypeName(TfLiteType type) { return "kTfLiteInt64"; case kTfLiteString: return "kTfLiteString"; + case kTfLiteBool: + return "kTfLiteBool"; } return "(invalid)"; } diff --git a/tensorflow/contrib/lite/profiling/BUILD b/tensorflow/contrib/lite/profiling/BUILD new file mode 100644 index 0000000000000000000000000000000000000000..15999e5d4188db1e191936ae6d84faf8cce5ca6e --- /dev/null +++ b/tensorflow/contrib/lite/profiling/BUILD @@ -0,0 +1,44 @@ +package(default_visibility = ["//visibility:public"]) + +licenses(["notice"]) # Apache 2.0 + +common_copts = [ + "-Wall", +] + +cc_library( + name = "profiler", + hdrs = ["profiler.h"], + copts = common_copts, + deps = [":profile_buffer"], +) + +cc_test( + name = "profiler_test", + srcs = ["profiler_test.cc"], + copts = ["-DTFLITE_PROFILING_ENABLED"], + defines = ["TFLITE_PROFILING_ENABLED"], + deps = [ + ":profiler", + "//tensorflow/contrib/lite/testing:util", + "@com_google_googletest//:gtest", + ], +) + +cc_library( + name = "profile_buffer", + hdrs = ["profile_buffer.h"], + copts = common_copts, +) + +cc_test( + name = "profile_buffer_test", + srcs = ["profile_buffer_test.cc"], + copts = ["-DTFLITE_PROFILING_ENABLED"], + defines = ["TFLITE_PROFILING_ENABLED"], + deps = [ + ":profile_buffer", + "//tensorflow/contrib/lite/testing:util", + "@com_google_googletest//:gtest", + ], +) diff --git a/tensorflow/contrib/lite/profiling/profile_buffer.h b/tensorflow/contrib/lite/profiling/profile_buffer.h new file mode 100644 index 0000000000000000000000000000000000000000..3bfe02571ba59f05ff316d327d2a964f6d5b4e1e --- /dev/null +++ b/tensorflow/contrib/lite/profiling/profile_buffer.h @@ -0,0 +1,150 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_CONTRIB_LITE_PROFILING_PROFILE_BUFFER_H_ +#define TENSORFLOW_CONTRIB_LITE_PROFILING_PROFILE_BUFFER_H_ + +#include +#include + +namespace tflite { +namespace profiling { + +// A profiling event. +struct ProfileEvent { + // Describes the type of event. + // The event_metadata field may contain additional data for interpreting + // the event. + enum class EventType { + // Default event type, the metadata field has no special significance. + DEFAULT = 0, + // The event is an operator invocation and the event_metadata field is the + // index of operator node. + OPERATOR_INVOKE_EVENT = 1 + }; + + // Label of the event. This usually describes the event. + const char* tag; + // Timestamp in microseconds when the event began. + int64_t begin_timestamp_ms; + // Timestamp in microseconds when the event ended. + int64_t end_timestamp_ms; + // The field containing the type of event. This must be one of the event types + // in EventType. + EventType event_type; + // Extra data describing the details of the event. + uint32_t event_metadata; +}; +} // namespace profiling +} // namespace tflite + +#ifdef TFLITE_PROFILING_ENABLED + +#include +#include + +namespace tflite { +namespace profiling { +constexpr uint32_t kInvalidEventHandle = static_cast(~0) - 1; + +// A ring buffer of profile events. +// This class is not thread safe. +class ProfileBuffer { + public: + ProfileBuffer(uint32_t max_num_entries, bool enabled) + : enabled_(enabled), current_index_(0), event_buffer_(max_num_entries) {} + + // Adds an event to the buffer with begin timestamp set to the current + // timestamp. Returns a handle to event that can be used to call EndEvent. If + // buffer is disabled this has no affect. + // The tag of the event should remain valid till the buffer is valid. + uint32_t BeginEvent(const char* tag, ProfileEvent::EventType event_type, + uint32_t event_metadata) { + if (!enabled_) { + return kInvalidEventHandle; + } + int64_t timestamp = NowMicros(); + int index = current_index_ % event_buffer_.size(); + event_buffer_[index].tag = tag; + event_buffer_[index].event_type = event_type; + event_buffer_[index].event_metadata = event_metadata; + event_buffer_[index].begin_timestamp_ms = timestamp; + event_buffer_[index].end_timestamp_ms = 0; + current_index_++; + return index; + } + + // Sets the enabled state of buffer to |enabled| + void SetEnabled(bool enabled) { enabled_ = enabled; } + + // Sets the end timestamp for event for the handle to current time. + // If the buffer is disabled or previous event has been overwritten this + // operation has not effect. + void EndEvent(uint32_t event_handle) { + if (!enabled_ || event_handle == kInvalidEventHandle || + event_handle > current_index_) { + return; + } + const uint32_t max_size = event_buffer_.size(); + if (current_index_ > (max_size + event_handle)) { + // Ignore, buffer has already overflowed. + return; + } + + int event_index = event_handle % max_size; + event_buffer_[event_index].end_timestamp_ms = NowMicros(); + } + + // Returns the size of the buffer. + size_t Size() const { + return (current_index_ >= event_buffer_.size()) ? event_buffer_.size() + : current_index_; + } + + // Resets the buffer. + void Reset() { + enabled_ = false; + current_index_ = 0; + } + + // Returns the profile event at the given index. If the index is invalid a + // nullptr is returned. The return event may get overwritten if more events + // are added to buffer. + const struct ProfileEvent* const At(int index) const { + size_t size = Size(); + if (index >= size) { + return nullptr; + } + const uint32_t max_size = event_buffer_.size(); + uint32_t start = + (current_index_ > max_size) ? current_index_ % max_size : max_size; + index = (index + start) % max_size; + return &event_buffer_[index]; + } + + private: + static int64_t NowMicros() { + // TODO(shashishekhar): Refactor this to a separate file. + struct timeval tv; + gettimeofday(&tv, nullptr); + return static_cast(tv.tv_sec) * 1000000 + tv.tv_usec; + } + bool enabled_; + uint32_t current_index_; + std::vector event_buffer_; +}; +} // namespace profiling +} // namespace tflite +#endif // TFLITE_PROFILING_ENABLED +#endif // TENSORFLOW_CONTRIB_LITE_PROFILING_PROFILE_BUFFER_H_ diff --git a/tensorflow/contrib/lite/profiling/profile_buffer_test.cc b/tensorflow/contrib/lite/profiling/profile_buffer_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..0c5f0cd31495d67beb867beb835cfc897d7d6265 --- /dev/null +++ b/tensorflow/contrib/lite/profiling/profile_buffer_test.cc @@ -0,0 +1,102 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include + +#include +#include +#include "tensorflow/contrib/lite/profiling/profile_buffer.h" +#include "tensorflow/contrib/lite/testing/util.h" + +namespace tflite { +namespace profiling { + +namespace { + +std::vector GetProfileEvents(const ProfileBuffer& buffer) { + std::vector events; + for (auto i = 0; i < buffer.Size(); i++) { + events.push_back(buffer.At(i)); + } + return events; +} + +TEST(ProfileBufferTest, Empty) { + ProfileBuffer buffer(/*max_size*/ 0, /*enabled*/ true); + EXPECT_EQ(0, buffer.Size()); +} + +TEST(ProfileBufferTest, AddEvent) { + ProfileBuffer buffer(/*max_size*/ 10, /*enabled*/ true); + EXPECT_EQ(0, buffer.Size()); + auto event_handle = buffer.BeginEvent( + "hello", ProfileEvent::EventType::DEFAULT, /* event_metadata */ 42); + + EXPECT_GE(event_handle, 0); + EXPECT_EQ(1, buffer.Size()); + + auto event = GetProfileEvents(buffer)[0]; + EXPECT_EQ(event->tag, "hello"); + EXPECT_GT(event->begin_timestamp_ms, 0); + EXPECT_EQ(event->event_type, ProfileEvent::EventType::DEFAULT); + EXPECT_EQ(event->event_metadata, 42); + + buffer.EndEvent(event_handle); + EXPECT_EQ(1, buffer.Size()); + EXPECT_GE(event->end_timestamp_ms, event->begin_timestamp_ms); +} + +TEST(ProfileBufferTest, OverFlow) { + const int max_size = 4; + ProfileBuffer buffer{max_size, true}; + std::vector eventNames = {"first", "second", "third", "fourth"}; + for (int i = 0; i < 2 * max_size; i++) { + buffer.BeginEvent(eventNames[i % 4].c_str(), + ProfileEvent::EventType::DEFAULT, i); + size_t expected_size = std::min(i + 1, max_size); + EXPECT_EQ(expected_size, buffer.Size()); + } + EXPECT_EQ(max_size, buffer.Size()); + for (int j = 0; j < buffer.Size(); ++j) { + auto event = buffer.At(j); + EXPECT_EQ(eventNames[j % 4], event->tag); + EXPECT_EQ(ProfileEvent::EventType::DEFAULT, event->event_type); + EXPECT_EQ(4 + j, event->event_metadata); + } +} + +TEST(ProfileBufferTest, Enable) { + ProfileBuffer buffer(/*max_size*/ 10, /*enabled*/ false); + EXPECT_EQ(0, buffer.Size()); + auto event_handle = buffer.BeginEvent( + "hello", ProfileEvent::EventType::DEFAULT, /* event_metadata */ 42); + EXPECT_EQ(kInvalidEventHandle, event_handle); + EXPECT_EQ(0, buffer.Size()); + buffer.SetEnabled(true); + event_handle = buffer.BeginEvent("hello", ProfileEvent::EventType::DEFAULT, + /* event_metadata */ 42); + EXPECT_GE(event_handle, 0); + EXPECT_EQ(1, buffer.Size()); +} + +} // namespace +} // namespace profiling +} // namespace tflite + +int main(int argc, char** argv) { + ::tflite::LogToStderr(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/contrib/lite/profiling/profiler.h b/tensorflow/contrib/lite/profiling/profiler.h new file mode 100644 index 0000000000000000000000000000000000000000..dfa98a6708edc874708537dc324d8c340664dc63 --- /dev/null +++ b/tensorflow/contrib/lite/profiling/profiler.h @@ -0,0 +1,174 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_CONTRIB_LITE_PROFILING_PROFILER_H_ +#define TENSORFLOW_CONTRIB_LITE_PROFILING_PROFILER_H_ + +#include + +#include "tensorflow/contrib/lite/profiling/profile_buffer.h" + +#ifdef TFLITE_PROFILING_ENABLED + +namespace tflite { +namespace profiling { +class ScopedProfile; +class ScopedOperatorProfile; + +// Controls whether profiling is enabled or disabled and collects profiles. +// TFLite is used on platforms that don't have posix threads, so the profiler is +// kept as simple as possible. It is designed to be used only on a single +// thread. +// +// Profiles are collected using Scoped*Profile objects that begin and end a +// profile event. +// An example usage is shown in the example below: +// +// Say Worker class has a DoWork method and we are interested in profiling +// the overall execution time for DoWork and time spent in Task1 and Task2 +// functions. +// +// class Worker { +// public: +// void DoWork() { +// ScopedProfile(&controller, "DoWork"); +// Task1(); +// Task2(); +// ..... +// } +// +// void Task1() { +// ScopedProfile(&controller, "Task1"); +// .... +// } +// +// void Task2() { +// ScopedProfile(&controller, "Task2"); +// } +// +// Profiler profiler; +// } +// +// We instrument the functions that need to be profiled. +// +// Profile can be collected by enable profiling and then getting profile +// events. +// +// void ProfileWorker() { +// Worker worker; +// worker.profiler.EnableProfiling(); +// worker.DoWork(); +// worker.profiler.DisableProfiling(); +// // Profiling is complete, extract profiles. +// auto profile_events = worker.profiler.GetProfiles(); +// } +// +// +class Profiler { + public: + Profiler() : buffer_(1024, false) {} + + void StartProfiling() { buffer_.SetEnabled(true); } + void StopProfiling() { buffer_.SetEnabled(false); } + void Reset() { buffer_.Reset(); } + std::vector GetProfileEvents() { + std::vector profile_events; + profile_events.reserve(buffer_.Size()); + for (int i = 0; i < buffer_.Size(); i++) { + profile_events.push_back(buffer_.At(i)); + } + return profile_events; + } + + private: + friend class ScopedProfile; + friend class ScopedOperatorProfile; + ProfileBuffer* GetProfileBuffer() { return &buffer_; } + ProfileBuffer buffer_; +}; + +class ScopedProfile { + public: + // Adds a profile event to profile that begins with the construction + // of object and ends when the object goes out of scope. + // The lifetime of tag should be at least the lifetime of profiler. + ScopedProfile(Profiler* profiler, const char* tag) { + if (profiler) { + buffer_ = profiler->GetProfileBuffer(); + event_handle_ = + buffer_->BeginEvent(tag, ProfileEvent::EventType::DEFAULT, 0); + } + } + ~ScopedProfile() { + if (buffer_) { + buffer_->EndEvent(event_handle_); + } + } + + private: + ProfileBuffer* buffer_; + int32_t event_handle_; +}; + +class ScopedOperatorProfile { + public: + // Adds a profile event to profile that begins with the construction + // of object and ends when the object goes out of scope. + // The lifetime of tag should be at least the lifetime of profiler. + ScopedOperatorProfile(Profiler* profiler, const char* tag, int node_index) { + if (profiler) { + buffer_ = profiler->GetProfileBuffer(); + event_handle_ = buffer_->BeginEvent( + tag, ProfileEvent::EventType::OPERATOR_INVOKE_EVENT, node_index); + } + } + + ~ScopedOperatorProfile() { + if (buffer_) { + buffer_->EndEvent(event_handle_); + } + } + + private: + ProfileBuffer* buffer_; + int32_t event_handle_; +}; + +} // namespace profiling +} // namespace tflite + +#define SCOPED_OPERATOR_PROFILE(profiler, node_index) \ + tflite::profiling::ScopedOperatorProfile _profile((profiler), "OpInvoke", \ + (node_index)) +#else + +namespace tflite { +namespace profiling { +// A noop version of profiler when profiling is disabled. +class Profiler { + public: + Profiler() {} + void StartProfiling() {} + void StopProfiling() {} + void Reset() {} + std::vector GetProfileEvents() { return {}; } +}; +} // namespace profiling +} // namespace tflite + +#define SCOPED_OPERATOR_PROFILE(profiler, node_index) + +#endif // TFLITE_PROFILING_ENABLED + +#endif // TENSORFLOW_CONTRIB_LITE_PROFILING_PROFILER_H_ diff --git a/tensorflow/contrib/lite/profiling/profiler_test.cc b/tensorflow/contrib/lite/profiling/profiler_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..994523a8fb74b511a18f2ec53147ce81323bec1b --- /dev/null +++ b/tensorflow/contrib/lite/profiling/profiler_test.cc @@ -0,0 +1,105 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include + +#include // NOLINT(build/c++11) +#include +#include // NOLINT(build/c++11) + +#include +#include +#include "tensorflow/contrib/lite/profiling/profiler.h" +#include "tensorflow/contrib/lite/testing/util.h" + +namespace tflite { +namespace profiling { +namespace { + +void AssertDurationOfEventAroundMs(const ProfileEvent* event, + double expected_ms, double eps_ms) { + double duration_ms = + (event->end_timestamp_ms - event->begin_timestamp_ms) / 1e3; + EXPECT_NEAR(expected_ms, duration_ms, eps_ms); +} + +void SleepForQuarterSecond(Profiler* profiler) { + ScopedProfile profile(profiler, "SleepForQuarter"); + std::this_thread::sleep_for(std::chrono::milliseconds(250)); +} + +void ChildFunction(Profiler* profiler) { + ScopedProfile profile(profiler, "Child"); + SleepForQuarterSecond(profiler); +} + +void ParentFunction(Profiler* profiler) { + ScopedProfile profile(profiler, "Parent"); + for (int i = 0; i < 2; i++) { + ChildFunction(profiler); + } +} + +TEST(ProfilerTest, NoProfilesAreCollectedWhenDisabled) { + Profiler profiler; + ParentFunction(&profiler); + auto profile_events = profiler.GetProfileEvents(); + EXPECT_EQ(0, profile_events.size()); +} + +TEST(ProfilingTest, ProfilesAreCollected) { + Profiler profiler; + profiler.StartProfiling(); + ParentFunction(&profiler); + profiler.StopProfiling(); + auto profile_events = profiler.GetProfileEvents(); + // ParentFunction calls the ChildFunction 2 times. + // Each ChildFunction calls SleepForQuarterSecond once. + // We expect 1 entry for ParentFunction, 2 for ChildFunction and 2 for + // SleepForQuarterSecond: Total: 1+ 2 + 2 = 5 + // Profiles should look like: + // Parent ~ 500 ms (due to 2 Child calls) + // - Child ~ 250 ms (due to SleepForQuarter calls) + // - SleepForQuarter ~ 250ms + // - Child ~ 250 ms (due to SleepForQuarter calls) + // - SleepForQuarter ~ 250ms + // + ASSERT_EQ(5, profile_events.size()); + EXPECT_EQ("Parent", profile_events[0]->tag); + EXPECT_EQ("Child", profile_events[1]->tag); + EXPECT_EQ("SleepForQuarter", profile_events[2]->tag); + EXPECT_EQ("Child", profile_events[3]->tag); + EXPECT_EQ("SleepForQuarter", profile_events[4]->tag); + + AssertDurationOfEventAroundMs(profile_events[0], /*expected_ms*/ 500, + /*eps_ms*/ 2); + AssertDurationOfEventAroundMs(profile_events[1], /*expected_ms*/ 250, + /*eps_ms*/ 2); + AssertDurationOfEventAroundMs(profile_events[2], /*expected_ms*/ 250, + /*eps_ms*/ 2); + AssertDurationOfEventAroundMs(profile_events[3], /*expected_ms*/ 250, + /*eps_ms*/ 2); + AssertDurationOfEventAroundMs(profile_events[4], /*expected_ms*/ 250, + /*eps_ms*/ 2); +} + +} // namespace +} // namespace profiling +} // namespace tflite + +int main(int argc, char** argv) { + ::tflite::LogToStderr(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/contrib/lite/python/BUILD b/tensorflow/contrib/lite/python/BUILD index e70aa51298e8fbea78e68bd9a32395fcca7822a6..926896d609d83aac3b875d33dfe3c4dc7ae89ccd 100644 --- a/tensorflow/contrib/lite/python/BUILD +++ b/tensorflow/contrib/lite/python/BUILD @@ -97,16 +97,35 @@ py_binary( ], ) +py_binary( + name = "create_custom_op", + srcs = ["create_custom_op.py"], + srcs_version = "PY2AND3", + visibility = ["//visibility:public"], + deps = [ + "//tensorflow/contrib/framework:framework_py", + "//tensorflow/core:protos_all_py", + "//tensorflow/python:platform", + "@absl_py//absl/flags", + ], +) + py_test( name = "convert_saved_model_test", srcs = ["convert_saved_model_test.py"], srcs_version = "PY2AND3", + tags = ["no_windows"], visibility = ["//visibility:public"], deps = [ ":convert_saved_model", "//tensorflow/python:client_testlib", + "//tensorflow/python:layers", + "//tensorflow/python:nn", "//tensorflow/python:platform_test", "//tensorflow/python:session", + "//tensorflow/python/estimator:estimator_py", + "//tensorflow/python/keras", + "//tensorflow/python/ops/losses", "//tensorflow/python/saved_model", ], ) diff --git a/tensorflow/contrib/lite/python/convert_saved_model_test.py b/tensorflow/contrib/lite/python/convert_saved_model_test.py index d87fbeb91cc3d2779c0ae01aff488f88bd340c1c..734e42d619bdb79de0306a94e304ce46065d14d4 100644 --- a/tensorflow/contrib/lite/python/convert_saved_model_test.py +++ b/tensorflow/contrib/lite/python/convert_saved_model_test.py @@ -25,21 +25,21 @@ from __future__ import print_function import os from tensorflow.contrib.lite.python import convert_saved_model -from tensorflow.python import estimator from tensorflow.python import keras -from tensorflow.python import layers -from tensorflow.python import losses -from tensorflow.python import nn -from tensorflow.python import saved_model -from tensorflow.python import train from tensorflow.python.client import session +from tensorflow.python.estimator import estimator_lib as estimator from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import test_util +from tensorflow.python.layers import layers from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops +from tensorflow.python.ops import nn from tensorflow.python.ops import random_ops +from tensorflow.python.ops.losses import losses from tensorflow.python.platform import test +from tensorflow.python.saved_model import saved_model +from tensorflow.python.training import training as train class ConvertSavedModelTestBasicGraph(test_util.TensorFlowTestCase): diff --git a/tensorflow/contrib/lite/python/create_custom_op.py b/tensorflow/contrib/lite/python/create_custom_op.py new file mode 100644 index 0000000000000000000000000000000000000000..830f95358c455047db2cbad15cfed8c221e95dca --- /dev/null +++ b/tensorflow/contrib/lite/python/create_custom_op.py @@ -0,0 +1,111 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +r"""Replaces a subgraph of a TensorFlow GraphDef with a single node. + +In conjunction with TOCO's --allow_custom_op this script allows selected +portions of a TensorFlow GraphDef to be executed by custom code. + +Example: + +bazel run tensorflow/contrib/lite/python:create_custom_op -- \ + --input_graph=/tmp/input.pb \ + --output_graph=/tmp/output.pb \ + --inputs=concat,concat_1 \ + --outputs=detection_classes \ + --op_definition='op:"PostProcessing" attr{key:"num" value:{i:10}}' + +The above will identify a subgraph starting at nodes 'concat' and 'concat_1', +and ending at 'detection_classes'. All nodes in between will be removed and +replaced by a new op called 'PostProcessing'. + +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +import uuid as _uuid +from absl import app +from absl import flags +from google.protobuf import text_format +from tensorflow.contrib.framework.python.framework.graph_util import fuse_op +from tensorflow.core.framework import graph_pb2 +from tensorflow.core.framework import node_def_pb2 +from tensorflow.core.framework import types_pb2 +from tensorflow.python.platform import gfile + +FLAGS = flags.FLAGS + +flags.DEFINE_string("input_graph", "", "Binary graphdef to load.") +flags.DEFINE_string("output_graph", "", "Resulting binary graphdef.") + +flags.DEFINE_string("inputs", "", + "Comma-separated list of inputs to the subgraph.") +flags.DEFINE_string("outputs", "", + "Comma-separated list of outputs of the subgraph.") +flags.DEFINE_string("op_definition", "", + "A text NodeDef defining the contents of the custom op.") + + +def _read_graph_def(filename): + if not gfile.Exists(filename): + raise ValueError("Input graph file '" + filename + "' does not exist!") + + graph_def = graph_pb2.GraphDef() + with gfile.FastGFile(filename, "rb") as f: + graph_def.ParseFromString(f.read()) + return graph_def + + +def _write_graph_def(graph_def, filename): + if not filename: + raise ValueError("Output graph file not specified") + + with gfile.Open(filename, "wb") as f: + f.write(graph_def.SerializeToString()) + + +def _collapse_subgraph(graph_def, inputs, outputs, op_definition): + """Substitute a custom op for the subgraph delimited by inputs and outputs.""" + name = _uuid.uuid1().hex + # We need a default type, but it can be changed using 'op_definition'. + default_type = types_pb2.DT_FLOAT + new_graph = fuse_op( + graph_def=graph_def, + input_nodes=inputs, + output_nodes=outputs, + output_dtypes=[default_type for _ in outputs], + output_quantized=False, + op_name=name, + op_type="CustomTfLiteOp") + node_def = node_def_pb2.NodeDef() + text_format.Parse(op_definition, node_def) + for node in new_graph.node: + if node.name == name: + node.MergeFrom(node_def) + return new_graph + + +def main(argv): + del argv # unused + graph = _read_graph_def(filename=flags.FLAGS.input_graph) + graph = _collapse_subgraph( + graph_def=graph, + inputs=flags.FLAGS.inputs.split(","), + outputs=flags.FLAGS.outputs.split(","), + op_definition=flags.FLAGS.op_definition) + _write_graph_def(graph_def=graph, filename=flags.FLAGS.output_graph) + + +if __name__ == "__main__": + app.run(main) diff --git a/tensorflow/contrib/lite/python/interpreter.py b/tensorflow/contrib/lite/python/interpreter.py index b8638007f7e49737726d9939a00e8cb1d6a41281..cb9c0d3121895595ffce91e254bea3f527714809 100644 --- a/tensorflow/contrib/lite/python/interpreter.py +++ b/tensorflow/contrib/lite/python/interpreter.py @@ -121,8 +121,8 @@ class Interpreter(object): Raises: ValueError: If the interpreter could not resize the input tensor. """ - if not self.ResizeInputTensor.SetTensor(input_index, tensor_size): - raise ValueError('Failed to set input') + if not self._interpreter.ResizeInputTensor(input_index, tensor_size): + raise ValueError('Failed to resize input') def get_output_details(self): """Gets model output details. diff --git a/tensorflow/contrib/lite/python/interpreter_test.py b/tensorflow/contrib/lite/python/interpreter_test.py index cd2386f5263f24e1e034015ec6880e71f0608c7c..f802edf020db8a9d4e7bb890aadaae7e34e983a8 100644 --- a/tensorflow/contrib/lite/python/interpreter_test.py +++ b/tensorflow/contrib/lite/python/interpreter_test.py @@ -81,6 +81,9 @@ class InterpreterTest(test_util.TensorFlowTestCase): test_input = np.array([[1, 2, 3, 4]], dtype=np.uint8) expected_output = np.array([[4, 3, 2, 1]], dtype=np.uint8) + interpreter.resize_tensor_input(input_details[0]['index'], + np.array(test_input.shape, dtype=np.int32)) + interpreter.allocate_tensors() interpreter.set_tensor(input_details[0]['index'], test_input) interpreter.invoke() diff --git a/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.cc b/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.cc index 35ad226b78c906f0819afd5b029a1a0d438d69af..04fc098129854e168d68de3b308eabbcaa968ea8 100644 --- a/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.cc +++ b/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.cc @@ -72,6 +72,8 @@ int TfLiteTypeToPyArrayType(TfLiteType tf_lite_type) { return NPY_INT64; case kTfLiteString: return NPY_OBJECT; + case kTfLiteBool: + return NPY_BOOL; case kTfLiteNoType: return -1; } @@ -90,6 +92,8 @@ TfLiteType TfLiteTypeFromPyArray(PyArrayObject* array) { return kTfLiteUInt8; case NPY_INT64: return kTfLiteInt64; + case NPY_BOOL: + return kTfLiteBool; case NPY_OBJECT: case NPY_STRING: case NPY_UNICODE: @@ -186,7 +190,7 @@ bool InterpreterWrapper::ResizeInputTensor(int i, PyObject* value) { std::vector dims(PyArray_SHAPE(array)[0]); memcpy(dims.data(), PyArray_BYTES(array), dims.size() * sizeof(int)); - return interpreter_->ResizeInputTensor(i, dims); + return (interpreter_->ResizeInputTensor(i, dims) == kTfLiteOk); } std::string InterpreterWrapper::TensorName(int i) const { diff --git a/tensorflow/contrib/lite/python/lite.py b/tensorflow/contrib/lite/python/lite.py index ed6dd036f9fd9f39b74e902498d815793943924b..cf50f9d4d65cb7a36af8f82e2d29babbc9884d23 100644 --- a/tensorflow/contrib/lite/python/lite.py +++ b/tensorflow/contrib/lite/python/lite.py @@ -145,7 +145,8 @@ def toco_convert(input_data, input_format=TENSORFLOW_GRAPHDEF, output_format=TFLITE, quantized_input_stats=None, - drop_control_dependency=True): + drop_control_dependency=True, + allow_custom_ops=None): """Convert a model using TOCO from `input_format` to `output_format`. Typically this is to convert from TensorFlow GraphDef to TFLite, in which @@ -178,9 +179,12 @@ def toco_convert(input_data, toco = _toco_flags_pb2.TocoFlags() toco.input_format = input_format toco.output_format = output_format + toco.inference_type = inference_type toco.drop_control_dependency = drop_control_dependency + if allow_custom_ops is not None: + toco.allow_custom_ops = allow_custom_ops + model = _model_flags_pb2.ModelFlags() - toco.inference_type = inference_type for idx, input_tensor in enumerate(input_tensors): if input_tensor.dtype == _dtypes.float32: tflite_input_type = FLOAT diff --git a/tensorflow/contrib/lite/schema/BUILD b/tensorflow/contrib/lite/schema/BUILD index 246ec85fe47e496e157a91ab4ff84f6b1eeab4a4..9717a4a1a496b888348514584888e62c4e3703b4 100644 --- a/tensorflow/contrib/lite/schema/BUILD +++ b/tensorflow/contrib/lite/schema/BUILD @@ -63,6 +63,9 @@ cc_test( "schema.fbs", "schema_v3.fbs", ], + tags = [ + "tflite_not_portable_android", + ], deps = [ "//tensorflow/core:lib_platform", "@com_google_googletest//:gtest", diff --git a/tensorflow/contrib/lite/schema/schema.fbs b/tensorflow/contrib/lite/schema/schema.fbs index 7d2e00fe329a5da77af7bf091eaa99badbd1022a..2b62c257d8410f9af1b250c9d108eba6737a9efe 100644 --- a/tensorflow/contrib/lite/schema/schema.fbs +++ b/tensorflow/contrib/lite/schema/schema.fbs @@ -33,6 +33,7 @@ enum TensorType : byte { UINT8 = 3, INT64 = 4, STRING = 5, + BOOL = 6, } // Parameters for converting a quantized tensor back to float. Given a @@ -132,6 +133,9 @@ enum BuiltinOperator : byte { CAST = 53, PRELU = 54, MAXIMUM = 55, + ARG_MAX = 56, + MINIMUM = 57, + LESS = 58, } // Options for the builtin operators. @@ -174,7 +178,9 @@ union BuiltinOptions { LogSoftmaxOptions, CastOptions, DequantizeOptions, - MaximumOptions, + MaximumMinimumOptions, + ArgMaxOptions, + LessOptions, } enum Padding : byte { SAME, VALID } @@ -193,6 +199,8 @@ table Conv2DOptions { stride_w:int; stride_h:int; fused_activation_function:ActivationFunctionType; + dilation_w_factor:int = 1; + dilation_h_factor:int = 1; } table Pool2DOptions { @@ -381,12 +389,21 @@ table LogSoftmaxOptions { } table CastOptions { + in_data_type: TensorType; + out_data_type: TensorType; } table DequantizeOptions { } -table MaximumOptions { +table MaximumMinimumOptions { +} + +table ArgMaxOptions { + output_type : TensorType; +} + +table LessOptions { } // An OperatorCode can be an enum value (BuiltinOperator) if the operator is a diff --git a/tensorflow/contrib/lite/schema/schema_generated.h b/tensorflow/contrib/lite/schema/schema_generated.h index 66a97a1460d12b48102f53f975cb1e25e7735111..0b9961d606d6095b0dc693df631e4efffcb0e35e 100755 --- a/tensorflow/contrib/lite/schema/schema_generated.h +++ b/tensorflow/contrib/lite/schema/schema_generated.h @@ -145,8 +145,14 @@ struct CastOptionsT; struct DequantizeOptions; struct DequantizeOptionsT; -struct MaximumOptions; -struct MaximumOptionsT; +struct MaximumMinimumOptions; +struct MaximumMinimumOptionsT; + +struct ArgMaxOptions; +struct ArgMaxOptionsT; + +struct LessOptions; +struct LessOptionsT; struct OperatorCode; struct OperatorCodeT; @@ -170,18 +176,20 @@ enum TensorType { TensorType_UINT8 = 3, TensorType_INT64 = 4, TensorType_STRING = 5, + TensorType_BOOL = 6, TensorType_MIN = TensorType_FLOAT32, - TensorType_MAX = TensorType_STRING + TensorType_MAX = TensorType_BOOL }; -inline TensorType (&EnumValuesTensorType())[6] { +inline TensorType (&EnumValuesTensorType())[7] { static TensorType values[] = { TensorType_FLOAT32, TensorType_FLOAT16, TensorType_INT32, TensorType_UINT8, TensorType_INT64, - TensorType_STRING + TensorType_STRING, + TensorType_BOOL }; return values; } @@ -194,6 +202,7 @@ inline const char **EnumNamesTensorType() { "UINT8", "INT64", "STRING", + "BOOL", nullptr }; return names; @@ -259,11 +268,14 @@ enum BuiltinOperator { BuiltinOperator_CAST = 53, BuiltinOperator_PRELU = 54, BuiltinOperator_MAXIMUM = 55, + BuiltinOperator_ARG_MAX = 56, + BuiltinOperator_MINIMUM = 57, + BuiltinOperator_LESS = 58, BuiltinOperator_MIN = BuiltinOperator_ADD, - BuiltinOperator_MAX = BuiltinOperator_MAXIMUM + BuiltinOperator_MAX = BuiltinOperator_LESS }; -inline BuiltinOperator (&EnumValuesBuiltinOperator())[54] { +inline BuiltinOperator (&EnumValuesBuiltinOperator())[57] { static BuiltinOperator values[] = { BuiltinOperator_ADD, BuiltinOperator_AVERAGE_POOL_2D, @@ -318,7 +330,10 @@ inline BuiltinOperator (&EnumValuesBuiltinOperator())[54] { BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM, BuiltinOperator_CAST, BuiltinOperator_PRELU, - BuiltinOperator_MAXIMUM + BuiltinOperator_MAXIMUM, + BuiltinOperator_ARG_MAX, + BuiltinOperator_MINIMUM, + BuiltinOperator_LESS }; return values; } @@ -381,6 +396,9 @@ inline const char **EnumNamesBuiltinOperator() { "CAST", "PRELU", "MAXIMUM", + "ARG_MAX", + "MINIMUM", + "LESS", nullptr }; return names; @@ -431,12 +449,14 @@ enum BuiltinOptions { BuiltinOptions_LogSoftmaxOptions = 36, BuiltinOptions_CastOptions = 37, BuiltinOptions_DequantizeOptions = 38, - BuiltinOptions_MaximumOptions = 39, + BuiltinOptions_MaximumMinimumOptions = 39, + BuiltinOptions_ArgMaxOptions = 40, + BuiltinOptions_LessOptions = 41, BuiltinOptions_MIN = BuiltinOptions_NONE, - BuiltinOptions_MAX = BuiltinOptions_MaximumOptions + BuiltinOptions_MAX = BuiltinOptions_LessOptions }; -inline BuiltinOptions (&EnumValuesBuiltinOptions())[40] { +inline BuiltinOptions (&EnumValuesBuiltinOptions())[42] { static BuiltinOptions values[] = { BuiltinOptions_NONE, BuiltinOptions_Conv2DOptions, @@ -477,7 +497,9 @@ inline BuiltinOptions (&EnumValuesBuiltinOptions())[40] { BuiltinOptions_LogSoftmaxOptions, BuiltinOptions_CastOptions, BuiltinOptions_DequantizeOptions, - BuiltinOptions_MaximumOptions + BuiltinOptions_MaximumMinimumOptions, + BuiltinOptions_ArgMaxOptions, + BuiltinOptions_LessOptions }; return values; } @@ -523,7 +545,9 @@ inline const char **EnumNamesBuiltinOptions() { "LogSoftmaxOptions", "CastOptions", "DequantizeOptions", - "MaximumOptions", + "MaximumMinimumOptions", + "ArgMaxOptions", + "LessOptions", nullptr }; return names; @@ -690,8 +714,16 @@ template<> struct BuiltinOptionsTraits { static const BuiltinOptions enum_value = BuiltinOptions_DequantizeOptions; }; -template<> struct BuiltinOptionsTraits { - static const BuiltinOptions enum_value = BuiltinOptions_MaximumOptions; +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_MaximumMinimumOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_ArgMaxOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_LessOptions; }; struct BuiltinOptionsUnion { @@ -1029,13 +1061,29 @@ struct BuiltinOptionsUnion { return type == BuiltinOptions_DequantizeOptions ? reinterpret_cast(value) : nullptr; } - MaximumOptionsT *AsMaximumOptions() { - return type == BuiltinOptions_MaximumOptions ? - reinterpret_cast(value) : nullptr; + MaximumMinimumOptionsT *AsMaximumMinimumOptions() { + return type == BuiltinOptions_MaximumMinimumOptions ? + reinterpret_cast(value) : nullptr; + } + const MaximumMinimumOptionsT *AsMaximumMinimumOptions() const { + return type == BuiltinOptions_MaximumMinimumOptions ? + reinterpret_cast(value) : nullptr; + } + ArgMaxOptionsT *AsArgMaxOptions() { + return type == BuiltinOptions_ArgMaxOptions ? + reinterpret_cast(value) : nullptr; + } + const ArgMaxOptionsT *AsArgMaxOptions() const { + return type == BuiltinOptions_ArgMaxOptions ? + reinterpret_cast(value) : nullptr; } - const MaximumOptionsT *AsMaximumOptions() const { - return type == BuiltinOptions_MaximumOptions ? - reinterpret_cast(value) : nullptr; + LessOptionsT *AsLessOptions() { + return type == BuiltinOptions_LessOptions ? + reinterpret_cast(value) : nullptr; + } + const LessOptionsT *AsLessOptions() const { + return type == BuiltinOptions_LessOptions ? + reinterpret_cast(value) : nullptr; } }; @@ -1430,11 +1478,15 @@ struct Conv2DOptionsT : public flatbuffers::NativeTable { int32_t stride_w; int32_t stride_h; ActivationFunctionType fused_activation_function; + int32_t dilation_w_factor; + int32_t dilation_h_factor; Conv2DOptionsT() : padding(Padding_SAME), stride_w(0), stride_h(0), - fused_activation_function(ActivationFunctionType_NONE) { + fused_activation_function(ActivationFunctionType_NONE), + dilation_w_factor(0), + dilation_h_factor(0) { } }; @@ -1444,7 +1496,9 @@ struct Conv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { VT_PADDING = 4, VT_STRIDE_W = 6, VT_STRIDE_H = 8, - VT_FUSED_ACTIVATION_FUNCTION = 10 + VT_FUSED_ACTIVATION_FUNCTION = 10, + VT_DILATION_W_FACTOR = 12, + VT_DILATION_H_FACTOR = 14 }; Padding padding() const { return static_cast(GetField(VT_PADDING, 0)); @@ -1458,12 +1512,20 @@ struct Conv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { ActivationFunctionType fused_activation_function() const { return static_cast(GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); } + int32_t dilation_w_factor() const { + return GetField(VT_DILATION_W_FACTOR, 0); + } + int32_t dilation_h_factor() const { + return GetField(VT_DILATION_H_FACTOR, 0); + } bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && VerifyField(verifier, VT_PADDING) && VerifyField(verifier, VT_STRIDE_W) && VerifyField(verifier, VT_STRIDE_H) && VerifyField(verifier, VT_FUSED_ACTIVATION_FUNCTION) && + VerifyField(verifier, VT_DILATION_W_FACTOR) && + VerifyField(verifier, VT_DILATION_H_FACTOR) && verifier.EndTable(); } Conv2DOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; @@ -1486,6 +1548,12 @@ struct Conv2DOptionsBuilder { void add_fused_activation_function(ActivationFunctionType fused_activation_function) { fbb_.AddElement(Conv2DOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast(fused_activation_function), 0); } + void add_dilation_w_factor(int32_t dilation_w_factor) { + fbb_.AddElement(Conv2DOptions::VT_DILATION_W_FACTOR, dilation_w_factor, 0); + } + void add_dilation_h_factor(int32_t dilation_h_factor) { + fbb_.AddElement(Conv2DOptions::VT_DILATION_H_FACTOR, dilation_h_factor, 0); + } explicit Conv2DOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); @@ -1503,8 +1571,12 @@ inline flatbuffers::Offset CreateConv2DOptions( Padding padding = Padding_SAME, int32_t stride_w = 0, int32_t stride_h = 0, - ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE) { + ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE, + int32_t dilation_w_factor = 0, + int32_t dilation_h_factor = 0) { Conv2DOptionsBuilder builder_(_fbb); + builder_.add_dilation_h_factor(dilation_h_factor); + builder_.add_dilation_w_factor(dilation_w_factor); builder_.add_stride_h(stride_h); builder_.add_stride_w(stride_w); builder_.add_fused_activation_function(fused_activation_function); @@ -3702,14 +3774,30 @@ flatbuffers::Offset CreateLogSoftmaxOptions(flatbuffers::Flat struct CastOptionsT : public flatbuffers::NativeTable { typedef CastOptions TableType; - CastOptionsT() { + TensorType in_data_type; + TensorType out_data_type; + CastOptionsT() + : in_data_type(TensorType_FLOAT32), + out_data_type(TensorType_FLOAT32) { } }; struct CastOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { typedef CastOptionsT NativeTableType; + enum { + VT_IN_DATA_TYPE = 4, + VT_OUT_DATA_TYPE = 6 + }; + TensorType in_data_type() const { + return static_cast(GetField(VT_IN_DATA_TYPE, 0)); + } + TensorType out_data_type() const { + return static_cast(GetField(VT_OUT_DATA_TYPE, 0)); + } bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && + VerifyField(verifier, VT_IN_DATA_TYPE) && + VerifyField(verifier, VT_OUT_DATA_TYPE) && verifier.EndTable(); } CastOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; @@ -3720,6 +3808,12 @@ struct CastOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { struct CastOptionsBuilder { flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; + void add_in_data_type(TensorType in_data_type) { + fbb_.AddElement(CastOptions::VT_IN_DATA_TYPE, static_cast(in_data_type), 0); + } + void add_out_data_type(TensorType out_data_type) { + fbb_.AddElement(CastOptions::VT_OUT_DATA_TYPE, static_cast(out_data_type), 0); + } explicit CastOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); @@ -3733,8 +3827,12 @@ struct CastOptionsBuilder { }; inline flatbuffers::Offset CreateCastOptions( - flatbuffers::FlatBufferBuilder &_fbb) { + flatbuffers::FlatBufferBuilder &_fbb, + TensorType in_data_type = TensorType_FLOAT32, + TensorType out_data_type = TensorType_FLOAT32) { CastOptionsBuilder builder_(_fbb); + builder_.add_out_data_type(out_data_type); + builder_.add_in_data_type(in_data_type); return builder_.Finish(); } @@ -3780,45 +3878,139 @@ inline flatbuffers::Offset CreateDequantizeOptions( flatbuffers::Offset CreateDequantizeOptions(flatbuffers::FlatBufferBuilder &_fbb, const DequantizeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); -struct MaximumOptionsT : public flatbuffers::NativeTable { - typedef MaximumOptions TableType; - MaximumOptionsT() { +struct MaximumMinimumOptionsT : public flatbuffers::NativeTable { + typedef MaximumMinimumOptions TableType; + MaximumMinimumOptionsT() { } }; -struct MaximumOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef MaximumOptionsT NativeTableType; +struct MaximumMinimumOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef MaximumMinimumOptionsT NativeTableType; bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && verifier.EndTable(); } - MaximumOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; - void UnPackTo(MaximumOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; - static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const MaximumOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + MaximumMinimumOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(MaximumMinimumOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const MaximumMinimumOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); }; -struct MaximumOptionsBuilder { +struct MaximumMinimumOptionsBuilder { flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; - explicit MaximumOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + explicit MaximumMinimumOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); } - MaximumOptionsBuilder &operator=(const MaximumOptionsBuilder &); - flatbuffers::Offset Finish() { + MaximumMinimumOptionsBuilder &operator=(const MaximumMinimumOptionsBuilder &); + flatbuffers::Offset Finish() { const auto end = fbb_.EndTable(start_); - auto o = flatbuffers::Offset(end); + auto o = flatbuffers::Offset(end); return o; } }; -inline flatbuffers::Offset CreateMaximumOptions( +inline flatbuffers::Offset CreateMaximumMinimumOptions( flatbuffers::FlatBufferBuilder &_fbb) { - MaximumOptionsBuilder builder_(_fbb); + MaximumMinimumOptionsBuilder builder_(_fbb); return builder_.Finish(); } -flatbuffers::Offset CreateMaximumOptions(flatbuffers::FlatBufferBuilder &_fbb, const MaximumOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +flatbuffers::Offset CreateMaximumMinimumOptions(flatbuffers::FlatBufferBuilder &_fbb, const MaximumMinimumOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct ArgMaxOptionsT : public flatbuffers::NativeTable { + typedef ArgMaxOptions TableType; + TensorType output_type; + ArgMaxOptionsT() + : output_type(TensorType_FLOAT32) { + } +}; + +struct ArgMaxOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef ArgMaxOptionsT NativeTableType; + enum { + VT_OUTPUT_TYPE = 4 + }; + TensorType output_type() const { + return static_cast(GetField(VT_OUTPUT_TYPE, 0)); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_OUTPUT_TYPE) && + verifier.EndTable(); + } + ArgMaxOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(ArgMaxOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const ArgMaxOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct ArgMaxOptionsBuilder { + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_output_type(TensorType output_type) { + fbb_.AddElement(ArgMaxOptions::VT_OUTPUT_TYPE, static_cast(output_type), 0); + } + explicit ArgMaxOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ArgMaxOptionsBuilder &operator=(const ArgMaxOptionsBuilder &); + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateArgMaxOptions( + flatbuffers::FlatBufferBuilder &_fbb, + TensorType output_type = TensorType_FLOAT32) { + ArgMaxOptionsBuilder builder_(_fbb); + builder_.add_output_type(output_type); + return builder_.Finish(); +} + +flatbuffers::Offset CreateArgMaxOptions(flatbuffers::FlatBufferBuilder &_fbb, const ArgMaxOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct LessOptionsT : public flatbuffers::NativeTable { + typedef LessOptions TableType; + LessOptionsT() { + } +}; + +struct LessOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef LessOptionsT NativeTableType; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + LessOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(LessOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const LessOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct LessOptionsBuilder { + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit LessOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + LessOptionsBuilder &operator=(const LessOptionsBuilder &); + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateLessOptions( + flatbuffers::FlatBufferBuilder &_fbb) { + LessOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreateLessOptions(flatbuffers::FlatBufferBuilder &_fbb, const LessOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); struct OperatorCodeT : public flatbuffers::NativeTable { typedef OperatorCode TableType; @@ -4051,8 +4243,14 @@ struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { const DequantizeOptions *builtin_options_as_DequantizeOptions() const { return builtin_options_type() == BuiltinOptions_DequantizeOptions ? static_cast(builtin_options()) : nullptr; } - const MaximumOptions *builtin_options_as_MaximumOptions() const { - return builtin_options_type() == BuiltinOptions_MaximumOptions ? static_cast(builtin_options()) : nullptr; + const MaximumMinimumOptions *builtin_options_as_MaximumMinimumOptions() const { + return builtin_options_type() == BuiltinOptions_MaximumMinimumOptions ? static_cast(builtin_options()) : nullptr; + } + const ArgMaxOptions *builtin_options_as_ArgMaxOptions() const { + return builtin_options_type() == BuiltinOptions_ArgMaxOptions ? static_cast(builtin_options()) : nullptr; + } + const LessOptions *builtin_options_as_LessOptions() const { + return builtin_options_type() == BuiltinOptions_LessOptions ? static_cast(builtin_options()) : nullptr; } const flatbuffers::Vector *custom_options() const { return GetPointer *>(VT_CUSTOM_OPTIONS); @@ -4232,8 +4430,16 @@ template<> inline const DequantizeOptions *Operator::builtin_options_as inline const MaximumOptions *Operator::builtin_options_as() const { - return builtin_options_as_MaximumOptions(); +template<> inline const MaximumMinimumOptions *Operator::builtin_options_as() const { + return builtin_options_as_MaximumMinimumOptions(); +} + +template<> inline const ArgMaxOptions *Operator::builtin_options_as() const { + return builtin_options_as_ArgMaxOptions(); +} + +template<> inline const LessOptions *Operator::builtin_options_as() const { + return builtin_options_as_LessOptions(); } struct OperatorBuilder { @@ -4703,6 +4909,8 @@ inline void Conv2DOptions::UnPackTo(Conv2DOptionsT *_o, const flatbuffers::resol { auto _e = stride_w(); _o->stride_w = _e; }; { auto _e = stride_h(); _o->stride_h = _e; }; { auto _e = fused_activation_function(); _o->fused_activation_function = _e; }; + { auto _e = dilation_w_factor(); _o->dilation_w_factor = _e; }; + { auto _e = dilation_h_factor(); _o->dilation_h_factor = _e; }; } inline flatbuffers::Offset Conv2DOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const Conv2DOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { @@ -4717,12 +4925,16 @@ inline flatbuffers::Offset CreateConv2DOptions(flatbuffers::FlatB auto _stride_w = _o->stride_w; auto _stride_h = _o->stride_h; auto _fused_activation_function = _o->fused_activation_function; + auto _dilation_w_factor = _o->dilation_w_factor; + auto _dilation_h_factor = _o->dilation_h_factor; return tflite::CreateConv2DOptions( _fbb, _padding, _stride_w, _stride_h, - _fused_activation_function); + _fused_activation_function, + _dilation_w_factor, + _dilation_h_factor); } inline Pool2DOptionsT *Pool2DOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { @@ -5727,6 +5939,8 @@ inline CastOptionsT *CastOptions::UnPack(const flatbuffers::resolver_function_t inline void CastOptions::UnPackTo(CastOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { (void)_o; (void)_resolver; + { auto _e = in_data_type(); _o->in_data_type = _e; }; + { auto _e = out_data_type(); _o->out_data_type = _e; }; } inline flatbuffers::Offset CastOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const CastOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { @@ -5737,8 +5951,12 @@ inline flatbuffers::Offset CreateCastOptions(flatbuffers::FlatBuffe (void)_rehasher; (void)_o; struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const CastOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _in_data_type = _o->in_data_type; + auto _out_data_type = _o->out_data_type; return tflite::CreateCastOptions( - _fbb); + _fbb, + _in_data_type, + _out_data_type); } inline DequantizeOptionsT *DequantizeOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { @@ -5764,26 +5982,75 @@ inline flatbuffers::Offset CreateDequantizeOptions(flatbuffer _fbb); } -inline MaximumOptionsT *MaximumOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { - auto _o = new MaximumOptionsT(); +inline MaximumMinimumOptionsT *MaximumMinimumOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = new MaximumMinimumOptionsT(); + UnPackTo(_o, _resolver); + return _o; +} + +inline void MaximumMinimumOptions::UnPackTo(MaximumMinimumOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset MaximumMinimumOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const MaximumMinimumOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateMaximumMinimumOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateMaximumMinimumOptions(flatbuffers::FlatBufferBuilder &_fbb, const MaximumMinimumOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const MaximumMinimumOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateMaximumMinimumOptions( + _fbb); +} + +inline ArgMaxOptionsT *ArgMaxOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = new ArgMaxOptionsT(); + UnPackTo(_o, _resolver); + return _o; +} + +inline void ArgMaxOptions::UnPackTo(ArgMaxOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = output_type(); _o->output_type = _e; }; +} + +inline flatbuffers::Offset ArgMaxOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ArgMaxOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateArgMaxOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateArgMaxOptions(flatbuffers::FlatBufferBuilder &_fbb, const ArgMaxOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const ArgMaxOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _output_type = _o->output_type; + return tflite::CreateArgMaxOptions( + _fbb, + _output_type); +} + +inline LessOptionsT *LessOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = new LessOptionsT(); UnPackTo(_o, _resolver); return _o; } -inline void MaximumOptions::UnPackTo(MaximumOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { +inline void LessOptions::UnPackTo(LessOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { (void)_o; (void)_resolver; } -inline flatbuffers::Offset MaximumOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const MaximumOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { - return CreateMaximumOptions(_fbb, _o, _rehasher); +inline flatbuffers::Offset LessOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const LessOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateLessOptions(_fbb, _o, _rehasher); } -inline flatbuffers::Offset CreateMaximumOptions(flatbuffers::FlatBufferBuilder &_fbb, const MaximumOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset CreateLessOptions(flatbuffers::FlatBufferBuilder &_fbb, const LessOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { (void)_rehasher; (void)_o; - struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const MaximumOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; - return tflite::CreateMaximumOptions( + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const LessOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateLessOptions( _fbb); } @@ -6119,8 +6386,16 @@ inline bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *ob auto ptr = reinterpret_cast(obj); return verifier.VerifyTable(ptr); } - case BuiltinOptions_MaximumOptions: { - auto ptr = reinterpret_cast(obj); + case BuiltinOptions_MaximumMinimumOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_ArgMaxOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_LessOptions: { + auto ptr = reinterpret_cast(obj); return verifier.VerifyTable(ptr); } default: return false; @@ -6293,8 +6568,16 @@ inline void *BuiltinOptionsUnion::UnPack(const void *obj, BuiltinOptions type, c auto ptr = reinterpret_cast(obj); return ptr->UnPack(resolver); } - case BuiltinOptions_MaximumOptions: { - auto ptr = reinterpret_cast(obj); + case BuiltinOptions_MaximumMinimumOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_ArgMaxOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_LessOptions: { + auto ptr = reinterpret_cast(obj); return ptr->UnPack(resolver); } default: return nullptr; @@ -6455,9 +6738,17 @@ inline flatbuffers::Offset BuiltinOptionsUnion::Pack(flatbuffers::FlatBuff auto ptr = reinterpret_cast(value); return CreateDequantizeOptions(_fbb, ptr, _rehasher).Union(); } - case BuiltinOptions_MaximumOptions: { - auto ptr = reinterpret_cast(value); - return CreateMaximumOptions(_fbb, ptr, _rehasher).Union(); + case BuiltinOptions_MaximumMinimumOptions: { + auto ptr = reinterpret_cast(value); + return CreateMaximumMinimumOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_ArgMaxOptions: { + auto ptr = reinterpret_cast(value); + return CreateArgMaxOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_LessOptions: { + auto ptr = reinterpret_cast(value); + return CreateLessOptions(_fbb, ptr, _rehasher).Union(); } default: return 0; } @@ -6617,8 +6908,16 @@ inline BuiltinOptionsUnion::BuiltinOptionsUnion(const BuiltinOptionsUnion &u) FL value = new DequantizeOptionsT(*reinterpret_cast(u.value)); break; } - case BuiltinOptions_MaximumOptions: { - value = new MaximumOptionsT(*reinterpret_cast(u.value)); + case BuiltinOptions_MaximumMinimumOptions: { + value = new MaximumMinimumOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_ArgMaxOptions: { + value = new ArgMaxOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_LessOptions: { + value = new LessOptionsT(*reinterpret_cast(u.value)); break; } default: @@ -6818,8 +7117,18 @@ inline void BuiltinOptionsUnion::Reset() { delete ptr; break; } - case BuiltinOptions_MaximumOptions: { - auto ptr = reinterpret_cast(value); + case BuiltinOptions_MaximumMinimumOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_ArgMaxOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_LessOptions: { + auto ptr = reinterpret_cast(value); delete ptr; break; } diff --git a/tensorflow/contrib/lite/testing/BUILD b/tensorflow/contrib/lite/testing/BUILD index 62f20638bac943e9f674087e46c18233e8b09d63..bd888a415b035917e64932103740890876eb493e 100644 --- a/tensorflow/contrib/lite/testing/BUILD +++ b/tensorflow/contrib/lite/testing/BUILD @@ -18,6 +18,7 @@ gen_zipped_test_files( name = "optest", files = [ "add.zip", + "arg_max.zip", "avg_pool.zip", "batch_to_space_nd.zip", "concat.zip", @@ -33,14 +34,15 @@ gen_zipped_test_files( "global_batch_norm.zip", "l2_pool.zip", "l2norm.zip", + "less.zip", "local_response_norm.zip", "log_softmax.zip", "max_pool.zip", "maximum.zip", "mean.zip", + "minimum.zip", "mul.zip", "pad.zip", - "prelu.zip", "relu.zip", "relu1.zip", "relu6.zip", @@ -160,6 +162,9 @@ cc_test( size = "small", srcs = ["tflite_driver_test.cc"], data = ["//tensorflow/contrib/lite:testdata/multi_add.bin"], + tags = [ + "tflite_not_portable_android", + ], deps = [ ":tflite_driver", "@com_google_googletest//:gtest_main", @@ -194,7 +199,6 @@ cc_library( cc_library( name = "util", - testonly = 1, hdrs = ["util.h"], ) @@ -249,7 +253,6 @@ cc_test( cc_library( name = "generate_testspec", - testonly = 1, srcs = ["generate_testspec.cc"], hdrs = ["generate_testspec.h"], deps = [ @@ -275,7 +278,6 @@ cc_test( cc_library( name = "tflite_diff_util", - testonly = 1, srcs = ["tflite_diff_util.cc"], hdrs = ["tflite_diff_util.h"], deps = [ @@ -293,7 +295,6 @@ cc_library( cc_library( name = "tflite_diff_flags", - testonly = 1, hdrs = ["tflite_diff_flags.h"], deps = [ ":split", @@ -336,6 +337,15 @@ tf_cc_test( ], ) +cc_binary( + name = "tflite_diff", + srcs = ["tflite_diff_example_test.cc"], + deps = [ + ":tflite_diff_flags", + ":tflite_diff_util", + ], +) + tf_cc_test( name = "generated_examples_zip_test", size = "large", diff --git a/tensorflow/contrib/lite/testing/generate_examples.py b/tensorflow/contrib/lite/testing/generate_examples.py index 80450524520892bb6f9d41f0d2c79f355ca3af15..f72a4e0d8cbc89c9de5ee0df61f78d7d32bde73e 100644 --- a/tensorflow/contrib/lite/testing/generate_examples.py +++ b/tensorflow/contrib/lite/testing/generate_examples.py @@ -17,10 +17,9 @@ Usage: -generate_examples zipped +generate_examples bazel run //tensorflow/contrib/lite/testing:generate_examples - third_party/tensorflow/contrib/lite/testing/generated_examples zipped """ from __future__ import absolute_import from __future__ import division @@ -52,8 +51,6 @@ from tensorflow.python.ops import rnn parser = argparse.ArgumentParser(description="Script to generate TFLite tests.") parser.add_argument("output_path", help="Directory where the outputs will be go.") -# TODO(ahentz): remove this flag -parser.add_argument("type", help="zipped") parser.add_argument("--zip_to_output", type=str, help="Particular zip to output.", @@ -96,9 +93,6 @@ KNOWN_BUGS = { r"softmax.*input_shape=\[1,3,4,3\]": "67749831", # SpaceToDepth only supports float32. r"space_to_depth.*(float16|int32|uint8|int64)": "68018134", - # BatchToSpaceND doesn't support cropping. This catches test cases with - # const tensors as crops. - r"batch_to_space_nd.*crops=\[\[1,1\],\[1,1\]\]": "70594634", # BatchToSpaceND only supports 4D tensors. r"batch_to_space_nd.*input_shape=\[8,2,2,2,1,1\]": "70594733", # Div will use floordiv. @@ -107,6 +101,10 @@ KNOWN_BUGS = { r"strided_slice.*begin=\[0\].*end=\[1\].*": "73170889", # No support for SplitV r"split.*num_or_size_splits=\[2,2\]": "73377559", + # Needs support for dimensions other than the last one in argmax. + r"arg_max.*axis=0.*": "77546240", + r"arg_max.*axis=1.*": "77546240", + r"arg_max.*axis=2.*": "77546240", } @@ -543,6 +541,18 @@ def make_pool_tests(pool_op_in): return f +def make_l2_pool_tests(zip_path): + make_pool_tests(make_l2_pool)(zip_path) + + +def make_avg_pool_tests(zip_path): + make_pool_tests(tf.nn.avg_pool)(zip_path) + + +def make_max_pool_tests(zip_path): + make_pool_tests(tf.nn.max_pool)(zip_path) + + def make_relu_tests(zip_path): """Make a set of tests to do relu.""" @@ -617,54 +627,6 @@ def make_relu6_tests(zip_path): make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs) -def make_prelu_tests(zip_path): - """Make a set of tests to do PReLU.""" - - test_parameters = [{ - # The canonical case for image processing is having a 4D `input` (NHWC) - # and `shared_axes`=[1, 2], so the alpha parameter is per channel. - "input_shape": [[1, 10, 10, 3], [3, 3, 3, 3]], - "shared_axes": [[1, 2], [1]], - }] - - def build_graph(parameters): - """Build the graph for the test case.""" - - input_tensor = tf.placeholder( - dtype=tf.float32, name="input", shape=parameters["input_shape"]) - prelu = tf.keras.layers.PReLU(shared_axes=parameters["shared_axes"]) - out = prelu(input_tensor) - return [input_tensor], [out] - - def build_inputs(parameters, sess, inputs, outputs): - """Build the inputs for the test case.""" - - input_shape = parameters["input_shape"] - input_values = create_tensor_data( - np.float32, input_shape, min_value=-10, max_value=10) - shared_axes = parameters["shared_axes"] - - alpha_shape = [] - for dim in range(1, len(input_shape)): - alpha_shape.append(1 if dim in shared_axes else input_shape[dim]) - - alpha_values = create_tensor_data(np.float32, alpha_shape) - - with tf.variable_scope("", reuse=True): - alpha = tf.get_variable("p_re_lu/alpha") - sess.run(alpha.assign(alpha_values)) - - return [input_values], sess.run( - outputs, feed_dict=dict(zip(inputs, [input_values]))) - - make_zip_of_tests( - zip_path, - test_parameters, - build_graph, - build_inputs, - use_frozen_graph=True) - - # This function tests various TensorFLow functions that generates Const op, # including `tf.ones`, `tf.zeros` and random functions. def make_constant_tests(zip_path): @@ -897,11 +859,62 @@ def make_maximum_tests(zip_path): make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs) +def make_minimum_tests(zip_path): + """Make a set of tests to do minimum.""" + + test_parameters = [{ + "input_dtype": [tf.float32], + "input_shape_1": [[3], [1, 100], [4, 2, 3], [5, 224, 224, 3]], + "input_shape_2": [[3], [1, 100], [4, 2, 3], [5, 224, 224, 3]], + }] + + def build_graph(parameters): + """Build the minimum op testing graph.""" + input_tensor_1 = tf.placeholder( + dtype=parameters["input_dtype"], + name="input_1", + shape=parameters["input_shape_1"]) + input_tensor_2 = tf.placeholder( + dtype=parameters["input_dtype"], + name="input_2", + shape=parameters["input_shape_2"]) + + out = tf.minimum(input_tensor_1, input_tensor_2) + return [input_tensor_1, input_tensor_2], [out] + + def build_inputs(parameters, sess, inputs, outputs): + values = [ + create_tensor_data(parameters["input_dtype"], + parameters["input_shape_1"]), + create_tensor_data(parameters["input_dtype"], + parameters["input_shape_2"]) + ] + return values, sess.run(outputs, feed_dict=dict(zip(inputs, values))) + + make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs) + + def make_binary_op_tests_func(binary_operator): """Return a function that does a test on a binary operator.""" return lambda zip_path: make_binary_op_tests(zip_path, binary_operator) +def make_add_tests(zip_path): + make_binary_op_tests(zip_path, tf.add) + + +def make_div_tests(zip_path): + make_binary_op_tests(zip_path, tf.div) + + +def make_sub_tests(zip_path): + make_binary_op_tests(zip_path, tf.subtract) + + +def make_mul_tests(zip_path): + make_binary_op_tests(zip_path, tf.multiply) + + def make_gather_tests(zip_path): """Make a set of tests to do gather.""" @@ -1026,6 +1039,7 @@ def make_conv_tests(zip_path): "input_shape": [[1, 3, 4, 3]], "filter_shape": [[1, 1, 3, 2]], "strides": [[1, 1, 1, 1], [1, 2, 3, 1]], + "dilations": [[1, 1, 1, 1], [1, 3, 2, 1], [1, 2, 2, 1]], "padding": ["SAME", "VALID"], "data_format": ["NHWC"], # TODO(aselle): NCHW would be good "constant_filter": [True, False], @@ -1034,6 +1048,7 @@ def make_conv_tests(zip_path): "input_shape": [[2, 14, 14, 2]], "filter_shape": [[6, 6, 2, 2]], "strides": [[1, 1, 1, 1], [1, 2, 3, 1]], + "dilations": [[1, 1, 1, 1], [1, 2, 2, 1]], "padding": ["SAME", "VALID"], "data_format": ["NHWC"], # TODO(aselle): NCHW would be good "constant_filter": [True, False], @@ -1059,6 +1074,7 @@ def make_conv_tests(zip_path): input_tensor, filter_input, strides=parameters["strides"], + dilations=parameters["dilations"], padding=parameters["padding"], data_format=parameters["data_format"]) return input_tensors, [out] @@ -1169,7 +1185,7 @@ def make_split_tests(zip_path): make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs) -def make_concatenation_tests(zip_path): +def make_concat_tests(zip_path): """Make a set of tests to do concatenation.""" test_parameters = [{ @@ -1579,7 +1595,7 @@ def make_batch_to_space_nd_tests(zip_path): test_parameters = [ { "dtype": [tf.float32, tf.int64, tf.int32], - "input_shape": [[12, 2, 2, 1]], + "input_shape": [[12, 3, 3, 1]], "block_shape": [[1, 4], [2, 2], [3, 4]], "crops": [[[0, 0], [0, 0]], [[1, 1], [1, 1]]], "constant_block_shape": [True, False], @@ -1742,19 +1758,7 @@ def make_strided_slice_tests(zip_path): "shrink_axis_mask": [None, 1, 8, 11, 15, -1], "constant_indices": [False, True], }, - # - { - "dtype": [tf.float32], - "index_type": [tf.int32], - "input_shape": [[12, 2, 2, 5]], - "begin": [[0]], - "end": [[1]], - "strides": [[1]], - "begin_mask": [0], - "end_mask": [0], - "shrink_axis_mask": [1], - "constant_indices": [True], - }, + # TODO(b/73170889) Restore test paramaters removed in cl/191608113. # 2-D { "dtype": [tf.float32, tf.int32, tf.int64], @@ -1929,7 +1933,7 @@ def make_l2_pool(input_tensor, ksize, strides, padding, data_format): def make_topk_tests(zip_path): - """Make a set of tests to do gather.""" + """Make a set of tests to do topk.""" test_parameters = [{ "input_dtype": [tf.float32, tf.int32], @@ -1937,7 +1941,7 @@ def make_topk_tests(zip_path): }] def build_graph(parameters): - """Build the gather op testing graph.""" + """Build the topk op testing graph.""" input_value = tf.placeholder( dtype=parameters["input_dtype"], name="input", @@ -1954,6 +1958,69 @@ def make_topk_tests(zip_path): make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs) + +def make_arg_max_tests(zip_path): + """Make a set of tests to do arg_max.""" + + test_parameters = [{ + "input_dtype": [tf.float32, tf.int32], + "input_shape": [[1, 1, 1, 3], [2, 3, 4, 5], [2, 3, 3], [5, 5], [10]], + "axis": [0, 1, 2, 3], + "output_type": [tf.int32, tf.int64], + }] + + def build_graph(parameters): + """Build the topk op testing graph.""" + input_value = tf.placeholder( + dtype=parameters["input_dtype"], + name="input", + shape=parameters["input_shape"]) + axis = tf.constant(parameters["axis"], name="axis") + out = tf.arg_max(input_value, axis, output_type=parameters["output_type"]) + return [input_value], [out] + + def build_inputs(parameters, sess, inputs, outputs): + input_value = create_tensor_data(parameters["input_dtype"], + parameters["input_shape"]) + return [input_value], sess.run( + outputs, feed_dict=dict(zip(inputs, [input_value]))) + + make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs) + + +def make_less_tests(zip_path): + """Make a set of tests to do less.""" + + test_parameters = [{ + "input_dtype": [tf.float32, tf.int32, tf.int64], + "input_shape_pair": [([1, 1, 1, 3], [1, 1, 1, 3]), + ([2, 3, 4, 5], [2, 3, 4, 5]), ([2, 3, 3], [2, 3]), + ([5, 5], [1]), ([10], [2, 4, 10])], + }] + + def build_graph(parameters): + """Build the less op testing graph.""" + input_value1 = tf.placeholder( + dtype=parameters["input_dtype"], + name="input1", + shape=parameters["input_shape_pair"][0]) + input_value2 = tf.placeholder( + dtype=parameters["input_dtype"], + name="input2", + shape=parameters["input_shape_pair"][1]) + out = tf.less(input_value1, input_value2) + return [input_value1, input_value2], [out] + + def build_inputs(parameters, sess, inputs, outputs): + input_value1 = create_tensor_data(parameters["input_dtype"], + parameters["input_shape_pair"][0]) + input_value2 = create_tensor_data(parameters["input_dtype"], + parameters["input_shape_pair"][1]) + return [input_value1, input_value2], sess.run( + outputs, feed_dict=dict(zip(inputs, [input_value1, input_value2]))) + + make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs) + # Toco binary path provided by the generate rule. bin_path = None @@ -1966,69 +2033,26 @@ def main(unused_args): if not os.path.isdir(x): raise RuntimeError("Failed to create dir %r" % x) - if FLAGS.type == "zipped": - opstest_path = os.path.join(FLAGS.output_path) - mkdir_if_not_exist(opstest_path) - def _path(filename): - return os.path.join(opstest_path, filename) - - dispatch = { - "control_dep.zip": make_control_dep_tests, - "add.zip": make_binary_op_tests_func(tf.add), - "space_to_batch_nd.zip": make_space_to_batch_nd_tests, - "div.zip": make_binary_op_tests_func(tf.div), - "sub.zip": make_binary_op_tests_func(tf.subtract), - "batch_to_space_nd.zip": make_batch_to_space_nd_tests, - "conv.zip": make_conv_tests, - "constant.zip": make_constant_tests, - "depthwiseconv.zip": make_depthwiseconv_tests, - "concat.zip": make_concatenation_tests, - "fully_connected.zip": make_fully_connected_tests, - "global_batch_norm.zip": make_global_batch_norm_tests, - "gather.zip": make_gather_tests, - "fused_batch_norm.zip": make_fused_batch_norm_tests, - "l2norm.zip": make_l2norm_tests, - "local_response_norm.zip": make_local_response_norm_tests, - "mul.zip": make_binary_op_tests_func(tf.multiply), - "relu.zip": make_relu_tests, - "relu1.zip": make_relu1_tests, - "relu6.zip": make_relu6_tests, - "prelu.zip": make_prelu_tests, - "l2_pool.zip": make_pool_tests(make_l2_pool), - "avg_pool.zip": make_pool_tests(tf.nn.avg_pool), - "max_pool.zip": make_pool_tests(tf.nn.max_pool), - "pad.zip": make_pad_tests, - "reshape.zip": make_reshape_tests, - "resize_bilinear.zip": make_resize_bilinear_tests, - "sigmoid.zip": make_sigmoid_tests, - "softmax.zip": make_softmax_tests, - "space_to_depth.zip": make_space_to_depth_tests, - "topk.zip": make_topk_tests, - "split.zip": make_split_tests, - "transpose.zip": make_transpose_tests, - "mean.zip": make_mean_tests, - "squeeze.zip": make_squeeze_tests, - "strided_slice.zip": make_strided_slice_tests, - "exp.zip": make_exp_tests, - "log_softmax.zip": make_log_softmax_tests, - "lstm.zip": make_lstm_tests, - "maximum.zip": make_maximum_tests, - } - out = FLAGS.zip_to_output - bin_path = FLAGS.toco - if out in dispatch: - dispatch[out](_path(out)) - else: - raise RuntimeError("Invalid zip to output %r" % out) + opstest_path = os.path.join(FLAGS.output_path) + mkdir_if_not_exist(opstest_path) - else: - raise RuntimeError("Invalid argument for type of generation.") + out = FLAGS.zip_to_output + bin_path = FLAGS.toco + test_function = ("make_%s_tests" % out.replace(".zip", "")) + if test_function not in globals(): + raise RuntimeError("Can't find a test function to create %r. Tried %r" % + (out, test_function)) + + # TODO(ahentz): accessing globals() is not very elegant. We should either + # break this file into multiple tests or use decorator-based registration to + # avoid using globals(). + globals()[test_function](os.path.join(opstest_path, out)) if __name__ == "__main__": FLAGS, unparsed = parser.parse_known_args() if unparsed: - print("Usage: %s zipped ") + print("Usage: %s ") else: tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) diff --git a/tensorflow/contrib/lite/testing/generate_testspec.cc b/tensorflow/contrib/lite/testing/generate_testspec.cc index eb3deafb6986e877f0a553a8b6f712102af4caca..6580845af42b3cdded19b578b41c682089aaf9ef 100644 --- a/tensorflow/contrib/lite/testing/generate_testspec.cc +++ b/tensorflow/contrib/lite/testing/generate_testspec.cc @@ -22,7 +22,22 @@ limitations under the License. namespace tflite { namespace testing { -void GenerateTestSpecFromTensorflowModel( +template +void GenerateCsv(const std::vector& shape, float min, float max, + string* out) { + auto random_float = [](int min, int max) { + static unsigned int seed; + return min + (max - min) * static_cast(rand_r(&seed)) / RAND_MAX; + }; + + std::function random_t = [&](int) { + return static_cast(random_float(min, max)); + }; + std::vector data = GenerateRandomTensor(shape, random_t); + *out = Join(data.data(), data.size(), ","); +} + +bool GenerateTestSpecFromTensorflowModel( std::iostream& stream, const string& tensorflow_model_path, const string& tflite_model_path, const std::vector& input_layer, const std::vector& input_layer_type, @@ -31,12 +46,6 @@ void GenerateTestSpecFromTensorflowModel( CHECK_EQ(input_layer.size(), input_layer_type.size()); CHECK_EQ(input_layer.size(), input_layer_shape.size()); - // Initialize random functions. - static unsigned int seed = 0; - std::function float_rand = [](int idx) { - return static_cast(rand_r(&seed)) / RAND_MAX - 0.5f; - }; - // Generate inputs. std::vector input_values; input_values.resize(input_layer.size()); @@ -46,15 +55,25 @@ void GenerateTestSpecFromTensorflowModel( auto shape = Split(input_layer_shape[i], ","); switch (type) { - case tensorflow::DT_FLOAT: { - const auto& data = GenerateRandomTensor(shape, float_rand); - input_values[i] = Join(data.data(), data.size(), ","); + case tensorflow::DT_FLOAT: + GenerateCsv(shape, -0.5, 0.5, &input_values[i]); + break; + case tensorflow::DT_UINT8: + GenerateCsv(shape, 0, 255, &input_values[i]); + break; + case tensorflow::DT_INT32: + GenerateCsv(shape, -100, 100, &input_values[i]); + break; + case tensorflow::DT_INT64: + GenerateCsv(shape, -100, 100, &input_values[i]); + break; + case tensorflow::DT_BOOL: + GenerateCsv(shape, 0.01, 1.99, &input_values[i]); break; - } default: - - fprintf(stderr, "Unsupported type %d when generating testspec\n", type); - return; + fprintf(stderr, "Unsupported type %d (%s) when generating testspec.\n", + type, input_layer_type[i].c_str()); + return false; } } @@ -82,6 +101,8 @@ void GenerateTestSpecFromTensorflowModel( stream << " output: \"" << runner.ReadOutput(i) << "\"\n"; } stream << "}\n"; + + return true; } } // namespace testing diff --git a/tensorflow/contrib/lite/testing/generate_testspec.h b/tensorflow/contrib/lite/testing/generate_testspec.h index 3529ee709b66625fff6e2a35b78e47f3778f0fe7..6e31a853c3f7f82a89126ff83af784ffd418741a 100644 --- a/tensorflow/contrib/lite/testing/generate_testspec.h +++ b/tensorflow/contrib/lite/testing/generate_testspec.h @@ -34,7 +34,7 @@ namespace testing { // input_layer_type: datatypes of input tensors. Example: float // input_layer_shape: shapes of input tensors, separated by comma. example: // 1,3,4 output_layer: names of output tensors. Example: output -void GenerateTestSpecFromTensorflowModel( +bool GenerateTestSpecFromTensorflowModel( std::iostream& stream, const string& tensorflow_model_path, const string& tflite_model_path, const std::vector& input_layer, const std::vector& input_layer_type, diff --git a/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc b/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc index 6697b86e798756bf3273e36dc105eee17d146aa6..9da8bd7a28891fc4a534710fd0fd8344cde1b197 100644 --- a/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc +++ b/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc @@ -94,6 +94,11 @@ std::map kBrokenTests = { // No support for axis!=0 in GatherV2. {R"(^\/gather.*axis=1)", "76910444"}, + + // No support for arbitrary dimensions in ArgMax. + {R"(^\/arg_max.*axis=0)", "77546240"}, + {R"(^\/arg_max.*axis=1)", "77546240"}, + {R"(^\/arg_max.*axis=2)", "77546240"}, }; // Allows test data to be unzipped into a temporary directory and makes @@ -236,6 +241,7 @@ TEST_P(OpsTest, RunStuff) { ::testing::ValuesIn(UnarchiveZipAndFindTestNames(#zip_base ".zip"))); INSTANTIATE_TESTS(add) +INSTANTIATE_TESTS(arg_max) INSTANTIATE_TESTS(avg_pool) INSTANTIATE_TESTS(batch_to_space_nd) INSTANTIATE_TESTS(concat) @@ -256,11 +262,12 @@ INSTANTIATE_TESTS(log_softmax) INSTANTIATE_TESTS(maximum) INSTANTIATE_TESTS(max_pool) INSTANTIATE_TESTS(mean) +INSTANTIATE_TESTS(minimum) INSTANTIATE_TESTS(mul) INSTANTIATE_TESTS(pad) INSTANTIATE_TESTS(relu) INSTANTIATE_TESTS(relu1) -INSTANTIATE_TESTS(prelu) +// INSTANTIATE_TESTS(prelu) INSTANTIATE_TESTS(relu6) INSTANTIATE_TESTS(reshape) INSTANTIATE_TESTS(resize_bilinear) @@ -273,6 +280,7 @@ INSTANTIATE_TESTS(squeeze) INSTANTIATE_TESTS(strided_slice) INSTANTIATE_TESTS(sub) INSTANTIATE_TESTS(transpose) +INSTANTIATE_TESTS(less) } // namespace testing } // namespace tflite diff --git a/tensorflow/contrib/lite/testing/split.h b/tensorflow/contrib/lite/testing/split.h index 428cfda4f216f0ee6409a32c43a4cf91ecc11922..896f2949efa6aeda76940bae18a11dccf3c2f01b 100644 --- a/tensorflow/contrib/lite/testing/split.h +++ b/tensorflow/contrib/lite/testing/split.h @@ -80,6 +80,16 @@ inline std::vector Split(const string& s, const string& delimiter) { return fields; } +template <> +inline std::vector Split(const string& s, const string& delimiter) { + std::vector fields; + for (const auto& p : SplitToPos(s, delimiter)) { + fields.push_back( + static_cast(strtol(s.data() + p.first, nullptr, 10))); + } + return fields; +} + } // namespace testing } // namespace tflite diff --git a/tensorflow/contrib/lite/testing/split_test.cc b/tensorflow/contrib/lite/testing/split_test.cc index 3d1e25d9c7dab50984928adfe0d7392675578662..76b918cbcd83ef43c52057b84bcc2a8f4ff6b8f7 100644 --- a/tensorflow/contrib/lite/testing/split_test.cc +++ b/tensorflow/contrib/lite/testing/split_test.cc @@ -52,6 +52,11 @@ TEST(SplitTest, SplitUint8) { EXPECT_THAT(Split("1,-1,258", ","), ElementsAre(1, 255, 2)); } +TEST(SplitTest, SplitBool) { + EXPECT_THAT(Split("1, 0, 0, 1", ","), + ElementsAre(true, false, false, true)); +} + } // namespace } // namespace testing } // namespace tflite diff --git a/tensorflow/contrib/lite/testing/tf_driver.cc b/tensorflow/contrib/lite/testing/tf_driver.cc index 2c253bb1983e5ddc5bc12858c929585d1bcee710..7b295875aab12bf48da2341ce05dd53442464cf0 100644 --- a/tensorflow/contrib/lite/testing/tf_driver.cc +++ b/tensorflow/contrib/lite/testing/tf_driver.cc @@ -87,10 +87,9 @@ TfDriver::TfDriver(const std::vector& input_layer, void TfDriver::LoadModel(const string& bin_file_path) { if (!IsValid()) return; - std::cout << std::endl << "Loading model: " << bin_file_path << std::endl; std::ifstream model(bin_file_path); if (model.fail()) { - Invalidate("Failed to find the model"); + Invalidate("Failed to find the model " + bin_file_path); return; } @@ -121,6 +120,10 @@ void TfDriver::SetInput(int id, const string& csv_values) { FillTensorWithData(&tensor, csv_values); break; } + case tensorflow::DT_UINT8: { + FillTensorWithData(&tensor, csv_values); + break; + } default: fprintf(stderr, "Unsupported type %d in SetInput\n", input_types_[id]); Invalidate("Unsupported tensor data type"); @@ -162,6 +165,8 @@ string TfDriver::ReadOutput(int id) { return TensorDataToCsvString(output_tensors_[id]); case tensorflow::DT_INT32: return TensorDataToCsvString(output_tensors_[id]); + case tensorflow::DT_UINT8: + return TensorDataToCsvString(output_tensors_[id]); default: fprintf(stderr, "Unsupported type %d in ResetTensor\n", input_types_[id]); Invalidate("Unsupported tensor data type"); diff --git a/tensorflow/contrib/lite/testing/tflite_diff_example_test.cc b/tensorflow/contrib/lite/testing/tflite_diff_example_test.cc index 3817e68111dbaaf2a38ceff9fbc38f30f303cb5f..5afa0f800cdaa8bf70a11cb6e2ac64ace8138e79 100644 --- a/tensorflow/contrib/lite/testing/tflite_diff_example_test.cc +++ b/tensorflow/contrib/lite/testing/tflite_diff_example_test.cc @@ -19,10 +19,13 @@ limitations under the License. int main(int argc, char** argv) { ::tflite::testing::DiffOptions options = ::tflite::testing::ParseTfliteDiffFlags(&argc, argv); + if (options.tensorflow_model.empty()) return 1; + int failure_count = 0; for (int i = 0; i < 100; i++) { if (!tflite::testing::RunDiffTest(options)) { - return 1; + ++failure_count; } } - return 0; + fprintf(stderr, "Num errors: %d\n", failure_count); + return failure_count != 0 ? 1 : 0; } diff --git a/tensorflow/contrib/lite/testing/tflite_diff_flags.h b/tensorflow/contrib/lite/testing/tflite_diff_flags.h index 5f1129d501b7235f1202b704cf36904e07b8720e..706108ed73bb3fd9bd784cffffe322d6981433e6 100644 --- a/tensorflow/contrib/lite/testing/tflite_diff_flags.h +++ b/tensorflow/contrib/lite/testing/tflite_diff_flags.h @@ -51,9 +51,11 @@ DiffOptions ParseTfliteDiffFlags(int* argc, char** argv) { "output_1,output_2"), }; + bool no_inputs = *argc == 1; bool success = tensorflow::Flags::Parse(argc, argv, flags); - if (!success || (*argc == 2 && !strcmp(argv[1], "--helpfull"))) { + if (!success || no_inputs || (*argc == 2 && !strcmp(argv[1], "--helpfull"))) { fprintf(stderr, "%s", tensorflow::Flags::Usage(argv[0], flags).c_str()); + return {}; } return {values.tensorflow_model, diff --git a/tensorflow/contrib/lite/testing/tflite_diff_util.cc b/tensorflow/contrib/lite/testing/tflite_diff_util.cc index 9ef4e1f66c7d31c746c18d63495e760585d4af9e..f601d3752ddb5df9f2b5ac73d9bc303efaade4a5 100644 --- a/tensorflow/contrib/lite/testing/tflite_diff_util.cc +++ b/tensorflow/contrib/lite/testing/tflite_diff_util.cc @@ -27,13 +27,13 @@ namespace testing { bool RunDiffTest(const DiffOptions& options) { std::stringstream tflite_stream; - GenerateTestSpecFromTensorflowModel( - tflite_stream, options.tensorflow_model, options.tflite_model, - options.input_layer, options.input_layer_type, options.input_layer_shape, - options.output_layer); + if (!GenerateTestSpecFromTensorflowModel( + tflite_stream, options.tensorflow_model, options.tflite_model, + options.input_layer, options.input_layer_type, + options.input_layer_shape, options.output_layer)) + return false; TfLiteDriver tflite_driver(/*use_nnapi=*/true); tflite_driver.LoadModel(options.tflite_model); - std::cout << tflite_stream.str(); return tflite::testing::ParseAndRunTests(&tflite_stream, &tflite_driver); } } // namespace testing diff --git a/tensorflow/contrib/lite/testing/tflite_driver.cc b/tensorflow/contrib/lite/testing/tflite_driver.cc index c399f4f2b78d7420ac6ea7098ed44b2122216279..58fe5bd6e40b3d5979d64fae659eb39bfe87c265 100644 --- a/tensorflow/contrib/lite/testing/tflite_driver.cc +++ b/tensorflow/contrib/lite/testing/tflite_driver.cc @@ -42,6 +42,10 @@ template <> uint8_t Value(const TfLitePtrUnion& data, int index) { return data.uint8[index]; } +template <> +bool Value(const TfLitePtrUnion& data, int index) { + return data.b[index]; +} template void SetTensorData(const std::vector& values, TfLitePtrUnion* data) { @@ -79,6 +83,8 @@ class TfLiteDriver::Expectation { return TypedCheck(verbose, tensor); case kTfLiteUInt8: return TypedCheck(verbose, tensor); + case kTfLiteBool: + return TypedCheck(verbose, tensor); default: fprintf(stderr, "Unsupported type %d in Check\n", tensor.type); return false; @@ -143,7 +149,6 @@ void TfLiteDriver::AllocateTensors() { void TfLiteDriver::LoadModel(const string& bin_file_path) { if (!IsValid()) return; - std::cout << std::endl << "Loading model: " << bin_file_path << std::endl; model_ = FlatBufferModel::BuildFromFile(GetFullPath(bin_file_path).c_str()); if (!model_) { @@ -204,6 +209,12 @@ void TfLiteDriver::SetInput(int id, const string& csv_values) { SetTensorData(values, &tensor->data); break; } + case kTfLiteBool: { + const auto& values = testing::Split(csv_values, ","); + if (!CheckSizes(tensor->bytes, values.size())) return; + SetTensorData(values, &tensor->data); + break; + } default: fprintf(stderr, "Unsupported type %d in SetInput\n", tensor->type); Invalidate("Unsupported tensor data type"); @@ -232,6 +243,9 @@ void TfLiteDriver::SetExpectation(int id, const string& csv_values) { case kTfLiteUInt8: expected_output_[id]->SetData(csv_values); break; + case kTfLiteBool: + expected_output_[id]->SetData(csv_values); + break; default: fprintf(stderr, "Unsupported type %d in SetExpectation\n", tensor->type); Invalidate("Unsupported tensor data type"); diff --git a/tensorflow/contrib/lite/toco/BUILD b/tensorflow/contrib/lite/toco/BUILD index d552de313c815828a9a6f4867fe2e20643fc1bd7..3f73ef620e121bf3568db55e8a9fbe799900bc90 100644 --- a/tensorflow/contrib/lite/toco/BUILD +++ b/tensorflow/contrib/lite/toco/BUILD @@ -219,6 +219,8 @@ cc_library( "graph_transformations/drop_fake_quant.cc", "graph_transformations/drop_im2col_arrays.cc", "graph_transformations/ensure_bias_vectors.cc", + "graph_transformations/ensure_uint8_weights_safe_for_fast_int8_kernels.cc", + "graph_transformations/experimental_shuffle_fc_weights.cc", "graph_transformations/fuse_activation_functions.cc", "graph_transformations/fuse_binary_into_following_affine.cc", "graph_transformations/fuse_binary_into_preceding_affine.cc", @@ -234,9 +236,14 @@ cc_library( "graph_transformations/identify_relu1.cc", "graph_transformations/lstm_utils.cc", "graph_transformations/make_initial_dequantize_operator.cc", + "graph_transformations/merge_reshape_into_preceding_transpose.cc", "graph_transformations/propagate_activation_function_into_constants.cc", "graph_transformations/propagate_array_data_types.cc", + "graph_transformations/propagate_default_min_max.cc", + "graph_transformations/propagate_fake_quant_num_bits.cc", "graph_transformations/propagate_fixed_sizes.cc", + "graph_transformations/quantization_util.cc", + "graph_transformations/quantization_util.h", "graph_transformations/quantize.cc", "graph_transformations/read_fake_quant_min_max.cc", "graph_transformations/remove_final_dequantize_op.cc", @@ -245,13 +252,16 @@ cc_library( "graph_transformations/remove_trivial_binary.cc", "graph_transformations/remove_trivial_concatenation.cc", "graph_transformations/remove_trivial_concatenation_input.cc", + "graph_transformations/remove_trivial_fake_quant.cc", "graph_transformations/remove_trivial_passthrough.cc", "graph_transformations/remove_trivial_passthrough.h", "graph_transformations/remove_trivial_quantized_activation_func.cc", + "graph_transformations/remove_trivial_quantized_min_max.cc", "graph_transformations/remove_trivial_reshape.cc", "graph_transformations/remove_trivial_slice.cc", "graph_transformations/remove_unused_op.cc", - "graph_transformations/reorder_activation_functions.cc", + "graph_transformations/reorder_elementwise_unary.cc", + "graph_transformations/reorder_reshape_transpose.cc", "graph_transformations/resolve_batch_normalization.cc", "graph_transformations/resolve_batch_to_space_nd_attributes.cc", "graph_transformations/resolve_constant_binary.cc", @@ -259,7 +269,9 @@ cc_library( "graph_transformations/resolve_constant_fake_quant.cc", "graph_transformations/resolve_constant_fill.cc", "graph_transformations/resolve_constant_gather.cc", + "graph_transformations/resolve_constant_random_uniform.cc", "graph_transformations/resolve_constant_range.cc", + "graph_transformations/resolve_constant_reshape.cc", "graph_transformations/resolve_constant_shape_or_rank.cc", "graph_transformations/resolve_constant_stack.cc", "graph_transformations/resolve_constant_strided_slice.cc", @@ -295,7 +307,7 @@ cc_library( ":runtime", ":toco_port", ":tooling_util", - ":types_proto_cc", + "//tensorflow/contrib/lite/kernels/internal:quantization_util", "//tensorflow/core:lib", "@com_google_absl//absl/memory", "@com_google_absl//absl/strings", @@ -370,7 +382,6 @@ cc_library( ":toco_graphviz_dump_options", ":toco_port", ":types_proto_cc", - "//tensorflow/contrib/lite/kernels/internal:quantization_util", "//tensorflow/core:lib", "@com_google_absl//absl/strings", "@protobuf_archive//:protobuf_headers", diff --git a/tensorflow/contrib/lite/toco/allocate_transient_arrays.cc b/tensorflow/contrib/lite/toco/allocate_transient_arrays.cc index 621fbcb98db049f819ebbbda8816ad4e30538530..1f3ea2e1c71e7de7e9ede2224796b489d7518d18 100644 --- a/tensorflow/contrib/lite/toco/allocate_transient_arrays.cc +++ b/tensorflow/contrib/lite/toco/allocate_transient_arrays.cc @@ -200,6 +200,12 @@ void DeallocateTransientArray(const Model& model, const string& array_name, allocator->Deallocate(*array->alloc); } +void PushBackIfNotFound(const string& s, std::vector* v) { + if (std::find(v->begin(), v->end(), s) == v->end()) { + v->push_back(s); + } +} + } // namespace void AllocateTransientArrays(Model* model, @@ -251,18 +257,12 @@ void AllocateTransientArrays(Model* model, std::vector arrays_to_allocate; for (const auto& input : op->inputs) { if (StartsAt(array_lifespans[input], op_index)) { - if (std::find(arrays_to_allocate.begin(), arrays_to_allocate.end(), - input) == arrays_to_allocate.end()) { - arrays_to_allocate.push_back(input); - } + PushBackIfNotFound(input, &arrays_to_allocate); } } for (const auto& output : op->outputs) { if (StartsAt(array_lifespans[output], op_index)) { - if (std::find(arrays_to_allocate.begin(), arrays_to_allocate.end(), - output) == arrays_to_allocate.end()) { - arrays_to_allocate.push_back(output); - } + PushBackIfNotFound(output, &arrays_to_allocate); } } for (const string& array : arrays_to_allocate) { @@ -274,18 +274,12 @@ void AllocateTransientArrays(Model* model, std::vector arrays_to_deallocate; for (const auto& input : op->inputs) { if (EndsAt(array_lifespans[input], op_index)) { - if (std::find(arrays_to_deallocate.begin(), arrays_to_deallocate.end(), - input) == arrays_to_deallocate.end()) { - arrays_to_deallocate.push_back(input); - } + PushBackIfNotFound(input, &arrays_to_deallocate); } } for (const auto& output : op->outputs) { if (EndsAt(array_lifespans[output], op_index)) { - if (std::find(arrays_to_deallocate.begin(), arrays_to_deallocate.end(), - output) == arrays_to_deallocate.end()) { - arrays_to_deallocate.push_back(output); - } + PushBackIfNotFound(output, &arrays_to_deallocate); } } for (const string& array : arrays_to_deallocate) { @@ -310,17 +304,21 @@ void AllocateTransientArrays(Model* model, // for each operator, compute the sum of the sizes of the array that must // be live during the execution of this operator, plus the size of // persistent arrays that must be live at all times. - std::size_t size = persistent_alloc_size; + std::vector non_persistent_edges; for (const auto& input : op->inputs) { if (!array_lifespans[input].persistent) { - size += TransientArraySize(*model, input, transient_data_alignment); + PushBackIfNotFound(input, &non_persistent_edges); } } for (const auto& output : op->outputs) { if (!array_lifespans[output].persistent) { - size += TransientArraySize(*model, output, transient_data_alignment); + PushBackIfNotFound(output, &non_persistent_edges); } } + std::size_t size = persistent_alloc_size; + for (const string& edge : non_persistent_edges) { + size += TransientArraySize(*model, edge, transient_data_alignment); + } // The optimal total size is the maximum of all operator-specific sizes. optimal_transient_alloc_size = std::max(optimal_transient_alloc_size, size); } diff --git a/tensorflow/contrib/lite/toco/args.h b/tensorflow/contrib/lite/toco/args.h index 52c789293cef51ac63e49244670b67188b39ca5a..fe30b88344c5340dc8647cd89e244987c86e47fe 100644 --- a/tensorflow/contrib/lite/toco/args.h +++ b/tensorflow/contrib/lite/toco/args.h @@ -202,6 +202,7 @@ struct ParsedModelFlags { Arg input_shape; Arg rnn_states; Arg model_checks; + Arg change_concat_input_ranges = Arg(true); // Debugging output options. // TODO(benoitjacob): these shouldn't be ModelFlags. Arg graphviz_first_array; @@ -211,6 +212,7 @@ struct ParsedModelFlags { Arg allow_nonexistent_arrays = Arg(false); Arg allow_nonascii_arrays = Arg(false); Arg arrays_extra_info_file; + Arg model_flags_file; }; // Flags that describe the operation you would like to do (what conversion @@ -225,6 +227,8 @@ struct ParsedTocoFlags { // TODO(aselle): command_line_flags doesn't support doubles Arg default_ranges_min = Arg(0.); Arg default_ranges_max = Arg(0.); + Arg default_int16_ranges_min = Arg(0.); + Arg default_int16_ranges_max = Arg(0.); Arg inference_type; Arg inference_input_type; Arg drop_fake_quant = Arg(false); @@ -235,6 +239,8 @@ struct ParsedTocoFlags { Arg input_types; Arg debug_disable_recurrent_cell_fusion = Arg(false); Arg drop_control_dependency = Arg(false); + Arg propagate_fake_quant_num_bits = Arg(false); + Arg allow_nudging_weights_to_use_fast_gemm_kernel = Arg(false); }; } // namespace toco diff --git a/tensorflow/contrib/lite/toco/dump_graphviz.cc b/tensorflow/contrib/lite/toco/dump_graphviz.cc index c8352741b44cd627ff9edb9c4677b994c4cb9a09..c289ddcd929c74e9a5b0eaf2e0c8fc1a6a01d320 100644 --- a/tensorflow/contrib/lite/toco/dump_graphviz.cc +++ b/tensorflow/contrib/lite/toco/dump_graphviz.cc @@ -95,10 +95,8 @@ Color GetColorForArray(const Model& model, const string& array_name) { array_name == dump_options.graphviz_last_array) { return Color(0x9E, 0x9E, 0x9E); } - for (const string& output_array : model.flags.output_arrays()) { - if (array_name == output_array) { - return Color(0x9E, 0x9E, 0x9E); - } + if (IsOutputArray(model, array_name)) { + return Color(0x9E, 0x9E, 0x9E); } // Remaining arrays are intermediate activation arrays. // Lighter tone of the same grey as for input/output arrays: @@ -119,6 +117,12 @@ void AppendArrayVal(string* string, Array const& array, int index) { return; } AppendF(string, "%d", data[index]); + } else if (array.buffer->type == ArrayDataType::kInt16) { + const auto& data = array.GetBuffer().data; + if (index >= data.size()) { + return; + } + AppendF(string, "%d", data[index]); } else if (array.buffer->type == ArrayDataType::kInt32) { const auto& data = array.GetBuffer().data; if (index >= data.size()) { diff --git a/tensorflow/contrib/lite/toco/export_tensorflow.cc b/tensorflow/contrib/lite/toco/export_tensorflow.cc index 22a23357b36c16ea937e726f1e49aa95d7f964e3..99ccfaea648077b7b72af30b32dd53b42b85d3a2 100644 --- a/tensorflow/contrib/lite/toco/export_tensorflow.cc +++ b/tensorflow/contrib/lite/toco/export_tensorflow.cc @@ -37,6 +37,7 @@ limitations under the License. using tensorflow::DT_BOOL; using tensorflow::DT_FLOAT; +using tensorflow::DT_INT16; using tensorflow::DT_INT32; using tensorflow::DT_INT64; using tensorflow::DT_UINT8; @@ -357,6 +358,14 @@ void ConvertConvOperator(const Model& model, const ConvOperator& src_op, strides.mutable_list()->add_i(src_op.stride_height); strides.mutable_list()->add_i(src_op.stride_width); strides.mutable_list()->add_i(1); + if ((src_op.dilation_width_factor != 1) || + (src_op.dilation_height_factor != 1)) { + auto& dilations = (*conv2d_op->mutable_attr())["dilations"]; + dilations.mutable_list()->add_i(1); + dilations.mutable_list()->add_i(src_op.dilation_height_factor); + dilations.mutable_list()->add_i(src_op.dilation_width_factor); + dilations.mutable_list()->add_i(1); + } string padding; if (src_op.padding.type == PaddingType::kSame) { padding = "SAME"; @@ -391,84 +400,6 @@ void ConvertConvOperator(const Model& model, const ConvOperator& src_op, } } -void ConvertDilatedConvOperator(const Model& model, const ConvOperator& src_op, - GraphDef* tensorflow_graph) { - CHECK((src_op.dilation_width_factor > 1) || - (src_op.dilation_height_factor > 1)) - << "Conv operator must have height or width dilation factor > 1. " - "Otherwise, use regular conv op."; - CHECK_EQ(src_op.stride_width, 1) - << "Dilated AND strided convolution is unsupported"; - CHECK_EQ(src_op.stride_height, 1) - << "Dilated AND strided convolution is unsupported"; - - // Emulate dilated convolution with a chain of SpaceToBatchND -> Conv -> - // BatchToSpaceND ops. - - // Compute padding - const auto& input_array = model.GetArray(src_op.inputs[0]); - const auto& input_shape = input_array.shape(); - CHECK_EQ(input_shape.dimensions_count(), 4); - int height_mod_dilation = input_shape.dims(1) % src_op.dilation_height_factor; - int pad_height; - if (height_mod_dilation) { - pad_height = src_op.dilation_height_factor - height_mod_dilation; - } else { - pad_height = 0; - } - int pad_width; - int width_mod_dilation = input_shape.dims(2) % src_op.dilation_width_factor; - if (width_mod_dilation) { - pad_width = src_op.dilation_width_factor - width_mod_dilation; - } else { - pad_width = 0; - } - - // SpaceToBatchND op "collapses" the spatially separated elements together - string stb_output = src_op.outputs[0] + "/dilated_conv_SpaceToBatch"; - auto* stb_op = tensorflow_graph->add_node(); - stb_op->set_op("SpaceToBatchND"); - stb_op->set_name(stb_output); - *stb_op->add_input() = src_op.inputs[0]; - (*stb_op->mutable_attr())["T"].set_type(DT_FLOAT); - string block_shape = src_op.outputs[0] + "/dilated_conv_block_shape"; - CreateIntTensorConst( - block_shape, - {src_op.dilation_height_factor, src_op.dilation_width_factor}, {2}, - tensorflow_graph); - *stb_op->add_input() = block_shape; - (*stb_op->mutable_attr())["Tblock_shape"].set_type(DT_INT32); - string stb_paddings = src_op.outputs[0] + "/dilated_conv_paddings"; - CreateIntTensorConst(stb_paddings, {0, pad_height, pad_width, 0}, {2, 2}, - tensorflow_graph); - *stb_op->add_input() = stb_paddings; - (*stb_op->mutable_attr())["Tpaddings"].set_type(DT_INT32); - - // Perform a regular conv on the "collapsed" elements - ConvOperator conv_op; - string conv_output = src_op.outputs[0] + "/dilated_conv_Conv2D"; - conv_op.inputs = src_op.inputs; - conv_op.inputs[0] = stb_output; - conv_op.outputs = {conv_output}; - conv_op.padding.type = src_op.padding.type; - conv_op.stride_width = src_op.stride_width; - conv_op.stride_height = src_op.stride_height; - conv_op.dilation_width_factor = 1; - conv_op.dilation_height_factor = 1; - ConvertConvOperator(model, conv_op, tensorflow_graph); - - // BatchToSpaceND op restores elements to their original layout - auto* bts_op = tensorflow_graph->add_node(); - bts_op->set_op("BatchToSpaceND"); - bts_op->set_name(src_op.outputs[0]); - *bts_op->add_input() = conv_output; - (*bts_op->mutable_attr())["T"].set_type(DT_FLOAT); - *bts_op->add_input() = block_shape; - (*bts_op->mutable_attr())["Tblock_shape"].set_type(DT_INT32); - *bts_op->add_input() = stb_paddings; - (*bts_op->mutable_attr())["Tcrops"].set_type(DT_INT32); -} - void ConvertDepthwiseConvOperator(const Model& model, const DepthwiseConvOperator& src_op, GraphDef* tensorflow_graph) { @@ -773,6 +704,15 @@ void ConvertRelu6Operator(const Relu6Operator& src_op, (*relu_op->mutable_attr())["T"].set_type(DT_FLOAT); } +void ConvertLogOperator(const LogOperator& src_op, GraphDef* tensorflow_graph) { + auto* op = tensorflow_graph->add_node(); + op->set_op("Log"); + op->set_name(src_op.outputs[0]); + CHECK_EQ(src_op.inputs.size(), 1); + *op->add_input() = src_op.inputs[0]; + (*op->mutable_attr())["T"].set_type(DT_FLOAT); +} + void ConvertLogisticOperator(const LogisticOperator& src_op, GraphDef* tensorflow_graph) { auto* relu_op = tensorflow_graph->add_node(); @@ -943,6 +883,9 @@ void ConvertFakeQuantOperator(const FakeQuantOperator& src_op, CHECK(src_op.minmax); (*fakequant_op->mutable_attr())["min"].set_f(src_op.minmax->min); (*fakequant_op->mutable_attr())["max"].set_f(src_op.minmax->max); + if (src_op.num_bits) { + (*fakequant_op->mutable_attr())["num_bits"].set_i(src_op.num_bits); + } } void ConvertMaxPoolOperator(const MaxPoolOperator& src_op, @@ -1711,6 +1654,23 @@ void ConvertTopKV2Operator(const Model& model, const TopKV2Operator& src_op, (*topk_op->mutable_attr())["sorted"].set_b(true); } +void ConvertRandomUniformOperator(const Model& model, + const RandomUniformOperator& src_op, + GraphDef* tensorflow_graph) { + CHECK(tensorflow_graph != nullptr); + auto* new_op = tensorflow_graph->add_node(); + new_op->set_op("RandomUniform"); + CHECK_EQ(src_op.inputs.size(), 1); + new_op->set_name(src_op.outputs[0]); + *new_op->add_input() = src_op.inputs[0]; + const auto shape_type = GetTensorFlowDataType(model, src_op.inputs[0]); + (*new_op->mutable_attr())["T"].set_type(shape_type); + (*new_op->mutable_attr())["dtype"].set_type( + GetTensorFlowDataType(src_op.dtype)); + (*new_op->mutable_attr())["seed"].set_i(src_op.seed); + (*new_op->mutable_attr())["seed2"].set_i(src_op.seed2); +} + void ConvertOperator(const Model& model, const Operator& src_op, GraphDef* tensorflow_graph) { if (src_op.fused_activation_function != FusedActivationFunctionType::kNone) { @@ -1719,13 +1679,8 @@ void ConvertOperator(const Model& model, const Operator& src_op, } if (src_op.type == OperatorType::kConv) { - const ConvOperator& conv_op = static_cast(src_op); - if ((conv_op.dilation_width_factor != 1) || - (conv_op.dilation_height_factor != 1)) { - return ConvertDilatedConvOperator(model, conv_op, tensorflow_graph); - } else { - ConvertConvOperator(model, conv_op, tensorflow_graph); - } + ConvertConvOperator(model, static_cast(src_op), + tensorflow_graph); } else if (src_op.type == OperatorType::kDepthwiseConv) { ConvertDepthwiseConvOperator( model, static_cast(src_op), @@ -1760,6 +1715,9 @@ void ConvertOperator(const Model& model, const Operator& src_op, } else if (src_op.type == OperatorType::kRelu6) { ConvertRelu6Operator(static_cast(src_op), tensorflow_graph); + } else if (src_op.type == OperatorType::kLog) { + ConvertLogOperator(static_cast(src_op), + tensorflow_graph); } else if (src_op.type == OperatorType::kLogistic) { ConvertLogisticOperator(static_cast(src_op), tensorflow_graph); @@ -1897,6 +1855,10 @@ void ConvertOperator(const Model& model, const Operator& src_op, ConvertTransposeConvOperator( model, static_cast(src_op), tensorflow_graph); + } else if (src_op.type == OperatorType::kRandomUniform) { + ConvertRandomUniformOperator( + model, static_cast(src_op), + tensorflow_graph); } else { LOG(FATAL) << "Unhandled operator type " << OperatorTypeName(src_op.type); } @@ -1922,6 +1884,9 @@ void AddPlaceholder(const string& name, ArrayDataType type, case ArrayDataType::kInt64: (*placeholder->mutable_attr())["dtype"].set_type(DT_INT64); break; + case ArrayDataType::kInt16: + (*placeholder->mutable_attr())["dtype"].set_type(DT_INT16); + break; default: LOG(FATAL) << "Unexpected data type in array \"" << name << "\""; } diff --git a/tensorflow/contrib/lite/toco/graph_transformations/convert_pure_conv_to_depthwise.cc b/tensorflow/contrib/lite/toco/graph_transformations/convert_pure_conv_to_depthwise.cc index d38db85280d7bd935a47cda70227d383a513fbac..0fffab574ddd8ad75ec07ae4442f363a36ed289e 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/convert_pure_conv_to_depthwise.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/convert_pure_conv_to_depthwise.cc @@ -33,6 +33,11 @@ bool ConvertPureConvToDepthwise::Run(Model* model, std::size_t op_index) { if (conv_op->stride_width != conv_op->stride_height) { return false; } + if ((conv_op->dilation_width_factor != 1) || + (conv_op->dilation_height_factor != 1)) { + // Depthwise conv does not support dilation + return false; + } auto& weights_array = model->GetArray(conv_op->inputs[1]); if (!weights_array.buffer) { // Yield until the weights are resolved as a constant array. diff --git a/tensorflow/contrib/lite/toco/graph_transformations/ensure_bias_vectors.cc b/tensorflow/contrib/lite/toco/graph_transformations/ensure_bias_vectors.cc index badefeca883b1e1d67f7de5276389c5e6e7f7cd3..708ecf6e0a96811ab274fbb25f748f562cd3afad 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/ensure_bias_vectors.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/ensure_bias_vectors.cc @@ -47,7 +47,7 @@ bool EnsureBiasVectors::Run(Model* model, std::size_t op_index) { op->type == OperatorType::kDepthwiseConv || op->type == OperatorType::kFullyConnected) { if (ProcessLinearOperator(model, op)) { - AddMessageF("Added bias vector to %s", LogName(*op)); + AddMessageF("Added bias vector to %s as %s", LogName(*op), op->inputs[2]); return true; } } diff --git a/tensorflow/contrib/lite/toco/graph_transformations/ensure_uint8_weights_safe_for_fast_int8_kernels.cc b/tensorflow/contrib/lite/toco/graph_transformations/ensure_uint8_weights_safe_for_fast_int8_kernels.cc new file mode 100644 index 0000000000000000000000000000000000000000..394fa349e2663e2806344f27a96a5132a2d4a810 --- /dev/null +++ b/tensorflow/contrib/lite/toco/graph_transformations/ensure_uint8_weights_safe_for_fast_int8_kernels.cc @@ -0,0 +1,209 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" +#include "tensorflow/core/platform/logging.h" + +namespace toco { + +// === Summary === +// +// TLDR: Some of our 8-bit arithmetic operations require uint8 weight values +// to avoid the value 0, thus ranging only in [1, 255]. This enables faster +// runtime arithmetic kernels on ARM NEON. This is not relevant on most +// other hardware architectures, and will cease to be relevant on ARM NEON +// in the future. These topics are elaborated below ("Context"). +// +// Having just one isolated uint8 value equal to 0 is fine. The bad case is when +// two uint8 values are both zero and are less than 16 bytes apart. +// +// By default, toco generates a fatal error when that happens. The user may opt +// in to more lax behavior by passing +// --allow_nudging_weights_to_use_fast_gemm_kernel. +// This causes toco to nudge such bad 0 values into the value 1, thus avoiding +// the problem in exchange for compromising on accuracy. +// +// The present graph transformation implements both the default fatal-erroring +// behavior, and, when allow_nudging_weights is set, also the lax nudging +// behavior. +// +// +// === Context === +// +// Since March 2017, we have been using a trick to perform faster +// 8bit matrix multiplications, to our knowledge first implemented in gemmlowp +// here: +// https://github.com/google/gemmlowp/commit/25b2989415b99e797e1ab977837111b2e231f81f +// +// This trick is explained in Appendix B of our paper, +// https://arxiv.org/abs/1712.05877 +// +// Here is the relevant paragraph: +// +// For efficient NEON implementation of the matrix multiplication’s +// core accumulation, we use the following trick. +// In the multiply-add operation in (10), we first change the +// operands’ type from uint8 to int8 (which can be done by +// subtracting 128 from the quantized values and zero-points). +// Thus the core multiply-add becomes +// +// int32 += int8 * int8. (B.1) +// +// As mentioned in section 3, with a minor tweak of the quantized +// training process, we can ensure that the weights, once +// quantized as int8 values, never take the value −128. Hence, +// the product in (B.1) is never −128 ∗ −128, and is therefore +// always less than 2^14 in absolute value. Hence, (B.1) +// can accumulate two products on a local int16 accumulator +// before that needs to be accumulated into the true int32 accumulator. +// This allows the use of an 8-way SIMD multiplication +// (SMULL on int8 operands), followed by an 8-way +// SIMD multiply-add (SMLAL on int8 operands), followed +// by a pairwise-add-and-accumulate into the int32 accumulators +// (SADALP). +// +// As that paragraph notes, quantized training should be suitably modified to +// ensure that quantized uint8 weights value only range in [1, 255]. So the +// problem that we are dealing with is only about the existing 8-bit quantized +// models that haven't been trained specifically to get 8-bit weights only in +// [1, 255]. +// +// This spreadsheet shows the speed benefit of this trick across many existing +// ARM-architecture CPUs: +// +// https://docs.google.com/spreadsheets/d/1-0LjdMvW0XtH1bYknC0bQINoFaxjTuL9eplZZcitykI/edit?usp=sharing +// +// Compare Row 18 (fast int8 trick) to Row 20 (regular uint8 kernel). +// +// The introduction of the 'dotprod' extension to ARM NEON, specifically the +// SDOT instruction, renders this eventually moot. See the experimental +// kernels contributed by ARM here, +// +// https://github.com/google/gemmlowp/pull/116 +// +// However, as of April 2018, there don't seem to be any commercially available +// CPU supporting these instructions (yet); we are waiting for +// Cortex-A{75,55}-r1 to become available; the "-r1" is key here. Even if such +// CPUs become available soon, it will presumably take years for them to +// overtake the large volume of existing CPUs not supporting these new +// instructions, especially in current and future low-end devices. All in all, +// we can foresee these 'fast int8 kernels' to remain important to have into +// the 2020s. +// +bool EnsureUint8WeightsSafeForFastInt8Kernels::Run(Model* model, + std::size_t op_index) { + const auto& op = *model->operators[op_index]; + int weights_index = 0; + switch (op.type) { + case OperatorType::kConv: + weights_index = 1; + break; + case OperatorType::kLstmCell: + weights_index = 2; + break; + case OperatorType::kFullyConnected: { + weights_index = 1; + const auto& fc_op = static_cast(op); + CHECK(!fc_op.experimental_shuffled_weights) + << "This graph transformation expects to run before FC weights get " + "shuffled."; + break; + } + default: + // Other operator types are unaffected by this graph transformation, + // because their runtime implementations don't use the fast int8 trick. + // In particular that's the case of DepthwiseConv at the moment. + // We have to update this logic when that changes, e.g. if in the future + // some DepthwiseConv kernel wants to use the trick. + // + // The reason why that's not so likely, hence why it's fairly safe to + // stay conservative in the list of operators that we handle here, is that + // the fast int8 kernel trick is only applicable to ops that either are + // implemented as a GEMM, or use symmetric ranges for both weights and + // activations. The reason why GEMM is special (can use the trick even + // without symmetric ranges) is that it is so arithmetic-intense that + // it can use techniques reducing its implementation to the symmetric + // ranges case, with limited relative overhead (O(N^2) overhead vs + // O(N^3) GEMM cost). See https://arxiv.org/pdf/1712.05877, section + // 2.3 Efficient handling of zero-points. + // + // That's why at the moment we only handle operators that use a GEMM + // (Conv, fully-connected --- note that LSTM merely wraps a + // fully-connected operator). + return false; + } + + const string& name = op.inputs[weights_index]; + auto& array = model->GetArray(name); + if (!array.buffer) { + return false; + } + if (array.data_type != ArrayDataType::kUint8) { + return false; + } + auto& buffer_data = array.GetMutableBuffer().data; + + int count_bad = 0; + int index_of_previous_bad_value = 0; + bool changed = false; + + for (int i = 0; i < buffer_data.size(); i++) { + if (buffer_data[i] == 0) { + count_bad++; + if (count_bad > 1) { + const int distance = i - index_of_previous_bad_value; + // Semi-arbitrary threshold. The idea is that trouble only occurs + // when two bad values are very close to each other so that they + // are jointly used within registers inside some GEMM kernel. + // The details of that depend on the kernel. Our current fast ARM64 + // kernel, for instance, only has an issue when the distance between + // consecutive bad values is exactly 8. We do not want to track such + // kernel details too closely here, so we pick a threshold that's + // a bit larger than that, to give us room to change kernels in the + // future without worrying. + static constexpr int kMinDistanceBetweenBadValues = 16; + if (distance < kMinDistanceBetweenBadValues) { + if (allow_nudging_weights()) { + buffer_data[i] = 1; + changed = true; + continue; + } + LOG(FATAL) << "Bad value for " << name << " at index " << i + << ", previous bad value at index " + << index_of_previous_bad_value << ", distance=" << distance + << ", kMinDistanceBetweenBadValues=" + << kMinDistanceBetweenBadValues << ". Consider passing " + << "--allow_nudging_weights_to_use_fast_gemm_kernel " + << "if you don't care about accuracy."; + } + } + index_of_previous_bad_value = i; + } + } + + if (changed) { + AddMessageF("Tweaked weights values for %s", LogName(op)); + } + + return changed; +} + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/experimental_shuffle_fc_weights.cc b/tensorflow/contrib/lite/toco/graph_transformations/experimental_shuffle_fc_weights.cc new file mode 100644 index 0000000000000000000000000000000000000000..f098981a5cf4b91df4c7798bd3db8563705a3bd0 --- /dev/null +++ b/tensorflow/contrib/lite/toco/graph_transformations/experimental_shuffle_fc_weights.cc @@ -0,0 +1,135 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" +#include "tensorflow/core/platform/logging.h" + +namespace toco { + +bool ExperimentalShuffleFCWeights::Run(Model* model, std::size_t op_index) { + Operator* op = model->operators[op_index].get(); + if (op->type != OperatorType::kFullyConnected) { + return false; + } + FullyConnectedOperator* fc_op = static_cast(op); + // Exit if this FC op already has shuffled weights + if (fc_op->experimental_shuffled_weights) { + return false; + } + const Array& input_array = model->GetArray(fc_op->inputs[0]); + const string& weights_name = fc_op->inputs[1]; + Array& weights_array = model->GetArray(weights_name); + const Array& output_array = model->GetArray(fc_op->outputs[0]); + // Exit if this FC op isn't quantized with uint8 inputs and int16 outputs, + // the only case where we are currently interested in providing a fast path + // with shuffled weights. + if (input_array.data_type != ArrayDataType::kUint8 || + weights_array.data_type != ArrayDataType::kUint8 || + output_array.data_type != ArrayDataType::kInt16 || + !input_array.quantization_params || !weights_array.quantization_params || + !output_array.quantization_params) { + return false; + } + // Exit if the shapes aren't known + if (!input_array.has_shape() || !weights_array.has_shape()) { + return false; + } + // Exit if, based on the known shapes, this FC op is not a GEMV. + // The shuffling of FC weights is only useful to enable fast GEMV paths. + const Shape& input_shape = input_array.shape(); + for (int i = 0; i < input_shape.dimensions_count() - 1; i++) { + if (input_shape.dims(i) != 1) { + // The input activations, shaped as a matrix, have multiple columns. + // This FC op isn't a matrix*vector multiplication. + AddMessageF( + "Not applying experimental shuffling to the weights of %s because " + "it's not a matrix*vector product", + LogName(*op)); + return false; + } + } + // Exit if the weights shape isn't an integral multiple of the shuffled + // block shape, 4x16. We don't want to have to write code dealing with + // odd sizes, that would go un-exercised at the moment as the models + // for which we need this shuffling have shapes that are multiples of that + // 4x16 block size. In fact, much of the rationale for this shuffling is + // to avoid cache aliasin issue with large power-of-two depths, with our + // models motivating this shuffling having FC weights shapes like + // 4096x2048. Thus, if some model doesn't get the shuffling because of that + // size requirement, that might be just fine --- that model might just not + // suffer from that cache aliasing issue that we have with large powers of + // two. + const Shape& weights_shape = weights_array.shape(); + if (weights_shape.dimensions_count() != 2) { + return false; + } + const int rows = weights_shape.dims(0); + const int cols = weights_shape.dims(1); + if (rows % 4 || cols % 16) { + AddMessageF( + "Not applying experimental shuffling to the weights of %s because its " + "shape isn't a multiple of the shuffling block shape, 4x16", + LogName(*op)); + return false; + } + // Exit if the weights aren't already a constant array. + if (!weights_array.buffer) { + return false; + } + // Exit if the weights are used by more than one op. + if (CountOpsWithInput(*model, weights_name) != 1) { + AddMessageF( + "Not applying experimental shuffling to the weights of %s because that " + "array is consumed by other operators", + LogName(*op)); + return false; + } + // Compute the shuffled weights + auto& weights_data = + weights_array.GetMutableBuffer().data; + CHECK_EQ(rows * cols, weights_data.size()); + std::vector shuffled_data(weights_data.size()); + uint8* shuffled_data_ptr = shuffled_data.data(); + for (int r = 0; r < rows; r += 4) { + for (int c = 0; c < cols; c += 16) { + for (int i = 0; i < 4; i++) { + const uint8* src_data_ptr = weights_data.data() + (r + i) * cols + c; + for (int j = 0; j < 16; j++) { + uint8 src_val = *src_data_ptr++; + // Flip the sign bit, so that the runtime will only need to + // reinterpret these uint8 values as int8, getting for free the + // subtraction of the zero_point value 128. + uint8 dst_val = src_val ^ 0x80; + *shuffled_data_ptr++ = dst_val; + } + } + } + } + CHECK_EQ(shuffled_data_ptr, shuffled_data.data() + rows * cols); + // Switch this FC op to using the shuffled weights. + weights_data = std::move(shuffled_data); + fc_op->experimental_shuffled_weights = true; + AddMessageF("Applied experimental shuffling to the weights of %s", + LogName(*op)); + return true; +} + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h b/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h index 640afc7c74d7284fb9e212ab23d74a8215314add..72ffd51db45d0808feab3d07436c13db2420a680 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h +++ b/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h @@ -128,12 +128,14 @@ DECLARE_GRAPH_TRANSFORMATION(IdentifyL2Pool) DECLARE_GRAPH_TRANSFORMATION(IdentifyLstmCell) DECLARE_GRAPH_TRANSFORMATION(SplitLstmCellInputs) DECLARE_GRAPH_TRANSFORMATION(MergeLstmCellInputs) +DECLARE_GRAPH_TRANSFORMATION(MergeReshapeIntoPrecedingTranspose) DECLARE_GRAPH_TRANSFORMATION(IdentifyRelu1) DECLARE_GRAPH_TRANSFORMATION(IdentifyPRelu) DECLARE_GRAPH_TRANSFORMATION(IdentifyDilatedConv) DECLARE_GRAPH_TRANSFORMATION(MakeInitialDequantizeOperator) DECLARE_GRAPH_TRANSFORMATION(PropagateActivationFunctionIntoConstants) DECLARE_GRAPH_TRANSFORMATION(PropagateArrayDataTypes) +DECLARE_GRAPH_TRANSFORMATION(PropagateFakeQuantNumBits); DECLARE_GRAPH_TRANSFORMATION(PropagateFixedSizes) DECLARE_GRAPH_TRANSFORMATION(HardcodeMinMax) DECLARE_GRAPH_TRANSFORMATION(Quantize) @@ -143,8 +145,10 @@ DECLARE_GRAPH_TRANSFORMATION(RemoveTensorFlowIdentity) DECLARE_GRAPH_TRANSFORMATION(RemoveTrivialBinaryOperator) DECLARE_GRAPH_TRANSFORMATION(RemoveTrivialConcatenation) DECLARE_GRAPH_TRANSFORMATION(RemoveTrivialConcatenationInput) +DECLARE_GRAPH_TRANSFORMATION(RemoveTrivialFakeQuant) DECLARE_GRAPH_TRANSFORMATION(RemoveTrivialSlice) DECLARE_GRAPH_TRANSFORMATION(RemoveTrivialQuantizedActivationFunc) +DECLARE_GRAPH_TRANSFORMATION(RemoveTrivialQuantizedMinMax) DECLARE_GRAPH_TRANSFORMATION(RemoveUnusedOp) DECLARE_GRAPH_TRANSFORMATION(ResolveBatchNormalization) DECLARE_GRAPH_TRANSFORMATION(ResolveConstantBinaryOperator) @@ -152,7 +156,8 @@ DECLARE_GRAPH_TRANSFORMATION(ResolveConstantUnaryOperator) DECLARE_GRAPH_TRANSFORMATION(CreateIm2colArrays) DECLARE_GRAPH_TRANSFORMATION(DropIm2colArrays) DECLARE_GRAPH_TRANSFORMATION(ReadFakeQuantMinMax) -DECLARE_GRAPH_TRANSFORMATION(ReorderActivationFunctions) +DECLARE_GRAPH_TRANSFORMATION(ReorderElementwiseUnary) +DECLARE_GRAPH_TRANSFORMATION(ReorderReshapeTranspose) DECLARE_GRAPH_TRANSFORMATION(ResolveReorderAxes) DECLARE_GRAPH_TRANSFORMATION(ResolveTensorFlowConcat) DECLARE_GRAPH_TRANSFORMATION(ResolveTensorFlowMatMul) @@ -160,8 +165,8 @@ DECLARE_GRAPH_TRANSFORMATION(ResolveTensorFlowMerge) DECLARE_GRAPH_TRANSFORMATION(ResolveSqueezeAttributes) DECLARE_GRAPH_TRANSFORMATION(ResolveTensorFlowSwitch) DECLARE_GRAPH_TRANSFORMATION(ResolveTensorFlowTile) -DECLARE_GRAPH_TRANSFORMATION(ResolveConstantFakeQuant) DECLARE_GRAPH_TRANSFORMATION(ResolveConstantConcatenation) +DECLARE_GRAPH_TRANSFORMATION(ResolveConstantReshape) DECLARE_GRAPH_TRANSFORMATION(ResolveConstantTranspose) DECLARE_GRAPH_TRANSFORMATION(DropFakeQuant) DECLARE_GRAPH_TRANSFORMATION(UnfuseActivationFunctions) @@ -173,6 +178,7 @@ DECLARE_GRAPH_TRANSFORMATION(ResolveStridedSliceAttributes) DECLARE_GRAPH_TRANSFORMATION(ResolveSliceAttributes) DECLARE_GRAPH_TRANSFORMATION(ResolveMeanAttributes) DECLARE_GRAPH_TRANSFORMATION(ResolveTransposeAttributes) +DECLARE_GRAPH_TRANSFORMATION(ResolveConstantRandomUniform) DECLARE_GRAPH_TRANSFORMATION(ResolveConstantRange) DECLARE_GRAPH_TRANSFORMATION(ResolveConstantShapeOrRank) DECLARE_GRAPH_TRANSFORMATION(ResolveConstantStack) @@ -182,6 +188,25 @@ DECLARE_GRAPH_TRANSFORMATION(ResolveConstantGather) DECLARE_GRAPH_TRANSFORMATION(ResolveMultiplyByZero) DECLARE_GRAPH_TRANSFORMATION(Dequantize) DECLARE_GRAPH_TRANSFORMATION(UnpartitionEmbeddingLookup) +DECLARE_GRAPH_TRANSFORMATION(ExperimentalShuffleFCWeights) + +class PropagateDefaultMinMax : public GraphTransformation { + public: + bool Run(Model* model, std::size_t op_index) override; + const char* Name() const override { return "PropagateDefaultMinMax"; } + + bool has_any_ranges_defined() const { return !type_ranges_.empty(); } + void DefineTypeRange(ArrayDataType data_type, double min, double max) { + MinMax minmax; + minmax.min = min; + minmax.max = max; + type_ranges_.emplace_back(data_type, minmax); + } + + private: + bool SetArrayMinMax(const string& array_name, Array* array); + std::vector> type_ranges_; +}; class ResolveReshapeAttributes : public GraphTransformation { public: @@ -204,6 +229,36 @@ class RemoveTrivialReshape : public GraphTransformation { bool treat_expand_dims_as_trivial_ = false; }; +class ResolveConstantFakeQuant : public GraphTransformation { + public: + bool Run(Model* model, std::size_t op_index) override; + const char* Name() const override { return "ResolveConstantFakeQuant"; } + + // True if the num_bits should adjust the final data type. + bool propagate_fake_quant_num_bits() const { + return propagate_fake_quant_num_bits_; + } + void set_propagate_fake_quant_num_bits(bool val) { + propagate_fake_quant_num_bits_ = val; + } + + private: + bool propagate_fake_quant_num_bits_ = false; +}; + +class EnsureUint8WeightsSafeForFastInt8Kernels : public GraphTransformation { + public: + bool Run(Model* model, std::size_t op_index) override; + const char* Name() const override { + return "EnsureUint8WeightsSafeForFastInt8Kernels"; + } + bool allow_nudging_weights() const { return allow_nudging_weights_; } + void set_allow_nudging_weights(bool val) { allow_nudging_weights_ = val; } + + private: + bool allow_nudging_weights_ = false; +}; + #undef DECLARE_GRAPH_TRANSFORMATION } // end namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/hardcode_min_max.cc b/tensorflow/contrib/lite/toco/graph_transformations/hardcode_min_max.cc index 7c97ef0d31993408bf0647d640ce91146bec2343..437e30a91803bfc847bf246875fa2924b7c0d3fe 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/hardcode_min_max.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/hardcode_min_max.cc @@ -95,30 +95,37 @@ bool HardcodeMinMaxForConcatenation(Model* model, Operator* op) { overall_minmax.min = overall_min; overall_minmax.max = overall_max; bool changed = false; - for (const auto& input : op->inputs) { - auto& array = model->GetArray(input); - if (!array.minmax) { - changed = true; - } else if (!(overall_minmax == array.GetMinMax())) { - changed = true; - LOG(WARNING) - << "Tweaking the MinMax of array " << input << ", which is " - << "an input to " << LogName(*op) << ", because we want all inputs " - << "and outputs of a Concatenation operator to have the same MinMax " - << "so that it can be implemented as a pure byte-copy, no " - "arithmetic."; + if (model->flags.change_concat_input_ranges()) { + for (const auto& input : op->inputs) { + auto& array = model->GetArray(input); + if (!array.minmax) { + changed = true; + } else if (!(overall_minmax == array.GetMinMax())) { + changed = true; + LOG(WARNING) + << "Tweaking the MinMax of array " << input << ", which is " + << "an input to " << LogName(*op) << ", because we want all inputs " + << "and outputs of a Concatenation operator to have the same " + << "MinMax so that it can be implemented as a pure byte-copy, no " + "arithmetic."; + } + array.GetOrCreateMinMax() = overall_minmax; } - array.GetOrCreateMinMax() = overall_minmax; } if (!output.minmax) { changed = true; } else if (!(overall_minmax == output.GetMinMax())) { - changed = true; - LOG(WARNING) - << "Tweaking the MinMax of the output array of " << LogName(*op) - << ", because we want all inputs " - << "and outputs of a Concatenation operator to have the same MinMax " - << "so that it can be implemented as a pure byte-copy, no arithmetic."; + if (model->flags.change_concat_input_ranges()) { + changed = true; + LOG(WARNING) + << "Tweaking the MinMax of the output array of " << LogName(*op) + << ", because we want all inputs " + << "and outputs of a Concatenation operator to have the same MinMax " + << "so that it can be implemented as a pure byte-copy, no " + << "arithmetic."; + } else { + return false; + } } output.GetOrCreateMinMax() = overall_minmax; @@ -223,8 +230,11 @@ bool PropagateMinMaxAmongArrays(Model* model, if (array.minmax) { CHECK(*array.minmax == *reference_minmax) << "Both the following arrays have minmax, and they disagree: " - << reference_array_name << " and " << array_name - << ". Expected that either only one of them would have minmax, or at " + << reference_array_name << " (" << reference_minmax->min << "," + << reference_minmax->max << ") and " << array_name << " (" + << array.minmax->min << "," << array.minmax->max + << "). Expected that either only one of them would have minmax, or " + "at " "least that they would agree."; } else { array.GetOrCreateMinMax() = *reference_minmax; diff --git a/tensorflow/contrib/lite/toco/graph_transformations/identify_lstm.cc b/tensorflow/contrib/lite/toco/graph_transformations/identify_lstm.cc index c363b93394f0af7bcfc37c1e8be5f98aca6667ae..e9842524c829b839b97b3453a36c41efe186efbb 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/identify_lstm.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/identify_lstm.cc @@ -306,6 +306,12 @@ bool IdentifyLstmCell::Run(Model* model, std::size_t op_index) { return false; } + if (static_cast(fully_connected) + ->experimental_shuffled_weights) { + // Not yet implemented: experimental shuffled weights in fused LSTM cell. + return false; + } + // Emplace a new LSTM cell operator auto* lstm_cell_op = new LstmCellOperator; lstm_cell_op->inputs.resize(LstmCellOperator::NUM_INPUTS); diff --git a/tensorflow/contrib/lite/toco/graph_transformations/make_initial_dequantize_operator.cc b/tensorflow/contrib/lite/toco/graph_transformations/make_initial_dequantize_operator.cc index 935da9f966ca63095faa17476be3a559d1a0193a..45d9f73a1e6416b8f3fe3936c740da637961b7fc 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/make_initial_dequantize_operator.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/make_initial_dequantize_operator.cc @@ -18,6 +18,7 @@ limitations under the License. #include #include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/graph_transformations/quantization_util.h" #include "tensorflow/contrib/lite/toco/model.h" #include "tensorflow/contrib/lite/toco/model_flags.pb.h" #include "tensorflow/contrib/lite/toco/tooling_util.h" @@ -78,15 +79,21 @@ bool AddDequantizeOperatorToInput(const string& input_name, const Operator* op, image_input_op->outputs = {dequantized_input_name}; model->operators.emplace(model->operators.begin(), image_input_op); - CHECK(input_array.final_data_type == ArrayDataType::kUint8); - input_array.data_type = ArrayDataType::kUint8; dequantized_input_array.data_type = ArrayDataType::kFloat; const auto& input_minmax = input_array.GetMinMax(); auto& dequantized_input_minmax = dequantized_input_array.GetOrCreateMinMax(); dequantized_input_minmax = input_minmax; auto& input_qparams = input_array.GetOrCreateQuantizationParams(); - GetQuantizationParamsFromMinMax(input_minmax, - &input_qparams); + input_array.data_type = input_array.final_data_type; + if (input_array.data_type == ArrayDataType::kUint8) { + GetQuantizationParamsFromMinMax(input_minmax, + &input_qparams); + } else if (input_array.data_type == ArrayDataType::kInt16) { + GetQuantizationParamsFromMinMax(input_minmax, + &input_qparams); + } else { + LOG(FATAL) << "unhandled data type"; + } transformation->AddMessageF( "Created %s" diff --git a/tensorflow/contrib/lite/toco/graph_transformations/merge_reshape_into_preceding_transpose.cc b/tensorflow/contrib/lite/toco/graph_transformations/merge_reshape_into_preceding_transpose.cc new file mode 100644 index 0000000000000000000000000000000000000000..5065004093434475172a39efdcfd26c10c49148b --- /dev/null +++ b/tensorflow/contrib/lite/toco/graph_transformations/merge_reshape_into_preceding_transpose.cc @@ -0,0 +1,190 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_passthrough.h" +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/runtime/types.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" +#include "tensorflow/core/platform/logging.h" + +namespace toco { + +namespace { + +bool OperatorReady(const Model& model, const Operator* op) { + if (!model.HasArray(op->inputs[0]) || !model.HasArray(op->inputs[1]) || + !model.HasArray(op->outputs[0])) { + // Arrays are missing. + return false; + } + + if (!model.GetArray(op->inputs[0]).has_shape() || + !model.GetArray(op->outputs[0]).has_shape()) { + // Input and output needs the shape. + return false; + } + + if (!model.GetArray(op->inputs[1]).buffer) { + // Buffer needs to be a constant. + return false; + } + + return true; +} + +// Returns whether the reshape could be a transpose. +std::vector ReshapeToTranspose(const Model& model, + const TensorFlowReshapeOperator* op) { + CHECK(!op->shape.empty()); + CHECK(model.HasArray(op->inputs[0])); + CHECK(model.HasArray(op->outputs[0])); + + const auto& input_array = model.GetArray(op->inputs[0]); + const auto& output_array = model.GetArray(op->outputs[0]); + + CHECK(input_array.has_shape()); + CHECK(output_array.has_shape()); + + std::vector in_shape = input_array.shape().dims(); + std::vector out_shape = output_array.shape().dims(); + + std::vector one_indices; + std::vector not_one_indices; + + // Separate into one indices and not one indices. + for (int i = 0; i < in_shape.size(); i++) { + if (in_shape[i] == 1) { + one_indices.push_back(i); + } else { + not_one_indices.push_back(i); + } + } + + // Reorder the vertices. + std::vector perm; + perm.reserve(in_shape.size()); + int one_index = 0; + int not_one_index = 0; + for (const auto val : out_shape) { + if (val == 1) { + perm.push_back(one_indices[one_index]); + one_index++; + } else { + perm.push_back(not_one_indices[not_one_index]); + not_one_index++; + } + } + + return perm; +} + +} // namespace + +// When a transpose is fed into a reshape, it is possible for the two operators +// to be merged if the reshape does not affect memory ordering and does not +// affects the number of dimensions. This only occurs when only unary dimensions +// are shifting position. +bool MergeReshapeIntoPrecedingTranspose::Run(Model* model, + std::size_t op_index) { + auto it = model->operators.begin() + op_index; + auto* reshape_op = ConvertOperator( + it->get(), OperatorType::kTensorFlowReshape); + + if (reshape_op == nullptr) { + return false; + } + + if (!OperatorReady(*model, reshape_op) || reshape_op->shape.empty()) { + return false; + } + + const string intermediate_name = reshape_op->inputs[0]; + const string output_name = reshape_op->outputs[0]; + + // Guarantee the input is only consume by the reshape. + if (CountOpsWithInput(*model, intermediate_name) != 1) { + return false; + } + + // Check for the parent operator. + const auto& transpose_it = FindOpWithOutput(*model, intermediate_name); + if (transpose_it == model->operators.end()) { + return false; + } + + // Find the parent operator and guarantee it is a transpose. + TransposeOperator* transpose_op = ConvertOperator( + transpose_it->get(), OperatorType::kTranspose); + + if (transpose_op == nullptr) { + return false; + } + + if (!OperatorReady(*model, transpose_op) || transpose_op->perm.empty()) { + return false; + } + + if (!ReshapeIsEquivalentToTranspose(*model, reshape_op, + false /*allow_extra_unary_dimensions*/)) { + return false; + } + + // Check that the intermediate is not an output array. + if (!IsDiscardableArray(*model, intermediate_name)) { + AddMessageF( + "Cannot fuse %s and %s as it would invalidate the transpose " + "output array.", + LogName(*transpose_op), LogName(*reshape_op)); + return false; + } + + AddMessageF("Merging operations %s and %s", LogName(*transpose_op), + LogName(*reshape_op)); + + // const auto& intermediate_array = model->GetArray(intermediate_name); + // const auto& output_array = model->GetArray(output_name); + + auto merged_perm = ReshapeToTranspose(*model, reshape_op); + + // Combine the permutations. + const auto& transpose_perm = transpose_op->perm; + for (int i = 0; i < merged_perm.size(); i++) { + merged_perm[i] = transpose_perm[merged_perm[i]]; + } + + // Remove the reshape as passthrough operation. + if (!RemoveTrivialPassthroughOp(this, model, op_index)) { + return false; + } + + // Update transpose_op's constant buffer to contain the new permutation. + model->GetArray(transpose_op->inputs[1]) + .GetMutableBuffer() + .data = merged_perm; + transpose_op->perm = merged_perm; + + // transpose_ops's shape will likely has changed. + model->GetArray(transpose_op->outputs[0]).clear_shape(); + + return true; +} + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/propagate_array_data_types.cc b/tensorflow/contrib/lite/toco/graph_transformations/propagate_array_data_types.cc index 778da39bf13563cbbdbe54f1140595b057253ae3..89ad58f887f3644125b64e9341e572c6b435edab 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/propagate_array_data_types.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/propagate_array_data_types.cc @@ -50,78 +50,108 @@ bool PropagateArrayDataTypes::Run(Model* model, std::size_t op_index) { old_output_data_types[output] = model->GetArray(output).data_type; } // Do the actual output data types propagation. - if (op->type == OperatorType::kDequantize || - op->type == OperatorType::kResizeBilinear) { - // These operators unconditionally produce float outputs - SetDataTypeForAllOutputs(model, op, ArrayDataType::kFloat); - } else if (op->type == OperatorType::kTensorFlowLess || - op->type == OperatorType::kTensorFlowLessEqual || - op->type == OperatorType::kTensorFlowGreater || - op->type == OperatorType::kTensorFlowGreaterEqual) { - // These operators unconditionally produce bool outputs - SetDataTypeForAllOutputs(model, op, ArrayDataType::kBool); - } else if (op->type == OperatorType::kRank || - op->type == OperatorType::kTensorFlowShape) { - // These operators only produce int32 outputs. - SetDataTypeForAllOutputs(model, op, ArrayDataType::kInt32); - } else if (op->type == OperatorType::kTensorFlowSplit || - op->type == OperatorType::kTensorFlowConcat || - op->type == OperatorType::kFill) { - // These operators produce an output with the same type as their 2nd input - CHECK_GE(op->inputs.size(), 2); - const ArrayDataType data_type = model->GetArray(op->inputs[1]).data_type; - SetDataTypeForAllOutputs(model, op, data_type); - } else if (op->type == OperatorType::kTransposeConv) { - // These operators produce an output with the same type as their 3rd input - CHECK_GE(op->inputs.size(), 3); - const ArrayDataType data_type = model->GetArray(op->inputs[2]).data_type; - SetDataTypeForAllOutputs(model, op, data_type); - } else if (op->type == OperatorType::kCast) { - // Data type of the Cast op is specified. - CHECK_EQ(op->outputs.size(), 1); - auto* cast_op = static_cast(op); - model->GetArray(op->outputs[0]).data_type = cast_op->dst_data_type; - } else if (op->type == OperatorType::kArgMax) { - // Data type of the ArgMax op is specified. - CHECK_EQ(op->outputs.size(), 1); - auto* argmax_op = static_cast(op); - model->GetArray(op->outputs[0]).data_type = argmax_op->output_data_type; - } else if (op->type == OperatorType::kRange) { - auto* range_op = static_cast(op); - // Output type of the Range op can be set via an attribute - ArrayDataType data_type; - if (range_op->dtype != ArrayDataType::kNone) { - // Use the type if specified - data_type = range_op->dtype; - } else { - // Otherwise use the first input - CHECK_GE(op->inputs.size(), 1); - data_type = model->GetArray(op->inputs[0]).data_type; + switch (op->type) { + case OperatorType::kDequantize: + case OperatorType::kResizeBilinear: + // These operators unconditionally produce float outputs + SetDataTypeForAllOutputs(model, op, ArrayDataType::kFloat); + break; + case OperatorType::kTensorFlowLess: + case OperatorType::kTensorFlowLessEqual: + case OperatorType::kTensorFlowGreater: + case OperatorType::kTensorFlowGreaterEqual: + // These operators unconditionally produce bool outputs + SetDataTypeForAllOutputs(model, op, ArrayDataType::kBool); + break; + case OperatorType::kRank: + case OperatorType::kTensorFlowShape: + // These operators only produce int32 outputs. + SetDataTypeForAllOutputs(model, op, ArrayDataType::kInt32); + break; + case OperatorType::kTensorFlowSplit: + case OperatorType::kTensorFlowConcat: + case OperatorType::kFill: { + // These operators produce an output with the same type as their 2nd input + CHECK_GE(op->inputs.size(), 2); + const ArrayDataType data_type = model->GetArray(op->inputs[1]).data_type; + SetDataTypeForAllOutputs(model, op, data_type); + break; } - CHECK_EQ(op->outputs.size(), 1); - SetDataTypeForAllOutputs(model, op, data_type); - } else if (op->type == OperatorType::kTensorFlowUnsupported) { - auto* unsupported_op = static_cast(op); - // Some output tensors from the op could be eliminated by optimization. - // This can make unsupported_op->output_data_types have more elements than - // op->outputs. - if (unsupported_op->output_data_types.size() < op->outputs.size()) { + case OperatorType::kTransposeConv: { + // These operators produce an output with the same type as their 3rd input + CHECK_GE(op->inputs.size(), 3); + const ArrayDataType data_type = model->GetArray(op->inputs[2]).data_type; + SetDataTypeForAllOutputs(model, op, data_type); + break; + } + case OperatorType::kCast: { + // Data type of the Cast op is specified. + CHECK_EQ(op->outputs.size(), 1); + auto* cast_op = static_cast(op); + model->GetArray(op->outputs[0]).data_type = cast_op->dst_data_type; + break; + } + case OperatorType::kArgMax: { + // Data type of the ArgMax op is specified. + CHECK_EQ(op->outputs.size(), 1); + auto* argmax_op = static_cast(op); + model->GetArray(op->outputs[0]).data_type = argmax_op->output_data_type; + break; + } + case OperatorType::kRange: { + auto* range_op = static_cast(op); + // Output type of the Range op can be set via an attribute + ArrayDataType data_type; + if (range_op->dtype != ArrayDataType::kNone) { + // Use the type if specified + data_type = range_op->dtype; + } else { + // Otherwise use the first input + CHECK_GE(op->inputs.size(), 1); + data_type = model->GetArray(op->inputs[0]).data_type; + } + CHECK_EQ(op->outputs.size(), 1); + SetDataTypeForAllOutputs(model, op, data_type); + break; + } + case OperatorType::kRandomUniform: { + auto* rand_op = static_cast(op); + // The output type of RandomUniform is specified with an attribute + if (rand_op->dtype == ArrayDataType::kNone) { + return false; + } + CHECK_EQ(op->outputs.size(), 1); + SetDataTypeForAllOutputs(model, op, rand_op->dtype); + break; + } + case OperatorType::kTensorFlowUnsupported: { + auto* unsupported_op = static_cast(op); + // Some output tensors from the op could be eliminated by optimization. + // This can make unsupported_op->output_data_types have more elements than + // op->outputs. + if (unsupported_op->output_data_types.size() < op->outputs.size()) { + return false; + } + for (int i = 0; i < op->outputs.size(); ++i) { + auto output = op->outputs[i]; + auto data_type = unsupported_op->output_data_types[i]; + model->GetArray(output).data_type = data_type; + } + break; + } + case OperatorType::kExpandDims: { + // Yield on ExpandDim until it is converted to Reshape return false; } - for (int i = 0; i < op->outputs.size(); ++i) { - auto output = op->outputs[i]; - auto data_type = unsupported_op->output_data_types[i]; - model->GetArray(output).data_type = data_type; + default: { + // These operators produce outputs with the same type as their 1st input + CHECK_GT(op->inputs.size(), 0); + const ArrayDataType data_type = model->GetArray(op->inputs[0]).data_type; + SetDataTypeForAllOutputs(model, op, data_type); + break; } - } else if (op->type == OperatorType::kExpandDims) { - // Yield on ExpandDim until it is converted to Reshape - return false; - } else { - // These operators produce outputs with the same type as their 1st input - CHECK_GT(op->inputs.size(), 0); - const ArrayDataType data_type = model->GetArray(op->inputs[0]).data_type; - SetDataTypeForAllOutputs(model, op, data_type); } + // Return true if any output data type changed, false if none changed. for (const auto& output : op->outputs) { if (old_output_data_types[output] != model->GetArray(output).data_type) { diff --git a/tensorflow/contrib/lite/toco/graph_transformations/propagate_default_min_max.cc b/tensorflow/contrib/lite/toco/graph_transformations/propagate_default_min_max.cc new file mode 100644 index 0000000000000000000000000000000000000000..50b90e7c2bfddb0382a4d44ad6c90fc7f7701273 --- /dev/null +++ b/tensorflow/contrib/lite/toco/graph_transformations/propagate_default_min_max.cc @@ -0,0 +1,86 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/graph_transformations/quantization_util.h" +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" +#include "tensorflow/core/platform/logging.h" + +namespace toco { + +// Propagates default min/max values to any operator input/output array that +// is missing them. +// +// When provided a set of min/max values for uint8 arrays this will rescale +// the values for other data types as required and preserving the floating point +// range within the new type. +bool PropagateDefaultMinMax::Run(Model* model, std::size_t op_index) { + const auto it = model->operators.begin() + op_index; + const auto* op = it->get(); + + bool did_change = false; + + for (const auto& input : op->inputs) { + auto& input_array = model->GetArray(input); + if (!input_array.minmax && !input_array.buffer) { + did_change |= SetArrayMinMax(input, &input_array); + } + } + + for (const auto& output : op->outputs) { + auto& output_array = model->GetArray(output); + if (!output_array.minmax && !output_array.buffer) { + did_change |= SetArrayMinMax(output, &output_array); + } + } + + return did_change; +} + +// Sets the min/max on the given array, adjusting the reference_minmax for the +// final data type of the array if it is already specified. +bool PropagateDefaultMinMax::SetArrayMinMax(const string& array_name, + Array* array) { + CHECK(!array->minmax); + + ArrayDataType quantized_data_type = + GetQuantizedDataType(*array, ArrayDataType::kUint8); + for (const auto& type_range : type_ranges_) { + if (type_range.first == quantized_data_type) { + array->GetOrCreateMinMax() = type_range.second; + break; + } + } + if (!array->minmax) { + AddMessageF( + "No defaults specified for quantized data type %s of array %s, " + "skipping", + ArrayDataTypeName(quantized_data_type), array_name); + return false; + } + + AddMessageF("Adding default minmax %g,%g to array %s when quantized as %s", + array->GetMinMax().min, array->GetMinMax().max, array_name, + ArrayDataTypeName(quantized_data_type)); + + return true; +} + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/propagate_fake_quant_num_bits.cc b/tensorflow/contrib/lite/toco/graph_transformations/propagate_fake_quant_num_bits.cc new file mode 100644 index 0000000000000000000000000000000000000000..0bce183c1897dfba6f2c393ffc0306c054366725 --- /dev/null +++ b/tensorflow/contrib/lite/toco/graph_transformations/propagate_fake_quant_num_bits.cc @@ -0,0 +1,307 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/graph_transformations/quantization_util.h" +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" +#include "tensorflow/core/platform/logging.h" + +namespace toco { + +namespace { + +void ChangeArrayDataType(GraphTransformation* transformation, Array* array, + ArrayDataType new_data_type, + const MinMax* new_minmax) { + // Ensure the array ends up in the new type (if it hasn't yet been quantized). + array->final_data_type = new_data_type; + + if (array->minmax && array->quantization_params) { + // The array is already quantized and has min/max info. + // As we are changing the data type we need to fix up the existing min/max + // to the new data type range. + + double old_quantized_min, old_quantized_max; + CHECK(GetQuantizedDataTypeNumericalRange( + array->data_type, &old_quantized_min, &old_quantized_max)) + << "Existing data type is not quantized: " + << ArrayDataTypeName(array->data_type); + double new_quantized_min, new_quantized_max; + CHECK(GetQuantizedDataTypeNumericalRange(new_data_type, &new_quantized_min, + &new_quantized_max)) + << "New data type is not quantized: " + << ArrayDataTypeName(new_data_type); + + // Compute new minmax values. + double min = (old_quantized_min - array->quantization_params->zero_point) * + array->quantization_params->scale; + double max = + (old_quantized_max + 1 - array->quantization_params->zero_point) * + array->quantization_params->scale; + max = max - 1.0 / (new_quantized_max + 1); + + auto& array_minmax = array->GetOrCreateMinMax(); + transformation->AddMessageF( + "Rescaling min/max from %g,%g (%s) to %g,%g (%s)", array_minmax.min, + array_minmax.max, ArrayDataTypeName(array->data_type), min, max, + ArrayDataTypeName(new_data_type)); + + array_minmax.min = min; + array_minmax.max = max; + GetQuantizationParamsFromMinMax( + array_minmax, array->quantization_params.get()); + + // Directly change the type as the array was already quantized. + array->data_type = new_data_type; + } else { + // Array has not yet been quantized so we can just set the final data type + // and assign the new min/max value (if provided). + CHECK(!array->quantization_params); + + if (!array->minmax && new_minmax) { + transformation->AddMessageF("Forcing new minmax to %g,%g (%s)", + new_minmax->min, new_minmax->max, + ArrayDataTypeName(new_data_type)); + auto& array_minmax = array->GetOrCreateMinMax(); + array_minmax.min = new_minmax->min; + array_minmax.max = new_minmax->max; + } + } +} + +// Returns true if the op blocks our backward recursive data type propagation. +bool DoesOpBlockBackwardPropagation(const Operator& op) { + switch (op.type) { + case OperatorType::kConcatenation: + case OperatorType::kTensorFlowConcat: + case OperatorType::kTensorFlowConcatV2: + // Concat shouldn't block propagation, but we do expect that all inputs + // have the same range. + return false; + case OperatorType::kDequantize: + // Dequantize ops are inserted between the value we care about and the + // FakeQuant so make sure we move across them. + case OperatorType::kGather: + // Gathers need their parameters changed to the appropriate data type. + case OperatorType::kTensorFlowReshape: + case OperatorType::kTranspose: + // Reshapes and transposes don't change values. + return false; + default: + return true; + } +} + +// Returns true if the input of an op blocks our backward recursive data type +// propagation. +bool DoesOpInputBlockBackwardPropagation(const Operator& op, int input_index) { + switch (op.type) { + case OperatorType::kGather: + // Ignore gather indices. + return input_index != 0; + break; + case OperatorType::kTensorFlowReshape: + case OperatorType::kTranspose: + // Ignore reshape/transpose shapes/dimensions. + return input_index != 0; + default: + return false; + } +} + +// Propagates the data type up into the input arrays if they are model inputs +// that may need their type changed. May act recursively if the inputs are +// produced by ops that we can move over (such as Dequantize). +bool RecursivelyBackwardPropagateDataType(GraphTransformation* transformation, + Model* model, Operator* op, + ArrayDataType new_data_type, + const MinMax& new_minmax) { + bool did_change = false; + for (int input_index = 0; input_index < op->inputs.size(); ++input_index) { + const auto& input = op->inputs[input_index]; + auto& input_array = model->GetArray(input); + if (input_array.final_data_type == new_data_type) { + // Final data type is already - skip. + continue; + } + + // Prevent moving into constant param args that we don't want to modify. + if (DoesOpInputBlockBackwardPropagation(*op, input_index)) { + continue; + } + + if (input_array.final_data_type != new_data_type) { + transformation->AddMessageF( + "Adjusting input final data type of array %s from %s to %s", input, + ArrayDataTypeName(input_array.final_data_type), + ArrayDataTypeName(new_data_type)); + did_change = true; + ChangeArrayDataType(transformation, &input_array, new_data_type, + &new_minmax); + + // Walk up into all ops producing the inputs to this op. + for (auto& producing_op : model->operators) { + if (!DoesOpBlockBackwardPropagation(*producing_op)) { + for (const auto& output : producing_op->outputs) { + if (input == output) { + did_change |= RecursivelyBackwardPropagateDataType( + transformation, model, producing_op.get(), new_data_type, + new_minmax); + } + } + } + } + } + } + return did_change; +} + +// Returns true if the op blocks our forward recursive data type propagation. +bool DoesOpBlockForwardPropagation(const Operator& op) { + switch (op.type) { + case OperatorType::kFakeQuant: + // Always stop at another FakeQuant, as it will likely have different + // parameters. + return true; + default: + return false; + } +} + +// Recurses down the graph setting the data type of all arrays until an operator +// that blocks propagation (like another FakeQuant) or a final_data_type is +// already specified. +bool RecursivelyForwardPropagateDataType(GraphTransformation* transformation, + Model* model, Operator* op, + ArrayDataType new_data_type) { + bool did_change = false; + for (const auto& output : op->outputs) { + auto& output_array = model->GetArray(output); + if (output_array.final_data_type == new_data_type) { + // Final data type is already - skip. + continue; + } + + if (output_array.final_data_type == ArrayDataType::kNone || + output_array.final_data_type != new_data_type) { + transformation->AddMessageF( + "Adjusting output final data type of array %s from %s to %s", output, + ArrayDataTypeName(output_array.final_data_type), + ArrayDataTypeName(new_data_type)); + did_change = true; + ChangeArrayDataType(transformation, &output_array, new_data_type, + nullptr); + + // Walk down into all ops consuming the output of this op. + for (auto& consuming_op : model->operators) { + if (!DoesOpBlockForwardPropagation(*consuming_op)) { + for (const auto& input : consuming_op->inputs) { + if (input == output) { + did_change |= RecursivelyForwardPropagateDataType( + transformation, model, consuming_op.get(), new_data_type); + } + } + } + } + } + } + return did_change; +} + +} // namespace + +// Propagates the num_bits on a FakeQuant operator into the final data types +// of inputs and outputs. For example, if FakeQuant.num_bits==16 then we know +// the output must be int16 and assume all inputs up until the preceding op are +// also 16. +// +// This can be thought of as a bidirectional flood-fill of the num_bits implied +// final_data_type that terminates at other FakeQuant ops (and a few others as +// determined by DoesOpBlockBackwardPropagation/DoesOpBlockForwardPropagation). +// Once all FakeQuant ops have been visted the arrays should all have +// appropriate final_data_types if the source graph was annotated with the +// proper FakeQuant ops. +// +// Annotating a graph requires following a few hard rules: +// - every input MUST have a FakeQuant immediately following it +// - every output MUST have a FakeQuant immediately preceding it +// - important arithmetic ops (such as FullyConnected) SHOULD have a FakeQuant +// immediately following it +// - all trained weights (RHS of FullyConnected ops, params on Gather ops, etc) +// MUST have FakeQuants between them and the consuming op +// Additional FakeQuants may be used if desired, especially in areas that may +// suffer from large precision changes - such as between a Softmax and a +// FullyConnected. Only by validating accuracy differences between float +// inference with the FakeQuant ops simulating quantization and the actually +// quantized graph can you be sure the appropriate FakeQuant ops are present. +// +// You can tell if you're missing some FakeQuants by looking for warnings from +// quantize.cc about minmax ranges being determined by the contents of constant +// arrays. This will almost never produce functional models during inference. +// +// As this op may change the data types and ranges of input and output arrays +// downstream tools must also be sure to parse the output model flags to get the +// post-Transform values that may have changed due to this transformation. +// +// This isn't a GraphTransformation in the traditional respect as it affects ops +// outside of the one under transformation. This is primarily so that we can +// utilize the graph traversal and repeated pass system underlying the +// transformation system to exhaustively find all FakeQuant ops. It also gets us +// nice logging and integration with the graphviz video dumping mode. +// In general you should not copy this style of transformation and stick to +// local-only changes as seen in the other transformations. +bool PropagateFakeQuantNumBits::Run(Model* model, std::size_t op_index) { + auto it = model->operators.begin() + op_index; + auto* op = it->get(); + if (op->type != OperatorType::kFakeQuant) { + return false; + } + auto* fakequant_op = static_cast(op); + + ArrayDataType quantized_data_type = ArrayDataType::kNone; + if (!InferQuantizedDataTypeFromFakeQuant(*fakequant_op, + &quantized_data_type)) { + AddMessageF("FakeQuant op %s num_bits=%d is out of range, ignoring", + LogName(*op), fakequant_op->num_bits); + return false; + } + const auto& final_minmax = *fakequant_op->minmax; + + AddMessageF( + "Beginning propagation of fake quant %s num_bits=%d min=%g max=%g to %s", + LogName(*op), fakequant_op->num_bits, final_minmax.min, final_minmax.max, + ArrayDataTypeName(quantized_data_type)); + + bool did_change = false; + + // Propagate the FakeQuant information backward up the graph. + // This will possibly adjust input arrays or constant types (like Gather). + did_change |= RecursivelyBackwardPropagateDataType( + this, model, op, quantized_data_type, final_minmax); + + // Propagate the FakeQuant information forward down the graph. + // This will possibly adjust output arrays. + did_change |= + RecursivelyForwardPropagateDataType(this, model, op, quantized_data_type); + + return did_change; +} + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc b/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc index 676736cfc523c03c9f4d99c404eb2b5209209945..ba244cf5ef56301e02e49635cc7d085ddfe931a4 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc @@ -38,6 +38,16 @@ void ComputeConvSizes(const Shape& input_shape, int output_depth, int kwidth, const int input_height = input_shape.dims(1); const int batch = input_shape.dims(0); + CHECK_GE(input_width, 1); + CHECK_GE(input_height, 1); + CHECK_GE(batch, 1); + CHECK_GE(kwidth, 1); + CHECK_GE(kheight, 1); + CHECK_GE(stride_width, 1); + CHECK_GE(stride_height, 1); + CHECK_GE(dilation_width_factor, 1); + CHECK_GE(dilation_height_factor, 1); + int dilated_kwidth = dilation_width_factor * (kwidth - 1) + 1; int dilated_kheight = dilation_height_factor * (kheight - 1) + 1; @@ -392,8 +402,7 @@ void ProcessSpaceToDepthOperator(Model* model, SpaceToDepthOperator* op) { depth * block_size * block_size})); } -void ProcessFillOperator(Model* model, FillOperator* op) { - CHECK_EQ(op->inputs.size(), 2); +void ProcessOpWithShapeInput(Model* model, Operator* op) { CHECK_EQ(op->outputs.size(), 1); auto& output_array = model->GetArray(op->outputs[0]); if (output_array.has_shape()) { @@ -1051,17 +1060,15 @@ void ProcessBatchToSpaceNDOperator(Model* model, BatchToSpaceNDOperator* op) { } QCHECK(crops_array.data_type == ArrayDataType::kInt32); const auto& crops_data = crops_array.GetBuffer().data; - // We don't support crops now. - QCHECK_EQ(crops_data[0], 0); - QCHECK_EQ(crops_data[1], 0); - QCHECK_EQ(crops_data[2], 0); - QCHECK_EQ(crops_data[3], 0); - + const int crops_top = crops_data[0]; + const int crops_bottom = crops_data[1]; + const int crops_left = crops_data[2]; + const int crops_right = crops_data[3]; + const int output_height = + input_height * block_height - crops_top - crops_bottom; + const int output_width = input_width * block_width - crops_left - crops_right; QCHECK_EQ(input_shape.dims(0) % (block_height * block_width), 0); - int output_height = input_height * block_height; - int output_width = input_width * block_width; - model->GetArray(op->outputs[0]) .copy_shape(Shape({input_shape.dims(0) / (block_height * block_width), output_height, output_width, input_shape.dims(3)})); @@ -1246,6 +1253,83 @@ void ProcessStackOperator(Model* model, StackOperator* op) { output_array.copy_shape(*stacked_shape); } +// These StridedSlice utility functions are essentially a COPY of those in +// reference_ops.h. See comments there. + +// Use until std::clamp() is available from C++17. +int Clamp(const int v, const int lo, const int hi) { + if (hi < v) return hi; + if (v < lo) return lo; + return v; +} + +int StartForAxis(StridedSliceOperator const& op, Shape const& input_shape, + int axis) { + // Begin with the specified index + int start = op.start_indices[axis]; + + // begin_mask override + if (op.begin_mask & 1 << axis) { + if (op.strides[axis] > 0) { + // Forward iteration - use the first element. These values will get + // clamped below (Note: We could have set them to 0 and axis_size-1, but + // use lowest() and max() to maintain symmetry with StopForAxis()) + start = std::numeric_limits::lowest(); + } else { + // Backward iteration - use the last element. + start = std::numeric_limits::max(); + } + } + + // Handle negative indices + int axis_size = input_shape.dims(axis); + if (start < 0) { + start += axis_size; + } + + // Clamping + start = Clamp(start, 0, axis_size - 1); + + return start; +} + +int StopForAxis(StridedSliceOperator const& op, Shape const& input_shape, + int axis) { + // Begin with the specified index + int stop = op.stop_indices[axis]; + + // end_mask override + if (op.end_mask & (1 << axis)) { + if (op.strides[axis] > 0) { + // Forward iteration - use the last element. These values will get + // clamped below + stop = std::numeric_limits::max(); + } else { + // Backward iteration - use the first element. + stop = std::numeric_limits::lowest(); + } + } + + // Handle negative indices + int axis_size = input_shape.dims(axis); + if (stop < 0) { + stop += axis_size; + } + + // Clamping + // Because the end index points one past the last element, we need slightly + // different clamping ranges depending on the direction. + if (op.strides[axis] > 0) { + // Forward iteration + stop = Clamp(stop, 0, axis_size); + } else { + // Backward iteration + stop = Clamp(stop, -1, axis_size - 1); + } + + return stop; +} + void ProcessStridedSliceOperator(Model* model, StridedSliceOperator* op) { CHECK_GE(op->inputs.size(), 1); CHECK_EQ(op->outputs.size(), 1); @@ -1283,43 +1367,46 @@ void ProcessStridedSliceOperator(Model* model, StridedSliceOperator* op) { return; } - int dim_count = input_array.shape().dimensions_count(); - CHECK(op->start_indices.size() == dim_count) - << ": Incorrect number of start indices supplied to StridedSlice op with " - "output \"" - << op->outputs[0] << "\". Op requires " << dim_count << " start indices"; - CHECK(op->stop_indices.size() == dim_count) - << ": Incorrect number of stop indices supplied to StridedSlice op with " - "output \"" - << op->outputs[0] << "\". Op requires " << dim_count << " stop indices"; - CHECK(op->strides.size() == dim_count) - << ": Incorrect number of strides supplied to StridedSlice op with " - " output \"" - << op->outputs[0] << "\". Op requires " << dim_count << " strides"; + int num_input_axes = input_array.shape().dimensions_count(); + CHECK_LE(op->start_indices.size(), num_input_axes) + << "StridedSlice op with output \"" << op->outputs[0] + << "\", requires no more than " << num_input_axes << " start indices"; + CHECK_LE(op->stop_indices.size(), num_input_axes) + << "StridedSlice op with output \"" << op->outputs[0] + << "\", requires no more than " << num_input_axes << " stop indices"; + CHECK_LE(op->strides.size(), num_input_axes) + << "StridedSlice op with output \"" << op->outputs[0] + << "\", requires no more than " << num_input_axes << " strides"; + for (int i = 0; i < op->strides.size(); i++) { + CHECK_NE(op->strides[i], 0) << "Strides must be non-zero. Axis " << i + << " has stride=" << op->strides[i] << "."; + } + + // The TensorFlow documentation is not explicit on how it handles fewer + // supplied indices than dimensions, but they are accepted. We emulate TF's + // behavior by fully iterating over each "forgotten" dimension. + op->PadIndices(num_input_axes); // Create output shape std::vector* dims = output_array.mutable_shape()->mutable_dims(); // Compute output shape - for (int i = 0; i < dim_count; ++i) { - const int mask = 1 << i; - int start = (op->begin_mask & mask) ? 0 : op->start_indices[i]; - if (start < 0) { - // handle negative indices - start += input_array.shape().dims(i); - } - int stop = (op->end_mask & mask) ? input_array.shape().dims(i) - : op->stop_indices[i]; - if (stop < 0) { - // handle negative indices - stop += input_array.shape().dims(i); - } - - int dim_size = ceil((stop - start) / static_cast(op->strides[i])); - dim_size = dim_size < 0 ? 0 : dim_size; - if (op->shrink_axis_mask & mask) { - CHECK_EQ(dim_size, 1) << "Output size for an axis must compute to 1 when " - "shrinking that axis"; + for (int axis = 0; axis < num_input_axes; ++axis) { + int start_index = StartForAxis(*op, input_array.shape(), axis); + int stop_index = StopForAxis(*op, input_array.shape(), axis); + int dim_size = + ceil(static_cast(stop_index - start_index) / op->strides[axis]); + + CHECK_GT(dim_size, 0) + << "Output size for an axis must be greater than 0. Axis " << axis + << " computes to size " << dim_size + << " for StridedSlice op with output \"" << op->outputs[0] << "\"."; + if (op->shrink_axis_mask & (1 << axis)) { + CHECK_EQ(dim_size, 1) + << "Output size for an axis must compute to 1 when shrinking an " + "axis. Axis " + << axis << " computes to size " << dim_size + << " for StridedSlice op with output \"" << op->outputs[0] << "\"."; } else { dims->push_back(dim_size); } @@ -1429,10 +1516,7 @@ void ProcessArgMaxOperator(Model* model, ArgMaxOperator* op) { return; } - // The current ArgMax implementation only supports 4-dimensional inputs with - // the last dimension as the axis to perform ArgMax for. const std::vector& input_dims = input_array.shape().dims(); - CHECK_EQ(input_dims.size(), 4); std::vector output_dims; output_dims.reserve(input_dims.size() - 1); @@ -1470,6 +1554,7 @@ bool PropagateFixedSizes::Run(Model* model, std::size_t op_index) { case OperatorType::kPRelu: case OperatorType::kSoftmax: case OperatorType::kLogSoftmax: + case OperatorType::kLog: case OperatorType::kLogistic: case OperatorType::kTanh: case OperatorType::kLocalResponseNormalization: @@ -1529,7 +1614,8 @@ bool PropagateFixedSizes::Run(Model* model, std::size_t op_index) { static_cast(op)); break; case OperatorType::kFill: - ProcessFillOperator(model, static_cast(op)); + CHECK_EQ(op->inputs.size(), 2); + ProcessOpWithShapeInput(model, op); break; case OperatorType::kFullyConnected: ProcessFullyConnectedOperator(model, @@ -1659,6 +1745,10 @@ bool PropagateFixedSizes::Run(Model* model, std::size_t op_index) { // transforms that remove them, so we avoid propagating shapes through // them and let things settle once they've been removed. break; + case OperatorType::kRandomUniform: + CHECK_EQ(op->inputs.size(), 1); + ProcessOpWithShapeInput(model, op); + break; default: // Unimplemented, another graph transformation should drop it. LOG(FATAL) << "Unhandled operator type " << OperatorTypeName(op->type); diff --git a/tensorflow/contrib/lite/toco/graph_transformations/quantization_util.cc b/tensorflow/contrib/lite/toco/graph_transformations/quantization_util.cc new file mode 100644 index 0000000000000000000000000000000000000000..d74cad9a626b3a472e2740d6bdaaaf7aab5bd484 --- /dev/null +++ b/tensorflow/contrib/lite/toco/graph_transformations/quantization_util.cc @@ -0,0 +1,261 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include + +#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/graph_transformations/quantization_util.h" +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" +#include "tensorflow/core/platform/logging.h" + +namespace toco { + +bool InferQuantizedDataTypeFromFakeQuant( + const FakeQuantOperator& op, ArrayDataType* out_quantized_data_type) { + if (op.num_bits <= 8) { + *out_quantized_data_type = ArrayDataType::kUint8; + return true; + } else if (op.num_bits <= 16) { + *out_quantized_data_type = ArrayDataType::kInt16; + return true; + } else { + *out_quantized_data_type = ArrayDataType::kNone; + return false; + } +} + +bool GetQuantizedDataTypeNumericalRange(ArrayDataType data_type, + double* out_min_value, + double* out_max_value) { + switch (data_type) { + case ArrayDataType::kUint8: + *out_min_value = 0; + *out_max_value = 255; + return true; + case ArrayDataType::kInt16: + *out_min_value = -32768; + *out_max_value = 32767; + return true; + default: + return false; + } +} + +ArrayDataType GetQuantizedDataType(const Array& array, + ArrayDataType default_type) { + switch (array.final_data_type) { + case ArrayDataType::kInt8: + case ArrayDataType::kUint8: + case ArrayDataType::kInt16: + case ArrayDataType::kUint16: + case ArrayDataType::kInt32: + case ArrayDataType::kUint32: + case ArrayDataType::kInt64: + case ArrayDataType::kUint64: + return array.final_data_type; + case ArrayDataType::kFloat: + case ArrayDataType::kNone: + return default_type; + default: + LOG(FATAL) << "Unhandled final quantization type " + << static_cast(array.final_data_type); + } +} + +void GetQuantizationParams(ArrayDataType data_type, const MinMax& minmax, + QuantizationParams* quantization_params) { + switch (data_type) { + case ArrayDataType::kInt8: + GetQuantizationParamsFromMinMax( + minmax, quantization_params); + break; + case ArrayDataType::kUint8: + GetQuantizationParamsFromMinMax( + minmax, quantization_params); + break; + case ArrayDataType::kInt16: + GetQuantizationParamsFromMinMax( + minmax, quantization_params); + break; + case ArrayDataType::kUint16: + GetQuantizationParamsFromMinMax( + minmax, quantization_params); + break; + case ArrayDataType::kInt32: + GetQuantizationParamsFromMinMax( + minmax, quantization_params); + break; + case ArrayDataType::kUint32: + GetQuantizationParamsFromMinMax( + minmax, quantization_params); + break; + case ArrayDataType::kInt64: + GetQuantizationParamsFromMinMax( + minmax, quantization_params); + break; + case ArrayDataType::kUint64: + GetQuantizationParamsFromMinMax( + minmax, quantization_params); + break; + case ArrayDataType::kFloat: + case ArrayDataType::kNone: + default: + LOG(FATAL) << "Unhandled final quantization type " + << static_cast(data_type); + } +} + +namespace { + +template +std::unique_ptr QuantizeBuffer( + const GenericBuffer& buffer, + const QuantizationParams& quantization_params) { + const auto inverse_scale = 1. / quantization_params.scale; + CHECK(buffer.type == ArrayDataType::kFloat); + const auto& float_buffer = + static_cast&>(buffer); + auto* quantized_buffer = new Buffer; + quantized_buffer->data.resize(float_buffer.data.size()); + for (std::size_t i = 0; i < float_buffer.data.size(); i++) { + const float src_val = float_buffer.data[i]; + double scaled_val; // Astonishingly, using 'float' degrades accuracy just + // enough to make a few tests fail! + if (quantization_params.scale == 0) { + CHECK_EQ(src_val, 0) << "The quantization scale for this array is 0, " + << "so all its values should be 0."; + scaled_val = quantization_params.zero_point; + } else { + scaled_val = quantization_params.zero_point + inverse_scale * src_val; + } + quantized_buffer->data[i] = + tflite::SafeCast>(std::round(scaled_val)); + } + return std::unique_ptr(quantized_buffer); +} + +template +void QuantizeArray(GraphTransformation* transformation, Model* model, + const string& name, + const QuantizationParams& quantization_params) { + auto& array = model->GetArray(name); + CHECK(array.data_type == ArrayDataType::kFloat); + CHECK(!array.quantization_params); + array.GetOrCreateQuantizationParams() = quantization_params; + if (array.buffer) { + array.buffer = QuantizeBuffer(*array.buffer, quantization_params); + } + array.data_type = A; + array.final_data_type = A; + transformation->AddMessageF( + "Quantized array %s to %s zero_point=%g, scale=%g", name, + ArrayDataTypeName(array.data_type), quantization_params.zero_point, + quantization_params.scale); +} + +} // namespace + +void QuantizeArray(GraphTransformation* transformation, Model* model, + const string& name, ArrayDataType quantized_data_type, + const QuantizationParams& quantization_params) { + ArrayDataType adjusted_data_type = quantized_data_type; + auto& array = model->GetArray(name); + if (array.final_data_type == ArrayDataType::kInt16) { + adjusted_data_type = array.final_data_type; + } + + switch (adjusted_data_type) { + case ArrayDataType::kUint8: + return QuantizeArray(transformation, model, name, + quantization_params); + case ArrayDataType::kInt16: + return QuantizeArray(transformation, model, name, + quantization_params); + case ArrayDataType::kInt32: + return QuantizeArray(transformation, model, name, + quantization_params); + default: + LOG(FATAL) << "Unhandled case."; + } +} + +bool IsArrayQuantizedRangeSubset(GraphTransformation* transformation, + const Array& array, double clamp_min, + double clamp_max) { + ArrayDataType quantized_data_type = + GetQuantizedDataType(array, array.data_type); + if (quantized_data_type == ArrayDataType::kNone || + quantized_data_type == ArrayDataType::kFloat) { + // The array is not (or never will be) quantized. + return false; + } + + QuantizationParams quantization_params; + if (!array.quantization_params) { + if (!array.minmax) { + transformation->AddMessageF("No quantization params and no minmax"); + return false; + } else { + // Work around cases where we are asking for this prior to the Quantize + // transformation having added the quantization_params. + GetQuantizationParams(quantized_data_type, *array.minmax, + &quantization_params); + transformation->AddMessageF( + "No quantization params - infering from data type %s with minmax " + "%g,%g as zero_point=%g, scale=%g", + ArrayDataTypeName(quantized_data_type), array.minmax->min, + array.minmax->max, quantization_params.zero_point, + quantization_params.scale); + } + } else { + quantization_params = array.GetQuantizationParams(); + } + + double quantized_min, quantized_max; + CHECK(GetQuantizedDataTypeNumericalRange(quantized_data_type, &quantized_min, + &quantized_max)) + << "Type is not quantized"; + + bool has_nontrivial_min_bound = false; + bool has_nontrivial_max_bound = false; + + double lowest_representable_output = + (quantized_min - quantization_params.zero_point) * + quantization_params.scale; + if (lowest_representable_output < clamp_min) { + has_nontrivial_min_bound = true; + transformation->AddMessageF( + "Quantized activation function is not trivial: " + "the lowest representable output value %g" + " less than the clamp min bound %g.", + lowest_representable_output, clamp_min); + } + + double highest_representable_output = + (quantized_max - quantization_params.zero_point) * + quantization_params.scale; + if (highest_representable_output > clamp_max) { + has_nontrivial_max_bound = true; + transformation->AddMessageF( + "Quantized activation function is not trivial: " + "the highest representable output value %g" + " is greater than the clamp max bound %g.", + highest_representable_output, clamp_max); + } + + return !has_nontrivial_min_bound && !has_nontrivial_max_bound; +} + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/quantization_util.h b/tensorflow/contrib/lite/toco/graph_transformations/quantization_util.h new file mode 100644 index 0000000000000000000000000000000000000000..79a2ce7e50887b4608b278471da0e5e63b5673e3 --- /dev/null +++ b/tensorflow/contrib/lite/toco/graph_transformations/quantization_util.h @@ -0,0 +1,73 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_CONTRIB_LITE_TOCO_GRAPH_TRANSFORMATIONS_QUANTIZATION_UTIL_H_ +#define TENSORFLOW_CONTRIB_LITE_TOCO_GRAPH_TRANSFORMATIONS_QUANTIZATION_UTIL_H_ + +#include "tensorflow/contrib/lite/kernels/internal/quantization_util.h" +#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/model.h" + +namespace toco { + +// Gets the target quantized data type of an array based on the fake quant op. +// For example, if the num_bits is 8 the data type will be kUint8. +bool InferQuantizedDataTypeFromFakeQuant( + const FakeQuantOperator& op, ArrayDataType* out_quantized_data_type); + +// Gets the min/max numerical range for the given quantized data type. +// For example, kUint8 will return [0,255]. +// Returns true if the ranges were set and false if the type is not quantized. +bool GetQuantizedDataTypeNumericalRange(ArrayDataType data_type, + double* out_min_value, + double* out_max_value); + +// Returns the quantized data type of an array, falling back to the provided +// default data type. +ArrayDataType GetQuantizedDataType(const Array& array, + ArrayDataType default_type); + +// Returns the quantization params for the array with the given data type and +// minmax. +void GetQuantizationParams(ArrayDataType data_type, const MinMax& minmax, + QuantizationParams* quantization_params); + +// Returns the quantization params for the data type and minmax values. +template +void GetQuantizationParamsFromMinMax(const MinMax& minmax, + QuantizationParams* quantization_params) { + using Integer = DataType; + const double rmin = minmax.min; + const double rmax = minmax.max; + *quantization_params = + ::tflite::ChooseQuantizationParams(rmin, rmax); +} + +// Quantizes an array by setting its data type and (if constant) quantizing +// all values in the array. +void QuantizeArray(GraphTransformation* transformation, Model* model, + const string& name, ArrayDataType quantized_data_type, + const QuantizationParams& quantization_params); + +// Returns true if the given array, when quantized, contains only values between +// the provided clamp min/max. +// Either clamp_min or clamp_max may be +/-infinity to indicate that the value +// is unbounded on that side. +bool IsArrayQuantizedRangeSubset(GraphTransformation* transformation, + const Array& array, double clamp_min, + double clamp_max); + +} // namespace toco + +#endif // TENSORFLOW_CONTRIB_LITE_TOCO_GRAPH_TRANSFORMATIONS_QUANTIZATION_UTIL_H_ diff --git a/tensorflow/contrib/lite/toco/graph_transformations/quantize.cc b/tensorflow/contrib/lite/toco/graph_transformations/quantize.cc index 9fcc95e1feeb39d70c1c12eef5555187def69089..fa46e6bc3805d3f3a7c9223ff4b111a4ed8e8559 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/quantize.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/quantize.cc @@ -21,6 +21,7 @@ limitations under the License. #include #include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/graph_transformations/quantization_util.h" #include "tensorflow/contrib/lite/toco/model.h" #include "tensorflow/contrib/lite/toco/model_flags.pb.h" #include "tensorflow/contrib/lite/toco/tooling_util.h" @@ -44,6 +45,7 @@ bool SupportsQuantization(const Operator& op) { type == OperatorType::kTensorFlowMinimum || type == OperatorType::kTensorFlowMaximum || type == OperatorType::kLogistic || type == OperatorType::kSoftmax || + type == OperatorType::kLogSoftmax || type == OperatorType::kTensorFlowSplit || type == OperatorType::kSub || type == OperatorType::kSqueeze || type == OperatorType::kPad || type == OperatorType::kTensorFlowReshape || @@ -55,72 +57,6 @@ bool SupportsQuantization(const Operator& op) { type == OperatorType::kTranspose || type == OperatorType::kMean; } -template -std::unique_ptr QuantizeBuffer( - const GenericBuffer& buffer, - const QuantizationParams& quantization_params) { - const auto inverse_scale = 1. / quantization_params.scale; - CHECK(buffer.type == ArrayDataType::kFloat); - const auto& float_buffer = - static_cast&>(buffer); - auto* quantized_buffer = new Buffer; - quantized_buffer->data.resize(float_buffer.data.size()); - for (std::size_t i = 0; i < float_buffer.data.size(); i++) { - const float src_val = float_buffer.data[i]; - double scaled_val; // Astonishingly, using 'float' degrades accuracy just - // enough to make a few tests fail! - if (quantization_params.scale == 0) { - CHECK_EQ(src_val, 0) << "The quantization scale for this array is 0, " - << "so all its values should be 0."; - scaled_val = quantization_params.zero_point; - } else { - scaled_val = quantization_params.zero_point + inverse_scale * src_val; - } - quantized_buffer->data[i] = - tflite::SafeCast>(std::round(scaled_val)); - } - return std::unique_ptr(quantized_buffer); -} - -template -void QuantizeArray(GraphTransformation* transformation, Model* model, - const string& name, - const QuantizationParams& quantization_params) { - auto& array = model->GetArray(name); - CHECK(array.data_type == ArrayDataType::kFloat); - CHECK(!array.quantization_params); - array.GetOrCreateQuantizationParams() = quantization_params; - if (array.buffer) { - array.buffer = QuantizeBuffer(*array.buffer, quantization_params); - } - array.data_type = A; - transformation->AddMessageF("Quantized array %s", name); -} - -void QuantizeArray(GraphTransformation* transformation, Model* model, - const string& name, ArrayDataType quantized_data_type, - const QuantizationParams& quantization_params) { - ArrayDataType adjusted_data_type = quantized_data_type; - auto& array = model->GetArray(name); - if (array.final_data_type == ArrayDataType::kInt16) { - adjusted_data_type = array.final_data_type; - } - - switch (adjusted_data_type) { - case ArrayDataType::kUint8: - return QuantizeArray(transformation, model, name, - quantization_params); - case ArrayDataType::kInt16: - return QuantizeArray(transformation, model, name, - quantization_params); - case ArrayDataType::kInt32: - return QuantizeArray(transformation, model, name, - quantization_params); - default: - LOG(FATAL) << "Unhandled case."; - } -} - const MinMax& GetOrComputeMinMax(Model* model, const string& array_name) { auto& array = model->GetArray(array_name); // Normally we should have a MinMax recorded on this Array, @@ -204,70 +140,6 @@ QuantizationPoints GetQuantizationPoints(ArrayDataType data_type) { } } -ArrayDataType GetQuantizedDataType(const Array& array, - ArrayDataType default_type) { - switch (array.final_data_type) { - case ArrayDataType::kInt8: - case ArrayDataType::kUint8: - case ArrayDataType::kInt16: - case ArrayDataType::kUint16: - case ArrayDataType::kInt32: - case ArrayDataType::kUint32: - case ArrayDataType::kInt64: - case ArrayDataType::kUint64: - return array.final_data_type; - case ArrayDataType::kFloat: - case ArrayDataType::kNone: - return default_type; - default: - LOG(FATAL) << "Unhandled final quantization type " - << static_cast(array.final_data_type); - } -} - -void GetQuantizationParams(ArrayDataType data_type, const MinMax& minmax, - QuantizationParams* quantization_params) { - switch (data_type) { - case ArrayDataType::kInt8: - GetQuantizationParamsFromMinMax( - minmax, quantization_params); - break; - case ArrayDataType::kUint8: - GetQuantizationParamsFromMinMax( - minmax, quantization_params); - break; - case ArrayDataType::kInt16: - GetQuantizationParamsFromMinMax( - minmax, quantization_params); - break; - case ArrayDataType::kUint16: - GetQuantizationParamsFromMinMax( - minmax, quantization_params); - break; - case ArrayDataType::kInt32: - GetQuantizationParamsFromMinMax( - minmax, quantization_params); - break; - case ArrayDataType::kUint32: - GetQuantizationParamsFromMinMax( - minmax, quantization_params); - break; - case ArrayDataType::kInt64: - GetQuantizationParamsFromMinMax( - minmax, quantization_params); - break; - case ArrayDataType::kUint64: - GetQuantizationParamsFromMinMax( - minmax, quantization_params); - break; - case ArrayDataType::kFloat: - case ArrayDataType::kNone: - default: - LOG(FATAL) << "Unhandled final quantization type " - << static_cast(data_type); - } -} - bool ChooseQuantizationForOperatorInput( GraphTransformation* transformation, Model* model, const Operator& op, std::size_t input_index, ArrayDataType* quantized_data_type, @@ -307,6 +179,8 @@ bool ChooseQuantizationForOperatorInput( const auto& input_weights = model->GetArray(op.inputs[weights_input_index]); if (!input_activations.quantization_params || !input_weights.quantization_params) { + transformation->AddMessageF( + "Input array %s is a bias vector but has no qparams", input); return false; } const auto input_activations_scale = @@ -335,12 +209,11 @@ bool ChooseQuantizationForOperatorInput( *quantized_data_type = GetQuantizedDataType(array, ArrayDataType::kUint8); GetQuantizationParams(*quantized_data_type, minmax, quantization_params); transformation->AddMessageF( - "For input array %s with min=%g" - ", max=%g" - ", chose to quantize as %s with zero_point=%d" - ", scale=%g", + "For input array %s with min=%g, max=%g, chose to quantize as %s (f=%s) " + "with zero_point=%d, scale=%g", input, minmax.min, minmax.max, ArrayDataTypeName(*quantized_data_type), - quantization_params->zero_point, quantization_params->scale); + ArrayDataTypeName(array.final_data_type), quantization_params->zero_point, + quantization_params->scale); return true; } @@ -394,6 +267,19 @@ bool ChooseHardcodedQuantizationForOperatorOutput( *quantization_params)); return true; } + if (op.type == OperatorType::kLogSoftmax) { + // LogSoftmax has range: [LogSoftmaxOperator::kOutputRangeMin, 0]. + *quantized_data_type = GetQuantizedDataType(array, *quantized_data_type); + const QuantizationPoints qp = GetQuantizationPoints(*quantized_data_type); + quantization_params->zero_point = qp.max_value; + quantization_params->scale = + -LogSoftmaxOperator::kOutputRangeMin / (qp.max_value + 1); + // While not strictly necessary, it is easier to interpret output data and + // quantization if the scale is similar to others (such as power of 2). + CHECK(IsExactlyRepresentable(LogSoftmaxOperator::kOutputRangeMin / 2, + *quantized_data_type, *quantization_params)); + return true; + } if (op.type == OperatorType::kTanh) { // Tanh has the range: [-1, 1]. *quantized_data_type = GetQuantizedDataType(array, *quantized_data_type); @@ -416,6 +302,9 @@ bool ChooseQuantizationForOperatorOutput( const auto& output = op.outputs[output_index]; auto& array = model->GetArray(output); if (array.data_type != ArrayDataType::kFloat) { + transformation->AddMessageF("Array data type already set to %s, final=%s", + ArrayDataTypeName(array.data_type), + ArrayDataTypeName(array.final_data_type)); return false; } *quantized_data_type = model->GetArray(op.inputs[0]).data_type; @@ -431,7 +320,8 @@ bool ChooseQuantizationForOperatorOutput( (op.type == OperatorType::kSpaceToDepth) || (op.type == OperatorType::kTensorFlowReshape) || (op.type == OperatorType::kTensorFlowSplit) || - (op.type == OperatorType::kConcatenation)) { + (op.type == OperatorType::kConcatenation && + model->flags.change_concat_input_ranges())) { int data_input_index = 0; if (op.type == OperatorType::kTensorFlowSplit) { data_input_index = 1; @@ -472,6 +362,41 @@ bool ChooseQuantizationForOperatorOutput( return true; } + +// Fixes array minmax info to match the quantization parameters. +// This is required for when quantization parameters change for an array during +// quantization (such as ChooseQuantizationForOperatorOutput). +void FixMinMaxPostQuantization(GraphTransformation* transformation, + ArrayDataType quantized_data_type, + const QuantizationParams& quantization_params, + MinMax* minmax) { + double quantized_min, quantized_max; + if (!GetQuantizedDataTypeNumericalRange(quantized_data_type, &quantized_min, + &quantized_max)) { + // Not quantized - no update required. + return; + } + + // Compute new minmax values. + double min = (quantized_min - quantization_params.zero_point) * + quantization_params.scale; + double max = (quantized_max - quantization_params.zero_point) * + quantization_params.scale; + + // If we are close to the existing minmax values don't bother changing them. + // This prevents propagating small floating point precision errors. + constexpr double kMinMaxThreshold = 1e-5; + const double width = max - min; + if (std::abs(min - minmax->min) > kMinMaxThreshold * width || + std::abs(max - minmax->max) > kMinMaxThreshold * width) { + transformation->AddMessageF( + "Adjusting min/max from %g,%g to %g,%g to match quantization params", + minmax->min, minmax->max, min, max); + minmax->min = min; + minmax->max = max; + } +} + } // namespace bool Quantize::Run(Model* model, std::size_t op_index) { @@ -576,10 +501,33 @@ bool Quantize::Run(Model* model, std::size_t op_index) { // input instead. for (int i = 0; i < model->flags.output_arrays_size(); i++) { if (model->flags.output_arrays(i) == dequantize_op->outputs[0]) { - model->flags.set_output_arrays(i, dequantize_op->inputs[0]); + // TODO(b/78013785): never rename output arrays. + if (IsInputArray(*model, dequantize_op->inputs[0])) { + // The op input is an input array and the output is an output + // array and we can't have an array be both. Insert a copy + // op to ensure the two arrays stay separate. + AddMessageF( + "Tried to rename output array %d while removing dequant " + "op %s but array is also an input; inserting copy %s " + "-> %s", + i, LogName(*dequantize_op), model->flags.output_arrays(i), + dequantize_op->inputs[0]); + InsertCopyOperator(model, dequantize_op->inputs[0], + dequantize_op->outputs[0]); + } else { + // Op output is strictly used as an output array, so we can + // just rename the array and directly bypass the op. + AddMessageF( + "Renaming output array %d after removing dequant op %s: " + "%s -> %s", + i, LogName(*dequantize_op), model->flags.output_arrays(i), + dequantize_op->inputs[0]); + model->flags.set_output_arrays(i, dequantize_op->inputs[0]); + model->EraseArray(dequantize_op->outputs[0]); + } + break; } } - model->EraseArray(dequantize_op->outputs[0]); model->operators.erase(dequantize_it); } changed = true; @@ -618,15 +566,25 @@ bool Quantize::Run(Model* model, std::size_t op_index) { &quantization_params)) { changed = true; const auto& output = op.outputs[output_index]; + auto& output_array = model->GetArray(output); + + // Fix up the min/max information on the output array to match the chosen + // quantization parameters. + CHECK(output_array.minmax) + << "Output array named " << output << " lacks minmax"; + auto& output_minmax = output_array.GetMinMax(); + FixMinMaxPostQuantization(this, quantized_data_type, quantization_params, + &output_minmax); + QuantizeArray(this, model, output, quantized_data_type, quantization_params); + const auto& dequantized_output = AvailableArrayName(*model, output + "_dequantized"); - const auto& output_array = model->GetArray(output); - const auto& output_minmax = output_array.GetMinMax(); auto& dequantized_output_array = model->GetOrCreateArray(dequantized_output); dequantized_output_array.data_type = ArrayDataType::kFloat; + dequantized_output_array.final_data_type = output_array.data_type; auto& dequantized_output_minmax = dequantized_output_array.GetOrCreateMinMax(); dequantized_output_minmax.min = output_minmax.min; @@ -643,6 +601,12 @@ bool Quantize::Run(Model* model, std::size_t op_index) { dequantize_op->outputs = {dequantized_output}; for (int i = 0; i < model->flags.output_arrays_size(); i++) { if (model->flags.output_arrays(i) == output) { + // TODO(b/78013785): never rename output arrays. + AddMessageF( + "Renaming output array %d after inserting dequant op %s: %s -> " + "%s", + i, LogName(*dequantize_op), model->flags.output_arrays(i), + dequantized_output); model->flags.set_output_arrays(i, dequantized_output); } } diff --git a/tensorflow/contrib/lite/toco/graph_transformations/read_fake_quant_min_max.cc b/tensorflow/contrib/lite/toco/graph_transformations/read_fake_quant_min_max.cc index 11f8d4b6eea836c5fe4efcbd5136e6183a59dc62..bdcca5b7caf61a62203debaa32c4d7a9b2eb43fa 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/read_fake_quant_min_max.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/read_fake_quant_min_max.cc @@ -72,6 +72,13 @@ bool ReadFakeQuantMinMax::Run(Model* model, std::size_t op_index) { minmax.min = min_array.GetBuffer().data[0]; minmax.max = max_array.GetBuffer().data[0]; // We always want [min, max] to contain 0. + if (minmax.min > 0 || minmax.max < 0) { + LOG(ERROR) << "For " << LogName(*fakequant_op) << " the MinMax range " + << "[" << minmax.min << ", " << minmax.max + << "] does not contain 0. " + << "Proceeding by tweaking it to contain 0, which will result " + "in poor accuracy."; + } minmax.min = std::min(minmax.min, 0.); minmax.max = std::max(minmax.max, 0.); diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_fake_quant.cc b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_fake_quant.cc new file mode 100644 index 0000000000000000000000000000000000000000..2c8d04440f251f792d2a09155dd26fc01a732109 --- /dev/null +++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_fake_quant.cc @@ -0,0 +1,86 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_passthrough.h" +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" +#include "tensorflow/core/platform/logging.h" + +namespace toco { + +namespace { + +bool IsFakeQuantTrivial(GraphTransformation* transformation, const Model& model, + const FakeQuantOperator& fakequant_op) { + CHECK(fakequant_op.type == OperatorType::kFakeQuant); + + if (!fakequant_op.minmax) { + // Require ReadFakeQuantMinMax to have run. + return false; + } + + // FakeQuants are trivial if they are taking input from another identical + // FakeQuant op. + auto* producing_op = GetOpWithOutput(model, fakequant_op.inputs[0]); + if (!producing_op || producing_op->type != OperatorType::kFakeQuant) { + return false; + } + const auto& producing_fakequant_op = + *static_cast(producing_op); + if (!producing_fakequant_op.minmax) { + // Require ReadFakeQuantMinMax to have run. + return false; + } + + if (*fakequant_op.minmax == *producing_fakequant_op.minmax && + fakequant_op.num_bits == producing_fakequant_op.num_bits) { + transformation->AddMessageF( + "%s is trivial because it is preceded by an identical FakeQuant %s", + LogName(fakequant_op), LogName(producing_fakequant_op)); + return true; + } + + return false; +} + +} // namespace + +// Removes FakeQuant ops that are trivial (have no effect, are redundant, etc). +bool RemoveTrivialFakeQuant::Run(Model* model, std::size_t op_index) { + const auto op_it = model->operators.begin() + op_index; + auto* op = op_it->get(); + if (op->type != OperatorType::kFakeQuant) { + return false; + } + auto* fakequant_op = static_cast(op); + + if (!IsFakeQuantTrivial(this, *model, *fakequant_op)) { + AddMessageF("%s is not trivial", LogName(*fakequant_op)); + return false; + } + + AddMessageF("Removing trivial %s", LogName(*fakequant_op)); + + CHECK_EQ(fakequant_op->inputs.size(), 1); + return RemoveTrivialPassthroughOp(this, model, op_index); +} + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_passthrough.cc b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_passthrough.cc index aa93ace03af300f9cbd3f9c6620a6a58b9329aa4..3e021b819fc82d66fb70596a62fd7cee4911d4e8 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_passthrough.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_passthrough.cc @@ -82,22 +82,13 @@ bool RemoveTrivialPassthroughOp(GraphTransformation* transformation, if (IsDiscardableArray(*model, output_name)) { transformation->AddMessageF( - "Removing %s, keeping its non-constant input array", - LogName(*passthru_op)); - for (const string& input : passthru_op->inputs) { - if (IsDiscardableArray(*model, input) && input != main_input_name && - CountOpsWithInput(*model, input) == 1) { - } - } + "Removing %s, keeping its non-constant input array %s and removing %s", + LogName(*passthru_op), main_input_name, output_name); RerouteEdges(output_name, main_input_name, model); } else if (IsDiscardableArray(*model, main_input_name)) { - transformation->AddMessageF("Removing %s, keeping its output array", - LogName(*passthru_op)); - for (const string& input : passthru_op->inputs) { - if (IsDiscardableArray(*model, input) && - (input == main_input_name || CountOpsWithInput(*model, input) == 1)) { - } - } + transformation->AddMessageF( + "Removing %s, keeping its output array %s and removing input %s", + LogName(*passthru_op), output_name, main_input_name); RerouteEdges(main_input_name, output_name, model); } else { transformation->AddMessageF( @@ -113,6 +104,16 @@ bool RemoveTrivialPassthroughOp(GraphTransformation* transformation, // Remove any array that is no longer used. for (const string& removal_candidate : removal_candidates) { bool is_referenced = false; + for (const auto& array : model->flags.input_arrays()) { + if (array.name() == removal_candidate) { + is_referenced = true; + } + } + for (const auto& array_name : model->flags.output_arrays()) { + if (array_name == removal_candidate) { + is_referenced = true; + } + } for (const auto& op : model->operators) { for (const string& input : op->inputs) { if (input == removal_candidate) { diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_quantized_activation_func.cc b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_quantized_activation_func.cc index 9b65feaa6443cd32ac1bef961600ff225d52d4b2..752560e075a087bcc2b0a3cb19dad484fb582d42 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_quantized_activation_func.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_quantized_activation_func.cc @@ -18,6 +18,8 @@ limitations under the License. #include #include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/graph_transformations/quantization_util.h" +#include "tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_passthrough.h" #include "tensorflow/contrib/lite/toco/model.h" #include "tensorflow/contrib/lite/toco/runtime/types.h" #include "tensorflow/contrib/lite/toco/toco_types.h" @@ -26,27 +28,44 @@ limitations under the License. namespace toco { -bool RemoveTrivialQuantizedActivationFunc::Run(Model* model, - std::size_t op_index) { - const auto it = model->operators.begin() + op_index; - auto* op = it->get(); - if (op->fused_activation_function != FusedActivationFunctionType::kRelu && - op->fused_activation_function != FusedActivationFunctionType::kRelu1 && - op->fused_activation_function != FusedActivationFunctionType::kRelu6) { - return false; - } - const auto& output_array = model->GetArray(op->outputs[0]); - if (!output_array.quantization_params) { - return false; - } - if (output_array.data_type != ArrayDataType::kUint8) { - return false; +namespace { + +bool IsTrivialUnfusedActivationFunc(GraphTransformation* transformation, + const Model& model, OperatorType op_type, + const string& input_array_name) { + double clamp_min; + double clamp_max; + switch (op_type) { + case OperatorType::kRelu: + clamp_min = 0.0; + clamp_max = std::numeric_limits::infinity(); + break; + case OperatorType::kRelu1: + clamp_min = -1.0; + clamp_max = 1.0; + break; + case OperatorType::kRelu6: + clamp_min = 0.0; + clamp_max = 6.0; + break; + default: + return false; } - const auto& quantization_params = output_array.GetQuantizationParams(); + const auto& input_array = model.GetArray(input_array_name); + return IsArrayQuantizedRangeSubset(transformation, input_array, clamp_min, + clamp_max); +} + +bool IsTrivialFusedActivationFunc( + GraphTransformation* transformation, const Model& model, + FusedActivationFunctionType activation_function, + const string& output_array_name) { double clamp_min; double clamp_max; - switch (op->fused_activation_function) { + switch (activation_function) { + case FusedActivationFunctionType::kNone: + return false; case FusedActivationFunctionType::kRelu: clamp_min = 0.0; clamp_max = std::numeric_limits::infinity(); @@ -61,45 +80,46 @@ bool RemoveTrivialQuantizedActivationFunc::Run(Model* model, break; default: LOG(FATAL) << "Unsupported fused activation type: " - << static_cast(op->fused_activation_function); + << static_cast(activation_function); return false; } - bool has_nontrivial_min_bound = false; - bool has_nontrivial_max_bound = false; + const auto& output_array = model.GetArray(output_array_name); + return IsArrayQuantizedRangeSubset(transformation, output_array, clamp_min, + clamp_max); +} - double lowest_representable_output = - (0. - quantization_params.zero_point) * quantization_params.scale; - if (lowest_representable_output < clamp_min) { - has_nontrivial_min_bound = true; - AddMessageF( - "Quantized activation function is not trivial: " - "the lowest representable output value %g" - " less than the clamp min bound %g.", - lowest_representable_output, clamp_min); - } - double highest_representable_output = - (255. - quantization_params.zero_point) * quantization_params.scale; - if (highest_representable_output > clamp_max) { - has_nontrivial_max_bound = true; - AddMessageF( - "Quantized activation function is not trivial: " - "the highest representable output value %g" - " is greater than the clamp max bound %g.", - highest_representable_output, clamp_max); - } +} // namespace - if (has_nontrivial_min_bound || has_nontrivial_max_bound) { +// Attempts to remove both fused and unfused activation functions if the +// quantization params indicate that the representable values fall inside the +// activation range. +bool RemoveTrivialQuantizedActivationFunc::Run(Model* model, + std::size_t op_index) { + const auto it = model->operators.begin() + op_index; + auto* op = it->get(); + if (op->inputs.empty()) { return false; } - op->fused_activation_function = FusedActivationFunctionType::kNone; - AddMessageF( - "Removing trivial quantized activation function on %s" - " because the output quantization parameters imply at least as tight" - " a clamp anyway.", - LogName(*op)); - return true; + if (IsTrivialUnfusedActivationFunc(this, *model, op->type, op->inputs[0])) { + AddMessageF( + "Removing trivial unfused activation function %s because the input " + "minmax imply at least as tight a clamp anyway.", + LogName(*op)); + return RemoveTrivialPassthroughOp(this, model, op_index); + } + if (IsTrivialFusedActivationFunc(this, *model, op->fused_activation_function, + op->outputs[0])) { + op->fused_activation_function = FusedActivationFunctionType::kNone; + AddMessageF( + "Removing trivial quantized activation function on %s " + "because the output quantization parameters imply at least as tight " + "a clamp anyway.", + LogName(*op)); + return true; + } + return false; } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_quantized_min_max.cc b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_quantized_min_max.cc new file mode 100644 index 0000000000000000000000000000000000000000..eaee1c662b7cedb2baec7be47e12e348c3e7b25c --- /dev/null +++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_quantized_min_max.cc @@ -0,0 +1,90 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/graph_transformations/quantization_util.h" +#include "tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_passthrough.h" +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/runtime/types.h" +#include "tensorflow/contrib/lite/toco/toco_types.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" +#include "tensorflow/core/platform/logging.h" + +namespace toco { + +namespace { + +bool IsTrivialMinMax(GraphTransformation* transformation, const Model& model, + OperatorType op_type, const string& input_array_name, + const string& clamp_value_array_name) { + const auto& clamp_value_array = model.GetArray(clamp_value_array_name); + if (!IsConstantParameterArray(model, clamp_value_array_name)) { + transformation->AddMessageF("Clip value array %s is non-constant", + clamp_value_array_name); + return false; + } + const auto& clamp_value_buffer = + clamp_value_array.GetBuffer(); + CHECK_EQ(clamp_value_buffer.Length(), 1); + float clamp_value = clamp_value_buffer.data[0]; + + double clamp_min; + double clamp_max; + switch (op_type) { + case OperatorType::kTensorFlowMinimum: + clamp_min = -std::numeric_limits::infinity(); + clamp_max = clamp_value; + break; + case OperatorType::kTensorFlowMaximum: + clamp_min = clamp_value; + clamp_max = std::numeric_limits::infinity(); + break; + default: + CHECK(false); + return false; + } + + const auto& input_array = model.GetArray(input_array_name); + return IsArrayQuantizedRangeSubset(transformation, input_array, clamp_min, + clamp_max); +} + +} // namespace + +// Attempts to remove min/max functions if the quantization params indicate that +// the representable values fall inside the clip range. +bool RemoveTrivialQuantizedMinMax::Run(Model* model, std::size_t op_index) { + const auto it = model->operators.begin() + op_index; + auto* op = it->get(); + if ((op->type != OperatorType::kTensorFlowMinimum && + op->type != OperatorType::kTensorFlowMaximum) || + op->inputs.size() != 2) { + return false; + } + if (IsTrivialMinMax(this, *model, op->type, op->inputs[0], op->inputs[1])) { + AddMessageF( + "Removing trivial min/max %s because the quantization parameters imply " + "at least as tight a clamp anyway.", + LogName(*op)); + return RemoveTrivialPassthroughOp(this, model, op_index); + } + return false; +} + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_reshape.cc b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_reshape.cc index 61477d59aea2f11c6347b84d8863763a86c43558..e28d8cf01eafee64e08ac2cc4b43ea7c227456c2 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_reshape.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_reshape.cc @@ -41,8 +41,8 @@ bool IsReshapeTrivial(const Model& model, const Operator& op, ShapesAgreeUpToExtending(input_array.shape(), output_array.shape())) { transformation->AddMessageF( "%s is trivial because its input and output shapes are equal up to " - "extending " - "by 1's, and we are told to aggressively discard such Reshape ops.", + "extending by 1's, and we are told to aggressively discard such " + "Reshape ops.", LogName(op)); return true; } @@ -80,6 +80,7 @@ bool RemoveTrivialReshape::Run(Model* model, std::size_t op_index) { } if (!IsReshapeTrivial(*model, *reshape_op, this)) { + AddMessageF("%s is not trivial", LogName(*reshape_op)); return false; } diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_unused_op.cc b/tensorflow/contrib/lite/toco/graph_transformations/remove_unused_op.cc index aa2c293382a98b476bee783ed8e177b19d35b858..8e6aaf544aa5310b4233d93e7bc8f484f6164b8a 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/remove_unused_op.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_unused_op.cc @@ -47,7 +47,8 @@ bool RemoveUnusedOp::Run(Model* model, std::size_t op_index) { bool found_output_as_rnn_state_array = false; for (const auto& rnn_state : model->flags.rnn_states()) { if (output == rnn_state.state_array()) { - CHECK(op->type == OperatorType::kFill); + CHECK(op->type == OperatorType::kFill || + op->type == OperatorType::kTensorFlowIdentity); found_output_as_rnn_state_array = true; break; } diff --git a/tensorflow/contrib/lite/toco/graph_transformations/reorder_activation_functions.cc b/tensorflow/contrib/lite/toco/graph_transformations/reorder_activation_functions.cc deleted file mode 100644 index 9852c86c21b9a0714bc728e60b5d9dfe61ff52d1..0000000000000000000000000000000000000000 --- a/tensorflow/contrib/lite/toco/graph_transformations/reorder_activation_functions.cc +++ /dev/null @@ -1,137 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#include -#include -#include -#include - -#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" -#include "tensorflow/contrib/lite/toco/model.h" -#include "tensorflow/contrib/lite/toco/runtime/types.h" -#include "tensorflow/contrib/lite/toco/tooling_util.h" -#include "tensorflow/core/platform/logging.h" - -namespace toco { - -bool ReorderActivationFunctions::Run(Model* model, std::size_t op_index) { - const auto ac_it = model->operators.begin() + op_index; - std::unique_ptr& ac_op = *ac_it; - DCHECK(ac_op); - - if (ac_op->type != OperatorType::kRelu6 && - ac_op->type != OperatorType::kRelu1 && - ac_op->type != OperatorType::kRelu) { - return false; - } - - auto exchange_it = FindOpWithOutput(*model, ac_op->inputs[0]); - if (exchange_it == model->operators.end()) return false; - // Find the op producing the array passed to this activation function - std::unique_ptr& exchange_op = *exchange_it; - DCHECK(exchange_op); - - // Allow activation functions to move up over any operator that does not - // change the values. - switch (exchange_op->type) { - case OperatorType::kExpandDims: - case OperatorType::kSqueeze: - case OperatorType::kTensorFlowReshape: - case OperatorType::kTranspose: - break; - default: - return false; - } - - DCHECK_EQ(exchange_op->outputs[0], ac_op->inputs[0]); - const auto exchange_op_input = exchange_op->inputs[0]; - const auto intermediate_array = exchange_op->outputs[0]; - const auto ac_op_output = ac_op->outputs[0]; - - int count_ops_consuming_output = - CountOpsWithInput(*model, intermediate_array); - DCHECK_GE(count_ops_consuming_output, 1); - if (count_ops_consuming_output > 1) { - AddMessageF( - "Not exchanging activation function with %s because it is consumed by " - "more than 1 other operator", - LogName(*exchange_op)); - return false; - } - - // If the ac_op was originally producing an output_array we can't trivially - // reorder as otherwise the output array name would change and break - // downstream assumptions. To work around that we perform some renaming below - // in that case at the cost of a bit more confusing array names in this rare - // case. - bool is_ac_op_output = - std::find(model->flags.output_arrays().begin(), - model->flags.output_arrays().end(), - ac_op_output) != model->flags.output_arrays().end(); - if (is_ac_op_output) { - // To preserve the output array name of the activation function we need to - // create a temporary to use to pass between ac->ex. - // - // Original: - // (a) -> EX -> (b) -> AC -> (c) - // Now: - // (a) -> AC -> (c') -> EX -> (c) - AddMessageF( - "Exchanging activation function %s with %s but renaming to preserve " - "output array %s", - LogName(*ac_op), LogName(*exchange_op), ac_op->outputs[0]); - - auto renamed_ac_op_output = - AvailableArrayName(*model, ac_op_output + "_exchange"); - ac_op->inputs[0] = exchange_op_input; - ac_op->outputs[0] = renamed_ac_op_output; - model->EraseArray(exchange_op->outputs[0]); - exchange_op->inputs[0] = renamed_ac_op_output; - exchange_op->outputs[0] = ac_op_output; - } else { - // Simply swap the order and update consumers to use the exchange_op output - // array (b). - // - // Original: - // (a) -> EX -> (b) -> AC -> (c) - // Now: - // (a) -> AC -> (c) -> EX -> (b) - AddMessageF("Exchanging activation function %s with %s", LogName(*ac_op), - LogName(*exchange_op)); - - Operator* consumer = GetFirstOpWithInput(*model, ac_op_output); - while (consumer) { - for (int i = 0; i < consumer->inputs.size(); ++i) { - if (consumer->inputs[i] == ac_op_output) { - consumer->inputs[i] = intermediate_array; - } - } - consumer = GetFirstOpWithInput(*model, ac_op_output); - } - ac_op->inputs[0] = exchange_op_input; - exchange_op->inputs[0] = ac_op_output; - } - - // Clear shapes; this will allow shape propagation to fix the sizes for us. - model->GetOrCreateArray(ac_op->outputs[0]).clear_shape(); - model->GetOrCreateArray(exchange_op->outputs[0]).clear_shape(); - - // Finally, reorder operators. Note that this only works when there are no - // other direct descendents of the exchange_op. - ac_op.swap(exchange_op); - - return true; -} - -} // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/reorder_elementwise_unary.cc b/tensorflow/contrib/lite/toco/graph_transformations/reorder_elementwise_unary.cc new file mode 100644 index 0000000000000000000000000000000000000000..9f5b7920cb937b021eb23fc1d5fdc3c1ff18a72d --- /dev/null +++ b/tensorflow/contrib/lite/toco/graph_transformations/reorder_elementwise_unary.cc @@ -0,0 +1,153 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" +#include "tensorflow/core/platform/logging.h" + +namespace toco { + +namespace { + +bool IsElementwiseOperator(OperatorType optype) { + switch (optype) { + case OperatorType::kCast: + case OperatorType::kExp: + case OperatorType::kFloor: + case OperatorType::kNeg: + case OperatorType::kRelu: + case OperatorType::kRelu1: + case OperatorType::kRelu6: + case OperatorType::kTanh: + case OperatorType::kTensorFlowSqrt: + case OperatorType::kTensorFlowSquare: + return true; + default: + return false; + } +} + +bool IsMoveOperator(OperatorType optype) { + switch (optype) { + case OperatorType::kDepthToSpace: + case OperatorType::kExpandDims: + case OperatorType::kSpaceToDepth: + case OperatorType::kSqueeze: + case OperatorType::kTensorFlowReshape: + case OperatorType::kTranspose: + return true; + default: + return false; + } +} + +} // namespace + +// Swap elementwise operators such that all value operators occur before all +// element move operators, e.g. negation then transpose. +bool ReorderElementwiseUnary::Run(Model* model, std::size_t op_index) { + const auto element_op_it = model->operators.begin() + op_index; + std::unique_ptr& element_op = *element_op_it; + if (!IsElementwiseOperator(element_op->type)) { + return false; + } + + const string intermediate_name = element_op->inputs[0]; + auto it = FindOpWithOutput(*model, intermediate_name); + if (it == model->operators.end()) { + AddMessageF("No preceding operator"); + return false; + } + + std::unique_ptr& move_op = *it; + if (!IsMoveOperator(move_op->type)) { + AddMessageF("Preceding operator is not a move operator"); + return false; + } + + if (CountOpsWithInput(*model, intermediate_name) != 1) { + AddMessageF("Input %s used elsewhere", intermediate_name); + return false; + } + + // Check that the intermediate is discardable. + if (!IsDiscardableArray(*model, intermediate_name)) { + AddMessageF( + "Cannot swap elementwise as it would invalidate %s which is " + "an output array.", + intermediate_name); + return false; + } + + // op->inputs may change so we need to keep a value by copy. + const string input_name = move_op->inputs[0]; + const string output_name = element_op->outputs[0]; + + AddMessageF("Swapping around operators with %s and %s", LogName(*element_op), + LogName(*move_op)); + + // If the output array is an exit node for the graph then we need to retain + // the name as an output node. This makes the naming scheme a little confusing + // but is required in this rare case. + if (!IsDiscardableArray(*model, output_name)) { + // The output name of the sequence needs to stay static, so create a new + // array new use for the intermediate. + const auto new_intermediate_name = + AvailableArrayName(*model, element_op->outputs[0] + "_reorder"); + AddMessageF("Adding new array %s to preserve output array name %s", + new_intermediate_name, output_name); + + element_op->inputs[0] = input_name; + element_op->outputs[0] = new_intermediate_name; + model->EraseArray(intermediate_name); + move_op->inputs[0] = new_intermediate_name; + move_op->outputs[0] = output_name; + } else { + // The intermediate array is now the output array. + for (int i = 0; i < model->operators.size(); i++) { + Operator* consumer = model->operators[i].get(); + for (int j = 0; j < consumer->inputs.size(); j++) { + if (consumer->inputs[j] == output_name) { + consumer->inputs[j] = intermediate_name; + } + } + } + + element_op->inputs[0] = input_name; + move_op->inputs[0] = output_name; + } + + // Reset both arrays as shape, type, min/max, etc can all change because of + // the position swap. + model->EraseArray(element_op->outputs[0]); + model->EraseArray(move_op->outputs[0]); + + // Reconstruct. + model->GetOrCreateArray(element_op->outputs[0]); + model->GetOrCreateArray(move_op->outputs[0]); + + // Swap the order of the operators. + element_op.swap(move_op); + + return true; +} + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/reorder_reshape_transpose.cc b/tensorflow/contrib/lite/toco/graph_transformations/reorder_reshape_transpose.cc new file mode 100644 index 0000000000000000000000000000000000000000..9e7fe1b1ccd851dd998e59e75ff798f52f7c6e5a --- /dev/null +++ b/tensorflow/contrib/lite/toco/graph_transformations/reorder_reshape_transpose.cc @@ -0,0 +1,248 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" +#include "tensorflow/core/platform/logging.h" + +namespace toco { + +namespace { + +bool OperatorReady(const Model& model, const Operator* op) { + if (!model.HasArray(op->inputs[0]) || !model.HasArray(op->inputs[1]) || + !model.HasArray(op->outputs[0])) { + return false; + } + + if (!model.GetArray(op->inputs[0]).has_shape() || + !model.GetArray(op->outputs[0]).has_shape()) { + // Input and output needs the shape. + return false; + } + + if (!model.GetArray(op->inputs[1]).buffer) { + // Buffer needs to be a constant. + return false; + } + + return true; +} + +// Utility function to filter out a value. +void Filter(std::vector* vec, int value) { + vec->erase(std::remove(vec->begin(), vec->end(), value), vec->end()); +} + +// Computes a new permutation used to swap a reshape-transpose to a +// transpose-reshape. In this case the permutation operates on the intermediate +// shape. +std::vector ComputeNewPerm(std::vector input_dims, + std::vector intermediate_dims, + std::vector perm) { + // These are the major axis of the input. + std::vector input_indices; + for (int i = 0; i < input_dims.size(); i++) { + if (input_dims[i] != 1) { + input_indices.push_back(i); + } + } + + // This maps which indices of the input produced the intermediate indices for + // non-unary dimensions. + std::unordered_map intermediate_to_input_indices_map; + for (int i = 0; i < intermediate_dims.size(); i++) { + if (intermediate_dims[i] != 1) { + intermediate_to_input_indices_map[i] = + input_indices[intermediate_to_input_indices_map.size()]; + } + } + + // Translate the transpose permutation to a new permutation starting with the + // major indices. + std::vector new_perm; + new_perm.reserve(input_dims.size()); + for (int i = 0; i < perm.size(); i++) { + if (intermediate_dims[perm[i]] == 1) continue; + + new_perm.push_back(intermediate_to_input_indices_map[perm[i]]); + } + + // Fill the rest of the transpose in with the ones. + for (int index = 0; index < input_dims.size(); index++) { + if (input_dims[index] == 1) { + new_perm.push_back(index); + } + } + + CHECK_EQ(new_perm.size(), input_dims.size()); + return new_perm; +} + +} // namespace + +// Swaps reshape-transpose to transpose-reshape whenever possible. This is +// possible when the reshape does not affect memory ordering. +bool ReorderReshapeTranspose::Run(Model* model, std::size_t op_index) { + auto transpose_it = model->operators.begin() + op_index; + + TransposeOperator* transpose_op = ConvertOperator( + transpose_it->get(), OperatorType::kTranspose); + + if (transpose_op == nullptr) { + return false; + } + + if (!OperatorReady(*model, transpose_op) || transpose_op->perm.empty()) { + // Wait for values to propagate. + return false; + } + + // Find the operator that produces the transpose op. + auto reshape_it = FindOpWithOutput(*model, transpose_op->inputs[0]); + if (reshape_it == model->operators.end()) { + return false; + } + + TensorFlowReshapeOperator* reshape_op = + ConvertOperator( + reshape_it->get(), OperatorType::kTensorFlowReshape); + if (reshape_op == nullptr) { + return false; + } + + // Ignore if the reshape is uninitialized. + if (!OperatorReady(*model, reshape_op) || reshape_op->shape.empty()) { + return false; + } + + // Need to copy to keep static if permutated. + const string input_name = reshape_op->inputs[0]; + const string intermediate_name = reshape_op->outputs[0]; + const string output_name = transpose_op->outputs[0]; + + // Intermediate should not be consumed by any other operators. + if (CountOpsWithInput(*model, intermediate_name) != 1) { + AddMessageF("Input %s used elsewhere", intermediate_name); + return false; + } + + // Check that the intermediate is not an output array. + if (!IsDiscardableArray(*model, intermediate_name)) { + AddMessageF( + "Cannot reorder reshape-transpose as it would invalidate %s which is " + "an output array.", + intermediate_name); + return false; + } + + // Get the arrays. + const auto& input_array = model->GetArray(input_name); + const auto& intermediate_array = model->GetArray(intermediate_name); + const auto& output_array = model->GetArray(output_name); + + // Get the shapes of each array. + Shape input_shape = input_array.shape(); + Shape intermediate_shape = intermediate_array.shape(); + Shape output_shape = output_array.shape(); + + // Assign ids to non-unary indices. + std::vector input_dims = input_shape.dims(); + std::vector intermediate_dims = intermediate_shape.dims(); + std::vector output_dims = output_shape.dims(); + + // If the reshape is equivalent to a transpose with fewer/more unary + // dimensions then it can be moved between the transpose. + if (!ReshapeIsEquivalentToTranspose(*model, reshape_op, + true /*allow_extra_unary_dims*/)) { + return false; + } + + if (!IsDiscardableArray(*model, output_name)) { + // The output name of the sequence needs to stay static, so create a new + // array new use for the intermediate. + const auto new_intermediate_name = + AvailableArrayName(*model, transpose_op->outputs[0] + "_exchange"); + AddMessageF("Adding new array %s to preserve output array name %s", + new_intermediate_name, transpose_op->outputs[0]); + transpose_op->inputs[0] = input_name; + transpose_op->outputs[0] = new_intermediate_name; + reshape_op->inputs[0] = new_intermediate_name; + reshape_op->outputs[0] = output_name; + model->EraseArray(intermediate_name); + } else { + // The intermediate array is now the output array. + for (int i = 0; i < model->operators.size(); i++) { + Operator* consumer = model->operators[i].get(); + for (int j = 0; j < consumer->inputs.size(); j++) { + if (consumer->inputs[j] == output_name) { + consumer->inputs[j] = intermediate_name; + } + } + } + + transpose_op->inputs[0] = input_name; + reshape_op->inputs[0] = output_name; + } + + // If transposes constant buffer is used elsewhere, make a new copy. + if (CountOpsWithInput(*model, transpose_op->inputs[1]) != 1) { + transpose_op->inputs[1] = + AvailableArrayName(*model, transpose_op->inputs[1] + "_copy"); + } + + // Make the new transpose permutation. + const std::vector new_perm = + ComputeNewPerm(input_dims, intermediate_dims, transpose_op->perm); + CHECK_EQ(input_dims.size(), new_perm.size()); + + auto& transpose_array = model->GetOrCreateArray(transpose_op->inputs[1]); + transpose_array.GetMutableBuffer().data = new_perm; + *(transpose_array.mutable_shape()->mutable_dims()) = { + static_cast(new_perm.size())}; + transpose_op->perm = new_perm; + + // If the reshape's constant buffer is reused, create a new one. + if (CountOpsWithInput(*model, reshape_op->inputs[1]) != 1) { + reshape_op->inputs[1] = + AvailableArrayName(*model, reshape_op->inputs[1] + "_copy"); + } + + // We need to modify the reshape input array to target the new output size. + auto& reshape_array = model->GetOrCreateArray(reshape_op->inputs[1]); + reshape_array.GetMutableBuffer().data = output_dims; + *(reshape_array.mutable_shape()->mutable_dims()) = { + static_cast(output_shape.dimensions_count())}; + reshape_op->shape.clear(); + + AddMessageF("Swapping around operators between %s and %s", input_name, + output_name); + + model->GetOrCreateArray(transpose_op->outputs[0]).clear_shape(); + model->GetOrCreateArray(reshape_op->outputs[0]).clear_shape(); + + // Swap the order of the operators. + transpose_it->swap(*reshape_it); + + return true; +} + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_batch_normalization.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_batch_normalization.cc index fb109eb91b16e3a73005230f821c18b9ef82d2fb..2b3ee36ad10e24ab7367ca44c03a234688a63a9b 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_batch_normalization.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_batch_normalization.cc @@ -33,7 +33,7 @@ bool ResolveBatchNormalization::Run(Model* model, std::size_t op_index) { const auto* bn_op = static_cast(bn_it->get()); - const auto& mean_array = model->GetArray(bn_op->inputs[1]); + auto& mean_array = model->GetArray(bn_op->inputs[1]); const auto& multiplier_array = model->GetArray(bn_op->inputs[2]); const auto& offset_array = model->GetArray(bn_op->inputs[3]); @@ -49,6 +49,13 @@ bool ResolveBatchNormalization::Run(Model* model, std::size_t op_index) { CHECK(multiplier_array.data_type == ArrayDataType::kFloat); CHECK(offset_array.data_type == ArrayDataType::kFloat); + // This graph transformations will need to address constant buffers below, + // so we need to exit early if these buffers don't exist (i.e. if the params + // haven't yet been resolved as constants). + if (!mean_array.buffer || !multiplier_array.buffer || !offset_array.buffer) { + return false; + } + // Create the new Mul, Add operators auto* mul_op = new MulOperator; auto* add_op = new AddOperator; @@ -80,9 +87,15 @@ bool ResolveBatchNormalization::Run(Model* model, std::size_t op_index) { DCHECK_EQ(bn_it->get(), bn_op); // Create the new param arrays - const auto& mean_shape = mean_array.shape(); + auto& mean_shape = *mean_array.mutable_shape(); const auto& multiplier_shape = multiplier_array.shape(); const auto& offset_shape = offset_array.shape(); + if (mean_shape.dims().empty()) { + *mean_shape.mutable_dims() = multiplier_shape.dims(); + auto& data = mean_array.GetMutableBuffer().data; + CHECK_EQ(data.size(), 1); + data.resize(RequiredBufferSizeForShape(mean_shape), data[0]); + } CHECK(mean_shape.dims() == multiplier_shape.dims()); CHECK(mean_shape.dims() == offset_shape.dims()); const auto& param_shape = mean_shape; diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_concatenation.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_concatenation.cc index 064810b53e7c3bee4601204c9dbd976c374a6a60..d916ae0ddf017fe6a2fb2709db6e9de8c258adfc 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_concatenation.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_concatenation.cc @@ -18,6 +18,7 @@ limitations under the License. #include #include "absl/strings/str_cat.h" +#include "absl/strings/str_join.h" #include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" #include "tensorflow/contrib/lite/toco/model.h" #include "tensorflow/contrib/lite/toco/tooling_util.h" @@ -105,7 +106,8 @@ void ConcatenateTensorBuffers(const std::vector& input_arrays, // already set (e.g. because of previous pass in TOCO), it doesn't change it and // returns. Otherwise it uses the input arrays min and max values to compute the // concatenated array min and max. -void SetMinMaxForConcatenedArray(const std::vector& input_arrays, +void SetMinMaxForConcatenedArray(GraphTransformation* transformation, + const std::vector& input_arrays, Array* concatenated_array) { CHECK(concatenated_array->data_type == ArrayDataType::kFloat); // If the minmax is already set, use it @@ -125,6 +127,9 @@ void SetMinMaxForConcatenedArray(const std::vector& input_arrays, MinMax& minmax = concatenated_array->GetOrCreateMinMax(); minmax.min = concat_min; minmax.max = concat_max; + + transformation->AddMessageF("Setting concatenated array min/max to %g,%g", + concat_min, concat_max); } } // namespace @@ -161,11 +166,14 @@ bool ResolveConstantConcatenation::Run(Model* model, std::size_t op_index) { input_arrays.push_back(&model->GetArray(input_name)); } + AddMessageF("Performing constant concat of %s into %s", + absl::StrJoin(concat_op->inputs, ", "), concatenated_array_name); + switch (concatenated_array.data_type) { case ArrayDataType::kFloat: ConcatenateTensorBuffers( input_arrays, concatenation_axis, &concatenated_array); - SetMinMaxForConcatenedArray(input_arrays, &concatenated_array); + SetMinMaxForConcatenedArray(this, input_arrays, &concatenated_array); break; case ArrayDataType::kUint8: ConcatenateTensorBuffers( @@ -189,13 +197,13 @@ bool ResolveConstantConcatenation::Run(Model* model, std::size_t op_index) { // Remove all the resolved arrays. for (const string& input_name : concat_op->inputs) { - // Check to prevent removal of shared tensors + // Check to prevent removal of shared tensors. if (CountOpsWithInput(*model, input_name) == 1) { model->EraseArray(input_name); } } - // Remove concatenate operator + // Remove concatenate operator. model->operators.erase(concat_it); return true; } diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_fake_quant.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_fake_quant.cc index 625d90205a801ad7c3fc1026c9cedc9b509f920d..efb7bb218421dd045e3e8e2a38b9c70989f222e1 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_fake_quant.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_fake_quant.cc @@ -18,6 +18,7 @@ limitations under the License. #include #include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/graph_transformations/quantization_util.h" #include "tensorflow/contrib/lite/toco/model.h" #include "tensorflow/contrib/lite/toco/tooling_util.h" #include "tensorflow/core/platform/logging.h" @@ -45,9 +46,29 @@ bool ResolveConstantFakeQuant::Run(Model* model, std::size_t op_index) { } const auto& input_array = model->GetArray(fakequant_op->inputs[0]); + CHECK(input_array.data_type == ArrayDataType::kFloat); + + // Determine the final data type in the same way as PropagateFakeQuantNumBits. + ArrayDataType quantized_data_type = input_array.final_data_type; + if (!InferQuantizedDataTypeFromFakeQuant(*fakequant_op, + &quantized_data_type)) { + AddMessageF("Unsupported FakeQuant num_bits=%d", fakequant_op->num_bits); + return false; + } + + AddMessageF("Resolving constant %s", LogName(*fakequant_op)); + auto& output_array = model->GetArray(fakequant_op->outputs[0]); CHECK(input_array.data_type == ArrayDataType::kFloat); output_array.data_type = ArrayDataType::kFloat; + + // We'll set the final data type to what the fake quant indicates we should + // have (and would have been set if this stayed around until + // PropagateFakeQuantNumBits). + if (propagate_fake_quant_num_bits()) { + output_array.final_data_type = quantized_data_type; + } + CHECK(!output_array.buffer); const auto& input_buffer = input_array.GetBuffer(); output_array.GetOrCreateMinMax() = *fakequant_op->minmax; @@ -66,7 +87,9 @@ bool ResolveConstantFakeQuant::Run(Model* model, std::size_t op_index) { const double dst_val = qparams.scale * (quantized_val - qparams.zero_point); output_buffer.data[i] = dst_val; } - if (CountOpsWithInput(*model, fakequant_op->inputs[0]) == 1) { + + if (IsDiscardableArray(*model, fakequant_op->inputs[0]) && + CountOpsWithInput(*model, fakequant_op->inputs[0]) == 1) { model->EraseArray(fakequant_op->inputs[0]); } model->operators.erase(fakequant_it); diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_gather.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_gather.cc index d999c2df9483e096f333c6af83e1d9fee873d4d6..debe298a5a93034bcb928d7384b5ec1fc7439e47 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_gather.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_gather.cc @@ -98,6 +98,16 @@ bool ResolveConstantGather::Run(Model* model, std::size_t op_index) { CHECK(coords_array.data_type == ArrayDataType::kInt32) << "Only int32 indices are supported"; + // Copy min/max info if present. The ranges of the selected values may be + // a subset of the original range but we want to ensure the quantization + // params stay the same. + if (input_array.minmax) { + const auto& input_minmax = input_array.GetMinMax(); + auto& output_minmax = output_array.GetOrCreateMinMax(); + output_minmax.min = input_minmax.min; + output_minmax.max = input_minmax.max; + } + CHECK(!output_array.buffer); switch (output_array.data_type) { case ArrayDataType::kFloat: diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_random_uniform.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_random_uniform.cc new file mode 100644 index 0000000000000000000000000000000000000000..88d06d7dc75005c89a69b881aa0064d1162227d5 --- /dev/null +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_random_uniform.cc @@ -0,0 +1,116 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include + +#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" +#include "tensorflow/core/platform/logging.h" + +#include "tensorflow/core/lib/random/philox_random.h" +#include "tensorflow/core/lib/random/random_distributions.h" + +namespace toco { + +template +bool ComputeRandomUniformArray(Model* model, RandomUniformOperator* op) { + typedef tensorflow::random::UniformDistribution< + tensorflow::random::PhiloxRandom, DataType> + Distribution; + + // Allocate output + auto& output_array = model->GetArray(op->outputs[0]); + CHECK(output_array.data_type == Type); + std::vector>& data = + output_array.GetMutableBuffer().data; + data.resize(RequiredBufferSizeForShape(output_array.shape())); + + // We use the same random number generator and distribution as TensorFlow to + // produce the exact same values given the same seeds. See + // tensorflow::functor::FillPhiloxRandomTask in + // //third_party/tensorflow/core/kernels/random_op.cc for the implementation. + tensorflow::random::PhiloxRandom generator(op->seed, op->seed2); + Distribution dist; + + // The generator creates Distribution::kResultElementCount samples at a time. + size_t offset = 0; + size_t num_samples = Distribution::kResultElementCount; + while (offset < data.size()) { + const typename Distribution::ResultType samples = dist(&generator); + std::copy(&samples[0], + &samples[0] + std::min(num_samples, data.size() - offset), + &data[0] + offset); + offset += num_samples; + } + + return true; +} + +bool ResolveConstantRandomUniform::Run(Model* model, std::size_t op_index) { + const auto it = model->operators.begin() + op_index; + auto* base_op = it->get(); + if (base_op->type != OperatorType::kRandomUniform) { + return false; + } + auto* op = static_cast(base_op); + + CHECK_EQ(op->inputs.size(), 1); + CHECK_EQ(op->outputs.size(), 1); + + auto& output_array = model->GetArray(op->outputs[0]); + if (output_array.data_type == ArrayDataType::kNone) { + // Yield until the output type has been set by PropagateArrayDataTypes + return false; + } + + if (!output_array.has_shape()) { + // Yield until the output shape has been set by PropagateFixedShapes + return false; + } + + if ((op->seed == 0) && (op->seed2 == 0)) { + LOG(WARNING) << "RandomUniform op outputting \"" << op->outputs[0] + << "\" is truly random (using /dev/random system entropy). " + "Therefore, cannot resolve as constant. Set \"seed\" or " + "\"seed2\" attr non-zero to fix this"; + return false; + } + + switch (output_array.data_type) { + case ArrayDataType::kFloat: + if (!ComputeRandomUniformArray(model, op)) { + return false; + } + break; + // For future support of double or half. + // case ArrayDataType::kDouble... + default: + LOG(FATAL) + << "Unsupported data type given to RandomUniform op with output \"" + << op->outputs[0] << "\""; + break; + } + + // Erase input arrays if no longer used + toco::DeleteArrayIfUsedOnce(op->inputs[0], model); + + // Erase the operator + model->operators.erase(it); + + return true; +} + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_reshape.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_reshape.cc new file mode 100644 index 0000000000000000000000000000000000000000..7e7ad383e7789891f5396845241e70143dc8b76f --- /dev/null +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_reshape.cc @@ -0,0 +1,124 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include + +#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" +#include "tensorflow/core/platform/logging.h" + +namespace toco { + +// Resolves a constant reshape operation by copying the buffer. +bool ResolveConstantReshape::Run(Model* model, std::size_t op_index) { + auto it = model->operators.begin() + op_index; + const auto* base_op = it->get(); + if (base_op->type != OperatorType::kTensorFlowReshape) { + return false; + } + const auto* op = static_cast(base_op); + + CHECK_EQ(op->inputs.size(), 2); + CHECK_EQ(op->outputs.size(), 1); + + // We require constant inputs. + if (!IsConstantParameterArray(*model, op->inputs[0]) || + !IsConstantParameterArray(*model, op->inputs[1])) { + return false; + } + + auto& output_array = model->GetArray(op->outputs[0]); + if (output_array.data_type == ArrayDataType::kNone) { + // Yield until the output type has been set by PropagateArrayDataTypes. + return false; + } + if (!output_array.has_shape()) { + // Yield until the output shape has been set by PropagateFixedShapes. + return false; + } + + const Array& input_array = model->GetArray(op->inputs[0]); + if (!ShapesAgreeUpToExtending(input_array.shape(), output_array.shape())) { + AddMessageF("Constant reshape is non-trivial (%s -> %s)", + ShapeToString(input_array.shape()), + ShapeToString(output_array.shape())); + return false; + } + + CHECK(!output_array.buffer); + switch (input_array.data_type) { + case ArrayDataType::kBool: + CopyArrayBuffer(input_array, &output_array); + break; + case ArrayDataType::kFloat: + CopyArrayBuffer(input_array, &output_array); + break; + case ArrayDataType::kInt8: + CopyArrayBuffer(input_array, &output_array); + break; + case ArrayDataType::kUint8: + CopyArrayBuffer(input_array, &output_array); + break; + case ArrayDataType::kInt16: + CopyArrayBuffer(input_array, &output_array); + break; + case ArrayDataType::kUint16: + CopyArrayBuffer(input_array, &output_array); + break; + case ArrayDataType::kInt32: + CopyArrayBuffer(input_array, &output_array); + break; + case ArrayDataType::kUint32: + CopyArrayBuffer(input_array, &output_array); + break; + case ArrayDataType::kInt64: + CopyArrayBuffer(input_array, &output_array); + break; + case ArrayDataType::kUint64: + CopyArrayBuffer(input_array, &output_array); + break; + case ArrayDataType::kString: + CopyArrayBuffer(input_array, &output_array); + break; + default: + LOG(FATAL) << "Unsupported data type: " + << ArrayDataTypeName(input_array.data_type); + return false; + } + + AddMessageF("Resolving constant reshape of %s", LogName(*op)); + + if (input_array.minmax) { + output_array.GetOrCreateMinMax() = input_array.GetMinMax(); + } + if (input_array.quantization_params) { + output_array.GetOrCreateQuantizationParams() = + input_array.GetQuantizationParams(); + } + + // Erase input arrays if no longer used. + for (const auto& input : op->inputs) { + if (IsDiscardableArray(*model, input) && + CountOpsWithInput(*model, input) == 1) { + model->EraseArray(input); + } + } + + // Erase the operator. + model->operators.erase(it); + return true; +} + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_strided_slice.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_strided_slice.cc index a0cfc3d59763dc1211ed4d1ac114d371a4a7ee0b..8df3c2f1955c23b301ca10f613b57a1fbe138593 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_strided_slice.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_strided_slice.cc @@ -23,40 +23,88 @@ namespace toco { namespace { +// These StridedSlice utility functions are essentially a COPY of those in +// reference_ops.h. See comments there. + +// Use until std::clamp() is available from C++17. +int Clamp(const int v, const int lo, const int hi) { + if (hi < v) return hi; + if (v < lo) return lo; + return v; +} + int StartForAxis(StridedSliceOperator const& op, Shape const& input_shape, int axis) { - int start; + // Begin with the specified index + int start = op.start_indices[axis]; + + // begin_mask override if (op.begin_mask & 1 << axis) { - // If begin mask bit is set, use the first element - start = 0; - } else { - // Otherwise, use the specified element - start = op.start_indices[axis]; - if (start < 0) { - // Handle negative indices - start += input_shape.dims(axis); + if (op.strides[axis] > 0) { + // Forward iteration - use the first element. These values will get + // clamped below (Note: We could have set them to 0 and axis_size-1, but + // use lowest() and max() to maintain symmetry with StopForAxis()) + start = std::numeric_limits::lowest(); + } else { + // Backward iteration - use the last element. + start = std::numeric_limits::max(); } } + + // Handle negative indices + int axis_size = input_shape.dims(axis); + if (start < 0) { + start += axis_size; + } + + // Clamping + start = Clamp(start, 0, axis_size - 1); + return start; } int StopForAxis(StridedSliceOperator const& op, Shape const& input_shape, int axis) { - int stop; + // Begin with the specified index + int stop = op.stop_indices[axis]; + + // end_mask override if (op.end_mask & (1 << axis)) { - // If end mask bit set, use the last element - stop = input_shape.dims(axis); - } else { - // Otherwise, use the specified element - stop = op.stop_indices[axis]; - if (stop < 0) { - // Handle negative indices - stop += input_shape.dims(axis); + if (op.strides[axis] > 0) { + // Forward iteration - use the last element. These values will get + // clamped below + stop = std::numeric_limits::max(); + } else { + // Backward iteration - use the first element. + stop = std::numeric_limits::lowest(); } } + + // Handle negative indices + int axis_size = input_shape.dims(axis); + if (stop < 0) { + stop += axis_size; + } + + // Clamping + // Because the end index points one past the last element, we need slightly + // different clamping ranges depending on the direction. + if (op.strides[axis] > 0) { + // Forward iteration + stop = Clamp(stop, 0, axis_size); + } else { + // Backward iteration + stop = Clamp(stop, -1, axis_size - 1); + } + return stop; } +bool LoopCondition(int index, int stop, int stride) { + // True when we have reached the end of an axis and should loop. + return stride > 0 ? index >= stop : index <= stop; +} + template void StridedSlice(StridedSliceOperator const& op, Array const& input_array, Array* output_array) { @@ -73,9 +121,6 @@ void StridedSlice(StridedSliceOperator const& op, Array const& input_array, int num_input_axes = op.start_indices.size(); CHECK_EQ(num_input_axes, op.stop_indices.size()); CHECK_EQ(num_input_axes, op.strides.size()); - for (int i = 0; i < op.strides.size(); i++) { - CHECK_GE(op.strides[i], 0) << "Negative strides usupported"; - } // Create a buffer for the output array std::vector>& output_data = @@ -103,13 +148,15 @@ void StridedSlice(StridedSliceOperator const& op, Array const& input_array, // Compute next source input coordinates. bool carry = true; for (int axis = 0; axis < num_input_axes; axis++) { + int stride = op.strides[axis]; // Increment this axis if we carried from the previous one if (carry) { - src_coord[axis] += op.strides[axis]; + src_coord[axis] += stride; } // Check if we've overflowed. - if (src_coord[axis] >= StopForAxis(op, input_shape, axis)) { + int stop = StopForAxis(op, input_shape, axis); + if (LoopCondition(src_coord[axis], stop, stride)) { // Reset axis and set carry src_coord[axis] = StartForAxis(op, input_shape, axis); carry = true; diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_transpose.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_transpose.cc index 4f984bfde55b3457694bb411bbfdf30723c7066e..1fd20314b14d98bd82e2b20a4e70f5d9c2c3b298 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_transpose.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_transpose.cc @@ -131,6 +131,10 @@ bool ResolveConstantTranspose::Run(Model* model, std::size_t op_index) { if (input_array.minmax) { output_array.GetOrCreateMinMax() = input_array.GetMinMax(); } + if (input_array.quantization_params) { + output_array.GetOrCreateQuantizationParams() = + input_array.GetQuantizationParams(); + } if (op->perm.empty()) { // Yield until perm has been populated by ResolveTransposeAttributes. @@ -164,6 +168,8 @@ bool ResolveConstantTranspose::Run(Model* model, std::size_t op_index) { break; } + AddMessageF("Resolving constant transpose of %s", LogName(*op)); + // Erase input arrays if no longer used. for (const auto& input : op->inputs) { if (IsDiscardableArray(*model, input) && diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc index d4db6f1c009cd19515655fb31974a2e97cfa42e8..f6c8f79d8d3311dc2294e3ec406a184b2a16a6b5 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc @@ -51,6 +51,7 @@ bool ResolveConstantUnaryOperator::Run(Model* model, std::size_t op_index) { // Test for unary ops of types that we know how to resolve. switch (unary_op->type) { case OperatorType::kCast: + case OperatorType::kLog: case OperatorType::kNeg: case OperatorType::kTensorFlowRsqrt: case OperatorType::kTensorFlowSqrt: @@ -218,6 +219,7 @@ bool ResolveConstantUnaryOperator::Run(Model* model, std::size_t op_index) { } output_float_data[0] = max; } else if (unary_op->type == OperatorType::kNeg || + unary_op->type == OperatorType::kLog || unary_op->type == OperatorType::kTensorFlowRsqrt || unary_op->type == OperatorType::kTensorFlowSqrt || unary_op->type == OperatorType::kTensorFlowSquare) { @@ -231,6 +233,8 @@ bool ResolveConstantUnaryOperator::Run(Model* model, std::size_t op_index) { float outval = 0.f; if (unary_op->type == OperatorType::kNeg) { outval = -val; + } else if (unary_op->type == OperatorType::kLog) { + outval = std::log(val); } else if (unary_op->type == OperatorType::kTensorFlowRsqrt) { outval = 1.0f / std::sqrt(val); } else if (unary_op->type == OperatorType::kTensorFlowSqrt) { diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_matmul.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_matmul.cc index f38203c80fcb7ab8bc1639129fd98e4e342e5cb7..2a236d3f98784e8244942f94d5a250b5bc00a8ad 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_matmul.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_matmul.cc @@ -60,6 +60,13 @@ bool ResolveTensorFlowMatMul::Run(Model* model, std::size_t op_index) { string input_lhs = matmul_op->inputs[0]; string input_rhs = transpose_op->outputs[0]; + // Construct the new FullyConnectedOperator. + auto* fc_op = new FullyConnectedOperator; + fc_op->outputs = matmul_op->outputs; + + // Insert the newly constructed FullyConnectedOperator. + model->operators.emplace(matmul_it, fc_op) + 1; + // Find the op producing the array passed to this MatMul auto previous_op_it = model->operators.begin(); bool found = false; @@ -76,13 +83,6 @@ bool ResolveTensorFlowMatMul::Run(Model* model, std::size_t op_index) { } Operator* previous_op = (found) ? previous_op_it->get() : nullptr; - // Construct the new FullyConnectedOperator. - auto* fc_op = new FullyConnectedOperator; - fc_op->outputs = matmul_op->outputs; - - // Insert the newly constructed FullyConnectedOperator. - model->operators.emplace(matmul_it, fc_op) + 1; - // Refresh iterator. matmul_it = model->operators.begin(); for (; matmul_it != model->operators.end(); ++matmul_it) { diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_merge.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_merge.cc index 477e7f13da3d88a68547d494011cd4984936b909..38e0005890ac10410df4ddb5290be8fcc948c349 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_merge.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_merge.cc @@ -32,7 +32,7 @@ bool ResolveTensorFlowMerge::Run(Model* model, std::size_t op_index) { } // We need to yield until this Merge node has only 1 input, which will mean - // that that is the selected input. Other graph transformations on other nodes + // that is the selected input. Other graph transformations on other nodes // such as ResolveTensorFlowSwitch, will take care of trimming the // non-selected inputs, so that at some point there will be only 1 input left. if (merge_op->inputs.size() > 1) { diff --git a/tensorflow/contrib/lite/toco/import_tensorflow.cc b/tensorflow/contrib/lite/toco/import_tensorflow.cc index c26e4bddffc0588106d1805cdd735f4aa121c144..155d890c9f23ba206f1f0e6db645a601308cea5b 100644 --- a/tensorflow/contrib/lite/toco/import_tensorflow.cc +++ b/tensorflow/contrib/lite/toco/import_tensorflow.cc @@ -74,7 +74,7 @@ const string& GetStringAttr(const NodeDef& node, const string& attr_name) { return attr.s(); } -int GetIntAttr(const NodeDef& node, const string& attr_name) { +int64 GetIntAttr(const NodeDef& node, const string& attr_name) { CHECK(HasAttr(node, attr_name)) << attr_name << " not found in:\n" << node.DebugString(); const auto& attr = node.attr().at(attr_name); @@ -569,6 +569,23 @@ void ConvertBiasAddOperator(const NodeDef& node, model->operators.emplace_back(biasadd); } +void ConvertRandomUniform(const NodeDef& node, + const TensorFlowImportFlags& tf_import_flags, + Model* model) { + CHECK_EQ(node.op(), "RandomUniform"); + CheckInputsCount(node, tf_import_flags, 1); + + CHECK_EQ(GetDataTypeAttr(node, "T"), DT_INT32); + auto op = absl::make_unique(); + op->inputs.push_back(node.input(0)); + op->outputs.push_back(node.name()); + op->dtype = ConvertDataType(GetDataTypeAttr(node, "dtype")); + op->seed = GetIntAttr(node, "seed"); + op->seed2 = GetIntAttr(node, "seed2"); + CHECK(model != nullptr); + model->operators.emplace_back(std::move(op)); +} + void ConvertReluOperator(const NodeDef& node, const TensorFlowImportFlags& tf_import_flags, Model* model) { @@ -594,6 +611,18 @@ void ConvertRelu6Operator(const NodeDef& node, model->operators.emplace_back(op); } +void ConvertLogOperator(const NodeDef& node, + const TensorFlowImportFlags& tf_import_flags, + Model* model) { + CHECK_EQ(node.op(), "Log"); + CheckInputsCount(node, tf_import_flags, 1); + + auto op = absl::make_unique(); + op->inputs.push_back(node.input(0)); + op->outputs.push_back(node.name()); + model->operators.emplace_back(std::move(op)); +} + void ConvertLogisticOperator(const NodeDef& node, const TensorFlowImportFlags& tf_import_flags, Model* model) { @@ -665,6 +694,8 @@ void ConvertFakeQuantWithMinMaxArgs( minmax.min = GetFloatAttr(node, "min"); minmax.max = GetFloatAttr(node, "max"); op->outputs.push_back(node.name()); + // tf.fake_quant_with_min_max_args num_bits defaults to 8. + op->num_bits = HasAttr(node, "num_bits") ? GetIntAttr(node, "num_bits") : 8; model->operators.emplace_back(op); } @@ -682,6 +713,7 @@ void ConvertFakeQuantWithMinMaxVars( op->inputs.push_back(node.input(i)); } op->outputs.push_back(node.name()); + op->num_bits = HasAttr(node, "num_bits") ? GetIntAttr(node, "num_bits") : 8; model->operators.emplace_back(op); } @@ -1931,7 +1963,7 @@ void ConvertTopKV2Operator(const NodeDef& node, // K can be encoded as attr (TopK) convert it to a const. if (HasAttr(node, "k")) { string k_array = CreateConstArray( - model, node.name() + "k", {GetIntAttr(node, "k")}); + model, node.name() + "k", {static_cast(GetIntAttr(node, "k"))}); op->inputs.push_back(k_array); } else { CheckInputsCount(node, tf_import_flags, 2); @@ -2074,6 +2106,8 @@ std::unique_ptr ImportTensorFlowGraphDef( ConvertLRNOperator(node, tf_import_flags, model); } else if (node.op() == "Softmax") { ConvertSoftmaxOperator(node, tf_import_flags, model); + } else if (node.op() == "Log") { + ConvertLogOperator(node, tf_import_flags, model); } else if (node.op() == "LogSoftmax") { ConvertLogSoftmaxOperator(node, tf_import_flags, model); } else if (node.op() == "All") { @@ -2168,6 +2202,8 @@ std::unique_ptr ImportTensorFlowGraphDef( } else if (node.op() == "DynamicStitch" || node.op() == "ParallelDynamicStitch") { ConvertDynamicStitchOperator(node, tf_import_flags, model); + } else if (node.op() == "RandomUniform") { + ConvertRandomUniform(node, tf_import_flags, model); } else { ConvertUnsupportedOperator(node, tf_import_flags, model); } diff --git a/tensorflow/contrib/lite/toco/model.h b/tensorflow/contrib/lite/toco/model.h index 5199e292e19c2ac59dcfc2efd9947cc788b0299d..482cc71d8b34705041130c7518b58f7fe183bc3c 100644 --- a/tensorflow/contrib/lite/toco/model.h +++ b/tensorflow/contrib/lite/toco/model.h @@ -24,6 +24,7 @@ limitations under the License. #include "tensorflow/contrib/lite/toco/model_flags.pb.h" #include "tensorflow/contrib/lite/toco/runtime/types.h" +#include "tensorflow/contrib/lite/toco/toco_port.h" #include "tensorflow/contrib/lite/toco/toco_types.h" #include "tensorflow/core/platform/logging.h" @@ -56,10 +57,12 @@ enum class OperatorType { kL2Pool, kLstmCell, kLocalResponseNormalization, + kLog, kLogistic, kMaxPool, kFakeQuant, kMul, + kRandomUniform, kRange, kRank, kRelu, @@ -149,9 +152,9 @@ enum class AxesOrder { }; // The type of the scalars in an array. -// Note that that does not by itself tell whether the values in the array are -// real (are literally interpreted as real numbers) or quantized (only acquire -// a meaning as real numbers in conjunction with QuantizationParams). +// Note that the type does not by itself tell whether the values in the array +// are real (are literally interpreted as real numbers) or quantized (only +// acquire a meaning as real numbers in conjunction with QuantizationParams). // // In practice though: // float values are always real @@ -423,6 +426,7 @@ struct SpaceToDepthOperator : Operator { // input activations as a matrix, followed by a MatMul node. struct FullyConnectedOperator : Operator { FullyConnectedOperator() : Operator(OperatorType::kFullyConnected) {} + bool experimental_shuffled_weights = false; }; // Dequantization operator, converting a quantized array of integers with @@ -590,6 +594,17 @@ struct LogisticOperator : Operator { LogisticOperator() : Operator(OperatorType::kLogistic) {} }; +// Element-wise natural log operator: +// x -> ln(x) +// +// Inputs: +// inputs[0]: required: the input array +// +// TensorFlow equivalent: Log +struct LogOperator : Operator { + LogOperator() : Operator(OperatorType::kLog) {} +}; + // Element-wise Tanh operator: // x -> Tanh(x) = (exp(x) - exp(-x)) / (exp(x) + exp(-x)) // @@ -711,8 +726,7 @@ struct L2PoolOperator : Operator { // The expected [min, max] range of values in a given array. // Used for quantization only. // This information typically comes from special nodes found in quantized -// models, -// see FakeQuantOperator, and is used during quantization to resolve +// models, see FakeQuantOperator, and is used during quantization to resolve // actual quantization parameters (see QuantizationParams). struct MinMax { double min = 0.; @@ -740,6 +754,7 @@ inline bool operator==(const MinMax& m1, const MinMax& m2) { struct FakeQuantOperator : Operator { FakeQuantOperator() : Operator(OperatorType::kFakeQuant) {} std::unique_ptr minmax; + int num_bits = 8; }; // Element-wise division operator. @@ -831,6 +846,60 @@ struct StridedSliceOperator : Operator { int end_mask; int new_axis_mask; int shrink_axis_mask; + + StridedSliceOperator(const StridedSliceOperator& other) + : Operator(OperatorType::kStridedSlice) { + inputs = other.inputs; + outputs = other.outputs; + + start_indices = other.start_indices; + stop_indices = other.stop_indices; + strides = other.strides; + + begin_mask = other.begin_mask; + ellipsis_mask = other.ellipsis_mask; + end_mask = other.end_mask; + new_axis_mask = other.new_axis_mask; + shrink_axis_mask = other.shrink_axis_mask; + } + + void PadIndices(int dim_count) { + // Add indices and mask bits to fully include extra dimensions + CHECK_GE(dim_count, start_indices.size()); + CHECK_EQ(start_indices.size(), stop_indices.size()); + CHECK_EQ(stop_indices.size(), strides.size()); + + for (int i = start_indices.size(); i < dim_count; i++) { + start_indices.push_back(0); + stop_indices.push_back(0); + strides.push_back(1); + begin_mask |= 1 << i; + end_mask |= 1 << i; + } + } + + void ReverseIndices() { + CHECK_EQ(start_indices.size(), stop_indices.size()); + CHECK_EQ(stop_indices.size(), strides.size()); + + std::reverse(start_indices.begin(), start_indices.end()); + std::reverse(stop_indices.begin(), stop_indices.end()); + std::reverse(strides.begin(), strides.end()); + + begin_mask = toco::port::ReverseBits32(static_cast(begin_mask)) >> + (32 - start_indices.size()); + ellipsis_mask = + toco::port::ReverseBits32(static_cast(ellipsis_mask)) >> + (32 - start_indices.size()); + end_mask = toco::port::ReverseBits32(static_cast(end_mask)) >> + (32 - start_indices.size()); + new_axis_mask = + toco::port::ReverseBits32(static_cast(new_axis_mask)) >> + (32 - start_indices.size()); + shrink_axis_mask = + toco::port::ReverseBits32(static_cast(shrink_axis_mask)) >> + (32 - start_indices.size()); + } }; // Reshaping operator, reshaping its input array to a two-dimensional shape @@ -946,6 +1015,13 @@ struct FloorModOperator : Operator { FloorModOperator() : Operator(OperatorType::kFloorMod) {} }; +struct RandomUniformOperator : Operator { + RandomUniformOperator() : Operator(OperatorType::kRandomUniform) {} + ArrayDataType dtype = ArrayDataType::kNone; + int64 seed; + int64 seed2; +}; + // Creates a sequence of numbers that begins at start and extends by increments // of delta up to but not including limit. // @@ -1309,6 +1385,15 @@ struct SoftmaxOperator : Operator { // TensorFlow equivalent: LogSoftmax struct LogSoftmaxOperator : Operator { LogSoftmaxOperator() : Operator(OperatorType::kLogSoftmax) {} + + // LogSoftmax can in principal have very large negative output, depending on + // the input size. However, input x_i that is less than x_max-10 is + // accumulated as exp(x_i-x_max), which is truncated to zero. + // + // Since we effectively disregard smallish inputs in the normalizing factor, + // we also drop them in the output (set to minimum output), and in doing so + // make better use of the quantization range / resolution. + static constexpr float kOutputRangeMin = -16.0; }; // Cast operator. @@ -1391,8 +1476,7 @@ struct SpaceToBatchNDOperator : Operator { }; // BatchToSpaceND operator. Rearranges data from batch into blocks of -// spatial data. Currently, only 2-d blocks are supported. Cropping is not -// supported, either, and the crops array should be all zero. +// spatial data. Currently, only 2-d blocks are supported. // // Inputs: // inputs[0]: required: the input array @@ -1499,7 +1583,14 @@ class Shape { // We still have that one convenience accessor to avoid // the awkward double bracket issue: shape.dims()[i]. - int dims(int i) const { return dims_[i]; } + int dims(int i) const { + // Always check for out-of-bounds accesses, even in optimized builds where + // standard assertions are disabled. Out-of-bounds access here is a common + // occurrence. + CHECK_GE(i, 0); + CHECK_GT(dims_.size(), i); + return dims_[i]; + } bool operator==(const Shape& comp) const { return (this->dims_ == comp.dims()); diff --git a/tensorflow/contrib/lite/toco/model_cmdline_flags.cc b/tensorflow/contrib/lite/toco/model_cmdline_flags.cc index 4264f21c76e6f4a26d1be710874c0edb96a6ca6d..7bbeab7c9d1e42d28f221f1a1134d9d05fe6ab51 100644 --- a/tensorflow/contrib/lite/toco/model_cmdline_flags.cc +++ b/tensorflow/contrib/lite/toco/model_cmdline_flags.cc @@ -160,6 +160,16 @@ bool ParseModelFlagsFromCommandLineFlags( "Path to an optional file containing a serialized ArraysExtraInfo " "proto allowing to pass extra information about arrays not specified " "in the input model file, such as extra MinMax information."), + Flag("model_flags_file", parsed_flags.model_flags_file.bind(), + parsed_flags.model_flags_file.default_value(), + "Path to an optional file containing a serialized ModelFlags proto. " + "Options specified on the command line will override the values in " + "the proto."), + Flag("change_concat_input_ranges", + parsed_flags.change_concat_input_ranges.bind(), + parsed_flags.change_concat_input_ranges.default_value(), + "Boolean to change the behavior of min/max ranges for inputs and" + " output of the concat operators."), }; bool asked_for_help = *argc == 2 && (!strcmp(argv[1], "--help") || !strcmp(argv[1], "-help")); @@ -182,7 +192,24 @@ void ReadModelFlagsFromCommandLineFlags( const ParsedModelFlags& parsed_model_flags, ModelFlags* model_flags) { toco::port::CheckInitGoogleIsDone("InitGoogle is not done yet"); -// "batch" flag only exists internally + // Load proto containing the initial model flags. + // Additional flags specified on the command line will overwrite the values. + if (parsed_model_flags.model_flags_file.specified()) { + string model_flags_file_contents; + QCHECK(port::file::GetContents(parsed_model_flags.model_flags_file.value(), + &model_flags_file_contents, + port::file::Defaults()) + .ok()) + << "Specified --model_flags_file=" + << parsed_model_flags.model_flags_file.value() + << " was not found or could not be read"; + QCHECK(ParseFromStringEitherTextOrBinary(model_flags_file_contents, + model_flags)) + << "Specified --model_flags_file=" + << parsed_model_flags.model_flags_file.value() + << " could not be parsed"; + } + #ifdef PLATFORM_GOOGLE CHECK(!((base::SpecifiedOnCommandLine("batch") && parsed_model_flags.variable_batch.specified()))) @@ -377,12 +404,15 @@ void ReadModelFlagsFromCommandLineFlags( parsed_model_flags.allow_nonascii_arrays.value()); model_flags->set_allow_nonexistent_arrays( parsed_model_flags.allow_nonexistent_arrays.value()); + model_flags->set_change_concat_input_ranges( + parsed_model_flags.change_concat_input_ranges.value()); if (parsed_model_flags.arrays_extra_info_file.specified()) { string arrays_extra_info_file_contents; - port::file::GetContents(parsed_model_flags.arrays_extra_info_file.value(), - &arrays_extra_info_file_contents, - port::file::Defaults()); + CHECK(port::file::GetContents( + parsed_model_flags.arrays_extra_info_file.value(), + &arrays_extra_info_file_contents, port::file::Defaults()) + .ok()); ParseFromStringEitherTextOrBinary(arrays_extra_info_file_contents, model_flags->mutable_arrays_extra_info()); } diff --git a/tensorflow/contrib/lite/toco/model_flags.proto b/tensorflow/contrib/lite/toco/model_flags.proto index 42e0f54826dd809a801a8ac1bfd0a5a7660382a8..d23e80c464c9fe9d717d4af8093fa5dee04dca6d 100644 --- a/tensorflow/contrib/lite/toco/model_flags.proto +++ b/tensorflow/contrib/lite/toco/model_flags.proto @@ -98,8 +98,8 @@ message ArraysExtraInfo { message Entry { // Next ID to use: 7. optional string name = 1; - optional float min = 2; - optional float max = 3; + optional double min = 2; + optional double max = 3; optional IODataType data_type = 4; optional InputArrayShape shape = 5; optional float constant_float_value = 6; @@ -128,7 +128,7 @@ message ArraysExtraInfo { // optional int32 input_dims = 11 [ default = 4]; // repeated int32 input_shape = 13; // -// Next ID to USE: 19. +// Next ID to USE: 20. message ModelFlags { // Information about the input arrays, i.e. the arrays from which input // activations will be read. @@ -175,4 +175,8 @@ message ModelFlags { // If set, this ArraysExtraInfo allows to pass extra information about arrays // not specified in the input model file, such as extra MinMax information. optional ArraysExtraInfo arrays_extra_info = 18; + + // When set to false, toco will not change the input ranges and the output + // ranges of concat operator to the overlap of all input ranges. + optional bool change_concat_input_ranges = 19 [default = true]; } diff --git a/tensorflow/contrib/lite/toco/python/toco_from_protos_test.py b/tensorflow/contrib/lite/toco/python/toco_from_protos_test.py index c35b6f99259b762aa83d92d21512169a7ab50b70..3761e0095ebb06b9e26eca55a36718b92058e47b 100644 --- a/tensorflow/contrib/lite/toco/python/toco_from_protos_test.py +++ b/tensorflow/contrib/lite/toco/python/toco_from_protos_test.py @@ -50,6 +50,7 @@ class TocoFromProtosTest(googletest.TestCase): toco_flags.output_format = toco_flags_pb2.TFLITE toco_flags.inference_input_type = types_pb2.FLOAT toco_flags.inference_type = types_pb2.FLOAT + toco_flags.allow_custom_ops = True; model_flags = model_flags_pb2.ModelFlags() input_array = model_flags.input_arrays.add() input_array.name = TensorName(in_tensor) diff --git a/tensorflow/contrib/lite/toco/python/toco_python_api.cc b/tensorflow/contrib/lite/toco/python/toco_python_api.cc index 8a5e483f3f1676ebed3244bd6f7eb610fad21557..153c117d17e4564d7cb0aaea64d792f63a587d91 100644 --- a/tensorflow/contrib/lite/toco/python/toco_python_api.cc +++ b/tensorflow/contrib/lite/toco/python/toco_python_api.cc @@ -75,7 +75,8 @@ PyObject* TocoConvert(PyObject* model_flags_proto_txt_raw, toco::Import(toco_flags, model_flags, input_contents_txt); toco::Transform(toco_flags, model.get()); string output_file_contents_txt; - Export(toco_flags, *model, &output_file_contents_txt); + Export(toco_flags, *model, toco_flags.allow_custom_ops(), + &output_file_contents_txt); // Convert arguments back to byte (py3) or str (py2) return TOCO_FROM_CPPSTRING_TO_PY(output_file_contents_txt.data(), diff --git a/tensorflow/contrib/lite/toco/tflite/operator.cc b/tensorflow/contrib/lite/toco/tflite/operator.cc index 0cb348bda513e61da983ed20c972e7ae2618c47a..d2e14ac5e0d7b06451de295574f42c6139cb97a0 100644 --- a/tensorflow/contrib/lite/toco/tflite/operator.cc +++ b/tensorflow/contrib/lite/toco/tflite/operator.cc @@ -68,7 +68,9 @@ class Convolution auto activation_function = ActivationFunction::Serialize(op.fused_activation_function); return ::tflite::CreateConv2DOptions(*builder, padding, op.stride_width, - op.stride_height, activation_function); + op.stride_height, activation_function, + op.dilation_width_factor, + op.dilation_height_factor); } void ReadOptions(const TfLiteOptions& options, @@ -76,6 +78,8 @@ class Convolution op->padding.type = Padding::Deserialize(options.padding()); op->stride_width = options.stride_w(); op->stride_height = options.stride_h(); + op->dilation_width_factor = options.dilation_w_factor(); + op->dilation_height_factor = options.dilation_h_factor(); op->fused_activation_function = ActivationFunction::Deserialize(options.fused_activation_function()); } @@ -204,17 +208,22 @@ class BatchToSpaceND TocoOperator* op) const override {} }; -class Cast : public CustomOperator { +class Cast : public BuiltinOperator { public: - using CustomOperator::CustomOperator; - void WriteOptions(const TocoOperator& op, - flexbuffers::Builder* fbb) const override { - fbb->Int("src_data_type", DataType::Serialize(op.src_data_type)); - fbb->Int("dst_data_type", DataType::Serialize(op.dst_data_type)); + using BuiltinOperator::BuiltinOperator; + flatbuffers::Offset WriteOptions( + const TocoOperator& op, + flatbuffers::FlatBufferBuilder* builder) const override { + return ::tflite::CreateCastOptions(*builder, + DataType::Serialize(op.src_data_type), + DataType::Serialize(op.dst_data_type)); } - void ReadOptions(const flexbuffers::Map& m, TocoOperator* op) const override { - op->src_data_type = DataType::Deserialize(m["src_data_type"].AsInt64()); - op->dst_data_type = DataType::Deserialize(m["dst_data_type"].AsInt64()); + + void ReadOptions(const TfLiteOptions& options, + TocoOperator* op) const override { + op->src_data_type = DataType::Deserialize(options.in_data_type()); + op->dst_data_type = DataType::Deserialize(options.out_data_type()); } }; @@ -255,12 +264,15 @@ class FakeQuant : public CustomOperator { flexbuffers::Builder* fbb) const override { fbb->Float("min", op.minmax->min); fbb->Float("max", op.minmax->max); + fbb->Int("num_bits", op.num_bits); } void ReadOptions(const flexbuffers::Map& m, TocoOperator* op) const override { auto* minmax = new MinMax; minmax->min = m["min"].AsFloat(); minmax->max = m["max"].AsFloat(); op->minmax.reset(minmax); + const auto& num_bits = m["num_bits"]; + op->num_bits = num_bits.IsInt() ? num_bits.AsInt32() : 8; } }; @@ -657,6 +669,23 @@ class TopK_V2 : public BuiltinOperator { + public: + using BuiltinOperator::BuiltinOperator; + flatbuffers::Offset WriteOptions( + const TocoOperator& op, + flatbuffers::FlatBufferBuilder* builder) const override { + return ::tflite::CreateArgMaxOptions( + *builder, DataType::Serialize(op.output_data_type)); + } + + void ReadOptions(const TfLiteOptions& options, + TocoOperator* op) const override { + op->output_data_type = DataType::Deserialize(options.output_type()); + } +}; + class TensorFlowUnsupported : public BaseOperator { public: using BaseOperator::BaseOperator; @@ -827,9 +856,12 @@ std::vector> BuildOperatorList() { new TopK_V2(::tflite::BuiltinOperator_TOPK_V2, OperatorType::kTopK_V2)); ops.emplace_back( new Lstm(::tflite::BuiltinOperator_LSTM, OperatorType::kLstmCell)); + ops.emplace_back( + new Cast(::tflite::BuiltinOperator_CAST, OperatorType::kCast)); + ops.emplace_back( + new ArgMax(::tflite::BuiltinOperator_ARG_MAX, OperatorType::kArgMax)); // Custom Operators. - ops.emplace_back(new Cast("CAST", OperatorType::kCast)); ops.emplace_back( new DepthToSpace("DEPTH_TO_SPACE", OperatorType::kDepthToSpace)); ops.emplace_back(new FakeQuant("FAKE_QUANT", OperatorType::kFakeQuant)); @@ -865,6 +897,10 @@ std::vector> BuildOperatorList() { "LOG_SOFTMAX", OperatorType::kLogSoftmax)); ops.emplace_back(new SimpleOperator( "MAXIMUM", OperatorType::kTensorFlowMaximum)); + ops.emplace_back(new SimpleOperator( + "MINIMUM", OperatorType::kTensorFlowMinimum)); + ops.emplace_back(new SimpleOperator( + "LESS", OperatorType::kTensorFlowLess)); return ops; } diff --git a/tensorflow/contrib/lite/toco/tflite/operator_test.cc b/tensorflow/contrib/lite/toco/tflite/operator_test.cc index f7a213ecfc539e009f78e7c0e424d36a38b3486c..36ed741541eadbc9435a67bec15d389ba48350c1 100644 --- a/tensorflow/contrib/lite/toco/tflite/operator_test.cc +++ b/tensorflow/contrib/lite/toco/tflite/operator_test.cc @@ -111,6 +111,10 @@ TEST_F(OperatorTest, SimpleOperators) { OperatorType::kLogSoftmax); CheckSimpleOperator( "MAXIMUM", OperatorType::kTensorFlowMaximum); + CheckSimpleOperator( + "MINIMUM", OperatorType::kTensorFlowMinimum); + CheckSimpleOperator("LESS", + OperatorType::kTensorFlowLess); } TEST_F(OperatorTest, BuiltinAdd) { @@ -131,7 +135,7 @@ TEST_F(OperatorTest, BuiltinMean) { EXPECT_EQ(op.keep_dims, output_toco_op->keep_dims); } -TEST_F(OperatorTest, CustomCast) { +TEST_F(OperatorTest, BuiltinCast) { CastOperator op; op.src_data_type = ArrayDataType::kFloat; op.dst_data_type = ArrayDataType::kUint8; @@ -163,10 +167,12 @@ TEST_F(OperatorTest, CustomFakeQuant) { minmax->min = -10; minmax->max = 200; op.minmax.reset(minmax); + op.num_bits = 16; auto output_toco_op = SerializeAndDeserialize( GetOperator("FAKE_QUANT", OperatorType::kFakeQuant), op); EXPECT_EQ(op.minmax->min, output_toco_op->minmax->min); EXPECT_EQ(op.minmax->max, output_toco_op->minmax->max); + EXPECT_EQ(op.num_bits, output_toco_op->num_bits); } TEST_F(OperatorTest, CustomFullyConnected) { @@ -391,6 +397,13 @@ TEST_F(OperatorTest, BuiltinTopKV2) { ASSERT_NE(nullptr, output_toco_op.get()); } +TEST_F(OperatorTest, BuiltinArgMax) { + ArgMaxOperator op; + auto output_toco_op = SerializeAndDeserialize( + GetOperator("ARG_MAX", OperatorType::kArgMax), op); + EXPECT_EQ(op.output_data_type, output_toco_op->output_data_type); +} + TEST_F(OperatorTest, TensorFlowUnsupported) { TensorFlowUnsupportedOperator op; op.tensorflow_op = "MyCustomUnsupportedOp"; diff --git a/tensorflow/contrib/lite/toco/toco_cmdline_flags.cc b/tensorflow/contrib/lite/toco/toco_cmdline_flags.cc index cc7803dd866f0282f67d1d6f227cce0fdd8c7fd6..1611c4d0c0b148e00dbf0b21b9cd65d4c8163c2c 100644 --- a/tensorflow/contrib/lite/toco/toco_cmdline_flags.cc +++ b/tensorflow/contrib/lite/toco/toco_cmdline_flags.cc @@ -61,11 +61,21 @@ bool ParseTocoFlagsFromCommandLineFlags( Flag("default_ranges_min", parsed_flags.default_ranges_min.bind(), parsed_flags.default_ranges_min.default_value(), "If defined, will be used as the default value for the min bound " - "of min/max ranges used for quantization."), + "of min/max ranges used for quantization of uint8 arrays."), Flag("default_ranges_max", parsed_flags.default_ranges_max.bind(), parsed_flags.default_ranges_max.default_value(), "If defined, will be used as the default value for the max bound " - "of min/max ranges used for quantization."), + "of min/max ranges used for quantization of uint8 arrays."), + Flag("default_int16_ranges_min", + parsed_flags.default_int16_ranges_min.bind(), + parsed_flags.default_int16_ranges_min.default_value(), + "If defined, will be used as the default value for the min bound " + "of min/max ranges used for quantization of int16 arrays."), + Flag("default_int16_ranges_max", + parsed_flags.default_int16_ranges_max.bind(), + parsed_flags.default_int16_ranges_max.default_value(), + "If defined, will be used as the default value for the max bound " + "of min/max ranges used for quantization of int16 arrays."), Flag("inference_type", parsed_flags.inference_type.bind(), parsed_flags.inference_type.default_value(), "Target data type of arrays in the output file (for input_arrays, " @@ -126,6 +136,18 @@ bool ParseTocoFlagsFromCommandLineFlags( parsed_flags.debug_disable_recurrent_cell_fusion.default_value(), "If true, disable fusion of known identifiable cell subgraphs into " "cells. This includes, for example, specific forms of LSTM cell."), + Flag("propagate_fake_quant_num_bits", + parsed_flags.propagate_fake_quant_num_bits.bind(), + parsed_flags.propagate_fake_quant_num_bits.default_value(), + "If true, use FakeQuant* operator num_bits attributes to adjust " + "array data_types."), + Flag("allow_nudging_weights_to_use_fast_gemm_kernel", + parsed_flags.allow_nudging_weights_to_use_fast_gemm_kernel.bind(), + parsed_flags.allow_nudging_weights_to_use_fast_gemm_kernel + .default_value(), + "Some fast uint8 GEMM kernels require uint8 weights to avoid the " + "value 0. This flag allows nudging them to 1 to allow proceeding, " + "with moderate inaccuracy."), }; bool asked_for_help = *argc == 2 && (!strcmp(argv[1], "--help") || !strcmp(argv[1], "-help")); @@ -207,10 +229,16 @@ void ReadTocoFlagsFromCommandLineFlags(const ParsedTocoFlags& parsed_toco_flags, PARSE_TOCO_FLAG(IODataType, inference_input_type, FlagRequirement::kNone); READ_TOCO_FLAG(default_ranges_min, FlagRequirement::kNone); READ_TOCO_FLAG(default_ranges_max, FlagRequirement::kNone); + READ_TOCO_FLAG(default_int16_ranges_min, FlagRequirement::kNone); + READ_TOCO_FLAG(default_int16_ranges_max, FlagRequirement::kNone); READ_TOCO_FLAG(drop_fake_quant, FlagRequirement::kNone); READ_TOCO_FLAG(reorder_across_fake_quant, FlagRequirement::kNone); READ_TOCO_FLAG(allow_custom_ops, FlagRequirement::kNone); READ_TOCO_FLAG(drop_control_dependency, FlagRequirement::kNone); + READ_TOCO_FLAG(debug_disable_recurrent_cell_fusion, FlagRequirement::kNone); + READ_TOCO_FLAG(propagate_fake_quant_num_bits, FlagRequirement::kNone); + READ_TOCO_FLAG(allow_nudging_weights_to_use_fast_gemm_kernel, + FlagRequirement::kNone); // Deprecated flag handling. if (parsed_toco_flags.input_type.specified()) { diff --git a/tensorflow/contrib/lite/toco/toco_flags.proto b/tensorflow/contrib/lite/toco/toco_flags.proto index 3237147a736f97f65953ca965420fcea934820a4..a04017a6bf05fb5a1ae324051375340a0adaccc6 100644 --- a/tensorflow/contrib/lite/toco/toco_flags.proto +++ b/tensorflow/contrib/lite/toco/toco_flags.proto @@ -37,7 +37,7 @@ enum FileFormat { // of as properties of models, instead describing how models are to be // processed in the context of the present tooling job. // -// Next ID to use: 14. +// Next ID to use: 18. message TocoFlags { // Input file format optional FileFormat input_format = 1; @@ -103,8 +103,14 @@ message TocoFlags { // for experimentation purposes only and should not be used in production: // they make it easy to quantize models, but the resulting quantized model // will be inaccurate. + // + // These values only apply to arrays quantized with the kUint8 data type. optional float default_ranges_min = 5; optional float default_ranges_max = 6; + // Equivalent versions of default_ranges_min/_max for arrays quantized with + // the kInt16 data type. + optional float default_int16_ranges_min = 15; + optional float default_int16_ranges_max = 16; // Ignore and discard FakeQuant nodes. For instance, that can be used to // generate plain float code without fake-quantization from a quantized @@ -141,4 +147,18 @@ message TocoFlags { // Disables transformations that fuse subgraphs such as known LSTMs (not all // LSTMs are identified). optional bool debug_disable_recurrent_cell_fusion = 13; + + // Uses the FakeQuantWithMinMaxArgs.num_bits attribute to adjust quantized + // array data types throughout the graph. The graph must be properly annotated + // with FakeQuant* ops on at least the edges and may contain additional ops on + // the interior of the graph to widen/narrow as desired. + // + // Input and output array data types may change because of this propagation + // and users must be sure to query the final data_type values. + optional bool propagate_fake_quant_num_bits = 14; + + // Some fast uint8 GEMM kernels require uint8 weights to avoid the value 0. + // This flag allows nudging them to 1 to allow proceeding, with moderate + // inaccuracy. + optional bool allow_nudging_weights_to_use_fast_gemm_kernel = 17; } diff --git a/tensorflow/contrib/lite/toco/toco_port.h b/tensorflow/contrib/lite/toco/toco_port.h index 4be3b5a0bf00ed204a1218545d9e66f7685a50d7..2d5c231bef350884cd2b0bb62cfdbddebfed7f58 100644 --- a/tensorflow/contrib/lite/toco/toco_port.h +++ b/tensorflow/contrib/lite/toco/toco_port.h @@ -75,6 +75,14 @@ Status Exists(const string& filename, const Options& options); void CopyToBuffer(const ::Cord& src, char* dest); #endif // PLATFORM_GOOGLE void CopyToBuffer(const string& src, char* dest); + +inline uint32 ReverseBits32(uint32 n) { + n = ((n >> 1) & 0x55555555) | ((n & 0x55555555) << 1); + n = ((n >> 2) & 0x33333333) | ((n & 0x33333333) << 2); + n = ((n >> 4) & 0x0F0F0F0F) | ((n & 0x0F0F0F0F) << 4); + return (((n & 0xFF) << 24) | ((n & 0xFF00) << 8) | ((n & 0xFF0000) >> 8) | + ((n & 0xFF000000) >> 24)); +} } // namespace port inline bool ParseFromStringOverload(const std::string& in, diff --git a/tensorflow/contrib/lite/toco/toco_tooling.cc b/tensorflow/contrib/lite/toco/toco_tooling.cc index 30dd6fab9ebbad9c2add7f830f9b58a73f41714b..7252ec2ea4d8864334fc989c91fc8c26efe2efea 100644 --- a/tensorflow/contrib/lite/toco/toco_tooling.cc +++ b/tensorflow/contrib/lite/toco/toco_tooling.cc @@ -18,6 +18,7 @@ limitations under the License. #include #include +#include "absl/memory/memory.h" #include "absl/strings/str_join.h" #include "tensorflow/contrib/lite/toco/allocate_transient_arrays.h" #include "tensorflow/contrib/lite/toco/dump_graphviz.h" @@ -66,6 +67,7 @@ void MakeGeneralGraphTransformationsSet( transformations->Add(new RemoveTensorFlowIdentity); transformations->Add(new RemoveTrivialConcatenation); transformations->Add(new RemoveTrivialConcatenationInput); + transformations->Add(new RemoveTrivialFakeQuant); transformations->Add(new RemoveTrivialSlice); transformations->Add(new RemoveUnusedOp); transformations->Add(new EnsureBiasVectors); @@ -74,12 +76,16 @@ void MakeGeneralGraphTransformationsSet( transformations->Add(new ResolveTensorFlowMatMul); transformations->Add(new FuseBinaryIntoPrecedingAffine); transformations->Add(new FuseBinaryIntoFollowingAffine); - transformations->Add(new ReorderActivationFunctions); + transformations->Add(new MergeReshapeIntoPrecedingTranspose); + transformations->Add(new ReorderElementwiseUnary); + transformations->Add(new ReorderReshapeTranspose); transformations->Add(new ResolveBatchNormalization); transformations->Add(new ResolveConstantBinaryOperator); transformations->Add(new ResolveConstantFill); transformations->Add(new ResolveConstantGather); + transformations->Add(new ResolveConstantRandomUniform); transformations->Add(new ResolveConstantRange); + transformations->Add(new ResolveConstantReshape); transformations->Add(new ResolveConstantStack); transformations->Add(new ResolveConstantStridedSlice); transformations->Add(new ResolveConstantTranspose); @@ -105,7 +111,6 @@ void MakeGeneralGraphTransformationsSet( transformations->Add(new ResolveMeanAttributes); transformations->Add(new ResolveConstantShapeOrRank); transformations->Add(new MakeInitialDequantizeOperator); - transformations->Add(new ResolveConstantFakeQuant); transformations->Add(new UnpartitionEmbeddingLookup); } @@ -127,20 +132,26 @@ bool SupportsPreallocatedWorkspace(FileFormat format) { } bool IsRealValued(toco::ArrayDataType type) { + // TODO(benoitjacob) - this is hardcoding that uint8 and int16 are only used + // for quantized real-number values, and no other integer type is ever used + // for that. This is dirty, should be resolved as part of a more general push + // to more explicitly distinguish between true-integers and + // integers used as quantized values representing real numbers. return static_cast(type == toco::ArrayDataType::kFloat || - type == toco::ArrayDataType::kUint8); + type == toco::ArrayDataType::kUint8 || + type == toco::ArrayDataType::kInt16); } void SetFinalDataTypeOnInputs(const TocoFlags& toco_flags, Model* model) { const FileFormat output_format = toco_flags.output_format(); ArrayDataType type; - if (toco_flags.has_inference_input_type()) { + if (!SupportsQuantization(output_format)) { + // Data type is implicitly float for non-quantized formats + type = ArrayDataType::kFloat; + } else if (toco_flags.has_inference_input_type()) { type = ConvertIODataTypeToArrayDataType(toco_flags.inference_input_type()); } else if (toco_flags.has_inference_type()) { type = ConvertIODataTypeToArrayDataType(toco_flags.inference_type()); - } else if (!SupportsQuantization(output_format)) { - // Data type is implicitly float for non-quantized formats - type = ArrayDataType::kFloat; } else { // Nothing to do. Data types stay as-is. return; @@ -195,11 +206,6 @@ std::unique_ptr Import(const TocoFlags& toco_flags, } void Transform(const TocoFlags& toco_flags, Model* model) { - // Clean up after import. - SetFinalDataTypeOnInputs(toco_flags, model); - UseArraysExtraInfo(model); - FinishBuildingRNNStates(model); - const FileFormat output_format = toco_flags.output_format(); const IODataType inference_type = toco_flags.inference_type(); @@ -212,6 +218,11 @@ void Transform(const TocoFlags& toco_flags, Model* model) { << "Quantized inference is not allowed with float inputs."; } + // Clean up after import. + SetFinalDataTypeOnInputs(toco_flags, model); + UseArraysExtraInfo(model, quantize_output); + FinishBuildingRNNStates(model); + // Remove unused ops before performing any other optimizations. This is to // stop optimizations from crossing the input/output boundaries. For example // this will stop BatchNorm fusing if the output node is in between a conv @@ -223,6 +234,12 @@ void Transform(const TocoFlags& toco_flags, Model* model) { MakeGeneralGraphTransformationsSet(&transformations); auto* remove_trivial_reshape = new RemoveTrivialReshape; transformations.Add(remove_trivial_reshape); + auto* resolve_constant_fake_quant = new ResolveConstantFakeQuant; + if (quantize_output) { + resolve_constant_fake_quant->set_propagate_fake_quant_num_bits( + toco_flags.propagate_fake_quant_num_bits()); + } + transformations.Add(resolve_constant_fake_quant); if (SupportsFusedActivationFunction(output_format)) { transformations.Add(new FuseActivationFunctions); } else { @@ -255,25 +272,63 @@ void Transform(const TocoFlags& toco_flags, Model* model) { transformations); if (quantize_output) { + if (toco_flags.propagate_fake_quant_num_bits()) { + RunGraphTransformations(model, + "fake quant propagation graph transformations", + {new PropagateFakeQuantNumBits}); + } RunGraphTransformations(model, "pre-quantization graph transformations", - {new HardcodeMinMax, new DropFakeQuant}); + { + new HardcodeMinMax, + new DropFakeQuant, + }); } + // Fix any issues with IO edges. This must happen after any transform that + // may modify the structure of the edges. + FixEdgeArrays(model); + if (quantize_output) { + // If the user specified default min/max ranges we need to set all arrays + // that didn't either have a min/max specified or get one set via + // HardcodeMinMax or PropagateFakeQuantNumBits. This may require running + // HardcodeMinMax to move changes through the graph as we make changes. + auto propagate_default_min_max = + absl::make_unique(); if (toco_flags.has_default_ranges_min() && toco_flags.has_default_ranges_max()) { - UseDefaultMinMaxRangeValues(model, toco_flags.default_ranges_min(), - toco_flags.default_ranges_max()); - // The new MinMax info may need to be propagated a bit. + propagate_default_min_max->DefineTypeRange( + ArrayDataType::kUint8, toco_flags.default_ranges_min(), + toco_flags.default_ranges_max()); + } + if (toco_flags.has_default_int16_ranges_min() && + toco_flags.has_default_int16_ranges_max()) { + propagate_default_min_max->DefineTypeRange( + ArrayDataType::kInt16, toco_flags.default_int16_ranges_min(), + toco_flags.default_int16_ranges_max()); + } + if (propagate_default_min_max->has_any_ranges_defined()) { RunGraphTransformations( model, "default min-max range propagation graph transformations", - {new HardcodeMinMax}); + { + propagate_default_min_max.release(), + new HardcodeMinMax, + }); } + CheckIsReadyForQuantization(*model); - RunGraphTransformations( - model, "quantization graph transformations", - {new Quantize, new RemoveTrivialQuantizedActivationFunc, - new RemoveFinalDequantizeOp}); + auto* ensure_safe_for_int8_kernels = + new EnsureUint8WeightsSafeForFastInt8Kernels; + ensure_safe_for_int8_kernels->set_allow_nudging_weights( + toco_flags.allow_nudging_weights_to_use_fast_gemm_kernel()); + RunGraphTransformations(model, "quantization graph transformations", + { + new RemoveTrivialQuantizedActivationFunc, + new RemoveTrivialQuantizedMinMax, + new Quantize, + new RemoveFinalDequantizeOp, + ensure_safe_for_int8_kernels, + }); } else { GraphTransformationsSet dequantization_transformations{new Dequantize}; // Dequantize creates FakeQuant nodes. We may want to discard @@ -290,10 +345,6 @@ void Transform(const TocoFlags& toco_flags, Model* model) { EncodeConstantArraysMinMaxByWrappingThemInFakeQuantNodes(model); } - // Fix any issues with IO edges. This must happen after any transform that - // may modify the structure of the edges. - FixEdgeArrays(model); - LogDump(kLogLevelModelChanged, "AFTER TRANSFORMATIONS", *model); if (output_format != GRAPHVIZ_DOT && output_format != TFLITE) { diff --git a/tensorflow/contrib/lite/toco/tooling_util.cc b/tensorflow/contrib/lite/toco/tooling_util.cc index f3f50487ff74904bf3708fa4c86f522997b55ca0..cf2cbeedc77c382b84f80f11a371f300ea4282ae 100644 --- a/tensorflow/contrib/lite/toco/tooling_util.cc +++ b/tensorflow/contrib/lite/toco/tooling_util.cc @@ -93,9 +93,18 @@ string ArrayDataTypeName(ArrayDataType data_type) { } } -bool IsInputArray(const Model& model, const string& name) { +bool IsInputArray(const Model& model, const string& array_name) { for (const auto& input_array : model.flags.input_arrays()) { - if (input_array.name() == name) { + if (array_name == input_array.name()) { + return true; + } + } + return false; +} + +bool IsOutputArray(const Model& model, const string& array_name) { + for (const auto& output_array : model.flags.output_arrays()) { + if (array_name == output_array) { return true; } } @@ -106,10 +115,8 @@ bool IsArrayConsumed(const Model& model, const string& name) { if (GetOpWithInput(model, name)) { return true; } - for (const string& model_output : model.flags.output_arrays()) { - if (model_output == name) { - return true; - } + if (IsOutputArray(model, name)) { + return true; } for (const auto& rnn_state : model.flags.rnn_states()) { if (rnn_state.back_edge_source_array() == name) { @@ -291,12 +298,14 @@ const char* OperatorTypeName(OperatorType type) { HANDLE_OPERATORTYPENAME_CASE(Dequantize) HANDLE_OPERATORTYPENAME_CASE(L2Normalization) HANDLE_OPERATORTYPENAME_CASE(LocalResponseNormalization) + HANDLE_OPERATORTYPENAME_CASE(Log) HANDLE_OPERATORTYPENAME_CASE(Logistic) HANDLE_OPERATORTYPENAME_CASE(LstmCell) HANDLE_OPERATORTYPENAME_CASE(MaxPool) HANDLE_OPERATORTYPENAME_CASE(L2Pool) HANDLE_OPERATORTYPENAME_CASE(FakeQuant) HANDLE_OPERATORTYPENAME_CASE(Mul) + HANDLE_OPERATORTYPENAME_CASE(RandomUniform) HANDLE_OPERATORTYPENAME_CASE(Relu) HANDLE_OPERATORTYPENAME_CASE(Relu1) HANDLE_OPERATORTYPENAME_CASE(Relu6) @@ -377,6 +386,7 @@ string HelpfulOperatorTypeName(const Operator& op) { bool OperatorSupportsFusedActivation(OperatorType type) { switch (type) { case OperatorType::kConcatenation: + case OperatorType::kFakeQuant: case OperatorType::kGather: case OperatorType::kSlice: case OperatorType::kSqueeze: @@ -1062,16 +1072,38 @@ void FixEdgeArrays(Model* model) { } } +namespace { +void CopyArrayAttribs(const Array& source_array, Array* target_array) { + target_array->data_type = source_array.data_type; + target_array->final_data_type = source_array.final_data_type; + target_array->copy_shape(source_array.shape()); + + if (source_array.minmax) { + target_array->GetOrCreateMinMax() = source_array.GetMinMax(); + } else { + target_array->minmax.reset(); + } + + if (source_array.quantization_params) { + target_array->GetOrCreateQuantizationParams() = + source_array.GetQuantizationParams(); + } else { + target_array->quantization_params.reset(); + } +} +} // namespace + void InsertCopyOperator(Model* model, const string& source_array_name, const string& target_array_name) { + // Reshape to the same size. This should be a no-op. + const Array& source_array = model->GetArray(source_array_name); + std::vector shape = source_array.shape().dims(); + // Drop constant data from the target array as the copy will be done at // runtime. Array& target_array = model->GetOrCreateArray(target_array_name); target_array.buffer.reset(); - - // Reshape to the same size. This should be a no-op. - const Array& source_array = model->GetArray(source_array_name); - std::vector shape = source_array.shape().dims(); + CopyArrayAttribs(source_array, &target_array); // Insert copy operator. auto* copy_op = new TensorFlowReshapeOperator; @@ -1082,22 +1114,30 @@ void InsertCopyOperator(Model* model, const string& source_array_name, model->operators.emplace_back(copy_op); } -namespace { -template -void CopyArrayBuffer(const Array& source_array, Array* target_array) { - if (source_array.buffer) { - const auto& source_buffer = source_array.GetBuffer(); - auto& target_buffer = target_array->GetMutableBuffer(); - target_buffer.data = source_buffer.data; - } -} -} // namespace - void CloneArray(Model* model, const string& source_array_name, const string& target_array_name) { CHECK(!model->HasArray(target_array_name)); const Array& source_array = model->GetArray(source_array_name); Array& target_array = model->GetOrCreateArray(target_array_name); + CopyArrayAttribs(source_array, &target_array); + + if (source_array.minmax) { + const auto& smm = source_array.GetMinMax(); + auto& tmm = target_array.GetOrCreateMinMax(); + tmm.min = smm.min; + tmm.max = smm.max; + } + + if (source_array.quantization_params) { + const auto& sqp = source_array.GetQuantizationParams(); + auto& tqp = target_array.GetOrCreateQuantizationParams(); + tqp.zero_point = sqp.zero_point; + tqp.scale = sqp.scale; + } + + target_array.data_type = source_array.data_type; + target_array.final_data_type = source_array.final_data_type; + target_array.copy_shape(source_array.shape()); switch (source_array.data_type) { case ArrayDataType::kBool: @@ -1138,25 +1178,6 @@ void CloneArray(Model* model, const string& source_array_name, << ArrayDataTypeName(source_array.data_type); return; } - - if (source_array.minmax) { - const auto& smm = source_array.GetMinMax(); - auto& tmm = target_array.GetOrCreateMinMax(); - tmm.min = smm.min; - tmm.max = smm.max; - } - - if (source_array.quantization_params) { - const auto& sqp = source_array.GetQuantizationParams(); - auto& tqp = target_array.GetOrCreateQuantizationParams(); - tqp.zero_point = sqp.zero_point; - tqp.scale = sqp.scale; - } - - target_array.data_type = source_array.data_type; - target_array.final_data_type = source_array.final_data_type; - - target_array.copy_shape(source_array.shape()); } void MakeArrayDims(int num_dims, int batch, int height, int width, int depth, @@ -1377,12 +1398,22 @@ void ResolveModelFlags(const ModelFlags& model_flags, Model* model) { const float mean_value = input_array_proto.mean_value(); const float std_value = input_array_proto.std_value(); MinMax input_minmax; - input_minmax.min = (0.f - mean_value) / std_value; - input_minmax.max = (255.f - mean_value) / std_value; + float qmin = 0, qmax = 255; + if (input_array.data_type == ArrayDataType::kInt16) { + qmin = -32768; + qmax = 32767; + } + input_minmax.min = (qmin - mean_value) / std_value; + input_minmax.max = (qmax - mean_value) / std_value; if (input_array.minmax) { if (input_array_proto.has_mean_value() || input_array_proto.has_std_value()) { - CHECK(input_minmax == *input_array.minmax) + const double width = input_minmax.max - input_minmax.min; + const double kMinMaxAllowedDiff = 1e-6 * width; + CHECK(std::abs(input_minmax.min - input_array.minmax->min) < + kMinMaxAllowedDiff && + std::abs(input_minmax.max - input_array.minmax->max) < + kMinMaxAllowedDiff) << input_minmax.min << ", " << input_minmax.max << " != " << input_array.minmax->min << ", " << input_array.minmax->max; @@ -1402,7 +1433,8 @@ void ResolveModelFlags(const ModelFlags& model_flags, Model* model) { CHECK(input_array.shape().dims_size()); } } - + model->flags.set_change_concat_input_ranges( + model_flags.change_concat_input_ranges()); model->flags.set_allow_nonascii_arrays(model_flags.allow_nonascii_arrays()); model->flags.set_allow_nonexistent_arrays( model_flags.allow_nonexistent_arrays()); @@ -1442,28 +1474,6 @@ void CheckIsReadyForQuantization(const Model& model) { } } -void UseDefaultMinMaxRangeValues(Model* model, double default_ranges_min, - double default_ranges_max) { - for (const auto& op : model->operators) { - for (const auto& input : op->inputs) { - auto& input_array = model->GetArray(input); - if (!input_array.minmax && !input_array.buffer) { - auto& minmax = input_array.GetOrCreateMinMax(); - minmax.min = default_ranges_min; - minmax.max = default_ranges_max; - } - } - for (const auto& output : op->outputs) { - auto& output_array = model->GetArray(output); - if (!output_array.minmax && !output_array.buffer) { - auto& minmax = output_array.GetOrCreateMinMax(); - minmax.min = default_ranges_min; - minmax.max = default_ranges_max; - } - } - } -} - int ElementSize(ArrayDataType data_type) { switch (data_type) { case ArrayDataType::kBool: @@ -1512,14 +1522,9 @@ bool IsAllocatableTransientArray(const Model& model, const string& array_name) { if (model.IsOptionalArray(array_name)) return false; // The model's input and output arrays are externally allocated. // They are not transient arrays. - if (IsInputArray(model, array_name)) { + if (IsInputArray(model, array_name) || IsOutputArray(model, array_name)) { return false; } - for (const string& output_array : model.flags.output_arrays()) { - if (array_name == output_array) { - return false; - } - } const auto& array = &model.GetArray(array_name); // An array with a constant buffer isn't a transient array. if (!!array->buffer) { @@ -1897,15 +1902,8 @@ int AxesCount(AxesOrder axes_order) { } bool IsDiscardableArray(const Model& model, const string& array_name) { - for (const auto& input_array : model.flags.input_arrays()) { - if (array_name == input_array.name()) { - return false; - } - } - for (const string& output_array : model.flags.output_arrays()) { - if (array_name == output_array) { - return false; - } + if (IsInputArray(model, array_name) || IsOutputArray(model, array_name)) { + return false; } for (const auto& rnn_state : model.flags.rnn_states()) { if (!rnn_state.discardable()) { @@ -1920,6 +1918,35 @@ bool IsDiscardableArray(const Model& model, const string& array_name) { return true; } +bool ReshapeIsEquivalentToTranspose(const Model& model, + const TensorFlowReshapeOperator* op, + bool allow_extra_unary_dims) { + CHECK(!op->shape.empty()); + CHECK(model.HasArray(op->inputs[0])); + CHECK(model.HasArray(op->outputs[0])); + + const auto& input_array = model.GetArray(op->inputs[0]); + const auto& output_array = model.GetArray(op->outputs[0]); + + CHECK(input_array.has_shape()); + CHECK(output_array.has_shape()); + + std::vector in_shape = input_array.shape().dims(); + std::vector out_shape = output_array.shape().dims(); + + // If the reshape changes the number of dimensions so it cannot be interpreted + // as a transpose. + if (!allow_extra_unary_dims && in_shape.size() != out_shape.size()) { + return false; + } + + in_shape.erase(std::remove(in_shape.begin(), in_shape.end(), 1), + in_shape.end()); + out_shape.erase(std::remove(out_shape.begin(), out_shape.end(), 1), + out_shape.end()); + return in_shape == out_shape; +} + void CheckFinalDataTypesSatisfied(const Model& model) { for (const auto& array_entry : model.GetArrayMap()) { const auto& array = *array_entry.second; @@ -1930,8 +1957,8 @@ void CheckFinalDataTypesSatisfied(const Model& model) { CHECK(array.final_data_type == array.data_type) << "Array \"" << array_entry.first << "\" has mis-matching actual and final data types (" - << static_cast(array.data_type) << "," - << static_cast(array.final_data_type) << ")."; + << ArrayDataTypeName(array.data_type) << "," + << ArrayDataTypeName(array.final_data_type) << ")."; } } } @@ -1970,19 +1997,19 @@ void FinishBuildingRNNStates(Model* model) { } } -void UseArraysExtraInfo(Model* model) { +void UseArraysExtraInfo(Model* model, bool quantize_output) { for (const auto& entry : model->flags.arrays_extra_info().entries()) { if (!model->HasArray(entry.name())) { continue; } auto& array = model->GetArray(entry.name()); - auto& minmax = array.GetOrCreateMinMax(); if (entry.has_min() || entry.has_max()) { CHECK_EQ(entry.has_min(), entry.has_max()); + auto& minmax = array.GetOrCreateMinMax(); minmax.min = entry.min(); minmax.max = entry.max(); } - if (entry.has_data_type()) { + if (entry.has_data_type() && quantize_output) { array.final_data_type = ConvertIODataTypeToArrayDataType(entry.data_type()); } @@ -1997,11 +2024,12 @@ void UseArraysExtraInfo(Model* model) { } if (entry.has_constant_float_value()) { CHECK(array.has_shape()); - CHECK(array.data_type == ArrayDataType::kFloat); - auto& data = array.GetMutableBuffer().data; - data.resize(RequiredBufferSizeForShape(array.shape())); - for (float& f : data) { - f = entry.constant_float_value(); + if (array.data_type == ArrayDataType::kFloat) { + auto& data = array.GetMutableBuffer().data; + data.resize(RequiredBufferSizeForShape(array.shape())); + for (float& f : data) { + f = entry.constant_float_value(); + } } } } diff --git a/tensorflow/contrib/lite/toco/tooling_util.h b/tensorflow/contrib/lite/toco/tooling_util.h index d3b7224fe3a773e389ad8fc9a40f0a0fad4debe5..5cc15fa57b3ea4ecb74c3f81af14e4fea95f6232 100644 --- a/tensorflow/contrib/lite/toco/tooling_util.h +++ b/tensorflow/contrib/lite/toco/tooling_util.h @@ -28,7 +28,6 @@ limitations under the License. #if TOCO_SUPPORT_PORTABLE_PROTOS #include "third_party/protobuf/src/google/protobuf/text_format.h" #endif // TOCO_SUPPORT_PORTABLE_PROTOS -#include "tensorflow/contrib/lite/kernels/internal/quantization_util.h" #include "tensorflow/contrib/lite/toco/model.h" #include "tensorflow/contrib/lite/toco/model_flags.pb.h" #include "tensorflow/contrib/lite/toco/runtime/types.h" @@ -57,7 +56,11 @@ string LogName(const Operator& op); string ArrayDataTypeName(ArrayDataType data_type); -bool IsInputArray(const Model& model, const string& name); +// Returns true if the given array is specified as a model input array. +bool IsInputArray(const Model& model, const string& array_name); +// Returns true if the given array is specified as a model output array. +bool IsOutputArray(const Model& model, const string& array_name); + bool IsArrayConsumed(const Model& model, const string& name); int CountTrueOutputs(const Model& model, const Operator& op); @@ -147,6 +150,23 @@ void FixNoOrphanedArray(Model* model); // Fixes input/output arrays that may have issues during export or inference. void FixEdgeArrays(Model* model); +// Copies the contents of an array into another. +// Expects that the shape and data type match. +template +void CopyArrayBuffer(const Array& source_array, Array* target_array) { + int source_buffer_size = RequiredBufferSizeForShape(source_array.shape()); + int target_buffer_size = RequiredBufferSizeForShape(target_array->shape()); + CHECK_EQ(source_buffer_size, target_buffer_size) + << "Buffer sizes must match in element count"; + CHECK(source_array.data_type == target_array->data_type) + << "Data types must match"; + if (source_array.buffer) { + const auto& source_buffer = source_array.GetBuffer(); + auto& target_buffer = target_array->GetMutableBuffer(); + target_buffer.data = source_buffer.data; + } +} + // Inserts a no-op reshape operator between the source array and the target // array. This effectively just copies the data. void InsertCopyOperator(Model* model, const string& source_array_name, @@ -158,20 +178,20 @@ void CloneArray(Model* model, const string& source_array_name, void ResolveModelFlags(const ModelFlags& model_flags, Model* model); -template -void GetQuantizationParamsFromMinMax(const MinMax& minmax, - QuantizationParams* quantization_params) { - using Integer = DataType; - const double rmin = minmax.min; - const double rmax = minmax.max; - - *quantization_params = - ::tflite::ChooseQuantizationParams(rmin, rmax); +template +T ConvertOperator(Operator* o, OperatorType type) { + if (o != nullptr && o->type == type) { + return static_cast(o); + } + + return nullptr; } void CheckIsReadyForQuantization(const Model& model); -void UseDefaultMinMaxRangeValues(Model* model, double default_ranges_min, - double default_ranges_max); + +bool ReshapeIsEquivalentToTranspose(const Model& model, + const TensorFlowReshapeOperator* op, + bool allow_extra_unary_dims); inline int Offset(const Shape& shape, const std::vector& indices) { DCHECK_EQ(shape.dimensions_count(), indices.size()); @@ -272,7 +292,7 @@ ArrayDataType ConvertIODataTypeToArrayDataType(IODataType type); // already quantized, then case (a) should hold. void FinishBuildingRNNStates(Model* model); -void UseArraysExtraInfo(Model* model); +void UseArraysExtraInfo(Model* model, bool quantize_output); } // namespace toco diff --git a/tensorflow/contrib/lite/tools/BUILD b/tensorflow/contrib/lite/tools/BUILD index 44fde69a1e1536b8d2ecff16876248cfe66a9b8a..7b3569ea9c8b15959b15e8ba46cf44d159d5528c 100644 --- a/tensorflow/contrib/lite/tools/BUILD +++ b/tensorflow/contrib/lite/tools/BUILD @@ -78,6 +78,9 @@ cc_test( "//tensorflow/contrib/lite:testdata/test_model.bin", "//tensorflow/contrib/lite:testdata/test_model_broken.bin", ], + tags = [ + "tflite_not_portable_android", + ], deps = [ ":gen_op_registration", "@com_google_googletest//:gtest", diff --git a/tensorflow/contrib/lookup/BUILD b/tensorflow/contrib/lookup/BUILD index 02b4f802528bbfa3f8aab0b01dfaa8206ce9408b..e3928a82a2d453fdd36cb861ce178a776574269c 100644 --- a/tensorflow/contrib/lookup/BUILD +++ b/tensorflow/contrib/lookup/BUILD @@ -28,7 +28,7 @@ py_library( tf_py_test( name = "lookup_ops_test", - size = "small", + size = "medium", srcs = ["lookup_ops_test.py"], additional_deps = [ ":lookup_py", @@ -46,4 +46,5 @@ tf_py_test( "//tensorflow/python:variables", ], grpc_enabled = True, + tags = ["no_windows"], # TODO: needs investigation on Windows ) diff --git a/tensorflow/contrib/losses/python/losses/loss_ops.py b/tensorflow/contrib/losses/python/losses/loss_ops.py index 8c3a8afe7a0f6f5ad9ceae566288ba60be73d339..bdad34a665e47a4e060fcaddfffecfdc876a8fb0 100644 --- a/tensorflow/contrib/losses/python/losses/loss_ops.py +++ b/tensorflow/contrib/losses/python/losses/loss_ops.py @@ -29,6 +29,7 @@ from tensorflow.python.ops import nn from tensorflow.python.ops import nn_ops from tensorflow.python.util.deprecation import deprecated from tensorflow.python.util.deprecation import deprecated_args +from tensorflow.python.util.deprecation import deprecated_argument_lookup __all__ = [ "absolute_difference", "add_loss", "cosine_distance", @@ -651,11 +652,9 @@ def cosine_distance(predictions, ValueError: If `predictions` shape doesn't match `labels` shape, or `weights` is `None`. """ - if dim is not None: - if axis is not None: - raise ValueError("Cannot specify both 'axis' and 'dim'") - axis = dim - if axis is None and dim is None: + axis = deprecated_argument_lookup( + "axis", axis, "dim", dim) + if axis is None: raise ValueError("You must specify 'axis'.") with ops.name_scope(scope, "cosine_distance_loss", [predictions, labels, weights]) as scope: diff --git a/tensorflow/contrib/losses/python/metric_learning/metric_loss_ops.py b/tensorflow/contrib/losses/python/metric_learning/metric_loss_ops.py index 2b9eee4ef7b418e2b90d388d2f165537b8660a9a..de76acb51ffe985162a66c617b266f47c5216b19 100644 --- a/tensorflow/contrib/losses/python/metric_learning/metric_loss_ops.py +++ b/tensorflow/contrib/losses/python/metric_learning/metric_loss_ops.py @@ -711,7 +711,7 @@ def _find_loss_augmented_facility_idx(pairwise_distances, labels, chosen_ids, candidate_scores, margin_multiplier * nmi_scores) argmax_index = math_ops.to_int32( - math_ops.argmax(candidate_scores, dimension=0)) + math_ops.argmax(candidate_scores, axis=0)) return candidate_ids[argmax_index] @@ -811,7 +811,7 @@ def update_medoid_per_cluster(pairwise_distances, pairwise_distances_subset, candidate_scores = math_ops.add(scores_fac, margin_multiplier * scores_margin) argmax_index = math_ops.to_int32( - math_ops.argmax(candidate_scores, dimension=0)) + math_ops.argmax(candidate_scores, axis=0)) best_medoid = math_ops.to_int32(cluster_member_ids[argmax_index]) chosen_ids = update_1d_tensor(chosen_ids, cluster_idx, best_medoid) diff --git a/tensorflow/contrib/makefile/Makefile b/tensorflow/contrib/makefile/Makefile index 05e8d9064bea748c935859f5f9b4c7e646f504cf..1a1ab54a53dd5866ca8357067846c002c5d5e9c1 100644 --- a/tensorflow/contrib/makefile/Makefile +++ b/tensorflow/contrib/makefile/Makefile @@ -89,6 +89,7 @@ HOST_INCLUDES := \ -I$(MAKEFILE_DIR)/downloads/gemmlowp \ -I$(MAKEFILE_DIR)/downloads/nsync/public \ -I$(MAKEFILE_DIR)/downloads/fft2d \ +-I$(MAKEFILE_DIR)/downloads/double_conversion \ -I$(HOST_GENDIR) ifeq ($(HAS_GEN_HOST_PROTOC),true) HOST_INCLUDES += -I$(MAKEFILE_DIR)/gen/protobuf-host/include @@ -125,7 +126,9 @@ PROTO_TEXT := $(HOST_BINDIR)proto_text # The list of dependencies is derived from the Bazel build file by running # the gen_file_lists.sh script on a system with a working Bazel setup. PROTO_TEXT_CC_FILES := $(shell cat $(MAKEFILE_DIR)/proto_text_cc_files.txt) -PROTO_TEXT_PB_CC_LIST := $(shell cat $(MAKEFILE_DIR)/proto_text_pb_cc_files.txt) +PROTO_TEXT_PB_CC_LIST := \ + $(shell cat $(MAKEFILE_DIR)/proto_text_pb_cc_files.txt) \ + $(wildcard tensorflow/contrib/makefile/downloads/double_conversion/double-conversion/*.cc) PROTO_TEXT_PB_H_LIST := $(shell cat $(MAKEFILE_DIR)/proto_text_pb_h_files.txt) # Locations of the intermediate files proto_text generates. @@ -171,6 +174,7 @@ INCLUDES := \ -I$(MAKEFILE_DIR)/downloads/gemmlowp \ -I$(MAKEFILE_DIR)/downloads/nsync/public \ -I$(MAKEFILE_DIR)/downloads/fft2d \ +-I$(MAKEFILE_DIR)/downloads/double_conversion \ -I$(PROTOGENDIR) \ -I$(PBTGENDIR) ifeq ($(HAS_GEN_HOST_PROTOC),true) @@ -326,6 +330,7 @@ $(MARCH_OPTION) \ -I$(MAKEFILE_DIR)/downloads/gemmlowp \ -I$(MAKEFILE_DIR)/downloads/nsync/public \ -I$(MAKEFILE_DIR)/downloads/fft2d \ +-I$(MAKEFILE_DIR)/downloads/double_conversion \ -I$(MAKEFILE_DIR)/gen/protobuf_android/$(ANDROID_ARCH)/include \ -I$(PROTOGENDIR) \ -I$(PBTGENDIR) @@ -603,6 +608,7 @@ $(wildcard tensorflow/core/platform/*/*.cc) \ $(wildcard tensorflow/core/platform/*/*/*.cc) \ $(wildcard tensorflow/core/util/*.cc) \ $(wildcard tensorflow/core/util/*/*.cc) \ +$(wildcard tensorflow/contrib/makefile/downloads/double_conversion/double-conversion/*.cc) \ tensorflow/core/util/version_info.cc # Remove duplicates (for version_info.cc) CORE_CC_ALL_SRCS := $(sort $(CORE_CC_ALL_SRCS)) diff --git a/tensorflow/contrib/makefile/download_dependencies.sh b/tensorflow/contrib/makefile/download_dependencies.sh index 8b415e6527f85a5a7844b9d4156fd39ecb1b637a..eff9081e35c285027c764c5bdbaf14f78bc5f512 100755 --- a/tensorflow/contrib/makefile/download_dependencies.sh +++ b/tensorflow/contrib/makefile/download_dependencies.sh @@ -27,12 +27,15 @@ if [ ! -f $BZL_FILE_PATH ]; then fi EIGEN_URL="$(grep -o 'http.*bitbucket.org/eigen/eigen/get/.*tar\.gz' "${BZL_FILE_PATH}" | grep -v mirror.bazel | head -n1)" -GEMMLOWP_URL="$(grep -o 'https://mirror.bazel.build/github.com/google/gemmlowp/.*zip' "${BZL_FILE_PATH}" | head -n1)" +# TODO (yongtang): Replace the following with 'https://mirror.bazel.build/github.com/google/gemmlowp/.*zip' once +# the archive has been propagated in mirror.bazel.build. +GEMMLOWP_URL="$(grep -o 'https://github.com/google/gemmlowp/.*zip' "${BZL_FILE_PATH}" | head -n1)" GOOGLETEST_URL="https://github.com/google/googletest/archive/release-1.8.0.tar.gz" NSYNC_URL="$(grep -o 'https://mirror.bazel.build/github.com/google/nsync/.*tar\.gz' "${BZL_FILE_PATH}" | head -n1)" PROTOBUF_URL="$(grep -o 'https://mirror.bazel.build/github.com/google/protobuf/.*tar\.gz' "${BZL_FILE_PATH}" | head -n1)" RE2_URL="$(grep -o 'https://mirror.bazel.build/github.com/google/re2/.*tar\.gz' "${BZL_FILE_PATH}" | head -n1)" -FFT2D_URL="$(grep -o 'http.*fft\.tgz' "${BZL_FILE_PATH}" | grep -v mirror.bazel | head -n1)" +FFT2D_URL="$(grep -o 'http.*fft\.tgz' "${BZL_FILE_PATH}" | grep -v bazel-mirror | head -n1)" +DOUBLE_CONVERSION_URL="$(grep -o "https.*google/double-conversion.*\.zip" "${BZL_FILE_PATH}" | head -n1)" ABSL_URL="$(grep -o 'https://github.com/abseil/abseil-cpp/.*tar.gz' "${BZL_FILE_PATH}" | head -n1)" CUB_URL="$(grep -o 'https.*cub/archive.*zip' "${BZL_FILE_PATH}" | grep -v mirror.bazel | head -n1)" @@ -87,6 +90,7 @@ download_and_extract "${NSYNC_URL}" "${DOWNLOADS_DIR}/nsync" download_and_extract "${PROTOBUF_URL}" "${DOWNLOADS_DIR}/protobuf" download_and_extract "${RE2_URL}" "${DOWNLOADS_DIR}/re2" download_and_extract "${FFT2D_URL}" "${DOWNLOADS_DIR}/fft2d" +download_and_extract "${DOUBLE_CONVERSION_URL}" "${DOWNLOADS_DIR}/double_conversion" download_and_extract "${ABSL_URL}" "${DOWNLOADS_DIR}/absl" download_and_extract "${CUB_URL}" "${DOWNLOADS_DIR}/cub/external/cub_archive" diff --git a/tensorflow/contrib/makefile/proto_text_cc_files.txt b/tensorflow/contrib/makefile/proto_text_cc_files.txt index 77c936d8c5b99033ff5c5e149a6ce6613b603132..76428bc1d4e682e000998a6e28fc290e218c2341 100644 --- a/tensorflow/contrib/makefile/proto_text_cc_files.txt +++ b/tensorflow/contrib/makefile/proto_text_cc_files.txt @@ -12,6 +12,7 @@ tensorflow/core/platform/posix/env.cc tensorflow/core/platform/posix/load_library.cc tensorflow/core/platform/posix/env_time.cc tensorflow/core/platform/file_system.cc +tensorflow/core/platform/file_system_helper.cc tensorflow/core/platform/env.cc tensorflow/core/platform/env_time.cc tensorflow/core/platform/setround.cc diff --git a/tensorflow/contrib/makefile/tf_op_files.txt b/tensorflow/contrib/makefile/tf_op_files.txt index b6acf71b9d446de6f57a7a7f077cc07276db2b17..d4c3f2eda8be0c70e961afe582983b9f73769c77 100644 --- a/tensorflow/contrib/makefile/tf_op_files.txt +++ b/tensorflow/contrib/makefile/tf_op_files.txt @@ -151,6 +151,7 @@ tensorflow/core/kernels/decode_bmp_op.cc tensorflow/core/kernels/depthtospace_op.cc tensorflow/core/kernels/data_format_ops.cc tensorflow/core/kernels/spacetodepth_op.cc +tensorflow/core/kernels/dense_update_functor.cc tensorflow/core/kernels/dense_update_ops.cc tensorflow/core/kernels/deep_conv2d.cc tensorflow/core/kernels/decode_wav_op.cc @@ -301,3 +302,5 @@ tensorflow/core/kernels/warn_about_ints.cc tensorflow/core/kernels/segment_reduction_ops.cc tensorflow/core/kernels/batch_util.cc tensorflow/core/ops/audio_ops.cc +tensorflow/core/kernels/decode_proto_op.cc +tensorflow/core/kernels/encode_proto_op.cc diff --git a/tensorflow/contrib/meta_graph_transform/meta_graph_transform.py b/tensorflow/contrib/meta_graph_transform/meta_graph_transform.py index ff88b4fa841673fc52b9f6fdc5ca43d30c44bbfd..f37a2593e2695ec55a10f69803cc7f446b0862df 100644 --- a/tensorflow/contrib/meta_graph_transform/meta_graph_transform.py +++ b/tensorflow/contrib/meta_graph_transform/meta_graph_transform.py @@ -30,7 +30,7 @@ from tensorflow.python.framework import importer as _importer from tensorflow.python.framework import ops as _ops from tensorflow.python.saved_model import constants as _saved_model_constants from tensorflow.python.training import saver as _saver_lib -from tensorflow.python.util import compat +from tensorflow.python.util import compat as _compat from tensorflow.tools import graph_transforms as _graph_transforms @@ -161,7 +161,7 @@ def _clean_save_and_restore(graph_def, op, removed_op_names): shapes = [] dtypes = [] for index, value in enumerate(name_op_value_tensor.string_val): - if not _is_removed(compat.as_str(value), removed_op_names): + if not _is_removed(_compat.as_str(value), removed_op_names): names.append(value) shapes.append(shape_op_value_tensor.string_val[index]) dtypes.append(op.attr['dtypes'].list.type[index]) @@ -348,7 +348,7 @@ def _freeze_graph_with_def_protos(input_graph_def, output_node_names, input_saver_def, input_checkpoint): """Converts all variables in a graph and checkpoint into constants. - During this process, we need to retain certain initialzer nodes (e.g. table + During this process, we need to retain certain initializer nodes (e.g. table initializer nodes). Instead of determining which dependencies of the shared initializer node (e.g. group_deps) to keep, we reconstruct the connections between the individual initializer nodes and @@ -651,7 +651,7 @@ def _is_removed_mentioned(s, removed_op_names): # /foo/bar. This regex ensures that we handle these two nodes # as separate entities. It matches on nodes having names in the form of # '/foo/bar_x' as well as nodes having names in the form of 'foo.' - s_names = _re.findall(r'((?:[\/]?[a-zA-Z0-9\_]*)*)', compat.as_str_any(s)) + s_names = _re.findall(r'((?:[\/]?[a-zA-Z0-9\_]*)*)', _compat.as_str_any(s)) for removed_op_name in removed_op_names: for s_name in s_names: if s_name.endswith(removed_op_name): @@ -737,9 +737,9 @@ def meta_graph_transform( for tag in tags: meta_graph_def.meta_info_def.tags.append(tag) - base_op_names = [compat.as_str(node.name) + base_op_names = [_compat.as_str(node.name) for node in base_meta_graph_def.graph_def.node] - retained_op_names = [compat.as_str(node.name) + retained_op_names = [_compat.as_str(node.name) for node in meta_graph_def.graph_def.node] removed_op_names = set(base_op_names) - set(retained_op_names) diff --git a/tensorflow/contrib/metrics/__init__.py b/tensorflow/contrib/metrics/__init__.py index de02dc8f457364450929776035829d86035d706b..5effea3596bb83a08e0a8627e411684262aef5f7 100644 --- a/tensorflow/contrib/metrics/__init__.py +++ b/tensorflow/contrib/metrics/__init__.py @@ -71,6 +71,7 @@ See the @{$python/contrib.metrics} guide. @@count @@precision_recall_at_equal_thresholds @@recall_at_precision +@@precision_at_recall """ from __future__ import absolute_import @@ -87,6 +88,7 @@ from tensorflow.contrib.metrics.python.ops.metric_ops import aggregate_metrics from tensorflow.contrib.metrics.python.ops.metric_ops import auc_with_confidence_intervals from tensorflow.contrib.metrics.python.ops.metric_ops import cohen_kappa from tensorflow.contrib.metrics.python.ops.metric_ops import count +from tensorflow.contrib.metrics.python.ops.metric_ops import precision_at_recall from tensorflow.contrib.metrics.python.ops.metric_ops import precision_recall_at_equal_thresholds from tensorflow.contrib.metrics.python.ops.metric_ops import recall_at_precision from tensorflow.contrib.metrics.python.ops.metric_ops import sparse_recall_at_top_k diff --git a/tensorflow/contrib/metrics/python/ops/metric_ops.py b/tensorflow/contrib/metrics/python/ops/metric_ops.py index 81f05e7ce587ed1da67a17efbbeb809dbe7fc0b3..00a933e5e0c537033573b225d43581f74557b240 100644 --- a/tensorflow/contrib/metrics/python/ops/metric_ops.py +++ b/tensorflow/contrib/metrics/python/ops/metric_ops.py @@ -63,6 +63,8 @@ def _safe_div(numerator, denominator, name): name=name) +@deprecated(None, 'Please switch to tf.metrics.true_positives. Note that the ' + 'order of the labels and predictions arguments has been switched.') def streaming_true_positives(predictions, labels, weights=None, @@ -107,6 +109,8 @@ def streaming_true_positives(predictions, name=name) +@deprecated(None, 'Please switch to tf.metrics.true_negatives. Note that the ' + 'order of the labels and predictions arguments has been switched.') def streaming_true_negatives(predictions, labels, weights=None, @@ -151,6 +155,8 @@ def streaming_true_negatives(predictions, name=name) +@deprecated(None, 'Please switch to tf.metrics.false_positives. Note that the ' + 'order of the labels and predictions arguments has been switched.') def streaming_false_positives(predictions, labels, weights=None, @@ -195,6 +201,8 @@ def streaming_false_positives(predictions, name=name) +@deprecated(None, 'Please switch to tf.metrics.false_negatives. Note that the ' + 'order of the labels and predictions arguments has been switched.') def streaming_false_negatives(predictions, labels, weights=None, @@ -238,6 +246,7 @@ def streaming_false_negatives(predictions, name=name) +@deprecated(None, 'Please switch to tf.metrics.mean') def streaming_mean(values, weights=None, metrics_collections=None, @@ -287,6 +296,7 @@ def streaming_mean(values, name=name) +@deprecated(None, 'Please switch to tf.metrics.mean_tensor') def streaming_mean_tensor(values, weights=None, metrics_collections=None, @@ -340,9 +350,8 @@ def streaming_mean_tensor(values, name=name) -@deprecated(None, - 'Please switch to tf.metrics.accuracy. Note that the order of the ' - 'labels and predictions arguments has been switched.') +@deprecated(None, 'Please switch to tf.metrics.accuracy. Note that the order ' + 'of the labels and predictions arguments has been switched.') def streaming_accuracy(predictions, labels, weights=None, @@ -400,6 +409,8 @@ def streaming_accuracy(predictions, name=name) +@deprecated(None, 'Please switch to tf.metrics.precision. Note that the order ' + 'of the labels and predictions arguments has been switched.') def streaming_precision(predictions, labels, weights=None, @@ -456,6 +467,8 @@ def streaming_precision(predictions, name=name) +@deprecated(None, 'Please switch to tf.metrics.recall. Note that the order ' + 'of the labels and predictions arguments has been switched.') def streaming_recall(predictions, labels, weights=None, @@ -975,8 +988,8 @@ def streaming_curve_points(labels=None, return points, update_op -@deprecated(None, 'Please switch to tf.metrics.auc. Note that the order of the ' - 'labels and predictions arguments has been switched.') +@deprecated(None, 'Please switch to tf.metrics.auc. Note that the order of ' + 'the labels and predictions arguments has been switched.') def streaming_auc(predictions, labels, weights=None, @@ -1797,9 +1810,9 @@ def streaming_sensitivity_at_specificity(predictions, name=name) -@deprecated( - None, 'Please switch to tf.metrics.precision_at_thresholds. Note that the ' - 'order of the labels and predictions arguments has been switched.') +@deprecated(None, + 'Please switch to tf.metrics.precision_at_thresholds. Note that ' + 'the order of the labels and predictions arguments are switched.') def streaming_precision_at_thresholds(predictions, labels, thresholds, @@ -2575,6 +2588,121 @@ def recall_at_precision(labels, return recall, update_op +def precision_at_recall(labels, + predictions, + target_recall, + weights=None, + num_thresholds=200, + metrics_collections=None, + updates_collections=None, + name=None): + """Computes the precision at a given recall. + + This function creates variables to track the true positives, false positives, + true negatives, and false negatives at a set of thresholds. Among those + thresholds where recall is at least `target_recall`, precision is computed + at the threshold where recall is closest to `target_recall`. + + For estimation of the metric over a stream of data, the function creates an + `update_op` operation that updates these variables and returns the + precision at `target_recall`. `update_op` increments the counts of true + positives, false positives, true negatives, and false negatives with the + weight of each case found in the `predictions` and `labels`. + + If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. + + For additional information about precision and recall, see + http://en.wikipedia.org/wiki/Precision_and_recall + + Args: + labels: The ground truth values, a `Tensor` whose dimensions must match + `predictions`. Will be cast to `bool`. + predictions: A floating point `Tensor` of arbitrary shape and whose values + are in the range `[0, 1]`. + target_recall: A scalar value in range `[0, 1]`. + weights: Optional `Tensor` whose rank is either 0, or the same rank as + `labels`, and must be broadcastable to `labels` (i.e., all dimensions must + be either `1`, or the same as the corresponding `labels` dimension). + num_thresholds: The number of thresholds to use for matching the given + recall. + metrics_collections: An optional list of collections to which `precision` + should be added. + updates_collections: An optional list of collections to which `update_op` + should be added. + name: An optional variable_scope name. + + Returns: + precision: A scalar `Tensor` representing the precision at the given + `target_recall` value. + update_op: An operation that increments the variables for tracking the + true positives, false positives, true negatives, and false negatives and + whose value matches `precision`. + + Raises: + ValueError: If `predictions` and `labels` have mismatched shapes, if + `weights` is not `None` and its shape doesn't match `predictions`, or if + `target_recall` is not between 0 and 1, or if either `metrics_collections` + or `updates_collections` are not a list or tuple. + RuntimeError: If eager execution is enabled. + """ + if context.executing_eagerly(): + raise RuntimeError('tf.metrics.precision_at_recall is not ' + 'supported when eager execution is enabled.') + + if target_recall < 0 or target_recall > 1: + raise ValueError('`target_recall` must be in the range [0, 1].') + + with variable_scope.variable_scope(name, 'precision_at_recall', + (predictions, labels, weights)): + kepsilon = 1e-7 # Used to avoid division by zero. + thresholds = [ + (i + 1) * 1.0 / (num_thresholds - 1) for i in range(num_thresholds - 2) + ] + thresholds = [0.0 - kepsilon] + thresholds + [1.0 + kepsilon] + + values, update_ops = _streaming_confusion_matrix_at_thresholds( + predictions, labels, thresholds, weights) + + def compute_precision_at_recall(tp, fp, fn, name): + """Computes the precision at a given recall. + + Args: + tp: True positives. + fp: False positives. + fn: False negatives. + name: A name for the operation. + + Returns: + The precision at the desired recall. + """ + recalls = math_ops.div(tp, tp + fn + kepsilon) + + # Because recall is monotone decreasing as a function of the threshold, + # the smallest recall exceeding target_recall occurs at the largest + # threshold where recall >= target_recall. + admissible_recalls = math_ops.cast( + math_ops.greater_equal(recalls, target_recall), dtypes.int64) + tf_index = math_ops.reduce_sum(admissible_recalls) - 1 + + # Now we have the threshold at which to compute precision: + return math_ops.div(tp[tf_index] + kepsilon, + tp[tf_index] + fp[tf_index] + kepsilon, + name) + + precision_value = compute_precision_at_recall( + values['tp'], values['fp'], values['fn'], 'value') + update_op = compute_precision_at_recall( + update_ops['tp'], update_ops['fp'], update_ops['fn'], 'update_op') + + if metrics_collections: + ops.add_to_collections(metrics_collections, precision_value) + + if updates_collections: + ops.add_to_collections(updates_collections, update_op) + + return precision_value, update_op + + def streaming_sparse_average_precision_at_k(predictions, labels, k, @@ -2706,7 +2834,9 @@ def streaming_sparse_average_precision_at_top_k(top_k_predictions, name=name) -@deprecated(None, 'Please switch to tf.metrics.mean.') +@deprecated(None, + 'Please switch to tf.metrics.mean_absolute_error. Note that the ' + 'order of the labels and predictions arguments has been switched.') def streaming_mean_absolute_error(predictions, labels, weights=None, @@ -2825,7 +2955,9 @@ def streaming_mean_relative_error(predictions, updates_collections=updates_collections, name=name) - +@deprecated(None, + 'Please switch to tf.metrics.mean_squared_error. Note that the ' + 'order of the labels and predictions arguments has been switched.') def streaming_mean_squared_error(predictions, labels, weights=None, @@ -2883,7 +3015,10 @@ def streaming_mean_squared_error(predictions, updates_collections=updates_collections, name=name) - +@deprecated( + None, + 'Please switch to tf.metrics.root_mean_squared_error. Note that the ' + 'order of the labels and predictions arguments has been switched.') def streaming_root_mean_squared_error(predictions, labels, weights=None, @@ -3223,7 +3358,7 @@ def streaming_mean_cosine_distance(predictions, radial_diffs = math_ops.reduce_sum( radial_diffs, reduction_indices=[ dim, - ], keep_dims=True) + ], keepdims=True) mean_distance, update_op = streaming_mean(radial_diffs, weights, None, None, name or 'mean_cosine_distance') mean_distance = math_ops.subtract(1.0, mean_distance) diff --git a/tensorflow/contrib/metrics/python/ops/metric_ops_test.py b/tensorflow/contrib/metrics/python/ops/metric_ops_test.py index 33eb655fb660f0ecdfe1c5ab870d7f17690ae3ff..76420db8bda39435bcc2be2fd3d8c3467d6753e2 100644 --- a/tensorflow/contrib/metrics/python/ops/metric_ops_test.py +++ b/tensorflow/contrib/metrics/python/ops/metric_ops_test.py @@ -3380,6 +3380,138 @@ class RecallAtPrecisionTest(test.TestCase): self.assertAlmostEqual(target_recall, recall.eval()) +class PrecisionAtRecallTest(test.TestCase): + + def setUp(self): + np.random.seed(1) + ops.reset_default_graph() + + def testVars(self): + metrics.precision_at_recall( + predictions=array_ops.ones((10, 1)), + labels=array_ops.ones((10, 1)), + target_recall=0.7) + _assert_metric_variables(self, + ('precision_at_recall/true_positives:0', + 'precision_at_recall/false_negatives:0', + 'precision_at_recall/false_positives:0', + 'precision_at_recall/true_negatives:0')) + + def testMetricsCollection(self): + my_collection_name = '__metrics__' + mean, _ = metrics.precision_at_recall( + predictions=array_ops.ones((10, 1)), + labels=array_ops.ones((10, 1)), + target_recall=0.7, + metrics_collections=[my_collection_name]) + self.assertListEqual(ops.get_collection(my_collection_name), [mean]) + + def testUpdatesCollection(self): + my_collection_name = '__updates__' + _, update_op = metrics.precision_at_recall( + predictions=array_ops.ones((10, 1)), + labels=array_ops.ones((10, 1)), + target_recall=0.7, + updates_collections=[my_collection_name]) + self.assertListEqual(ops.get_collection(my_collection_name), [update_op]) + + def testValueTensorIsIdempotent(self): + predictions = random_ops.random_uniform( + (10, 3), maxval=1, dtype=dtypes_lib.float32, seed=1) + labels = random_ops.random_uniform( + (10, 3), maxval=2, dtype=dtypes_lib.int64, seed=1) + precision, update_op = metrics.precision_at_recall( + labels, predictions, target_recall=0.7) + + with self.test_session() as sess: + sess.run(variables.local_variables_initializer()) + + # Run several updates. + for _ in range(10): + sess.run(update_op) + + # Then verify idempotency. + initial_precision = precision.eval() + for _ in range(10): + self.assertAlmostEqual(initial_precision, precision.eval(), places=5) + + def testAllCorrect(self): + inputs = np.random.randint(0, 2, size=(100, 1)) + + predictions = constant_op.constant(inputs, dtype=dtypes_lib.float32) + labels = constant_op.constant(inputs) + precision, update_op = metrics.precision_at_recall( + labels, predictions, target_recall=0.7) + + with self.test_session() as sess: + sess.run(variables.local_variables_initializer()) + self.assertEqual(1, sess.run(update_op)) + self.assertEqual(1, precision.eval()) + + def testAllIncorrect(self): + inputs = np.random.randint(0, 2, size=(100, 1)) + + predictions = constant_op.constant(inputs, dtype=dtypes_lib.float32) + labels = 1.0 - predictions + label_prior = math_ops.reduce_mean(labels) + precision, update_op = metrics.precision_at_recall( + labels, predictions, target_recall=0.2) + + with self.test_session() as sess: + sess.run(variables.local_variables_initializer()) + self.assertEqual(sess.run(label_prior), sess.run(update_op)) + self.assertEqual(sess.run(label_prior), precision.eval()) + + def testSomeCorrectHighRecall(self): + predictions_values = [0.1, 0.2, 0.5, 0.3, 0.0, 0.1, 0.45, 0.5, 0.8, 0.9] + labels_values = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1] + + predictions = constant_op.constant( + predictions_values, dtype=dtypes_lib.float32) + labels = constant_op.constant(labels_values) + precision, update_op = metrics.precision_at_recall( + labels, predictions, target_recall=0.8) + + with self.test_session() as sess: + sess.run(variables.local_variables_initializer()) + self.assertAlmostEqual(0.8, sess.run(update_op)) + self.assertAlmostEqual(0.8, precision.eval()) + + def testSomeCorrectLowRecall(self): + predictions_values = [0.1, 0.2, 0.7, 0.3, 0.0, 0.1, 0.45, 0.5, 0.6, 0.9] + labels_values = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1] + + predictions = constant_op.constant( + predictions_values, dtype=dtypes_lib.float32) + labels = constant_op.constant(labels_values) + precision, update_op = metrics.precision_at_recall( + labels, predictions, target_recall=0.4) + + with self.test_session() as sess: + sess.run(variables.local_variables_initializer()) + self.assertAlmostEqual(2.0/3, sess.run(update_op)) + self.assertAlmostEqual(2.0/3, precision.eval()) + + def testWeighted_multipleLabelDtypes(self): + for label_dtype in (dtypes_lib.bool, dtypes_lib.int32, dtypes_lib.float32): + predictions_values = [ + 0.0, 0.1, 0.2, 0.3, 0.4, 0.1, 0.22, 0.25, 0.31, 0.35] + labels_values = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1] + weights_values = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + + predictions = constant_op.constant( + predictions_values, dtype=dtypes_lib.float32) + labels = math_ops.cast(labels_values, dtype=label_dtype) + weights = constant_op.constant(weights_values) + precision, update_op = metrics.precision_at_recall( + labels, predictions, target_recall=0.8, weights=weights) + + with self.test_session() as sess: + sess.run(variables.local_variables_initializer()) + self.assertAlmostEqual(34.0/43, sess.run(update_op)) + self.assertAlmostEqual(34.0/43, precision.eval()) + + class StreamingFNRThresholdsTest(test.TestCase): def setUp(self): diff --git a/tensorflow/contrib/model_pruning/BUILD b/tensorflow/contrib/model_pruning/BUILD index f50575b2cf311e33f7b7c77488bc94b8d24c70ec..54bd39afacbec07f054f61b72eda0a3654858aa7 100644 --- a/tensorflow/contrib/model_pruning/BUILD +++ b/tensorflow/contrib/model_pruning/BUILD @@ -71,6 +71,17 @@ py_library( ], ) +py_library( + name = "pruning_utils", + srcs = ["python/pruning_utils.py"], + srcs_version = "PY2AND3", + visibility = ["//visibility:public"], + deps = [ + "//tensorflow/python:platform", + "//third_party/py/numpy", + ], +) + py_library( name = "pruning", srcs = ["python/pruning.py"], @@ -78,9 +89,20 @@ py_library( visibility = ["//visibility:public"], deps = [ ":core_layers", + ":pruning_utils", "//tensorflow/contrib/training:training_py", "//tensorflow/python:platform", - "//third_party/py/numpy", + ], +) + +py_test( + name = "pruning_utils_test", + size = "small", + srcs = ["python/pruning_utils_test.py"], + srcs_version = "PY2AND3", + deps = [ + ":pruning_utils", + "//tensorflow/python:client_testlib", ], ) diff --git a/tensorflow/contrib/model_pruning/README.md b/tensorflow/contrib/model_pruning/README.md index 52b659c69fdfc507e6259e928d79c65471f2f025..86f4fd6adf60d8fa54c13989bf4087e28f1e006f 100644 --- a/tensorflow/contrib/model_pruning/README.md +++ b/tensorflow/contrib/model_pruning/README.md @@ -45,7 +45,7 @@ The pruning library allows for specification of the following hyper parameters: | do_not_prune | list of strings | [""] | list of layers names that are not pruned | | threshold_decay | float | 0.9 | The decay factor to use for exponential decay of the thresholds | | pruning_frequency | integer | 10 | How often should the masks be updated? (in # of global_steps) | -| nbins | integer | 255 | Number of bins to use for histogram computation | +| nbins | integer | 256 | Number of bins to use for histogram computation | | block_height|integer | 1 | Number of rows in a block for block sparse matrices| | block_width |integer | 1 | Number of cols in a block for block sparse matrices| | block_pooling_function| string | AVG | The function to use to pool weight values in a block: average (AVG) or max (MAX)| diff --git a/tensorflow/contrib/model_pruning/python/pruning.py b/tensorflow/contrib/model_pruning/python/pruning.py index 5146a4a2de7806041991c04958de378b2d3dc810..ea6032e588cf398deaf497fb99087436ce1cb2e8 100644 --- a/tensorflow/contrib/model_pruning/python/pruning.py +++ b/tensorflow/contrib/model_pruning/python/pruning.py @@ -33,12 +33,14 @@ # Returns a list of all the weight tensors that have been masked get_weights() - The Pruning class uses a proto (defined in pruning.proto) to set up the - parameters for a pruning specification. Here's a typical usage: + The Pruning class uses a tf.hparams object to set up the + parameters for a model pruning. Here's a typical usage: - # Initialize a pruning spec from a proto - pruning_spec = '/tmp/pruning.pb' - p = Pruning(pruning_spec) + # Parse pruning hyperparameters + pruning_hparams = pruning.get_pruning_hparams().parse(FLAGS.pruning_hparams) + + # Create a pruning object using the pruning_hparams + p = pruning.Pruning(pruning_hparams) # Add mask update ops to the graph mask_update_op = p.conditional_mask_update_op() @@ -51,24 +53,20 @@ # An object of the pruning also accepts externally defined sparsity: sparsity = tf.Variable(0.5, name = "ConstantSparsity") - pruning_spec = '/tmp/pruning.pb' - p = Pruning(pruning_spec, sparsity=sparsity) - + p = pruning.Pruning(pruning_hparams, sparsity=sparsity) """ # pylint: disable=missing-docstring from __future__ import absolute_import from __future__ import division from __future__ import print_function -import numpy as np - +from tensorflow.contrib.model_pruning.python import pruning_utils from tensorflow.contrib.model_pruning.python.layers import core_layers as core from tensorflow.contrib.training.python.training import hparam +from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops -from tensorflow.python.ops import clip_ops from tensorflow.python.ops import control_flow_ops -from tensorflow.python.ops import gen_array_ops from tensorflow.python.ops import init_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn_impl @@ -87,172 +85,18 @@ _WEIGHT_COLLECTION = core.WEIGHT_COLLECTION _MASKED_WEIGHT_NAME = core.MASKED_WEIGHT_NAME -def _weight_mask_variable(var, scope): - """Create a mask for the weights. - - This function adds a variable 'mask' to the graph. - - Args: - var: the weight variable that needs to be masked - scope: The variable scope of the variable var - - Returns: - the mask variable of the same size and shape as var, initialized to all 1s. - """ - with variable_scope.variable_scope(scope): - mask = variable_scope.get_variable( - 'mask', - var.get_shape(), - initializer=init_ops.ones_initializer(), - trainable=False, - dtype=var.dtype) - return mask - - -def _weight_threshold_variable(var, scope): - """Create a scalar threshold for the weights. - - This function adds a variable - 'threshold' to the graph. - - Args: - var: The weight variable that needs to be masked - scope: The variable scope of the variable var - - Returns: - a scalar threshold variable initialized to 0. - """ - with variable_scope.variable_scope(scope): - threshold = variable_scope.get_variable( - 'threshold', [], - initializer=init_ops.zeros_initializer(), - trainable=False, - dtype=var.dtype) - return threshold - - -def _kronecker_product(mat1, mat2): - """Computes the Kronecker product of two matrices mat1 and mat2. - - Args: - mat1: A matrix of size m x n - mat2: A matrix of size p x q - Returns: - Kronecker product of matrices mat1 and mat2 of size mp x nq - """ - - m1, n1 = mat1.get_shape().as_list() - mat1_rsh = array_ops.reshape(mat1, [m1, 1, n1, 1]) - m2, n2 = mat2.get_shape().as_list() - mat2_rsh = array_ops.reshape(mat2, [1, m2, 1, n2]) - return array_ops.reshape(mat1_rsh * mat2_rsh, [m1 * m2, n1 * n2]) - - -def _histogram(values, value_range, nbins=100, dtype=np.int32, name=None): - """Return histogram of values. - - Given the tensor `values`, this operation returns a rank 1 histogram counting - the number of entries in `values` that fell into every bin. The bins are - equal width and determined by the arguments `value_range` and `nbins`. - - Args: - values: Numeric `Tensor`. - value_range: Shape [2] `Tensor` of same `dtype` as `values`. - values <= value_range[0] will be mapped to hist[0], - values >= value_range[1] will be mapped to hist[-1]. - nbins: Scalar `int32 Tensor`. Number of histogram bins. - dtype: dtype for returned histogram. - name: A name for this operation (defaults to 'histogram'). - - Returns: - A 1-D `Tensor` holding histogram of values. - - """ - with ops.name_scope(name, 'histogram', [values, value_range, nbins]) as scope: - values = ops.convert_to_tensor(values, name='values') - values = gen_array_ops.reshape(values, [-1]) - value_range = ops.convert_to_tensor(value_range, name='value_range') - nbins = ops.convert_to_tensor(nbins, dtype=np.int32, name='nbins') - nbins_float = math_ops.cast(nbins, values.dtype) - - # Map tensor values that fall within value_range to [0, 1]. - scaled_values = math_ops.truediv( - values - value_range[0], - value_range[1] - value_range[0], - name='scaled_values') - - # map tensor values within the open interval value_range to {0,.., nbins-1}, - # values outside the open interval will be zero or less, or nbins or more. - indices = math_ops.floor(nbins_float * scaled_values, name='indices') - - # Clip edge cases (e.g. value = value_range[1]) or "outliers." - indices = math_ops.cast( - clip_ops.clip_by_value(indices, 0, nbins_float - 1), np.int32) - - return math_ops.unsorted_segment_sum( - array_ops.ones_like(indices, dtype=dtype), indices, nbins, name=scope) - - -def _determine_partitioned_axis(partitioned_variable): - partitioned_axis = 0 - concatenated_variable_shape = partitioned_variable.get_shape() - for partition in partitioned_variable: - partition_shape = partition.get_shape() - maybe_partitioned_axis = np.less(partition_shape, - concatenated_variable_shape) - # Sanity check: make sure number of partitioned axis == 1 - if np.count_nonzero(maybe_partitioned_axis) != 1: - raise ValueError('Number of partitioned axes %s not equal to 1' % - np.count_nonzero(maybe_partitioned_axis)) - partitioned_axis = np.where(maybe_partitioned_axis)[0][0] - return partitioned_axis - - -def _variable_assign(var, new_value): - return state_ops.assign(var, new_value, name=var.op.name + '_assign') - - -def _partitioned_variable_assign(partitioned_var, new_value): - """Assign op for partitioned variables. - - Args: - partitioned_var: A partitioned tensorflow variable - new_value: Value to be assigned to the variable var - - Returns: - A tensorflow op that groups the assign ops for each of the variable slices - """ - # Determine which axis was used to partition the variable. Currently - # tensorflow allows partitioning variable only along 1 axis. - axis = 0 if len(partitioned_var) == 1 else _determine_partitioned_axis( - partitioned_var) - - partition_sizes = np.array( - [partition.get_shape()[axis] for partition in partitioned_var]) - new_partitioned_values = array_ops.split( - new_value, - ops.convert_to_tensor(partition_sizes, dtype=np.int32), - axis=axis) - op_list = [] - for partition in partitioned_var: - op_list.append( - _variable_assign(partition, new_partitioned_values[len(op_list)])) - return control_flow_ops.group( - *op_list, name=partitioned_var.name + '_group_assign') - - def apply_mask(x, scope=''): """Apply mask to a given weight tensor. Args: x: Input weight tensor - scope: The current variable scope. Defaults to "" + scope: The current variable scope. Defaults to "". Returns: Tensor representing masked_weights """ - mask = _weight_mask_variable(x, scope) - threshold = _weight_threshold_variable(x, scope) + mask = pruning_utils.weight_mask_variable(x, scope) + threshold = pruning_utils.weight_threshold_variable(x, scope) # Add masked_weights in the weights namescope so as to make it easier # for the quantization library to add quant ops. masked_weights = math_ops.multiply(mask, x, _MASKED_WEIGHT_NAME) @@ -335,6 +179,8 @@ def get_pruning_hparams(): sparsity_function_exponent: float exponent = 1 is linearly varying sparsity between initial and final. exponent > 1 varies more slowly towards the end than the beginning + use_tpu: False + Indicates whether to use TPU We use the following sparsity function: @@ -357,7 +203,7 @@ def get_pruning_hparams(): do_not_prune=[''], threshold_decay=0.9, pruning_frequency=10, - nbins=255, + nbins=256, block_height=1, block_width=1, block_pooling_function='AVG', @@ -365,7 +211,8 @@ def get_pruning_hparams(): target_sparsity=0.5, sparsity_function_begin_step=0, sparsity_function_end_step=100, - sparsity_function_exponent=3) + sparsity_function_exponent=3, + use_tpu=False) class Pruning(object): @@ -414,7 +261,7 @@ class Pruning(object): if graph_global_step is None: graph_global_step = training_util.get_global_step() - return math_ops.cast(graph_global_step, np.int32) + return math_ops.cast(graph_global_step, dtypes.int32) def _setup_sparsity(self): begin_step = self._spec.sparsity_function_begin_step @@ -429,13 +276,13 @@ class Pruning(object): (begin_step, end_step)) with ops.name_scope(self._spec.name): - p = math_ops.minimum(1.0, - math_ops.maximum( - 0.0, - math_ops.div( - math_ops.cast(self._global_step - begin_step, - np.float32), - end_step - begin_step))) + p = math_ops.minimum( + 1.0, + math_ops.maximum( + 0.0, + math_ops.div( + math_ops.cast(self._global_step - begin_step, dtypes.float32), + end_step - begin_step))) sparsity = math_ops.add( math_ops.multiply(initial_sparsity - target_sparsity, math_ops.pow(1 - p, exponent)), @@ -445,17 +292,18 @@ class Pruning(object): return sparsity def _setup_last_update_step(self): - with variable_scope.variable_scope(self._spec.name) as scope: + with variable_scope.variable_scope( + self._spec.name, use_resource=self._spec.use_tpu) as scope: try: last_update_step = variable_scope.get_variable( 'last_mask_update_step', [], initializer=init_ops.zeros_initializer(), trainable=False, - dtype=np.int32) + dtype=dtypes.int32) except ValueError: scope.reuse_variables() last_update_step = variable_scope.get_variable( - 'last_mask_update_step', dtype=np.int32) + 'last_mask_update_step', dtype=dtypes.int32) return last_update_step def _exists_in_do_not_prune_list(self, tensor_name): @@ -497,18 +345,16 @@ class Pruning(object): with ops.name_scope(weights.op.name + '_pruning_ops'): abs_weights = math_ops.abs(weights) max_value = math_ops.reduce_max(abs_weights) - histogram = _histogram( - abs_weights, [0.0, max_value], - nbins=self._spec.nbins, - dtype=np.float32) + cdf_fn = pruning_utils.compute_cdf_from_histogram + if self._spec.use_tpu: + cdf_fn = pruning_utils.compute_cdf - cdf = math_ops.cumsum(histogram) - norm_cdf = math_ops.div(cdf, math_ops.reduce_sum(histogram)) + norm_cdf = cdf_fn(abs_weights, [0.0, max_value], nbins=self._spec.nbins) current_threshold = math_ops.multiply( math_ops.div( math_ops.reduce_sum( math_ops.cast( - math_ops.less(norm_cdf, self._sparsity), np.float32)), + math_ops.less(norm_cdf, self._sparsity), dtypes.float32)), float(self._spec.nbins)), max_value) smoothed_threshold = math_ops.add_n([ @@ -516,7 +362,7 @@ class Pruning(object): math_ops.multiply(threshold, self._spec.threshold_decay) ]) new_mask = math_ops.cast( - math_ops.greater(abs_weights, smoothed_threshold), np.float32) + math_ops.greater(abs_weights, smoothed_threshold), dtypes.float32) return smoothed_threshold, new_mask def _maybe_update_block_mask(self, weights, threshold): @@ -572,8 +418,8 @@ class Pruning(object): new_mask, [pooled_weights.get_shape()[1], pooled_weights.get_shape()[2]]) - updated_mask = _kronecker_product(reshaped_mask, - array_ops.ones(self._block_dim)) + updated_mask = pruning_utils.kronecker_product( + reshaped_mask, array_ops.ones(self._block_dim)) sliced_mask = array_ops.slice( updated_mask, [0, 0], [squeezed_weights.get_shape()[0], @@ -608,11 +454,12 @@ class Pruning(object): continue new_threshold, new_mask = self._maybe_update_block_mask(weight, threshold) - self._assign_ops.append(_variable_assign(threshold, new_threshold)) + self._assign_ops.append( + pruning_utils.variable_assign(threshold, new_threshold)) self._assign_ops.append( - _partitioned_variable_assign(mask, new_mask) - if is_partitioned else _variable_assign(mask, new_mask)) + pruning_utils.partitioned_variable_assign(mask, new_mask) + if is_partitioned else pruning_utils.variable_assign(mask, new_mask)) def mask_update_op(self): with ops.name_scope(self._spec.name): diff --git a/tensorflow/contrib/model_pruning/python/pruning_test.py b/tensorflow/contrib/model_pruning/python/pruning_test.py index 89e65713197afc6ed37346cb67a6e9be3fa9290f..f80b7c52c000f13b5ce98dd442ff21abfac37761 100644 --- a/tensorflow/contrib/model_pruning/python/pruning_test.py +++ b/tensorflow/contrib/model_pruning/python/pruning_test.py @@ -110,12 +110,12 @@ class PruningTest(test.TestCase): self.assertAllEqual(np.count_nonzero(masked_weights_val), 100) session.run(mask_update_op) masked_weights_val = masked_weights.eval() - self.assertAllEqual(np.count_nonzero(masked_weights_val), 51) + self.assertAllEqual(np.count_nonzero(masked_weights_val), 50) def _blockMasking(self, hparams, weights, expected_mask): threshold = variables.Variable(0.0, name="threshold") - sparsity = variables.Variable(0.51, name="sparsity") + sparsity = variables.Variable(0.5, name="sparsity") test_spec = ",".join(hparams) pruning_hparams = pruning.get_pruning_hparams().parse(test_spec) @@ -138,7 +138,8 @@ class PruningTest(test.TestCase): weights_max = constant_op.constant( [[0.1, 0.0, 0.2, 0.0], [0.0, -0.1, 0.0, -0.2], [0.3, 0.0, 0.4, 0.0], [0.0, -0.3, 0.0, -0.4]]) - expected_mask = [[0, 0, 0, 0], [0, 0, 0, 0], [1, 1, 1, 1], [1, 1, 1, 1]] + expected_mask = [[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], + [1., 1., 1., 1.], [1., 1., 1., 1.]] self._blockMasking(param_list + ["block_pooling_function=MAX"], weights_max, expected_mask) @@ -155,7 +156,8 @@ class PruningTest(test.TestCase): weights_max = constant_op.constant( [[[0.1, 0.0, 0.2, 0.0], [0.0, -0.1, 0.0, -0.2], [0.3, 0.0, 0.4, 0.0], [0.0, -0.3, 0.0, -0.4]]]) - expected_mask = [[[0, 0, 0, 0], [0, 0, 0, 0], [1, 1, 1, 1], [1, 1, 1, 1]]] + expected_mask = [[[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], + [1., 1., 1., 1.], [1., 1., 1., 1.]]] self._blockMasking(param_list + ["block_pooling_function=MAX"], weights_max, expected_mask) @@ -178,11 +180,12 @@ class PruningTest(test.TestCase): masked_weights_val = masked_weights.eval() session.run(mask_update_op) masked_weights_val = masked_weights.eval() - self.assertAllEqual(np.count_nonzero(masked_weights_val), 51) + self.assertAllEqual(np.count_nonzero(masked_weights_val), 50) def testConditionalMaskUpdate(self): param_list = [ - "pruning_frequency=2", "begin_pruning_step=1", "end_pruning_step=6" + "pruning_frequency=2", "begin_pruning_step=1", "end_pruning_step=6", + "nbins=100" ] test_spec = ",".join(param_list) pruning_hparams = pruning.get_pruning_hparams().parse(test_spec) diff --git a/tensorflow/contrib/model_pruning/python/pruning_utils.py b/tensorflow/contrib/model_pruning/python/pruning_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..56d3dcef20d1b1c34d6b04535e2b4dc7be7f7320 --- /dev/null +++ b/tensorflow/contrib/model_pruning/python/pruning_utils.py @@ -0,0 +1,269 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Utility functions for adding pruning related ops to the graph. +""" +# pylint: disable=missing-docstring +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import clip_ops +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import init_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import state_ops +from tensorflow.python.ops import variable_scope + +_NBINS = 256 + + +def weight_mask_variable(var, scope): + """Create a mask for the weights. + + This function adds a variable 'mask' to the graph. + + Args: + var: the weight variable that needs to be masked + scope: The variable scope of the variable var + + Returns: + the mask variable of the same size and shape as var, initialized to all 1s. + """ + with variable_scope.variable_scope(scope): + mask = variable_scope.get_variable( + 'mask', + var.get_shape(), + initializer=init_ops.ones_initializer(), + trainable=False, + dtype=var.dtype) + return mask + + +def weight_threshold_variable(var, scope): + """Create a scalar threshold for the weights. + + This function adds a variable + 'threshold' to the graph. + + Args: + var: The weight variable that needs to be masked + scope: The variable scope of the variable var + + Returns: + a scalar threshold variable initialized to 0. + """ + with variable_scope.variable_scope(scope): + threshold = variable_scope.get_variable( + 'threshold', [], + initializer=init_ops.zeros_initializer(), + trainable=False, + dtype=var.dtype) + return threshold + + +def kronecker_product(mat1, mat2): + """Computes the Kronecker product of two matrices mat1 and mat2. + + Args: + mat1: A matrix of size m x n + mat2: A matrix of size p x q + Returns: + Kronecker product of matrices mat1 and mat2 of size mp x nq + """ + + m1, n1 = mat1.get_shape().as_list() + mat1_rsh = array_ops.reshape(mat1, [m1, 1, n1, 1]) + m2, n2 = mat2.get_shape().as_list() + mat2_rsh = array_ops.reshape(mat2, [1, m2, 1, n2]) + return array_ops.reshape(mat1_rsh * mat2_rsh, [m1 * m2, n1 * n2]) + + +def _histogram(values, value_range, nbins=100, dtype=dtypes.int32, name=None): + """Return histogram of values. + + Given the tensor `values`, this operation returns a rank 1 histogram counting + the number of entries in `values` that fell into every bin. The bins are + equal width and determined by the arguments `value_range` and `nbins`. + + Args: + values: Numeric `Tensor`. + value_range: Shape [2] `Tensor` of same `dtype` as `values`. + values <= value_range[0] will be mapped to hist[0], + values >= value_range[1] will be mapped to hist[-1]. + nbins: Scalar `int32 Tensor`. Number of histogram bins. + dtype: dtype for returned histogram. + name: A name for this operation (defaults to 'histogram'). + + Returns: + A 1-D `Tensor` holding histogram of values. + + """ + with ops.name_scope(name, 'histogram', [values, value_range, nbins]) as scope: + values = ops.convert_to_tensor(values, name='values') + values = array_ops.reshape(values, [-1]) + value_range = ops.convert_to_tensor(value_range, name='value_range') + nbins_float = np.float32(nbins) + + # Map tensor values that fall within value_range to [0, 1]. + scaled_values = math_ops.truediv( + values - value_range[0], + value_range[1] - value_range[0], + name='scaled_values') + + # map tensor values within the open interval value_range to {0,.., nbins-1}, + # values outside the open interval will be zero or less, or nbins or more. + indices = math_ops.floor(nbins_float * scaled_values, name='indices') + + # Clip edge cases (e.g. value = value_range[1]) or "outliers." + indices = math_ops.cast( + clip_ops.clip_by_value(indices, 0, nbins_float - 1), dtypes.int32) + + return math_ops.unsorted_segment_sum( + array_ops.ones_like(indices, dtype=dtype), indices, nbins, name=scope) + + +def compute_cdf_from_histogram(values, value_range, **kwargs): + """Returns the normalized cumulative distribution of the given values tensor. + + Computes the histogram and uses tf.cumsum to arrive at cdf + + Args: + values: Numeric `Tensor`. + value_range: Shape [2] `Tensor` of same `dtype` as `values`. + **kwargs: keyword arguments: nbins, name + + Returns: + A 1-D `Tensor` holding normalized cdf of values. + + """ + nbins = kwargs.get('nbins', _NBINS) + name = kwargs.get('name', None) + with ops.name_scope(name, 'cdf', [values, value_range, nbins]): + histogram = _histogram( + values, value_range, dtype=dtypes.float32, nbins=nbins) + cdf = math_ops.cumsum(histogram) + return math_ops.div(cdf, math_ops.reduce_max(cdf)) + + +def compute_cdf(values, value_range, **kwargs): + """Returns the normalized cumulative distribution of the given values tensor. + + Uses tf.while_loop to directly compute the cdf of the values. Number of bins + for histogram is fixed at _NBINS=255 + + Args: + values: Numeric `Tensor`. + value_range: Shape [2] `Tensor` of same `dtype` as `values` + **kwargs: keyword arguments: name + + Returns: + A 1-D `Tensor` holding normalized cdf of values. + + """ + nbins = _NBINS + name = kwargs.get('name', None) + with ops.name_scope(name, 'cdf', [values, value_range, nbins]): + values = ops.convert_to_tensor(values, name='values') + value_range = ops.convert_to_tensor(value_range, name='value_range') + nbins_float = np.float32(nbins) + + # Map tensor values that fall within value_range to [0, 1]. + scaled_values = math_ops.truediv( + values - value_range[0], + value_range[1] - value_range[0], + name='scaled_values') + + # map tensor values within the open interval value_range to {0,.., nbins-1}, + # values outside the open interval will be zero or less, or nbins or more. + indices = math_ops.floor(nbins_float * scaled_values, name='indices') + + # Clip edge cases (e.g. value = value_range[1]) or "outliers." + indices = math_ops.cast( + clip_ops.clip_by_value(indices, 0, nbins_float - 1), dtypes.int32) + + cdf = array_ops.zeros(nbins) + i = constant_op.constant(0) + + def loop_cond(loop_count, _): + return math_ops.less(loop_count, nbins) + + def loop_body(loop_count, cdf): + temp = math_ops.reduce_sum( + math_ops.cast( + math_ops.less_equal(indices, loop_count), dtypes.float32)) + cdf = math_ops.add( + cdf, + array_ops.one_hot( + loop_count, depth=_NBINS, on_value=temp, off_value=0.0)) + return [loop_count + 1, cdf] + + _, cdf = control_flow_ops.while_loop( + loop_cond, loop_body, [i, cdf], maximum_iterations=nbins) + + return math_ops.div(cdf, math_ops.reduce_max(cdf)) + + +def determine_partitioned_axis(partitioned_variable): + partitioned_axis = 0 + concatenated_variable_shape = partitioned_variable.get_shape() + for partition in partitioned_variable: + partition_shape = partition.get_shape() + maybe_partitioned_axis = np.less(partition_shape, + concatenated_variable_shape) + # Sanity check: make sure number of partitioned axis == 1 + if np.count_nonzero(maybe_partitioned_axis) != 1: + raise ValueError('Number of partitioned axes %s not equal to 1' % + np.count_nonzero(maybe_partitioned_axis)) + partitioned_axis = np.where(maybe_partitioned_axis)[0][0] + return partitioned_axis + + +def variable_assign(var, new_value): + return state_ops.assign(var, new_value, name=var.op.name + '_assign') + + +def partitioned_variable_assign(partitioned_var, new_value): + """Assign op for partitioned variables. + + Args: + partitioned_var: A partitioned tensorflow variable + new_value: Value to be assigned to the variable var + + Returns: + A tensorflow op that groups the assign ops for each of the variable slices + """ + # Determine which axis was used to partition the variable. Currently + # tensorflow allows partitioning variable only along 1 axis. + axis = 0 if len(partitioned_var) == 1 else determine_partitioned_axis( + partitioned_var) + + partition_sizes = np.array( + [partition.get_shape()[axis] for partition in partitioned_var]) + new_partitioned_values = array_ops.split( + new_value, + ops.convert_to_tensor(partition_sizes, dtype=dtypes.int32), + axis=axis) + op_list = [] + for partition in partitioned_var: + op_list.append( + variable_assign(partition, new_partitioned_values[len(op_list)])) + return control_flow_ops.group( + *op_list, name=partitioned_var.name + '_group_assign') diff --git a/tensorflow/contrib/model_pruning/python/pruning_utils_test.py b/tensorflow/contrib/model_pruning/python/pruning_utils_test.py new file mode 100644 index 0000000000000000000000000000000000000000..10e1dd0a8eee88f357fbe60bf00f180c05f2c4d2 --- /dev/null +++ b/tensorflow/contrib/model_pruning/python/pruning_utils_test.py @@ -0,0 +1,86 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for utility functions in pruning_utils.py.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.contrib.model_pruning.python import pruning_utils +from tensorflow.python.framework import constant_op +from tensorflow.python.ops import init_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import variable_scope +from tensorflow.python.ops import variables +from tensorflow.python.platform import test + + +class PruningUtilsTest(test.TestCase): + + def testHistogram(self): + width = 10 + height = 10 + nbins = 100 + expected_histogram = np.full(nbins, 1.0) + init = init_ops.constant_initializer(np.linspace(0.0, 1.0, width * height)) + weights = variable_scope.get_variable( + "weights", [width, height], initializer=init) + histogram = pruning_utils._histogram( + weights, [0, 1.0], nbins, dtype=np.float32) + with self.test_session(): + variables.global_variables_initializer().run() + computed_histogram = histogram.eval() + self.assertAllEqual(expected_histogram, computed_histogram) + + def testCDF(self): + nbins = 5 + weights = constant_op.constant([-1, 0, 1, 1.5, 2, 3, 4, 5, 10, 100]) + abs_weights = math_ops.abs(weights) + norm_cdf = pruning_utils.compute_cdf_from_histogram( + abs_weights, [0.0, 5.0], nbins=nbins) + expected_cdf = np.array([0.1, 0.4, 0.5, 0.6, 1.0], dtype=np.float32) + with self.test_session() as sess: + variables.global_variables_initializer().run() + norm_cdf_val = sess.run(norm_cdf) + self.assertAllEqual(len(norm_cdf_val), nbins) + self.assertAllEqual(expected_cdf, norm_cdf_val) + + def _compare_cdf(self, values): + abs_values = math_ops.abs(values) + max_value = math_ops.reduce_max(abs_values) + with self.test_session(): + variables.global_variables_initializer().run() + cdf_from_histogram = pruning_utils.compute_cdf_from_histogram( + abs_values, [0.0, max_value], nbins=pruning_utils._NBINS) + cdf = pruning_utils.compute_cdf(abs_values, [0.0, max_value]) + return cdf.eval(), cdf_from_histogram.eval() + + def testCDFEquivalence2D(self): + width = 100 + height = 100 + weights = variable_scope.get_variable("weights", shape=[width, height]) + cdf_val, cdf_from_histogram_val = self._compare_cdf(weights) + self.assertAllEqual(cdf_val, cdf_from_histogram_val) + + def testCDFEquivalence4D(self): + weights = variable_scope.get_variable("weights", shape=[5, 5, 128, 128]) + cdf_val, cdf_from_histogram_val = self._compare_cdf(weights) + self.assertAllEqual(cdf_val, cdf_from_histogram_val) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/nccl/BUILD b/tensorflow/contrib/nccl/BUILD index 6cbfd03881ce0619c049a7ed208659b6f30e5c5b..334e70318dd88185cecd93ebeb2587861b7999b9 100644 --- a/tensorflow/contrib/nccl/BUILD +++ b/tensorflow/contrib/nccl/BUILD @@ -31,7 +31,7 @@ tf_custom_op_library( "kernels/nccl_ops.cc", ], deps = if_cuda([ - "@nccl_archive//:nccl", + "@local_config_nccl//:nccl", "//tensorflow/core:gpu_headers_lib", ]), ) @@ -61,7 +61,7 @@ tf_cuda_cc_test( "//tensorflow/core:test", "//tensorflow/core:test_main", "//tensorflow/core:testlib", - "@nccl_archive//:nccl", + "@local_config_nccl//:nccl", ], ) @@ -80,7 +80,7 @@ tf_kernel_library( "//tensorflow/core:lib", "//tensorflow/core:proto_text", "//tensorflow/core:stream_executor", - "@nccl_archive//:nccl", + "@local_config_nccl//:nccl", ], alwayslink = 1, ) diff --git a/tensorflow/contrib/nccl/kernels/nccl_manager.cc b/tensorflow/contrib/nccl/kernels/nccl_manager.cc index 913935b38246f1c5c0f7da4c1ea1f986bc00891b..b9b482a6981e03144c6d00f2a38b71959b4b3621 100644 --- a/tensorflow/contrib/nccl/kernels/nccl_manager.cc +++ b/tensorflow/contrib/nccl/kernels/nccl_manager.cc @@ -76,6 +76,8 @@ struct NcclManager::Communicator { namespace { ncclDataType_t ToNcclType(DataType t) { switch (t) { + case DT_HALF: + return ncclHalf; case DT_FLOAT: return ncclFloat; case DT_DOUBLE: diff --git a/tensorflow/contrib/nccl/kernels/nccl_manager.h b/tensorflow/contrib/nccl/kernels/nccl_manager.h index bb219e0edc8a2c4ba0ce0583cbe4018a4fa3a1d1..6ff8cea84eb912d5e5c891c40efc617661725a63 100644 --- a/tensorflow/contrib/nccl/kernels/nccl_manager.h +++ b/tensorflow/contrib/nccl/kernels/nccl_manager.h @@ -20,7 +20,7 @@ limitations under the License. #include #include -#include "src/nccl.h" +#include "third_party/nccl/nccl.h" #include "tensorflow/core/common_runtime/gpu/gpu_event_mgr.h" #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/platform/mutex.h" diff --git a/tensorflow/contrib/nccl/kernels/nccl_manager_test.cc b/tensorflow/contrib/nccl/kernels/nccl_manager_test.cc index 985b2bae2566c38dfb2c71a899e4b03bbb8fa55d..06ca65e33ad6f5fb6620144231dd368379dcc190 100644 --- a/tensorflow/contrib/nccl/kernels/nccl_manager_test.cc +++ b/tensorflow/contrib/nccl/kernels/nccl_manager_test.cc @@ -48,35 +48,9 @@ static std::vector GetGPUDevices() { return gpus; } +template class NcclManagerTest : public ::testing::Test { - protected: - static void SetUpTestCase() { - setenv("NCCL_DEBUG", "INFO", 1 /* replace */); - devices = new std::vector(GetGPUDevices()); - CHECK(!devices->empty()); - LOG(ERROR) << "Running test with " << devices->size() << " gpus"; - } - static void TearDownTestCase() { - for (auto device : *devices) delete device; - delete devices; - } - - static Allocator* gpu_allocator(BaseGPUDevice* device) { - return device->GetStepAllocator(AllocatorAttributes(), - nullptr /* step_resource_manager */); - } - - static std::vector* devices; - - template - perftools::gputools::DeviceMemory AsDeviceMemory( - const Scalar* cuda_memory) { - perftools::gputools::DeviceMemoryBase wrapped( - const_cast(cuda_memory)); - perftools::gputools::DeviceMemory typed(wrapped); - return typed; - } - + public: // A single all-reduce to apply. struct TestCase { string key; @@ -89,42 +63,52 @@ class NcclManagerTest : public ::testing::Test { int num_completed = 0; }; + static void SetUpTestCase() { + setenv("NCCL_DEBUG", "INFO", 1 /* replace */); + devices_ = new std::vector(GetGPUDevices()); + CHECK(!devices_->empty()); + LOG(ERROR) << "Running test with " << devices_->size() << " gpus"; + } + + static void TearDownTestCase() { + for (auto device : *devices_) delete device; + delete devices_; + } + TestCase* MakeTestCase(int num_ranks, ncclRedOp_t reduction_op, TensorShape shape, float value_offset) { TestCase* test_case = new TestCase(); - test_case->expected = Tensor(DT_FLOAT, shape); + test_case->expected = Tensor(data_type_, shape); if (reduction_op == ncclProd) { - test::FillFn(&test_case->expected, [](int) { return 1; }); + test::FillFn(&test_case->expected, + [](int) { return static_cast(1); }); } else if (reduction_op == ncclSum) { - test::FillFn(&test_case->expected, [](int) { return 0; }); + test::FillFn(&test_case->expected, + [](int) { return static_cast(0); }); } else if (reduction_op == ncclMax) { - test::FillFn(&test_case->expected, [](int) { - return -1 * std::numeric_limits::max(); - }); + test::FillFn(&test_case->expected, [](int) { return -max_; }); } else if (reduction_op == ncclMin) { - test::FillFn(&test_case->expected, [](int) { - return std::numeric_limits::max(); - }); + test::FillFn(&test_case->expected, [](int) { return max_; }); } else { LOG(FATAL) << "Invalid reduction_op " << reduction_op; } - int mult = 1; - for (int i = 0; i < num_ranks; ++i) { - auto* device = devices->at(i % devices->size()); + float value_scale = 0.01; // Small scale to avoid fp16 overflow. + for (int rank = 0; rank < num_ranks; ++rank) { + auto* device = GetDevice(rank); auto* stream = device->tensorflow_gpu_device_info()->stream; - Tensor in_cpu(DT_FLOAT, shape); - test::FillFn(&in_cpu, [mult, value_offset](int index) { - return value_offset + (index + 1) * mult; + Tensor in_cpu(data_type_, shape); + test::FillFn(&in_cpu, [&](int index) { + return static_cast((index + 1) * value_scale + value_offset); }); for (int j = 0; j < shape.num_elements(); ++j) { - auto in_val = in_cpu.flat()(j); - auto out_expr = test_case->expected.flat(); + auto in_val = in_cpu.flat()(j); + auto out_expr = test_case->expected.template flat(); if (reduction_op == ncclProd) { - out_expr(j) *= in_val; + out_expr(j) = out_expr(j) * in_val; } else if (reduction_op == ncclSum) { - out_expr(j) += in_val; + out_expr(j) = out_expr(j) + in_val; } else if (reduction_op == ncclMax) { if (in_val > out_expr(j)) { out_expr(j) = in_val; @@ -136,26 +120,18 @@ class NcclManagerTest : public ::testing::Test { } } - mult *= 10; - test_case->ins.emplace_back(gpu_allocator(device), DT_FLOAT, shape); - test_case->outs.emplace_back(gpu_allocator(device), DT_FLOAT, shape); + value_scale *= 10; + test_case->ins.emplace_back(GpuAllocator(device), data_type_, shape); + test_case->outs.emplace_back(GpuAllocator(device), data_type_, shape); const Tensor& in_gpu = test_case->ins.back(); - auto in_gpu_mem = AsDeviceMemory(in_gpu.flat().data()); - stream->ThenMemcpy(&in_gpu_mem, in_cpu.flat().data(), + auto in_gpu_mem = AsDeviceMemory(in_gpu.flat().data()); + stream->ThenMemcpy(&in_gpu_mem, in_cpu.flat().data(), in_cpu.TotalBytes()); } return test_case; } - NcclManager::DoneCallback CreateDoneCallback(TestCase* test_case) { - return [this, test_case](Status s) { - mutex_lock l(test_case->mu); - ++test_case->num_completed; - test_case->final_status.Update(s); - }; - } - void VerifyResults(const string& case_label, TestCase* test_case) { // Wait for the done callback to be called. { @@ -168,41 +144,84 @@ class NcclManagerTest : public ::testing::Test { test_case->mu.unlock(); } // Copy memory to host and verify. - for (int i = 0; i < test_case->outs.size(); ++i) { - auto* device = devices->at(i % devices->size()); + for (int rank = 0; rank < test_case->outs.size(); ++rank) { + auto* device = GetDevice(rank); auto* stream = device->tensorflow_gpu_device_info()->stream; - const Tensor& out_gpu = test_case->outs[i]; - Tensor out_cpu(DT_FLOAT, out_gpu.shape()); - auto out_gpu_mem = AsDeviceMemory(out_gpu.flat().data()); - stream->ThenMemcpy(out_cpu.flat().data(), out_gpu_mem, + const Tensor& out_gpu = test_case->outs[rank]; + Tensor out_cpu(data_type_, out_gpu.shape()); + auto out_gpu_mem = AsDeviceMemory(out_gpu.flat().data()); + stream->ThenMemcpy(out_cpu.flat().data(), out_gpu_mem, out_cpu.TotalBytes()); SE_ASSERT_OK(stream->BlockHostUntilDone()); - test::ExpectTensorEqual(test_case->expected, out_cpu); + test::ExpectTensorNear(test_case->expected, out_cpu, 0.01); } } + + NcclManager::DoneCallback CreateDoneCallback(TestCase* test_case) { + return [this, test_case](Status s) { + mutex_lock l(test_case->mu); + ++test_case->num_completed; + test_case->final_status.Update(s); + }; + } + + static BaseGPUDevice* GetDevice(size_t rank) { + return devices_->at(rank % devices_->size()); + } + + private: + static Allocator* GpuAllocator(BaseGPUDevice* device) { + return device->GetStepAllocator(AllocatorAttributes(), + nullptr /* step_resource_manager */); + } + + static perftools::gputools::DeviceMemory AsDeviceMemory( + const Scalar* cuda_memory) { + perftools::gputools::DeviceMemoryBase wrapped( + const_cast(cuda_memory)); + perftools::gputools::DeviceMemory typed(wrapped); + return typed; + } + + private: + static std::vector* devices_; + static const DataType data_type_; + static const Scalar max_; }; -std::vector* NcclManagerTest::devices = nullptr; + +template +std::vector* NcclManagerTest::devices_ = nullptr; +template +const DataType NcclManagerTest::data_type_ = + DataTypeToEnum::value; +template +const Scalar NcclManagerTest::max_ = + Eigen::NumTraits::highest(); + +// Instantiate tests for float and half. +using TypeList = ::testing::Types; +TYPED_TEST_CASE(NcclManagerTest, TypeList); // Test basic sum reduction. -TEST_F(NcclManagerTest, BasicSumReduction) { +TYPED_TEST(NcclManagerTest, BasicSumReduction) { const int num_ranks = 3; for (int op = 0; op < 4; ++op) { ncclRedOp_t reduction_op = static_cast(op); - std::unique_ptr test_case( - MakeTestCase(num_ranks, reduction_op, TensorShape({2, 3}), 0)); - for (int device_num = 0; device_num < num_ranks; ++device_num) { - auto* device = devices->at(device_num % devices->size()); + std::unique_ptr test_case( + this->MakeTestCase(num_ranks, reduction_op, TensorShape({2, 3}), 0.0f)); + for (int rank = 0; rank < num_ranks; ++rank) { + auto* device = this->GetDevice(rank); auto* event_mgr = device->tensorflow_gpu_device_info()->event_mgr; auto* stream = device->tensorflow_gpu_device_info()->stream; NcclManager::instance()->AddToAllReduce( num_ranks, "allreduce", reduction_op, device->executor(), - device->gpu_id(), event_mgr, stream, &test_case->ins[device_num], - &test_case->outs[device_num], CreateDoneCallback(test_case.get())); + device->gpu_id(), event_mgr, stream, &test_case->ins[rank], + &test_case->outs[rank], this->CreateDoneCallback(test_case.get())); } LOG(ERROR) << "Verifying results"; - VerifyResults("test_case", test_case.get()); + this->VerifyResults("test_case", test_case.get()); } } @@ -213,7 +232,7 @@ TEST_F(NcclManagerTest, BasicSumReduction) { // with num_ranks > devices->size(), for some GPUs (e.g. K20m). // To test the higher settings, increase num_ranks, // num_collectives_per_iteration and time_limit_micros. -TEST_F(NcclManagerTest, MultipleCallers) { +TYPED_TEST(NcclManagerTest, MultipleCallers) { const int num_ranks = 1; // 2; const int num_collectives_per_iteration = 1; // 1000; const int num_threads = 3; @@ -223,49 +242,49 @@ TEST_F(NcclManagerTest, MultipleCallers) { srand(Env::Default()->NowMicros()); for (;;) { - std::vector> case_and_device_num; - std::vector> test_cases; + std::vector> case_and_rank; + std::vector> test_cases; for (int i = 0; i < num_collectives_per_iteration; ++i) { - test_cases.emplace_back( - MakeTestCase(num_ranks, ncclSum, - TensorShape({100, i % 5 + 1, i % 3 + 1}), i + 0.1 * i)); + test_cases.emplace_back(this->MakeTestCase( + num_ranks, ncclSum, TensorShape({100, i % 5 + 1, i % 3 + 1}), + 1.1f * i)); for (int j = 0; j < num_ranks; ++j) { - case_and_device_num.emplace_back(i, j); + case_and_rank.emplace_back(i, j); } } - for (int i = 0; i < num_ranks; ++i) { - auto* device = devices->at(i % devices->size()); + for (int rank = 0; rank < num_ranks; ++rank) { + auto* device = this->GetDevice(rank); auto* stream = device->tensorflow_gpu_device_info()->stream; SE_ASSERT_OK(stream->BlockHostUntilDone()); } - std::shuffle(case_and_device_num.begin(), case_and_device_num.end(), + std::shuffle(case_and_rank.begin(), case_and_rank.end(), std::mt19937(std::random_device()())); - mutex mu; // guards case_and_device_num. + mutex mu; // guards case_and_rank. std::unique_ptr pool( new thread::ThreadPool(Env::Default(), "test", num_threads)); - const int to_schedule = case_and_device_num.size(); + const int to_schedule = case_and_rank.size(); for (int i = 0; i < to_schedule; ++i) { auto fn = [&]() { - int device_num; + int rank; int test_num; { mutex_lock l(mu); - test_num = case_and_device_num.back().first; - device_num = case_and_device_num.back().second; - case_and_device_num.pop_back(); + test_num = case_and_rank.back().first; + rank = case_and_rank.back().second; + case_and_rank.pop_back(); } - auto* device = devices->at(device_num % devices->size()); + auto* device = this->GetDevice(rank); auto* event_mgr = device->tensorflow_gpu_device_info()->event_mgr; auto* stream = device->tensorflow_gpu_device_info()->stream; - TestCase* test_case = test_cases[test_num].get(); + typename TestFixture::TestCase* test_case = test_cases[test_num].get(); NcclManager::instance()->AddToAllReduce( num_ranks, strings::StrCat("allreduce", test_num), ncclSum, device->executor(), device->gpu_id(), event_mgr, stream, - &test_case->ins[device_num], &test_case->outs[device_num], - CreateDoneCallback(test_case)); + &test_case->ins[rank], &test_case->outs[rank], + this->CreateDoneCallback(test_case)); }; pool->Schedule(fn); } @@ -274,7 +293,8 @@ TEST_F(NcclManagerTest, MultipleCallers) { LOG(ERROR) << "Verifying results for " << num_collectives_per_iteration << " collectives"; for (int i = 0; i < test_cases.size(); ++i) { - VerifyResults(strings::StrCat("collective", i), test_cases[i].get()); + this->VerifyResults(strings::StrCat("collective", i), + test_cases[i].get()); } int64 delta = Env::Default()->NowMicros() - start; diff --git a/tensorflow/contrib/nccl/kernels/nccl_ops.cc b/tensorflow/contrib/nccl/kernels/nccl_ops.cc index 266d4f6f0de0274dca2bfc9022503f09b0ca7d42..c2b76caef38a4af248387b65701b8f8936e8431f 100644 --- a/tensorflow/contrib/nccl/kernels/nccl_ops.cc +++ b/tensorflow/contrib/nccl/kernels/nccl_ops.cc @@ -17,7 +17,7 @@ limitations under the License. #include -#include "src/nccl.h" +#include "third_party/nccl/nccl.h" #include "tensorflow/contrib/nccl/kernels/nccl_manager.h" #include "tensorflow/core/framework/op_kernel.h" diff --git a/tensorflow/contrib/nccl/kernels/nccl_rewrite.cc b/tensorflow/contrib/nccl/kernels/nccl_rewrite.cc index a4de46a93fab1dfe93b47f2789cc533bc447e43a..4676e937e56e35cdec5d2ac57fa07b7bda5fe291 100644 --- a/tensorflow/contrib/nccl/kernels/nccl_rewrite.cc +++ b/tensorflow/contrib/nccl/kernels/nccl_rewrite.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include "tensorflow/core/lib/strings/str_util.h" #if GOOGLE_CUDA #include @@ -254,7 +255,7 @@ class NcclReplacePass : public GraphOptimizationPass { // Find reduction and broadcast ops and replace them with Send/Recv ops. for (Node* node : graph->op_nodes()) { StringPiece type = node->type_string(); - if (!type.starts_with("Nccl")) { + if (!str_util::StartsWith(type, "Nccl")) { continue; } if (type == "NcclReduce") { diff --git a/tensorflow/contrib/nccl/ops/nccl_ops.cc b/tensorflow/contrib/nccl/ops/nccl_ops.cc index 8eb804c2e988f313ba1b340217cae20f1f5502c7..a353a34b80add119fcdc8bc4230eddf5a77b30e8 100644 --- a/tensorflow/contrib/nccl/ops/nccl_ops.cc +++ b/tensorflow/contrib/nccl/ops/nccl_ops.cc @@ -25,7 +25,7 @@ REGISTER_OP("NcclAllReduce") .Input("input: T") .Output("data: T") .Attr("reduction: {'min', 'max', 'prod', 'sum'}") - .Attr("T: {float, float64, int32, int64}") + .Attr("T: {half, float, float64, int32, int64}") .Attr("num_devices: int") .Attr("shared_name: string") .SetIsStateful() @@ -51,7 +51,7 @@ REGISTER_OP("NcclReduce") .Input("input: num_devices * T") .Output("data: T") .Attr("reduction: {'min', 'max', 'prod', 'sum'}") - .Attr("T: {float, float64, int32, int64}") + .Attr("T: {half, float, float64, int32, int64}") .Attr("num_devices: int") .SetIsStateful() .SetShapeFn(shape_inference::UnchangedShape) @@ -69,7 +69,7 @@ reduction: the reduction operation to perform. REGISTER_OP("_NcclReduceSend") .Input("input: T") .Attr("reduction: {'min', 'max', 'prod', 'sum'}") - .Attr("T: {float, float64, int32, int64}") + .Attr("T: {half, float, float64, int32, int64}") .Attr("num_devices: int") .Attr("shared_name: string") .SetIsStateful() @@ -92,7 +92,7 @@ REGISTER_OP("_NcclReduceRecv") .Input("input: T") .Output("data: T") .Attr("reduction: {'min', 'max', 'prod', 'sum'}") - .Attr("T: {float, float64, int32, int64}") + .Attr("T: {half, float, float64, int32, int64}") .Attr("num_devices: int") .Attr("shared_name: string") .SetIsStateful() @@ -118,7 +118,7 @@ shared_name: Identifier that is shared between ops of the same reduce. REGISTER_OP("NcclBroadcast") .Input("input: T") .Output("output: T") - .Attr("T: {float, float64, int32, int64}") + .Attr("T: {half, float, float64, int32, int64}") .Attr("shape: shape") .SetIsStateful() .SetShapeFn(shape_inference::UnchangedShape) @@ -135,7 +135,7 @@ shape: The shape of the input tensor. REGISTER_OP("_NcclBroadcastSend") .Input("input: T") - .Attr("T: {float, float64, int32, int64}") + .Attr("T: {half, float, float64, int32, int64}") .Attr("num_devices: int") .Attr("shared_name: string") .SetIsStateful() @@ -157,7 +157,7 @@ shared_name: Identifier that is shared between ops of the same broadcast. REGISTER_OP("_NcclBroadcastRecv") .Input("shape: int32") .Output("output: T") - .Attr("T: {float, float64, int32, int64}") + .Attr("T: {half, float, float64, int32, int64}") .Attr("num_devices: int") .Attr("shared_name: string") .SetIsStateful() diff --git a/tensorflow/contrib/nccl/python/ops/nccl_ops_test.py b/tensorflow/contrib/nccl/python/ops/nccl_ops_test.py index 98fe394c5b38294700617591992d3207b0a4706b..423a8689aeee062fb58eaf9d6d9b980b0998754e 100644 --- a/tensorflow/contrib/nccl/python/ops/nccl_ops_test.py +++ b/tensorflow/contrib/nccl/python/ops/nccl_ops_test.py @@ -72,7 +72,7 @@ class NcclTestCase(test.TestCase): two. device_sets: Tuple of virtual devices to run test on. """ - for dtype in [np.float32, np.int32, np.int64, np.float64]: + for dtype in [np.float16, np.float32, np.int32, np.int64, np.float64]: # Create session inside outer loop to test use of # same communicator across multiple sessions. with self.test_session(use_gpu=True) as sess: diff --git a/tensorflow/contrib/nn/python/ops/sampling_ops.py b/tensorflow/contrib/nn/python/ops/sampling_ops.py index 63fc487dca69a4777821595a0366d0ae0b393ce2..e65925610c5f5125c2d2e92edc1cf708c54255d4 100644 --- a/tensorflow/contrib/nn/python/ops/sampling_ops.py +++ b/tensorflow/contrib/nn/python/ops/sampling_ops.py @@ -88,7 +88,7 @@ def _rank_resample(weights, biases, inputs, sampled_values, num_resampled, return math_ops.reduce_logsumexp( math_ops.matmul(embeddings, reweighted_inputs, transpose_b=True), axis=1, - keep_dims=False) + keepdims=False) # Calling this protected form of embedding_lookup allows co-locating # the logsumexp computation with the partitioned weights, which yields diff --git a/tensorflow/contrib/opt/BUILD b/tensorflow/contrib/opt/BUILD index c57c5e3f29f1e36fa2f36f8113cb208be6c6be3e..612ecc3e63891f4dabf97828fe75672dd7877a91 100644 --- a/tensorflow/contrib/opt/BUILD +++ b/tensorflow/contrib/opt/BUILD @@ -14,6 +14,7 @@ py_library( name = "opt_py", srcs = [ "__init__.py", + "python/training/adamax.py", "python/training/addsign.py", "python/training/drop_stale_gradient_optimizer.py", "python/training/elastic_average_optimizer.py", @@ -43,11 +44,27 @@ py_library( "//tensorflow/python:util", "//tensorflow/python:variable_scope", "//tensorflow/python:variables", + "//tensorflow/python/eager:context", "//third_party/py/numpy", "@six_archive//:six", ], ) +py_test( + name = "adamax_test", + srcs = ["python/training/adamax_test.py"], + srcs_version = "PY2AND3", + deps = [ + ":opt_py", + "//tensorflow/python:array_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:math_ops", + "//tensorflow/python:training", + "//third_party/py/numpy", + ], +) + py_test( name = "external_optimizer_test", srcs = ["python/training/external_optimizer_test.py"], diff --git a/tensorflow/contrib/opt/__init__.py b/tensorflow/contrib/opt/__init__.py index 6c1bb1adc096f5b8e6945ea1492727d16cf29e65..4c13c8e247185213b798eb733ddcf65a07a8f64d 100644 --- a/tensorflow/contrib/opt/__init__.py +++ b/tensorflow/contrib/opt/__init__.py @@ -19,6 +19,7 @@ from __future__ import division from __future__ import print_function # pylint: disable=wildcard-import +from tensorflow.contrib.opt.python.training.adamax import * from tensorflow.contrib.opt.python.training.addsign import * from tensorflow.contrib.opt.python.training.drop_stale_gradient_optimizer import * from tensorflow.contrib.opt.python.training.external_optimizer import * @@ -36,6 +37,7 @@ from tensorflow.python.util.all_util import remove_undocumented _allowed_symbols = [ + 'AdaMaxOptimizer', 'PowerSignOptimizer', 'AddSignOptimizer', 'DelayCompensatedGradientDescentOptimizer', diff --git a/tensorflow/contrib/opt/python/training/adamax.py b/tensorflow/contrib/opt/python/training/adamax.py new file mode 100644 index 0000000000000000000000000000000000000000..686bac0d8408cdb0d2937bbd1de8535e216f8bf0 --- /dev/null +++ b/tensorflow/contrib/opt/python/training/adamax.py @@ -0,0 +1,191 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""AdaMax for TensorFlow.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.eager import context +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import resource_variable_ops +from tensorflow.python.ops import state_ops +from tensorflow.python.training import adam +from tensorflow.python.training import training_ops + + +class AdaMaxOptimizer(adam.AdamOptimizer): + """Optimizer that implements the AdaMax algorithm. + + Adamax is sometimes superior to adam, specially in models with embeddings, + see [Kingma et al., 2014](http://arxiv.org/abs/1412.6980) + ([pdf](http://arxiv.org/pdf/1412.6980.pdf)). + """ + + def __init__(self, learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-8, + use_locking=False, name="AdaMax"): + """Construct a new AdaMax optimizer. + + Initialization: + + ``` + m_0 <- 0 (Initialize initial 1st moment vector) + v_0 <- 0 (Initialize the exponentially weighted infinity norm) + t <- 0 (Initialize timestep) + ``` + + The update rule for `variable` with gradient `g` uses an optimization + described at the end of section 7.1 of the paper: + + ``` + t <- t + 1 + + m_t <- beta1 * m_{t-1} + (1 - beta1) * g + v_t <- max(beta2 * v_{t-1}, abs(g)) + variable <- variable - learning_rate / (1 - beta1^t) * m_t / (v_t + epsilon) + ``` + + Similar to AdamOptimizer, the epsilon is added for numerical stability + (especially to get rid of division by zero when v_t = 0). + + Contrast to AdamOptimizer, the sparse implementation of this algorithm + (used when the gradient is an IndexedSlices object, typically because of + `tf.gather` or an embedding lookup in the forward pass) only updates + variable slices and corresponding `m_t`, `v_t` terms when that part of + the variable was used in the forward pass. This means that the sparse + behavior is contrast to the dense behavior (similar to some momentum + implementations which ignore momentum unless a variable slice was actually + used). + + Args: + learning_rate: A Tensor or a floating point value. The learning rate. + beta1: A float value or a constant float tensor. + The exponential decay rate for the 1st moment estimates. + beta2: A float value or a constant float tensor. + The exponential decay rate for the exponentially weighted infinity norm. + epsilon: A small constant for numerical stability. + use_locking: If True use locks for update operations. + name: Optional name for the operations created when applying gradients. + Defaults to "AdaMax". + """ + super(AdaMaxOptimizer, self).__init__(learning_rate, beta1, beta2, + epsilon, use_locking, name) + + def _get_beta_accumulators(self): + if context.executing_eagerly(): + graph = None + else: + graph = ops.get_default_graph() + return self._get_non_slot_variable("beta1_power", graph=graph) + + def _create_slots(self, var_list): + # Create the beta1 accumulators on the same device as the first + # variable. Sort the var_list to make sure this device is consistent across + # workers (these need to go on the same PS, otherwise some updates are + # silently ignored). + first_var = min(var_list, key=lambda x: x.name) + self._create_non_slot_variable(initial_value=self._beta1, + name="beta1_power", + colocate_with=first_var) + + # Create slots for the first and second moments. + for v in var_list: + self._zeros_slot(v, "m", self._name) + self._zeros_slot(v, "v", self._name) + + def _apply_dense(self, grad, var): + m = self.get_slot(var, "m") + v = self.get_slot(var, "v") + beta1_power = self._get_beta_accumulators() + return training_ops.apply_ada_max( + var, m, v, + math_ops.cast(beta1_power, var.dtype.base_dtype), + math_ops.cast(self._lr_t, var.dtype.base_dtype), + math_ops.cast(self._beta1_t, var.dtype.base_dtype), + math_ops.cast(self._beta2_t, var.dtype.base_dtype), + math_ops.cast(self._epsilon_t, var.dtype.base_dtype), + grad, use_locking=self._use_locking).op + + def _resource_apply_dense(self, grad, var): + m = self.get_slot(var, "m") + v = self.get_slot(var, "v") + beta1_power = self._get_beta_accumulators() + return training_ops.resource_apply_ada_max( + var.handle, m.handle, v.handle, + math_ops.cast(beta1_power, grad.dtype.base_dtype), + math_ops.cast(self._lr_t, grad.dtype.base_dtype), + math_ops.cast(self._beta1_t, grad.dtype.base_dtype), + math_ops.cast(self._beta2_t, grad.dtype.base_dtype), + math_ops.cast(self._epsilon_t, grad.dtype.base_dtype), + grad, use_locking=self._use_locking) + + def _apply_sparse_shared(self, grad, var, indices, + scatter_add, scatter_update): + beta1_power = self._get_beta_accumulators() + beta1_power = math_ops.cast(beta1_power, var.dtype.base_dtype) + lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype) + beta1_t = math_ops.cast(self._beta1_t, var.dtype.base_dtype) + beta2_t = math_ops.cast(self._beta2_t, var.dtype.base_dtype) + epsilon_t = math_ops.cast(self._epsilon_t, var.dtype.base_dtype) + # m_t = beta1 * m + (1 - beta1) * g_t + m = self.get_slot(var, "m") + m_slice = array_ops.gather(m, indices) + m_t_slice = m_slice * beta1_t + grad * (1 - beta1_t) + with ops.control_dependencies([m_t_slice]): + m_t = scatter_update(m, indices, m_t_slice) + # u_t = max(beta2 * u, abs(g_t)) + v = self.get_slot(var, "v") + v_slice = array_ops.gather(v, indices) + v_t_slice = math_ops.maximum(v_slice * beta2_t, math_ops.abs(grad)) + with ops.control_dependencies([v_t_slice]): + v_t = scatter_update(v, indices, v_t_slice) + # theta_t = theta - lr / (1 - beta1^t) * m_t / u_t + var_slice = -lr_t / (1 - beta1_power) * (m_t_slice / + (v_t_slice + epsilon_t)) + with ops.control_dependencies([var_slice]): + var_update = scatter_add(var, indices, var_slice) + return control_flow_ops.group(*[var_update, m_t, v_t]) + + def _apply_sparse(self, grad, var): + return self._apply_sparse_shared( + grad.values, var, grad.indices, + lambda x, i, v: state_ops.scatter_add( # pylint: disable=g-long-lambda + x, i, v, use_locking=self._use_locking), + lambda x, i, v: state_ops.scatter_update( # pylint: disable=g-long-lambda + x, i, v, use_locking=self._use_locking)) + + def _resource_scatter_update(self, x, i, v): + with ops.control_dependencies( + [resource_variable_ops.resource_scatter_update( + x.handle, i, v)]): + return x.value() + + def _resource_apply_sparse(self, grad, var, indices): + return self._apply_sparse_shared( + grad, var, indices, + self._resource_scatter_add, self._resource_scatter_update) + + def _finish(self, update_ops, name_scope): + # Update the power accumulators. + with ops.control_dependencies(update_ops): + beta1_power = self._get_beta_accumulators() + with ops.colocate_with(beta1_power): + update_beta1 = beta1_power.assign( + beta1_power * self._beta1_t, use_locking=self._use_locking) + return control_flow_ops.group(*update_ops + [update_beta1], + name=name_scope) diff --git a/tensorflow/contrib/opt/python/training/adamax_test.py b/tensorflow/contrib/opt/python/training/adamax_test.py new file mode 100644 index 0000000000000000000000000000000000000000..bc92a7006f1a0a56adafc486a75afa94e965cb2c --- /dev/null +++ b/tensorflow/contrib/opt/python/training/adamax_test.py @@ -0,0 +1,348 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for AdaMax.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.contrib.opt.python.training import adamax +from tensorflow.python.client import session +from tensorflow.python.eager import context +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.framework import test_util +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import resource_variable_ops +from tensorflow.python.ops import variables +from tensorflow.python.platform import test + + +def adamax_update_numpy(param, + g_t, + t, + m, + v, + alpha=0.001, + beta1=0.9, + beta2=0.999, + epsilon=1e-8): + m_t = beta1 * m + (1 - beta1) * g_t + v_t = np.maximum(beta2 * v, np.abs(g_t)) + param_t = param - (alpha / (1 - beta1**t)) * (m_t / (v_t + epsilon)) + return param_t, m_t, v_t + + +def adamax_sparse_update_numpy(param, + indices, + g_t, + t, + m, + v, + alpha=0.001, + beta1=0.9, + beta2=0.999, + epsilon=1e-8): + m_t, v_t, param_t = np.copy(m), np.copy(v), np.copy(param) + m_t_slice = beta1 * m[indices] + (1 - beta1) * g_t + v_t_slice = np.maximum(beta2 * v[indices], np.abs(g_t)) + param_t_slice = param[indices] - ((alpha / (1 - beta1**t)) * + (m_t_slice / (v_t_slice + epsilon))) + m_t[indices] = m_t_slice + v_t[indices] = v_t_slice + param_t[indices] = param_t_slice + return param_t, m_t, v_t + + +class AdaMaxOptimizerTest(test.TestCase): + + def doTestSparse(self, use_resource=False): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + with self.test_session(): + # Initialize variables for numpy implementation. + zero_slots = lambda: np.zeros((3), dtype=dtype.as_numpy_dtype) + m0, v0, m1, v1 = zero_slots(), zero_slots(), zero_slots(), zero_slots() + var0_np = np.array([1.0, 2.0, 3.0], dtype=dtype.as_numpy_dtype) + grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) + var1_np = np.array([4.0, 5.0, 6.0], dtype=dtype.as_numpy_dtype) + grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) + + if use_resource: + var0 = resource_variable_ops.ResourceVariable(var0_np) + var1 = resource_variable_ops.ResourceVariable(var1_np) + else: + var0 = variables.Variable(var0_np) + var1 = variables.Variable(var1_np) + grads0_np_indices = np.array([0, 1], dtype=np.int32) + grads0 = ops.IndexedSlices( + constant_op.constant(grads0_np), + constant_op.constant(grads0_np_indices), constant_op.constant([2])) + grads1_np_indices = np.array([2, 1], dtype=np.int32) + grads1 = ops.IndexedSlices( + constant_op.constant(grads1_np), + constant_op.constant(grads1_np_indices), constant_op.constant([2])) + opt = adamax.AdaMaxOptimizer() + update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) + variables.global_variables_initializer().run() + + # Fetch params to validate initial values + self.assertAllClose([1.0, 2.0, 3.0], var0.eval()) + self.assertAllClose([4.0, 5.0, 6.0], var1.eval()) + + beta1_power = opt._get_beta_accumulators() + + # Run 3 steps of AdaMax + for t in range(1, 4): + self.assertAllCloseAccordingToType(0.9**t, beta1_power.eval()) + update.run() + + var0_np, m0, v0 = adamax_sparse_update_numpy( + var0_np, grads0_np_indices, grads0_np, t, m0, v0) + var1_np, m1, v1 = adamax_sparse_update_numpy( + var1_np, grads1_np_indices, grads1_np, t, m1, v1) + + # Validate updated params + self.assertAllCloseAccordingToType(var0_np, var0.eval()) + self.assertAllCloseAccordingToType(var1_np, var1.eval()) + + def testSparse(self): + self.doTestSparse(use_resource=False) + + def testResourceSparse(self): + self.doTestSparse(use_resource=True) + + def testSparseDevicePlacement(self): + for index_dtype in [dtypes.int32, dtypes.int64]: + with self.test_session(force_gpu=test.is_gpu_available()): + # If a GPU is available, tests that all optimizer ops can be placed on + # it (i.e. they have GPU kernels). + var = variables.Variable([[1.0], [2.0]]) + indices = constant_op.constant([0, 1], dtype=index_dtype) + gathered_sum = math_ops.reduce_sum(array_ops.gather(var, indices)) + optimizer = adamax.AdaMaxOptimizer(3.0) + minimize_op = optimizer.minimize(gathered_sum) + variables.global_variables_initializer().run() + minimize_op.run() + + def testSparseRepeatedIndices(self): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + with self.test_session(): + repeated_index_update_var = variables.Variable( + [[1.0], [2.0]], dtype=dtype) + aggregated_update_var = variables.Variable( + [[1.0], [2.0]], dtype=dtype) + grad_repeated_index = ops.IndexedSlices( + constant_op.constant( + [0.1, 0.1], shape=[2, 1], dtype=dtype), + constant_op.constant([1, 1]), + constant_op.constant([2, 1])) + grad_aggregated = ops.IndexedSlices( + constant_op.constant( + [0.2], shape=[1, 1], dtype=dtype), + constant_op.constant([1]), + constant_op.constant([2, 1])) + repeated_update = adamax.AdaMaxOptimizer().apply_gradients( + [(grad_repeated_index, repeated_index_update_var)]) + aggregated_update = adamax.AdaMaxOptimizer().apply_gradients( + [(grad_aggregated, aggregated_update_var)]) + variables.global_variables_initializer().run() + self.assertAllClose(aggregated_update_var.eval(), + repeated_index_update_var.eval()) + for _ in range(3): + repeated_update.run() + aggregated_update.run() + self.assertAllClose(aggregated_update_var.eval(), + repeated_index_update_var.eval()) + + def doTestBasic(self, use_resource=False): + for i, dtype in enumerate([dtypes.half, dtypes.float32, dtypes.float64]): + with self.test_session(graph=ops.Graph()): + # Initialize variables for numpy implementation. + m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 + var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) + grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) + var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) + grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) + + if use_resource: + var0 = resource_variable_ops.ResourceVariable( + var0_np, name="var0_%d" % i) + var1 = resource_variable_ops.ResourceVariable( + var1_np, name="var1_%d" % i) + else: + var0 = variables.Variable(var0_np) + var1 = variables.Variable(var1_np) + grads0 = constant_op.constant(grads0_np) + grads1 = constant_op.constant(grads1_np) + + opt = adamax.AdaMaxOptimizer() + update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) + opt_variables = opt.variables() + beta1_power = opt._get_beta_accumulators() + self.assertTrue(beta1_power is not None) + self.assertIn(beta1_power, opt_variables) + + with ops.Graph().as_default(): + # Shouldn't return non-slot variables from other graphs. + self.assertEqual(0, len(opt.variables())) + + if not context.executing_eagerly(): + self.evaluate(variables.global_variables_initializer()) + # Fetch params to validate initial values + self.assertAllClose([1.0, 2.0], self.evaluate(var0)) + self.assertAllClose([3.0, 4.0], self.evaluate(var1)) + + beta1_power = opt._get_beta_accumulators() + + # Run 3 steps of AdaMax + for t in range(1, 4): + if not context.executing_eagerly(): + self.evaluate(update) + elif t > 1: + opt.apply_gradients(zip([grads0, grads1], [var0, var1])) + + self.assertAllCloseAccordingToType(0.9**(t + 1), + self.evaluate(beta1_power)) + + var0_np, m0, v0 = adamax_update_numpy(var0_np, grads0_np, t, m0, v0) + var1_np, m1, v1 = adamax_update_numpy(var1_np, grads1_np, t, m1, v1) + + # Validate updated params + self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) + self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1)) + if use_resource: + self.assertEqual("var0_%d/AdaMax:0" % (i,), + opt.get_slot(var=var0, name="m").name) + + def testBasic(self): + with self.test_session(): + self.doTestBasic(use_resource=False) + + @test_util.run_in_graph_and_eager_modes(reset_test=True) + def testResourceBasic(self): + self.doTestBasic(use_resource=True) + + def testTensorLearningRate(self): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + with self.test_session(): + # Initialize variables for numpy implementation. + m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 + var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) + grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) + var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) + grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) + + var0 = variables.Variable(var0_np) + var1 = variables.Variable(var1_np) + grads0 = constant_op.constant(grads0_np) + grads1 = constant_op.constant(grads1_np) + opt = adamax.AdaMaxOptimizer(constant_op.constant(0.001)) + update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) + variables.global_variables_initializer().run() + + # Fetch params to validate initial values + self.assertAllClose([1.0, 2.0], var0.eval()) + self.assertAllClose([3.0, 4.0], var1.eval()) + + beta1_power = opt._get_beta_accumulators() + + # Run 3 steps of AdaMax + for t in range(1, 4): + self.assertAllCloseAccordingToType(0.9**t, beta1_power.eval()) + update.run() + + var0_np, m0, v0 = adamax_update_numpy(var0_np, grads0_np, t, m0, v0) + var1_np, m1, v1 = adamax_update_numpy(var1_np, grads1_np, t, m1, v1) + + # Validate updated params + self.assertAllCloseAccordingToType(var0_np, var0.eval()) + self.assertAllCloseAccordingToType(var1_np, var1.eval()) + + def testSharing(self): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + with self.test_session(): + # Initialize variables for numpy implementation. + m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 + var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) + grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) + var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) + grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) + + var0 = variables.Variable(var0_np) + var1 = variables.Variable(var1_np) + grads0 = constant_op.constant(grads0_np) + grads1 = constant_op.constant(grads1_np) + opt = adamax.AdaMaxOptimizer() + update1 = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) + update2 = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) + variables.global_variables_initializer().run() + + beta1_power = opt._get_beta_accumulators() + + # Fetch params to validate initial values + self.assertAllClose([1.0, 2.0], var0.eval()) + self.assertAllClose([3.0, 4.0], var1.eval()) + + # Run 3 steps of intertwined AdaMax1 and AdaMax2. + for t in range(1, 4): + self.assertAllCloseAccordingToType(0.9**t, beta1_power.eval()) + if t % 2 == 0: + update1.run() + else: + update2.run() + + var0_np, m0, v0 = adamax_update_numpy(var0_np, grads0_np, t, m0, v0) + var1_np, m1, v1 = adamax_update_numpy(var1_np, grads1_np, t, m1, v1) + + # Validate updated params + self.assertAllCloseAccordingToType(var0_np, var0.eval()) + self.assertAllCloseAccordingToType(var1_np, var1.eval()) + + def testTwoSessions(self): + optimizer = adamax.AdaMaxOptimizer() + g = ops.Graph() + with g.as_default(): + with session.Session(): + var0 = variables.Variable(np.array([1.0, 2.0]), name="v0") + grads0 = constant_op.constant(np.array([0.1, 0.1])) + optimizer.apply_gradients([(grads0, var0)]) + + gg = ops.Graph() + with gg.as_default(): + with session.Session(): + var0 = variables.Variable(np.array([1.0, 2.0]), name="v0") + grads0 = constant_op.constant(np.array([0.1, 0.1])) + + # If the optimizer saves any state not keyed by graph the following line + # fails. + optimizer.apply_gradients([(grads0, var0)]) + + def testSlotsUniqueEager(self): + with context.eager_mode(): + v1 = resource_variable_ops.ResourceVariable(1.) + v2 = resource_variable_ops.ResourceVariable(1.) + opt = adamax.AdaMaxOptimizer(1.) + opt.minimize(lambda: v1 + v2) + # There should be two non-slot variables, and two unique slot variables + # for v1 and v2 respectively. + self.assertEqual(5, len(set(opt.variables()))) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/opt/python/training/lazy_adam_optimizer.py b/tensorflow/contrib/opt/python/training/lazy_adam_optimizer.py index aeca900bc8ff4c4cc26da490ce43dfec70fd9f11..72117c1e81a164b0517fabeaddec3ea5132af5a9 100644 --- a/tensorflow/contrib/opt/python/training/lazy_adam_optimizer.py +++ b/tensorflow/contrib/opt/python/training/lazy_adam_optimizer.py @@ -56,21 +56,21 @@ class LazyAdamOptimizer(adam.AdamOptimizer): epsilon_t = math_ops.cast(self._epsilon_t, var.dtype.base_dtype) lr = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power)) - # m := beta1 * m + (1 - beta1) * g_t + # \\(m := beta1 * m + (1 - beta1) * g_t\\) m = self.get_slot(var, "m") m_t = state_ops.scatter_update(m, grad.indices, beta1_t * array_ops.gather(m, grad.indices) + (1 - beta1_t) * grad.values, use_locking=self._use_locking) - # v := beta2 * v + (1 - beta2) * (g_t * g_t) + # \\(v := beta2 * v + (1 - beta2) * (g_t * g_t)\\) v = self.get_slot(var, "v") v_t = state_ops.scatter_update(v, grad.indices, beta2_t * array_ops.gather(v, grad.indices) + (1 - beta2_t) * math_ops.square(grad.values), use_locking=self._use_locking) - # variable -= learning_rate * m_t / (epsilon_t + sqrt(v_t)) + # \\(variable -= learning_rate * m_t / (epsilon_t + sqrt(v_t))\\) m_t_slice = array_ops.gather(m_t, grad.indices) v_t_slice = array_ops.gather(v_t, grad.indices) denominator_slice = math_ops.sqrt(v_t_slice) + epsilon_t diff --git a/tensorflow/contrib/opt/python/training/moving_average_optimizer_test.py b/tensorflow/contrib/opt/python/training/moving_average_optimizer_test.py index 85e3e8d3791f2331ed249c0b7f67a3dbde4fca08..ac04ad99110b016b62e091aa10c7f565e5093bc1 100644 --- a/tensorflow/contrib/opt/python/training/moving_average_optimizer_test.py +++ b/tensorflow/contrib/opt/python/training/moving_average_optimizer_test.py @@ -85,7 +85,7 @@ class MovingAverageOptimizerTest(test.TestCase): state_ops.assign_add(ema_var1, [4.0, 4.0]) ]) - # Test taht saver with missing ema variables will fail. + # Test that saver with missing ema variables will fail. with self.assertRaisesRegexp(ValueError, r'Variable to swap'): opt.swapping_saver(var_list=[var0]) @@ -123,7 +123,7 @@ class MovingAverageOptimizerTest(test.TestCase): self.assertAllCloseAccordingToType([0.9, 1.9], ema_var0.eval()) self.assertAllCloseAccordingToType([4.98, 5.98], var1.eval()) self.assertAllCloseAccordingToType([6.99, 7.99], ema_var1.eval()) - # Restore back to previou state. + # Restore back to previous state. train_saver.restore(sess, save_path) # If updates are parallel, this is not always true after the 1st step. diff --git a/tensorflow/contrib/optimizer_v2/BUILD b/tensorflow/contrib/optimizer_v2/BUILD index 26ea9135f57fb9fe95e61023bccb97d1d4f5ea1c..5225ecc14fef3cec9506eceb776805b74a87714e 100644 --- a/tensorflow/contrib/optimizer_v2/BUILD +++ b/tensorflow/contrib/optimizer_v2/BUILD @@ -48,6 +48,7 @@ py_library( srcs_version = "PY2AND3", deps = [ "//tensorflow/python:control_flow_ops", + "//tensorflow/python:distribute", "//tensorflow/python:framework", "//tensorflow/python:math_ops", "//tensorflow/python:resource_variable_ops", @@ -114,7 +115,6 @@ cuda_py_test( additional_deps = [ ":training", "@six_archive//:six", - "//tensorflow/contrib/eager/python:checkpointable_utils", "//tensorflow/python:constant_op", "//tensorflow/python:dtypes", "//tensorflow/python:framework_ops", @@ -202,4 +202,5 @@ cuda_py_test( "//tensorflow/python:client_testlib", "//third_party/py/numpy", ], + tags = ["optonly"], ) diff --git a/tensorflow/contrib/optimizer_v2/adam.py b/tensorflow/contrib/optimizer_v2/adam.py index 42b7f92a76c1971e2a63722d769ee006c3f3210b..d538ad0fb02699ed8514f512208914f629a47436 100644 --- a/tensorflow/contrib/optimizer_v2/adam.py +++ b/tensorflow/contrib/optimizer_v2/adam.py @@ -40,23 +40,19 @@ class AdamOptimizer(optimizer_v2.OptimizerV2): Initialization: - ``` - m_0 <- 0 (Initialize initial 1st moment vector) - v_0 <- 0 (Initialize initial 2nd moment vector) - t <- 0 (Initialize timestep) - ``` + $$m_0 := 0 (Initialize initial 1st moment vector)$$ + $$v_0 := 0 (Initialize initial 2nd moment vector)$$ + $$t := 0 (Initialize timestep)$$ The update rule for `variable` with gradient `g` uses an optimization described at the end of section2 of the paper: - ``` - t <- t + 1 - lr_t <- learning_rate * sqrt(1 - beta2^t) / (1 - beta1^t) + $$t := t + 1$$ + $$lr_t := \text{learning_rate} * \sqrt{(1 - beta_2^t) / (1 - beta_1^t)}$$ - m_t <- beta1 * m_{t-1} + (1 - beta1) * g - v_t <- beta2 * v_{t-1} + (1 - beta2) * g * g - variable <- variable - lr_t * m_t / (sqrt(v_t) + epsilon) - ``` + $$m_t := beta_1 * m_{t-1} + (1 - beta_1) * g$$ + $$v_t := beta_2 * v_{t-1} + (1 - beta_2) * g * g$$ + $$variable := variable - lr_t * m_t / (\sqrt{v_t} + \epsilon)$$ The default value of 1e-8 for epsilon might not be a good default in general. For example, when training an Inception network on ImageNet a diff --git a/tensorflow/contrib/optimizer_v2/checkpointable_utils_test.py b/tensorflow/contrib/optimizer_v2/checkpointable_utils_test.py index 08f9699e850a6519dbb5de3bbf0d8b8de01c61b2..8ac9b581455f8f4c7af1a66432169ae179de1634 100644 --- a/tensorflow/contrib/optimizer_v2/checkpointable_utils_test.py +++ b/tensorflow/contrib/optimizer_v2/checkpointable_utils_test.py @@ -24,23 +24,24 @@ import os import six -from tensorflow.contrib.eager.python import checkpointable_utils from tensorflow.contrib.optimizer_v2 import adam from tensorflow.python.client import session as session_lib from tensorflow.python.eager import backprop from tensorflow.python.eager import context +from tensorflow.python.eager import function from tensorflow.python.eager import test from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import test_util from tensorflow.python.keras._impl.keras.engine import training -from tensorflow.python.layers import core +from tensorflow.python.keras._impl.keras.layers import core from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import resource_variable_ops from tensorflow.python.ops import state_ops from tensorflow.python.ops import variable_scope from tensorflow.python.training import checkpointable +from tensorflow.python.training import checkpointable_utils from tensorflow.python.training import saver as core_saver from tensorflow.python.training import training_util @@ -372,6 +373,50 @@ class CheckpointingTests(test.TestCase): self.assertEqual(training_continuation + 1, self.evaluate(root.save_counter)) + # pylint: disable=cell-var-from-loop + @test_util.run_in_graph_and_eager_modes() + def testWithDefun(self): + num_training_steps = 2 + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + for training_continuation in range(3): + with ops.Graph().as_default(), self.test_session( + graph=ops.get_default_graph()), test_util.device(use_gpu=True): + model = MyModel() + # Don't actually train so we can test variable values + optimizer = adam.AdamOptimizer(0.) + root = checkpointable_utils.Checkpoint( + optimizer=optimizer, model=model, + global_step=training_util.get_or_create_global_step()) + checkpoint_path = core_saver.latest_checkpoint(checkpoint_directory) + status = root.restore(save_path=checkpoint_path) + def train_fn(): + @function.defun + def _call_model(x): + return model(x) + with backprop.GradientTape() as tape: + loss = _call_model(constant_op.constant([[3.]])) + gradients = tape.gradient(loss, model.variables) + return optimizer.apply_gradients(zip(gradients, model.variables), + global_step=root.global_step) + if not context.executing_eagerly(): + train_fn = functools.partial( + self.evaluate, train_fn()) + status.initialize_or_restore() + for _ in range(num_training_steps): + train_fn() + if training_continuation > 0: + status.assert_consumed() + self.assertAllClose([[42.]], self.evaluate(model.variables[0])) + else: + self.evaluate(model.variables[0].assign([[42.]])) + root.save(file_prefix=checkpoint_prefix) + self.assertEqual((training_continuation + 1) * num_training_steps, + self.evaluate(root.global_step)) + self.assertEqual(training_continuation + 1, + self.evaluate(root.save_counter)) + # pylint: enable=cell-var-from-loop + def _get_checkpoint_name(self, name): root = checkpointable.Checkpointable() checkpointable_utils.add_variable( @@ -411,7 +456,7 @@ class CheckpointingTests(test.TestCase): optimizer.apply_gradients( [(g, v) for g, v in zip(grad, model.vars)]) - @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True) + @test_util.run_in_graph_and_eager_modes() def testDeferredSlotRestoration(self): checkpoint_directory = self.get_temp_dir() diff --git a/tensorflow/contrib/optimizer_v2/momentum_test.py b/tensorflow/contrib/optimizer_v2/momentum_test.py index f37eb48181d6bef195215b86f14f69d3df65a8ac..26724f66c2a1db1d01577b31b739af18f51d3976 100644 --- a/tensorflow/contrib/optimizer_v2/momentum_test.py +++ b/tensorflow/contrib/optimizer_v2/momentum_test.py @@ -237,7 +237,17 @@ class MomentumOptimizerTest(test.TestCase): @test_util.run_in_graph_and_eager_modes(reset_test=True) def testMinimizeSparseResourceVariable(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: - var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]], dtype=dtype) + # This test invokes the ResourceSparseApplyMomentum operation, which + # did not have a registered GPU kernel as of April 2018. With graph + # execution, the placement algorithm notices this and automatically + # places the variable in CPU (host) memory. With eager execution, + # the variable would be placed in GPU memory if available, which + # would then conflict with the future invocation of the + # ResourceSparseApplyMomentum operation. + # To work around this discrepancy, for now we force the variable + # to be placed on CPU. + with ops.device("/cpu:0"): + var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]], dtype=dtype) # pylint: disable=cell-var-from-loop def loss(): @@ -256,7 +266,17 @@ class MomentumOptimizerTest(test.TestCase): @test_util.run_in_graph_and_eager_modes(reset_test=True) def testMinimizeWith2DIndiciesForEmbeddingLookup(self): - var0 = resource_variable_ops.ResourceVariable(array_ops.ones([2, 2])) + # This test invokes the ResourceSparseApplyMomentum operation, which + # did not have a registered GPU kernel as of April 2018. With graph + # execution, the placement algorithm notices this and automatically + # places the variable in CPU (host) memory. With eager execution, + # the variable would be placed in GPU memory if available, which + # would then conflict with the future invocation of the + # ResourceSparseApplyMomentum operation. + # To work around this discrepancy, for now we force the variable + # to be placed on CPU. + with ops.device("/cpu:0"): + var0 = resource_variable_ops.ResourceVariable(array_ops.ones([2, 2])) def loss(): return math_ops.reduce_sum(embedding_ops.embedding_lookup(var0, [[1]])) diff --git a/tensorflow/contrib/optimizer_v2/optimizer_v2.py b/tensorflow/contrib/optimizer_v2/optimizer_v2.py index 471992fdac528316084cb402e51342cc54d4836b..ce15db6f1ec067e5aeb6ddbc8939d2b773692269 100644 --- a/tensorflow/contrib/optimizer_v2/optimizer_v2.py +++ b/tensorflow/contrib/optimizer_v2/optimizer_v2.py @@ -579,7 +579,7 @@ class OptimizerV2(optimizer_v1.Optimizer): ### State - Internal methods apre passed a `state` argument with the correct + Internal methods are passed a `state` argument with the correct values to use for the slot and non-slot variables, and the hyper parameters. """ @@ -866,7 +866,7 @@ class OptimizerV2(optimizer_v1.Optimizer): raise ValueError("No gradients provided for any variable: %s." % ([str(v) for _, v in grads_and_vars],)) return distribute_lib.get_tower_context().merge_call( - self.distributed_apply, filtered, global_step=global_step, name=name) + self._distributed_apply, filtered, global_step=global_step, name=name) def _get_or_create_state(self, var_list=None): """Either looks up or creates `_OptimizerV2State`. @@ -899,7 +899,7 @@ class OptimizerV2(optimizer_v1.Optimizer): self._per_graph_state[graph_key] = per_graph_state return per_graph_state - def distributed_apply(self, distribution, grads_and_vars, global_step, name): + def _distributed_apply(self, distribution, grads_and_vars, global_step, name): """`apply_gradients` for use with a `DistributionStrategy`.""" reduced_grads = distribution.batch_reduce("sum", grads_and_vars) var_list = [v for _, v in grads_and_vars] diff --git a/tensorflow/contrib/proto/BUILD b/tensorflow/contrib/proto/BUILD new file mode 100644 index 0000000000000000000000000000000000000000..3e9b1a0b8d8ec7c3c5fe5d1f2cf896dbb6c3de72 --- /dev/null +++ b/tensorflow/contrib/proto/BUILD @@ -0,0 +1,32 @@ +package(default_visibility = ["//visibility:public"]) + +licenses(["notice"]) # Apache 2.0 + +exports_files(["LICENSE"]) + +load("//tensorflow/core:platform/default/build_config_root.bzl", "if_static") + +py_library( + name = "proto", + srcs = [ + "__init__.py", + ], + deps = [ + "//tensorflow/contrib/proto/python/ops:decode_proto_op_py", + "//tensorflow/contrib/proto/python/ops:encode_proto_op_py", + ], +) + +py_library( + name = "proto_pip", + data = [ + "//tensorflow/contrib/proto/python/kernel_tests:test_messages", + ] + if_static( + [], + otherwise = ["//tensorflow/contrib/proto/python/kernel_tests:libtestexample.so"], + ), + deps = [ + ":proto", + "//tensorflow/contrib/proto/python/kernel_tests:py_test_deps", + ], +) diff --git a/tensorflow/contrib/proto/__init__.py b/tensorflow/contrib/proto/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..bc5a49de78e251cb4a854fc11a7b13b39820127d --- /dev/null +++ b/tensorflow/contrib/proto/__init__.py @@ -0,0 +1,28 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Ops and modules related to proto. + +@@decode_proto +@@encode_proto +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.proto.python.ops.decode_proto_op import decode_proto +from tensorflow.contrib.proto.python.ops.encode_proto_op import encode_proto + +from tensorflow.python.util.all_util import remove_undocumented +remove_undocumented(__name__) diff --git a/tensorflow/contrib/proto/python/kernel_tests/BUILD b/tensorflow/contrib/proto/python/kernel_tests/BUILD new file mode 100644 index 0000000000000000000000000000000000000000..a380a131f86abc8dd921a123afdb964bf6c2466c --- /dev/null +++ b/tensorflow/contrib/proto/python/kernel_tests/BUILD @@ -0,0 +1,86 @@ +package(default_visibility = ["//visibility:public"]) + +licenses(["notice"]) # Apache 2.0 + +exports_files(["LICENSE"]) + +# Much of the work in this BUILD file actually happens in the corresponding +# build_defs.bzl, which creates an individual testcase for each example .pbtxt +# file in this directory. +# +load(":build_defs.bzl", "decode_proto_test_suite") +load(":build_defs.bzl", "encode_proto_test_suite") + +# This expands to a tf_py_test for each test file. +# It defines the test_suite :decode_proto_op_tests. +decode_proto_test_suite( + name = "decode_proto_tests", + examples = glob(["*.pbtxt"]), +) + +# This expands to a tf_py_test for each test file. +# It defines the test_suite :encode_proto_op_tests. +encode_proto_test_suite( + name = "encode_proto_tests", + examples = glob(["*.pbtxt"]), +) + +# Below here are tests that are not tied to an example text proto. +filegroup( + name = "test_messages", + srcs = glob(["*.pbtxt"]), +) + +load("//tensorflow:tensorflow.bzl", "tf_py_test") +load("//tensorflow:tensorflow.bzl", "tf_cc_shared_object") +load("//tensorflow/core:platform/default/build_config_root.bzl", "if_static") +load("//tensorflow/core:platform/default/build_config.bzl", "tf_proto_library") + +tf_py_test( + name = "decode_proto_fail_test", + size = "small", + srcs = ["decode_proto_fail_test.py"], + additional_deps = [ + ":py_test_deps", + "//third_party/py/numpy", + "//tensorflow/contrib/proto:proto", + "//tensorflow/contrib/proto/python/ops:decode_proto_op_py", + ], + data = if_static( + [], + otherwise = [":libtestexample.so"], + ), + tags = [ + "no_pip", # TODO(b/78026780) + "no_windows", # TODO(b/78028010) + ], +) + +py_library( + name = "test_case", + srcs = ["test_case.py"], + deps = ["//tensorflow/python:client_testlib"], +) + +py_library( + name = "py_test_deps", + deps = [ + ":test_case", + ":test_example_proto_py", + ], +) + +tf_proto_library( + name = "test_example_proto", + srcs = ["test_example.proto"], + cc_api_version = 2, + protodeps = ["//tensorflow/core:protos_all"], +) + +tf_cc_shared_object( + name = "libtestexample.so", + linkstatic = 1, + deps = [ + ":test_example_proto_cc", + ], +) diff --git a/tensorflow/contrib/proto/python/kernel_tests/build_defs.bzl b/tensorflow/contrib/proto/python/kernel_tests/build_defs.bzl new file mode 100644 index 0000000000000000000000000000000000000000..f425601691e21b36914f340d53ccadf9b4e3641f --- /dev/null +++ b/tensorflow/contrib/proto/python/kernel_tests/build_defs.bzl @@ -0,0 +1,89 @@ +"""BUILD rules for generating file-driven proto test cases. + +The decode_proto_test_suite() and encode_proto_test_suite() rules take a list +of text protos and generates a tf_py_test() for each one. +""" + +load("//tensorflow:tensorflow.bzl", "tf_py_test") +load("//tensorflow:tensorflow.bzl", "register_extension_info") +load("//tensorflow/core:platform/default/build_config_root.bzl", "if_static") + +def _test_name(test, path): + return "%s_%s_test" % (test, path.split("/")[-1].split(".")[0]) + +def decode_proto_test_suite(name, examples): + """Build the decode_proto py_test for each test filename.""" + for test_filename in examples: + tf_py_test( + name = _test_name("decode_proto", test_filename), + srcs = ["decode_proto_op_test.py"], + size = "small", + data = [test_filename] + if_static( + [], + otherwise = [":libtestexample.so"], + ), + main = "decode_proto_op_test.py", + args = [ + "--message_text_file=\"%s/%s\"" % (native.package_name(), test_filename), + ], + additional_deps = [ + ":py_test_deps", + "//third_party/py/numpy", + "//tensorflow/contrib/proto:proto", + "//tensorflow/contrib/proto/python/ops:decode_proto_op_py", + ], + tags = [ + "no_pip", # TODO(b/78026780) + "no_windows", # TODO(b/78028010) + ], + ) + native.test_suite( + name = name, + tests = [":" + _test_name("decode_proto", test_filename) + for test_filename in examples], + ) + +def encode_proto_test_suite(name, examples): + """Build the encode_proto py_test for each test filename.""" + for test_filename in examples: + tf_py_test( + name = _test_name("encode_proto", test_filename), + srcs = ["encode_proto_op_test.py"], + size = "small", + data = [test_filename] + if_static( + [], + otherwise = [":libtestexample.so"], + ), + main = "encode_proto_op_test.py", + args = [ + "--message_text_file=\"%s/%s\"" % (native.package_name(), test_filename), + ], + additional_deps = [ + ":py_test_deps", + "//third_party/py/numpy", + "//tensorflow/contrib/proto:proto", + "//tensorflow/contrib/proto/python/ops:decode_proto_op_py", + "//tensorflow/contrib/proto/python/ops:encode_proto_op_py", + ], + tags = [ + "no_pip", # TODO(b/78026780) + "no_windows", # TODO(b/78028010) + ], + ) + native.test_suite( + name = name, + tests = [":" + _test_name("encode_proto", test_filename) + for test_filename in examples], + ) + +register_extension_info( + extension_name = "decode_proto_test_suite", + label_regex_map = { + "deps": "deps:decode_example_.*", + }) + +register_extension_info( + extension_name = "encode_proto_test_suite", + label_regex_map = { + "deps": "deps:encode_example_.*", + }) diff --git a/tensorflow/contrib/proto/python/kernel_tests/decode_proto_fail_test.py b/tensorflow/contrib/proto/python/kernel_tests/decode_proto_fail_test.py new file mode 100644 index 0000000000000000000000000000000000000000..5298342ee79b08a50b13ce8715e891a332efb3bc --- /dev/null +++ b/tensorflow/contrib/proto/python/kernel_tests/decode_proto_fail_test.py @@ -0,0 +1,68 @@ +# ============================================================================= +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= + +# Python3 preparedness imports. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.contrib.proto.python.kernel_tests import test_case +from tensorflow.contrib.proto.python.ops import decode_proto_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import errors +from tensorflow.python.platform import test + + +class DecodeProtoFailTest(test_case.ProtoOpTestCase): + """Test failure cases for DecodeToProto.""" + + def _TestCorruptProtobuf(self, sanitize): + """Test failure cases for DecodeToProto.""" + + # The goal here is to check the error reporting. + # Testing against a variety of corrupt protobufs is + # done by fuzzing. + corrupt_proto = 'This is not a binary protobuf' + + # Numpy silently truncates the strings if you don't specify dtype=object. + batch = np.array(corrupt_proto, dtype=object) + msg_type = 'tensorflow.contrib.proto.TestCase' + field_names = ['sizes'] + field_types = [dtypes.int32] + + with self.test_session() as sess: + ctensor, vtensor = decode_proto_op.decode_proto( + batch, + message_type=msg_type, + field_names=field_names, + output_types=field_types, + sanitize=sanitize) + with self.assertRaisesRegexp(errors.DataLossError, + 'Unable to parse binary protobuf' + '|Failed to consume entire buffer'): + _ = sess.run([ctensor] + vtensor) + + def testCorrupt(self): + self._TestCorruptProtobuf(sanitize=False) + + def testSanitizerCorrupt(self): + self._TestCorruptProtobuf(sanitize=True) + + +if __name__ == '__main__': + test.main() diff --git a/tensorflow/contrib/proto/python/kernel_tests/decode_proto_op_test.py b/tensorflow/contrib/proto/python/kernel_tests/decode_proto_op_test.py new file mode 100644 index 0000000000000000000000000000000000000000..d1c13c82bc264bc8bcc721eb68ee3916f32ef7a8 --- /dev/null +++ b/tensorflow/contrib/proto/python/kernel_tests/decode_proto_op_test.py @@ -0,0 +1,300 @@ +# ============================================================================= +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= +"""Table-driven test for decode_proto op. + +This test is run once with each of the *.TestCase.pbtxt files +in the test directory. +""" +# Python3 preparedness imports. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from google.protobuf import text_format + +from tensorflow.contrib.proto.python.kernel_tests import test_case +from tensorflow.contrib.proto.python.kernel_tests import test_example_pb2 +from tensorflow.contrib.proto.python.ops import decode_proto_op +from tensorflow.python.framework import dtypes +from tensorflow.python.platform import flags +from tensorflow.python.platform import test + +FLAGS = flags.FLAGS + +flags.DEFINE_string('message_text_file', None, + 'A file containing a text serialized TestCase protobuf.') + + +class DecodeProtoOpTest(test_case.ProtoOpTestCase): + + def _compareValues(self, fd, vs, evs): + """Compare lists/arrays of field values.""" + + if len(vs) != len(evs): + self.fail('Field %s decoded %d outputs, expected %d' % + (fd.name, len(vs), len(evs))) + for i, ev in enumerate(evs): + # Special case fuzzy match for float32. TensorFlow seems to mess with + # MAX_FLT slightly and the test doesn't work otherwise. + # TODO(nix): ask on TF list about why MAX_FLT doesn't pass through. + if fd.cpp_type == fd.CPPTYPE_FLOAT: + # Numpy isclose() is better than assertIsClose() which uses an absolute + # value comparison. + self.assertTrue( + np.isclose(vs[i], ev), 'expected %r, actual %r' % (ev, vs[i])) + elif fd.cpp_type == fd.CPPTYPE_STRING: + # In Python3 string tensor values will be represented as bytes, so we + # reencode the proto values to match that. + self.assertEqual(vs[i], ev.encode('ascii')) + else: + # Doubles and other types pass through unscathed. + self.assertEqual(vs[i], ev) + + def _compareRepeatedPrimitiveValue(self, batch_shape, sizes, fields, + field_dict): + """Compare protos of type RepeatedPrimitiveValue. + + Args: + batch_shape: the shape of the input tensor of serialized messages. + sizes: int matrix of repeat counts returned by decode_proto + fields: list of test_example_pb2.FieldSpec (types and expected values) + field_dict: map from field names to decoded numpy tensors of values + """ + + # Check that expected values match. + for field in fields: + values = field_dict[field.name] + self.assertEqual(dtypes.as_dtype(values.dtype), field.dtype) + + fd = field.expected.DESCRIPTOR.fields_by_name[field.name] + + # Values has the same shape as the input plus an extra + # dimension for repeats. + self.assertEqual(list(values.shape)[:-1], batch_shape) + + # Nested messages are represented as TF strings, requiring + # some special handling. + if field.name == 'message_value': + vs = [] + for buf in values.flat: + msg = test_example_pb2.PrimitiveValue() + msg.ParseFromString(buf) + vs.append(msg) + evs = getattr(field.expected, field.name) + if len(vs) != len(evs): + self.fail('Field %s decoded %d outputs, expected %d' % + (fd.name, len(vs), len(evs))) + for v, ev in zip(vs, evs): + self.assertEqual(v, ev) + continue + + # This can be a little confusing. For testing we are using + # RepeatedPrimitiveValue in two ways: it's the proto that we + # decode for testing, and it's used in the expected value as a + # union type. The two cases are slightly different: this is the + # second case. + # We may be fetching the uint64_value from the test proto, but + # in the expected proto we store it in the int64_value field + # because TensorFlow doesn't support unsigned int64. + tf_type_to_primitive_value_field = { + dtypes.float32: + 'float_value', + dtypes.float64: + 'double_value', + dtypes.int32: + 'int32_value', + dtypes.uint8: + 'uint8_value', + dtypes.int8: + 'int8_value', + dtypes.string: + 'string_value', + dtypes.int64: + 'int64_value', + dtypes.bool: + 'bool_value', + # Unhandled TensorFlow types: + # DT_INT16 DT_COMPLEX64 DT_QINT8 DT_QUINT8 DT_QINT32 + # DT_BFLOAT16 DT_QINT16 DT_QUINT16 DT_UINT16 + } + tf_field_name = tf_type_to_primitive_value_field.get(field.dtype) + if tf_field_name is None: + self.fail('Unhandled tensorflow type %d' % field.dtype) + + self._compareValues(fd, values.flat, + getattr(field.expected, tf_field_name)) + + def _runDecodeProtoTests(self, fields, case_sizes, batch_shape, batch, + message_type, message_format, sanitize, + force_disordered=False): + """Run decode tests on a batch of messages. + + Args: + fields: list of test_example_pb2.FieldSpec (types and expected values) + case_sizes: expected sizes array + batch_shape: the shape of the input tensor of serialized messages + batch: list of serialized messages + message_type: descriptor name for messages + message_format: format of messages, 'text' or 'binary' + sanitize: whether to sanitize binary protobuf inputs + force_disordered: whether to force fields encoded out of order. + """ + + if force_disordered: + # Exercise code path that handles out-of-order fields by prepending extra + # fields with tag numbers higher than any real field. Note that this won't + # work with sanitization because that forces reserialization using a + # trusted decoder and encoder. + assert not sanitize + extra_fields = test_example_pb2.ExtraFields() + extra_fields.string_value = 'IGNORE ME' + extra_fields.bool_value = False + extra_msg = extra_fields.SerializeToString() + batch = [extra_msg + msg for msg in batch] + + # Numpy silently truncates the strings if you don't specify dtype=object. + batch = np.array(batch, dtype=object) + batch = np.reshape(batch, batch_shape) + + field_names = [f.name for f in fields] + output_types = [f.dtype for f in fields] + + with self.test_session() as sess: + sizes, vtensor = decode_proto_op.decode_proto( + batch, + message_type=message_type, + field_names=field_names, + output_types=output_types, + message_format=message_format, + sanitize=sanitize) + + vlist = sess.run([sizes] + vtensor) + sizes = vlist[0] + # Values is a list of tensors, one for each field. + value_tensors = vlist[1:] + + # Check that the repeat sizes are correct. + self.assertTrue( + np.all(np.array(sizes.shape) == batch_shape + [len(field_names)])) + + # Check that the decoded sizes match the expected sizes. + self.assertEqual(len(sizes.flat), len(case_sizes)) + self.assertTrue( + np.all(sizes.flat == np.array( + case_sizes, dtype=np.int32))) + + field_dict = dict(zip(field_names, value_tensors)) + + self._compareRepeatedPrimitiveValue(batch_shape, sizes, fields, + field_dict) + + def testBinary(self): + with open(FLAGS.message_text_file, 'r') as fp: + case = text_format.Parse(fp.read(), test_example_pb2.TestCase()) + + batch = [primitive.SerializeToString() for primitive in case.primitive] + self._runDecodeProtoTests( + case.field, + case.sizes, + list(case.shape), + batch, + 'tensorflow.contrib.proto.RepeatedPrimitiveValue', + 'binary', + sanitize=False) + + def testBinaryDisordered(self): + with open(FLAGS.message_text_file, 'r') as fp: + case = text_format.Parse(fp.read(), test_example_pb2.TestCase()) + + batch = [primitive.SerializeToString() for primitive in case.primitive] + self._runDecodeProtoTests( + case.field, + case.sizes, + list(case.shape), + batch, + 'tensorflow.contrib.proto.RepeatedPrimitiveValue', + 'binary', + sanitize=False, + force_disordered=True) + + def testPacked(self): + with open(FLAGS.message_text_file, 'r') as fp: + case = text_format.Parse(fp.read(), test_example_pb2.TestCase()) + + # Now try with the packed serialization. + # We test the packed representations by loading the same test cases + # using PackedPrimitiveValue instead of RepeatedPrimitiveValue. + # To do this we rely on the text format being the same for packed and + # unpacked fields, and reparse the test message using the packed version + # of the proto. + packed_batch = [ + # Note: float_format='.17g' is necessary to ensure preservation of + # doubles and floats in text format. + text_format.Parse( + text_format.MessageToString( + primitive, float_format='.17g'), + test_example_pb2.PackedPrimitiveValue()).SerializeToString() + for primitive in case.primitive + ] + + self._runDecodeProtoTests( + case.field, + case.sizes, + list(case.shape), + packed_batch, + 'tensorflow.contrib.proto.PackedPrimitiveValue', + 'binary', + sanitize=False) + + def testText(self): + with open(FLAGS.message_text_file, 'r') as fp: + case = text_format.Parse(fp.read(), test_example_pb2.TestCase()) + + # Note: float_format='.17g' is necessary to ensure preservation of + # doubles and floats in text format. + text_batch = [ + text_format.MessageToString( + primitive, float_format='.17g') for primitive in case.primitive + ] + + self._runDecodeProtoTests( + case.field, + case.sizes, + list(case.shape), + text_batch, + 'tensorflow.contrib.proto.RepeatedPrimitiveValue', + 'text', + sanitize=False) + + def testSanitizerGood(self): + with open(FLAGS.message_text_file, 'r') as fp: + case = text_format.Parse(fp.read(), test_example_pb2.TestCase()) + + batch = [primitive.SerializeToString() for primitive in case.primitive] + self._runDecodeProtoTests( + case.field, + case.sizes, + list(case.shape), + batch, + 'tensorflow.contrib.proto.RepeatedPrimitiveValue', + 'binary', + sanitize=True) + + +if __name__ == '__main__': + test.main() diff --git a/tensorflow/contrib/proto/python/kernel_tests/defaut_values.TestCase.pbtxt b/tensorflow/contrib/proto/python/kernel_tests/defaut_values.TestCase.pbtxt new file mode 100644 index 0000000000000000000000000000000000000000..4e316819077c7dbb28beefd4dc260568f26da680 --- /dev/null +++ b/tensorflow/contrib/proto/python/kernel_tests/defaut_values.TestCase.pbtxt @@ -0,0 +1,94 @@ +primitive { + # No fields specified, so we get all defaults +} +shape: 1 +sizes: 0 +field { + name: "double_default" + dtype: DT_DOUBLE + expected { double_value: 1.0 } +} +sizes: 0 +field { + name: "float_default" + dtype: DT_DOUBLE # Try casting the float field to double. + expected { double_value: 2.0 } +} +sizes: 0 +field { + name: "int64_default" + dtype: DT_INT64 + expected { int64_value: 3 } +} +sizes: 0 +field { + name: "uint64_default" + dtype: DT_INT64 + expected { int64_value: 4 } +} +sizes: 0 +field { + name: "int32_default" + dtype: DT_INT32 + expected { int32_value: 5 } +} +sizes: 0 +field { + name: "fixed64_default" + dtype: DT_INT64 + expected { int64_value: 6 } +} +sizes: 0 +field { + name: "fixed32_default" + dtype: DT_INT32 + expected { int32_value: 7 } +} +sizes: 0 +field { + name: "bool_default" + dtype: DT_BOOL + expected { bool_value: true } +} +sizes: 0 +field { + name: "string_default" + dtype: DT_STRING + expected { string_value: "a" } +} +sizes: 0 +field { + name: "bytes_default" + dtype: DT_STRING + expected { string_value: "a longer default string" } +} +sizes: 0 +field { + name: "uint32_default" + dtype: DT_INT32 + expected { int32_value: -1 } +} +sizes: 0 +field { + name: "sfixed32_default" + dtype: DT_INT32 + expected { int32_value: 10 } +} +sizes: 0 +field { + name: "sfixed64_default" + dtype: DT_INT64 + expected { int64_value: 11 } +} +sizes: 0 +field { + name: "sint32_default" + dtype: DT_INT32 + expected { int32_value: 12 } +} +sizes: 0 +field { + name: "sint64_default" + dtype: DT_INT64 + expected { int64_value: 13 } +} diff --git a/tensorflow/contrib/proto/python/kernel_tests/encode_proto_op_test.py b/tensorflow/contrib/proto/python/kernel_tests/encode_proto_op_test.py new file mode 100644 index 0000000000000000000000000000000000000000..30e58e6336dc66830418c7cd2b3111a851d691b6 --- /dev/null +++ b/tensorflow/contrib/proto/python/kernel_tests/encode_proto_op_test.py @@ -0,0 +1,180 @@ +# ============================================================================= +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= +"""Table-driven test for encode_proto op. + +This test is run once with each of the *.TestCase.pbtxt files +in the test directory. + +It tests that encode_proto is a lossless inverse of decode_proto +(for the specified fields). +""" +# Python3 readiness boilerplate +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from google.protobuf import text_format + +from tensorflow.contrib.proto.python.kernel_tests import test_case +from tensorflow.contrib.proto.python.kernel_tests import test_example_pb2 +from tensorflow.contrib.proto.python.ops import decode_proto_op +from tensorflow.contrib.proto.python.ops import encode_proto_op +from tensorflow.python.framework import dtypes +from tensorflow.python.ops import array_ops +from tensorflow.python.platform import flags +from tensorflow.python.platform import test + +FLAGS = flags.FLAGS + +flags.DEFINE_string('message_text_file', None, + 'A file containing a text serialized TestCase protobuf.') + + +class EncodeProtoOpTest(test_case.ProtoOpTestCase): + + def testBadInputs(self): + # Invalid field name + with self.test_session(): + with self.assertRaisesOpError('Unknown field: non_existent_field'): + encode_proto_op.encode_proto( + sizes=[[1]], + values=[np.array([[0.0]], dtype=np.int32)], + message_type='tensorflow.contrib.proto.RepeatedPrimitiveValue', + field_names=['non_existent_field']).eval() + + # Incorrect types. + with self.test_session(): + with self.assertRaisesOpError( + 'Incompatible type for field double_value.'): + encode_proto_op.encode_proto( + sizes=[[1]], + values=[np.array([[0.0]], dtype=np.int32)], + message_type='tensorflow.contrib.proto.RepeatedPrimitiveValue', + field_names=['double_value']).eval() + + # Incorrect shapes of sizes. + with self.test_session(): + with self.assertRaisesOpError( + r'sizes should be batch_size \+ \[len\(field_names\)\]'): + sizes = array_ops.placeholder(dtypes.int32) + values = array_ops.placeholder(dtypes.float64) + encode_proto_op.encode_proto( + sizes=sizes, + values=[values], + message_type='tensorflow.contrib.proto.RepeatedPrimitiveValue', + field_names=['double_value']).eval(feed_dict={ + sizes: [[[0, 0]]], + values: [[0.0]] + }) + + # Inconsistent shapes of values. + with self.test_session(): + with self.assertRaisesOpError( + 'Values must match up to the last dimension'): + sizes = array_ops.placeholder(dtypes.int32) + values1 = array_ops.placeholder(dtypes.float64) + values2 = array_ops.placeholder(dtypes.int32) + (encode_proto_op.encode_proto( + sizes=[[1, 1]], + values=[values1, values2], + message_type='tensorflow.contrib.proto.RepeatedPrimitiveValue', + field_names=['double_value', 'int32_value']).eval(feed_dict={ + values1: [[0.0]], + values2: [[0], [0]] + })) + + def _testRoundtrip(self, in_bufs, message_type, fields): + + field_names = [f.name for f in fields] + out_types = [f.dtype for f in fields] + + with self.test_session() as sess: + sizes, field_tensors = decode_proto_op.decode_proto( + in_bufs, + message_type=message_type, + field_names=field_names, + output_types=out_types) + + out_tensors = encode_proto_op.encode_proto( + sizes, + field_tensors, + message_type=message_type, + field_names=field_names) + + out_bufs, = sess.run([out_tensors]) + + # Check that the re-encoded tensor has the same shape. + self.assertEqual(in_bufs.shape, out_bufs.shape) + + # Compare the input and output. + for in_buf, out_buf in zip(in_bufs.flat, out_bufs.flat): + in_obj = test_example_pb2.RepeatedPrimitiveValue() + in_obj.ParseFromString(in_buf) + + out_obj = test_example_pb2.RepeatedPrimitiveValue() + out_obj.ParseFromString(out_buf) + + # Check that the deserialized objects are identical. + self.assertEqual(in_obj, out_obj) + + # Check that the input and output serialized messages are identical. + # If we fail here, there is a difference in the serialized + # representation but the new serialization still parses. This could + # be harmless (a change in map ordering?) or it could be bad (e.g. + # loss of packing in the encoding). + self.assertEqual(in_buf, out_buf) + + def testRoundtrip(self): + with open(FLAGS.message_text_file, 'r') as fp: + case = text_format.Parse(fp.read(), test_example_pb2.TestCase()) + + in_bufs = [primitive.SerializeToString() for primitive in case.primitive] + + # np.array silently truncates strings if you don't specify dtype=object. + in_bufs = np.reshape(np.array(in_bufs, dtype=object), list(case.shape)) + return self._testRoundtrip( + in_bufs, 'tensorflow.contrib.proto.RepeatedPrimitiveValue', case.field) + + def testRoundtripPacked(self): + with open(FLAGS.message_text_file, 'r') as fp: + case = text_format.Parse(fp.read(), test_example_pb2.TestCase()) + + # Now try with the packed serialization. + # We test the packed representations by loading the same test cases + # using PackedPrimitiveValue instead of RepeatedPrimitiveValue. + # To do this we rely on the text format being the same for packed and + # unpacked fields, and reparse the test message using the packed version + # of the proto. + in_bufs = [ + # Note: float_format='.17g' is necessary to ensure preservation of + # doubles and floats in text format. + text_format.Parse( + text_format.MessageToString( + primitive, float_format='.17g'), + test_example_pb2.PackedPrimitiveValue()).SerializeToString() + for primitive in case.primitive + ] + + # np.array silently truncates strings if you don't specify dtype=object. + in_bufs = np.reshape(np.array(in_bufs, dtype=object), list(case.shape)) + return self._testRoundtrip( + in_bufs, 'tensorflow.contrib.proto.PackedPrimitiveValue', case.field) + + +if __name__ == '__main__': + test.main() diff --git a/tensorflow/contrib/proto/python/kernel_tests/minmax.TestCase.pbtxt b/tensorflow/contrib/proto/python/kernel_tests/minmax.TestCase.pbtxt new file mode 100644 index 0000000000000000000000000000000000000000..b170f89c0f00dd9dffd5785197bb3bfd1ca2cfee --- /dev/null +++ b/tensorflow/contrib/proto/python/kernel_tests/minmax.TestCase.pbtxt @@ -0,0 +1,161 @@ +primitive { + double_value: -1.7976931348623158e+308 + double_value: 2.2250738585072014e-308 + double_value: 1.7976931348623158e+308 + float_value: -3.402823466e+38 + float_value: 1.175494351e-38 + float_value: 3.402823466e+38 + int64_value: -9223372036854775808 + int64_value: 9223372036854775807 + uint64_value: 0 + uint64_value: 18446744073709551615 + int32_value: -2147483648 + int32_value: 2147483647 + fixed64_value: 0 + fixed64_value: 18446744073709551615 + fixed32_value: 0 + fixed32_value: 4294967295 + bool_value: false + bool_value: true + string_value: "" + string_value: "I refer to the infinite." + uint32_value: 0 + uint32_value: 4294967295 + sfixed32_value: -2147483648 + sfixed32_value: 2147483647 + sfixed64_value: -9223372036854775808 + sfixed64_value: 9223372036854775807 + sint32_value: -2147483648 + sint32_value: 2147483647 + sint64_value: -9223372036854775808 + sint64_value: 9223372036854775807 +} +shape: 1 +sizes: 3 +sizes: 3 +sizes: 2 +sizes: 2 +sizes: 2 +sizes: 2 +sizes: 2 +sizes: 2 +sizes: 2 +sizes: 2 +sizes: 2 +sizes: 2 +sizes: 2 +sizes: 2 +field { + name: "double_value" + dtype: DT_DOUBLE + expected { + double_value: -1.7976931348623158e+308 + double_value: 2.2250738585072014e-308 + double_value: 1.7976931348623158e+308 + } +} +field { + name: "float_value" + dtype: DT_FLOAT + expected { + float_value: -3.402823466e+38 + float_value: 1.175494351e-38 + float_value: 3.402823466e+38 + } +} +field { + name: "int64_value" + dtype: DT_INT64 + expected { + int64_value: -9223372036854775808 + int64_value: 9223372036854775807 + } +} +field { + name: "uint64_value" + dtype: DT_INT64 + expected { + int64_value: 0 + int64_value: -1 + } +} +field { + name: "int32_value" + dtype: DT_INT32 + expected { + int32_value: -2147483648 + int32_value: 2147483647 + } +} +field { + name: "fixed64_value" + dtype: DT_INT64 + expected { + int64_value: 0 + int64_value: -1 # unsigned is 18446744073709551615 + } +} +field { + name: "fixed32_value" + dtype: DT_INT32 + expected { + int32_value: 0 + int32_value: -1 # unsigned is 4294967295 + } +} +field { + name: "bool_value" + dtype: DT_BOOL + expected { + bool_value: false + bool_value: true + } +} +field { + name: "string_value" + dtype: DT_STRING + expected { + string_value: "" + string_value: "I refer to the infinite." + } +} +field { + name: "uint32_value" + dtype: DT_INT32 + expected { + int32_value: 0 + int32_value: -1 # unsigned is 4294967295 + } +} +field { + name: "sfixed32_value" + dtype: DT_INT32 + expected { + int32_value: -2147483648 + int32_value: 2147483647 + } +} +field { + name: "sfixed64_value" + dtype: DT_INT64 + expected { + int64_value: -9223372036854775808 + int64_value: 9223372036854775807 + } +} +field { + name: "sint32_value" + dtype: DT_INT32 + expected { + int32_value: -2147483648 + int32_value: 2147483647 + } +} +field { + name: "sint64_value" + dtype: DT_INT64 + expected { + int64_value: -9223372036854775808 + int64_value: 9223372036854775807 + } +} diff --git a/tensorflow/contrib/proto/python/kernel_tests/nested.TestCase.pbtxt b/tensorflow/contrib/proto/python/kernel_tests/nested.TestCase.pbtxt new file mode 100644 index 0000000000000000000000000000000000000000..c664e52851b5bb3c439544537ce6402fc7cf3362 --- /dev/null +++ b/tensorflow/contrib/proto/python/kernel_tests/nested.TestCase.pbtxt @@ -0,0 +1,16 @@ +primitive { + message_value { + double_value: 23.5 + } +} +shape: 1 +sizes: 1 +field { + name: "message_value" + dtype: DT_STRING + expected { + message_value { + double_value: 23.5 + } + } +} diff --git a/tensorflow/contrib/proto/python/kernel_tests/optional.TestCase.pbtxt b/tensorflow/contrib/proto/python/kernel_tests/optional.TestCase.pbtxt new file mode 100644 index 0000000000000000000000000000000000000000..125651d7eaa1901e4804712bb807322b02ed5bc6 --- /dev/null +++ b/tensorflow/contrib/proto/python/kernel_tests/optional.TestCase.pbtxt @@ -0,0 +1,20 @@ +primitive { + bool_value: true +} +shape: 1 +sizes: 1 +sizes: 0 +field { + name: "bool_value" + dtype: DT_BOOL + expected { + bool_value: true + } +} +field { + name: "double_value" + dtype: DT_DOUBLE + expected { + double_value: 0.0 + } +} diff --git a/tensorflow/contrib/proto/python/kernel_tests/promote_unsigned.TestCase.pbtxt b/tensorflow/contrib/proto/python/kernel_tests/promote_unsigned.TestCase.pbtxt new file mode 100644 index 0000000000000000000000000000000000000000..bc07efc8f3038c6c540855c97b2254575e517ef3 --- /dev/null +++ b/tensorflow/contrib/proto/python/kernel_tests/promote_unsigned.TestCase.pbtxt @@ -0,0 +1,29 @@ +primitive { + fixed32_value: 4294967295 + uint32_value: 4294967295 +} +shape: 1 +sizes: 1 +field { + name: "fixed32_value" + dtype: DT_INT64 + expected { + int64_value: 4294967295 + } +} +sizes: 1 +field { + name: "uint32_value" + dtype: DT_INT64 + expected { + int64_value: 4294967295 + } +} +sizes: 0 +field { + name: "uint32_default" + dtype: DT_INT64 + expected { + int64_value: 4294967295 # Comes from an explicitly-specified default + } +} diff --git a/tensorflow/contrib/proto/python/kernel_tests/ragged.TestCase.pbtxt b/tensorflow/contrib/proto/python/kernel_tests/ragged.TestCase.pbtxt new file mode 100644 index 0000000000000000000000000000000000000000..61c7ac53f72b0764a0d57241cbdcdd93fcbd9279 --- /dev/null +++ b/tensorflow/contrib/proto/python/kernel_tests/ragged.TestCase.pbtxt @@ -0,0 +1,32 @@ +primitive { + double_value: 23.5 + double_value: 123.0 + bool_value: true +} +primitive { + double_value: 3.1 + bool_value: false +} +shape: 2 +sizes: 2 +sizes: 1 +sizes: 1 +sizes: 1 +field { + name: "double_value" + dtype: DT_DOUBLE + expected { + double_value: 23.5 + double_value: 123.0 + double_value: 3.1 + double_value: 0.0 + } +} +field { + name: "bool_value" + dtype: DT_BOOL + expected { + bool_value: true + bool_value: false + } +} diff --git a/tensorflow/contrib/proto/python/kernel_tests/shaped_batch.TestCase.pbtxt b/tensorflow/contrib/proto/python/kernel_tests/shaped_batch.TestCase.pbtxt new file mode 100644 index 0000000000000000000000000000000000000000..f4828076d52dc5d03a887c4a445dbcf52414c361 --- /dev/null +++ b/tensorflow/contrib/proto/python/kernel_tests/shaped_batch.TestCase.pbtxt @@ -0,0 +1,62 @@ +primitive { + double_value: 23.5 + bool_value: true +} +primitive { + double_value: 44.0 + bool_value: false +} +primitive { + double_value: 3.14159 + bool_value: true +} +primitive { + double_value: 1.414 + bool_value: true +} +primitive { + double_value: -32.2 + bool_value: false +} +primitive { + double_value: 0.0001 + bool_value: true +} +shape: 3 +shape: 2 +sizes: 1 +sizes: 1 +sizes: 1 +sizes: 1 +sizes: 1 +sizes: 1 +sizes: 1 +sizes: 1 +sizes: 1 +sizes: 1 +sizes: 1 +sizes: 1 +field { + name: "double_value" + dtype: DT_DOUBLE + expected { + double_value: 23.5 + double_value: 44.0 + double_value: 3.14159 + double_value: 1.414 + double_value: -32.2 + double_value: 0.0001 + } +} +field { + name: "bool_value" + dtype: DT_BOOL + expected { + bool_value: true + bool_value: false + bool_value: true + bool_value: true + bool_value: false + bool_value: true + } +} diff --git a/tensorflow/contrib/proto/python/kernel_tests/simple.TestCase.pbtxt b/tensorflow/contrib/proto/python/kernel_tests/simple.TestCase.pbtxt new file mode 100644 index 0000000000000000000000000000000000000000..dc20ac147b0e772f05b4fc614f9f56513aceb1d5 --- /dev/null +++ b/tensorflow/contrib/proto/python/kernel_tests/simple.TestCase.pbtxt @@ -0,0 +1,21 @@ +primitive { + double_value: 23.5 + bool_value: true +} +shape: 1 +sizes: 1 +sizes: 1 +field { + name: "double_value" + dtype: DT_DOUBLE + expected { + double_value: 23.5 + } +} +field { + name: "bool_value" + dtype: DT_BOOL + expected { + bool_value: true + } +} diff --git a/tensorflow/contrib/proto/python/kernel_tests/test_case.py b/tensorflow/contrib/proto/python/kernel_tests/test_case.py new file mode 100644 index 0000000000000000000000000000000000000000..b95202c5df654cfc02339477b242b2c58575a4d5 --- /dev/null +++ b/tensorflow/contrib/proto/python/kernel_tests/test_case.py @@ -0,0 +1,35 @@ +# ============================================================================= +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= +"""Test case base for testing proto operations.""" + +# Python3 preparedness imports. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import ctypes as ct +import os + +from tensorflow.python.platform import test + + +class ProtoOpTestCase(test.TestCase): + + def __init__(self, methodName='runTest'): # pylint: disable=invalid-name + super(ProtoOpTestCase, self).__init__(methodName) + lib = os.path.join(os.path.dirname(__file__), 'libtestexample.so') + if os.path.isfile(lib): + ct.cdll.LoadLibrary(lib) diff --git a/tensorflow/contrib/proto/python/kernel_tests/test_example.proto b/tensorflow/contrib/proto/python/kernel_tests/test_example.proto new file mode 100644 index 0000000000000000000000000000000000000000..a2c88e372bf7c6b7f14c5bb55776b66c4c06bcd4 --- /dev/null +++ b/tensorflow/contrib/proto/python/kernel_tests/test_example.proto @@ -0,0 +1,182 @@ +// Test description and protos to work with it. +// +// Many of the protos in this file are for unit tests that haven't been written yet. + +syntax = "proto2"; + +import "tensorflow/core/framework/types.proto"; + +package tensorflow.contrib.proto; + +// A TestCase holds a proto and a bunch of assertions +// about how it should decode. +message TestCase { + // A batch of primitives to be serialized and decoded. + repeated RepeatedPrimitiveValue primitive = 1; + // The shape of the batch. + repeated int32 shape = 2; + // Expected sizes for each field. + repeated int32 sizes = 3; + // Expected values for each field. + repeated FieldSpec field = 4; +}; + +// FieldSpec describes the expected output for a single field. +message FieldSpec { + optional string name = 1; + optional tensorflow.DataType dtype = 2; + optional RepeatedPrimitiveValue expected = 3; +}; + +message TestValue { + optional PrimitiveValue primitive_value = 1; + optional EnumValue enum_value = 2; + optional MessageValue message_value = 3; + optional RepeatedMessageValue repeated_message_value = 4; + optional RepeatedPrimitiveValue repeated_primitive_value = 6; +} + +message PrimitiveValue { + optional double double_value = 1; + optional float float_value = 2; + optional int64 int64_value = 3; + optional uint64 uint64_value = 4; + optional int32 int32_value = 5; + optional fixed64 fixed64_value = 6; + optional fixed32 fixed32_value = 7; + optional bool bool_value = 8; + optional string string_value = 9; + optional bytes bytes_value = 12; + optional uint32 uint32_value = 13; + optional sfixed32 sfixed32_value = 15; + optional sfixed64 sfixed64_value = 16; + optional sint32 sint32_value = 17; + optional sint64 sint64_value = 18; +} + +// NOTE: This definition must be kept in sync with PackedPrimitiveValue. +message RepeatedPrimitiveValue { + repeated double double_value = 1; + repeated float float_value = 2; + repeated int64 int64_value = 3; + repeated uint64 uint64_value = 4; + repeated int32 int32_value = 5; + repeated fixed64 fixed64_value = 6; + repeated fixed32 fixed32_value = 7; + repeated bool bool_value = 8; + repeated string string_value = 9; + repeated bytes bytes_value = 12; + repeated uint32 uint32_value = 13; + repeated sfixed32 sfixed32_value = 15; + repeated sfixed64 sfixed64_value = 16; + repeated sint32 sint32_value = 17; + repeated sint64 sint64_value = 18; + repeated PrimitiveValue message_value = 19; + + // Optional fields with explicitly-specified defaults. + optional double double_default = 20 [default = 1.0]; + optional float float_default = 21 [default = 2.0]; + optional int64 int64_default = 22 [default = 3]; + optional uint64 uint64_default = 23 [default = 4]; + optional int32 int32_default = 24 [default = 5]; + optional fixed64 fixed64_default = 25 [default = 6]; + optional fixed32 fixed32_default = 26 [default = 7]; + optional bool bool_default = 27 [default = true]; + optional string string_default = 28 [default = "a"]; + optional bytes bytes_default = 29 [default = "a longer default string"]; + optional uint32 uint32_default = 30 [default = 4294967295]; + optional sfixed32 sfixed32_default = 31 [default = 10]; + optional sfixed64 sfixed64_default = 32 [default = 11]; + optional sint32 sint32_default = 33 [default = 12]; + optional sint64 sint64_default = 34 [default = 13]; +} + +// A PackedPrimitiveValue looks exactly the same as a RepeatedPrimitiveValue +// in the text format, but the binary serializion is different. +// We test the packed representations by loading the same test cases +// using this definition instead of RepeatedPrimitiveValue. +// NOTE: This definition must be kept in sync with RepeatedPrimitiveValue +// in every way except the packed=true declaration. +message PackedPrimitiveValue { + repeated double double_value = 1 [packed = true]; + repeated float float_value = 2 [packed = true]; + repeated int64 int64_value = 3 [packed = true]; + repeated uint64 uint64_value = 4 [packed = true]; + repeated int32 int32_value = 5 [packed = true]; + repeated fixed64 fixed64_value = 6 [packed = true]; + repeated fixed32 fixed32_value = 7 [packed = true]; + repeated bool bool_value = 8 [packed = true]; + repeated string string_value = 9; + repeated bytes bytes_value = 12; + repeated uint32 uint32_value = 13 [packed = true]; + repeated sfixed32 sfixed32_value = 15 [packed = true]; + repeated sfixed64 sfixed64_value = 16 [packed = true]; + repeated sint32 sint32_value = 17 [packed = true]; + repeated sint64 sint64_value = 18 [packed = true]; + repeated PrimitiveValue message_value = 19; + + optional double double_default = 20 [default = 1.0]; + optional float float_default = 21 [default = 2.0]; + optional int64 int64_default = 22 [default = 3]; + optional uint64 uint64_default = 23 [default = 4]; + optional int32 int32_default = 24 [default = 5]; + optional fixed64 fixed64_default = 25 [default = 6]; + optional fixed32 fixed32_default = 26 [default = 7]; + optional bool bool_default = 27 [default = true]; + optional string string_default = 28 [default = "a"]; + optional bytes bytes_default = 29 [default = "a longer default string"]; + optional uint32 uint32_default = 30 [default = 4294967295]; + optional sfixed32 sfixed32_default = 31 [default = 10]; + optional sfixed64 sfixed64_default = 32 [default = 11]; + optional sint32 sint32_default = 33 [default = 12]; + optional sint64 sint64_default = 34 [default = 13]; +} + +message EnumValue { + enum Color { + RED = 0; + ORANGE = 1; + YELLOW = 2; + GREEN = 3; + BLUE = 4; + INDIGO = 5; + VIOLET = 6; + }; + optional Color enum_value = 14; + repeated Color repeated_enum_value = 15; +} + + +message InnerMessageValue { + optional float float_value = 2; + repeated bytes bytes_values = 8; +} + +message MiddleMessageValue { + repeated int32 int32_values = 5; + optional InnerMessageValue message_value = 11; + optional uint32 uint32_value = 13; +} + +message MessageValue { + optional double double_value = 1; + optional MiddleMessageValue message_value = 11; +} + +message RepeatedMessageValue { + message NestedMessageValue { + optional float float_value = 2; + repeated bytes bytes_values = 8; + } + + repeated NestedMessageValue message_values = 11; +} + +// Message containing fields with field numbers higher than any field above. An +// instance of this message is prepended to each binary message in the test to +// exercise the code path that handles fields encoded out of order of field +// number. +message ExtraFields { + optional string string_value = 1776; + optional bool bool_value = 1777; +} diff --git a/tensorflow/contrib/proto/python/ops/BUILD b/tensorflow/contrib/proto/python/ops/BUILD new file mode 100644 index 0000000000000000000000000000000000000000..f17065477e1e14d24a16338b1a11d98da44639fe --- /dev/null +++ b/tensorflow/contrib/proto/python/ops/BUILD @@ -0,0 +1,44 @@ +package(default_visibility = ["//visibility:public"]) + +licenses(["notice"]) # Apache 2.0 + +exports_files(["LICENSE"]) + +load( + "//tensorflow:tensorflow.bzl", + "tf_gen_op_wrapper_py", +) + +py_library( + name = "decode_proto_op_py", + srcs = ["decode_proto_op.py"], + deps = [ + ":gen_decode_proto_op_py", + "//tensorflow/python:framework_ops", + ], +) + +tf_gen_op_wrapper_py( + name = "gen_decode_proto_op_py", + out = "gen_decode_proto_op.py", + deps = [ + "//tensorflow/core:decode_proto_ops_op_lib", + ], +) + +py_library( + name = "encode_proto_op_py", + srcs = ["encode_proto_op.py"], + deps = [ + ":gen_encode_proto_op_py", + "//tensorflow/python:framework_ops", + ], +) + +tf_gen_op_wrapper_py( + name = "gen_encode_proto_op_py", + out = "gen_encode_proto_op.py", + deps = [ + "//tensorflow/core:encode_proto_ops_op_lib", + ], +) diff --git a/tensorflow/contrib/proto/python/ops/decode_proto_op.py b/tensorflow/contrib/proto/python/ops/decode_proto_op.py new file mode 100644 index 0000000000000000000000000000000000000000..7dc000ebe49724e7571ade500eb29de25be89485 --- /dev/null +++ b/tensorflow/contrib/proto/python/ops/decode_proto_op.py @@ -0,0 +1,25 @@ +# ============================================================================= +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= + +# pylint: disable=wildcard-import,unused-import +"""Protocol Buffer decoding from tensors.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.proto.python.ops.gen_decode_proto_op import decode_proto_v2 as decode_proto +from tensorflow.python.framework import ops +ops.NotDifferentiable("DecodeProtoV2") diff --git a/tensorflow/contrib/proto/python/ops/encode_proto_op.py b/tensorflow/contrib/proto/python/ops/encode_proto_op.py new file mode 100644 index 0000000000000000000000000000000000000000..ac12198b2e462b73238778e91c1e9b6be156182c --- /dev/null +++ b/tensorflow/contrib/proto/python/ops/encode_proto_op.py @@ -0,0 +1,25 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= + +# pylint: disable=wildcard-import,unused-import +"""Protocol Buffer encoding from tensors.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.proto.python.ops.gen_encode_proto_op import encode_proto +from tensorflow.python.framework import ops + +ops.NotDifferentiable("EncodeProto") diff --git a/tensorflow/contrib/quantize/README.md b/tensorflow/contrib/quantize/README.md index 348c824a4072c3329ac4a3441c19c71598bc9c03..c83623ec947c1550991352a9dd9a5c6ee9282290 100644 --- a/tensorflow/contrib/quantize/README.md +++ b/tensorflow/contrib/quantize/README.md @@ -2,14 +2,17 @@ tf.contrib.quantize provides tools for transforming graphs to include ops to model quantization of weights, biases and activations during both training and -inference. This is done using the +inference. The details of the transformation implemented in this package is +described here [1]. + +This is done using the [fake quantization op](https://www.tensorflow.org/versions/r0.12/api_docs/python/array_ops/fake_quantization). -Recent literature has shown that fixed point networks provide comparable -performance to floating point networks [1]. This is achieved by modeling the -quantization operation during training in both the forward and backward passes. +Literature has shown that fixed point networks provide comparable performance to +floating point networks [2]. This is achieved by modeling the quantization +operation during training in both the forward and backward passes. The fake quantization operator achieves this by modeling the quantizer as a pass -through estimator [2]. Note that during back propagation, the parameters are +through estimator [3]. Note that during back propagation, the parameters are updated at high precision as this is needed to ensure sufficient precision in accumulating tiny adjustments to the parameters. However, for the forward pass, the parameters and activations are quantized to the desired lower precision. @@ -61,9 +64,11 @@ These rewrites are an active area of research and experimentation, so the rewrites and quantized training will likely not work across all models, though we hope to work towards generalizing these techniques. +[1] B.Jacob et al., "Quantization and Training of Neural Networks for Efficient +Integer-Arithmetic-Only Inference", https://arxiv.org/abs/1712.05877 -[1] P.Gysel, "HARDWARE-ORIENTED APPROXIMATION OF CONVOLUTIONAL +[2] P.Gysel et al., "HARDWARE-ORIENTED APPROXIMATION OF CONVOLUTIONAL NEURAL NETWORKS", https://arxiv.org/pdf/1604.03168.pdf -[2] Y.Bengio, "Estimating or Propagating Gradients Through Stochastic Neurons -for Conditional Computation", https://arxiv.org/abs/1308.3432 +[3] Y.Bengio et al., "Estimating or Propagating Gradients Through Stochastic +Neurons for Conditional Computation", https://arxiv.org/abs/1308.3432 diff --git a/tensorflow/contrib/quantize/python/fold_batch_norms.py b/tensorflow/contrib/quantize/python/fold_batch_norms.py index 4a8f8a04cc521d9ee7885b4318814a6f15008eef..aa0ef643088ef36b84596d08f78c29594ceca2d6 100644 --- a/tensorflow/contrib/quantize/python/fold_batch_norms.py +++ b/tensorflow/contrib/quantize/python/fold_batch_norms.py @@ -545,7 +545,7 @@ def _GetBatchNormParams(graph, context, has_scaling): gamma_tensor = graph.get_tensor_by_name(op.name + ':0') if not has_scaling: - gamma_tensor = array_ops.ones(batch_mean_tensor.shape) + gamma_tensor = array_ops.ones(moving_mean_tensor.shape) return _BatchNormMatch( layer_op=None, diff --git a/tensorflow/contrib/quantize/python/quant_ops.py b/tensorflow/contrib/quantize/python/quant_ops.py index a4f7b1b22139588be29171126d43b872d6658168..5c0e17dc8646ce7850e26ffaa80c0201cea456af 100644 --- a/tensorflow/contrib/quantize/python/quant_ops.py +++ b/tensorflow/contrib/quantize/python/quant_ops.py @@ -51,7 +51,6 @@ def LastValueQuantize(inputs, per_channel=False, init_min=-6.0, init_max=6.0, - updates_collection=ops.GraphKeys.UPDATE_OPS, vars_collection=ops.GraphKeys.MOVING_AVERAGE_VARIABLES, name_prefix='LastValueQuant', reuse=None, @@ -69,8 +68,6 @@ def LastValueQuantize(inputs, quantization ranges per output channel. init_min: a float scalar, the initial value for variable min. init_max: a float scalar, the initial value for variable max. - updates_collection: (Optional) collections to collect the update ops for - computation. vars_collection: (Optional) collection where to store variables for quantization interval ends. name_prefix: name_prefix for created nodes. @@ -133,7 +130,6 @@ def LastValueQuantize(inputs, # TFLite requires that 0.0 if always in the [min; max] range. batch_min = math_ops.minimum(batch_min, 0.0) assign_min = state_ops.assign(min_var, batch_min, name='AssignMinLast') - ops.add_to_collection(updates_collection, assign_min.op) if per_channel: if input_dim >= 2: @@ -146,7 +142,6 @@ def LastValueQuantize(inputs, # TFLite requires that 0.0 if always in the [min; max] range. batch_max = math_ops.maximum(batch_max, 0.0) assign_max = state_ops.assign(max_var, batch_max, name='AssignMaxLast') - ops.add_to_collection(updates_collection, assign_max.op) return _FakeQuantWithMinMaxVars( inputs, @@ -163,7 +158,6 @@ def MovingAvgQuantize(inputs, init_min=-6.0, init_max=6.0, ema_decay=0.999, - updates_collection=ops.GraphKeys.UPDATE_OPS, vars_collection=ops.GraphKeys.MOVING_AVERAGE_VARIABLES, name_prefix='MovingAvgQuantize', reuse=None, @@ -182,8 +176,6 @@ def MovingAvgQuantize(inputs, init_min: a float scalar, the initial value for variable min. init_max: a float scalar, the initial value for variable max. ema_decay: EMA decay parameter. - updates_collection: (Optional) collections to collect the update ops for - computation. vars_collection: (Optional) collection where to store variables for quantization interval ends. name_prefix: name_prefix for created nodes. @@ -246,7 +238,6 @@ def MovingAvgQuantize(inputs, batch_min = math_ops.minimum(batch_min, 0.0) assign_min = moving_averages.assign_moving_average( min_var, batch_min, ema_decay, name='AssignMinEma') - ops.add_to_collection(updates_collection, assign_min.op) if per_channel: if input_dim >= 2: @@ -260,7 +251,6 @@ def MovingAvgQuantize(inputs, batch_max = math_ops.maximum(batch_max, 0.0) assign_max = moving_averages.assign_moving_average( max_var, batch_max, ema_decay, name='AssignMaxEma') - ops.add_to_collection(updates_collection, assign_max.op) return _FakeQuantWithMinMaxVars( inputs, diff --git a/tensorflow/contrib/quantize/python/quantize.py b/tensorflow/contrib/quantize/python/quantize.py index 2889016a84352cdc336bb7c7369b627ebf6ed46d..d2d0426d233aaadb4ffd0fb222c77ade0a98278c 100644 --- a/tensorflow/contrib/quantize/python/quantize.py +++ b/tensorflow/contrib/quantize/python/quantize.py @@ -27,6 +27,7 @@ from tensorflow.contrib.quantize.python import quant_ops from tensorflow.python.framework import ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import math_ops +from tensorflow.python.platform import tf_logging as logging # Quantizable operation types that are supported by the quantization rewrite. _QUANTIZABLE_TYPES = {'Conv2D', 'MatMul', 'DepthwiseConv2dNative'} @@ -41,9 +42,16 @@ def Quantize(graph, activation_bits=8, ema_decay=0.999, quant_delay=None, - vars_collection=ops.GraphKeys.GLOBAL_VARIABLES): + vars_collection=ops.GraphKeys.GLOBAL_VARIABLES, + scope=None): """Updates graph with quantization operations. + Currently we quantize the following tensors: + * Conv/MatMul: Quantize the weights if it matches. + * Activation: Quantize the output if it matches. + * Bypass/Post-activation Bypass: Quantize both input and output + if it matches. + Args: graph: Graph to modify. is_training: Whether quantizing training graph or eval graph. @@ -57,13 +65,21 @@ def Quantize(graph, training. vars_collection: (Optional) Collection where to store the variables for quantization interval ends. + scope: The scope to be transformed. If it's not None, only the ops which + are in this scope will be transformed. Raises: ValueError: When quantization fails. """ + if scope and not scope.endswith('/'): + scope += '/' + input_to_ops_map = input_to_ops.InputToOps(graph) for layer_match in _FindLayersToQuantize(graph): # Quantize the weights. context = _GetContextFromOp(layer_match.layer_op) + + # If `scope` is given, only quantize it if the consumer of weights + # (the layer op) is in the right scope. _InsertQuantOp( context, 'weights_quant', @@ -74,7 +90,8 @@ def Quantize(graph, quant_delay=quant_delay, narrow_range=True, vars_collection=vars_collection, - bits=weight_bits) + bits=weight_bits, + consumer_scope=scope) # Quantize the activations. consumer_ops = input_to_ops_map.ConsumerOperations( @@ -82,6 +99,9 @@ def Quantize(graph, add_context = context if layer_match.bypass_op: add_context = re.search(r'^(.*)/([^/]+)', context).group(1) + + # If `scope` is given, only quantize it if the producer of weights + # (usually it's the layer op) is in the right scope. _InsertQuantOp( add_context, 'act_quant', @@ -93,11 +113,14 @@ def Quantize(graph, quant_delay=quant_delay, vars_collection=vars_collection, bits=activation_bits, - init_min=0.0) + init_min=0.0, + producer_scope=scope) # Quantize the inputs and output to the bypass (if it exists). The input to # the bypass is the bias add, and the output is the activation. if layer_match.bypass_op is not None: + # If `scope` is given, only quantize it if the both the producer and the + # consumer are in the right scope. _InsertQuantOp( context, 'conv_quant', @@ -107,7 +130,9 @@ def Quantize(graph, ema_decay=ema_decay, quant_delay=quant_delay, vars_collection=vars_collection, - bits=activation_bits) + bits=activation_bits, + producer_scope=scope, + consumer_scope=scope) _InsertQuantOp( add_context, 'add_quant', @@ -118,12 +143,16 @@ def Quantize(graph, ema_decay=ema_decay, quant_delay=quant_delay, vars_collection=vars_collection, - bits=activation_bits) + bits=activation_bits, + producer_scope=scope, + consumer_scope=scope) # Quantize bypass ops that occur after the activation. if layer_match.post_activation_bypass_op is not None: post_activation_bypass_context = re.search( r'^(.*)/([^/]+)', layer_match.post_activation_bypass_op.name).group(1) + # If `scope` is given, only quantize it if the producer is in the right + # scope. _InsertQuantOp( post_activation_bypass_context, 'post_activation_bypass_quant', @@ -135,7 +164,8 @@ def Quantize(graph, ema_decay=ema_decay, quant_delay=quant_delay, vars_collection=vars_collection, - bits=activation_bits) + bits=activation_bits, + producer_scope=scope) def _FindLayersToQuantize(graph): @@ -382,7 +412,9 @@ def _InsertQuantOp(context, ema_decay=0.999, quant_delay=None, vars_collection=ops.GraphKeys.GLOBAL_VARIABLES, - narrow_range=False): + narrow_range=False, + producer_scope=None, + consumer_scope=None): """Inserts a quant op between a producer op and (multiple) consumer ops. Args: @@ -407,16 +439,42 @@ def _InsertQuantOp(context, quantization interval ends. narrow_range: Whether to use the narrow quantization range [1; 2^bits - 1] or wide range [0; 2^bits - 1]. + producer_scope: The restriction of producer scope. If not None, the new op + will be inserted only when the producer is in this scope. + consumer_scope: The restriction of producer scope. If not None, the new op + will be inserted only when all the consumers are in this scope. Raises: ValueError: When producer operation is not directly connected to the consumer operation. """ + if producer_scope and not producer.name.startswith(producer_scope): + logging.info( + '_InsertQuantOp ignores context="%s" name="%s" ' + 'because producer "%s" is not in scope "%s"', + context, name, producer.name, producer_scope) + return + + if consumer_scope: + consumers_in_scope = [] + for consumer in consumers: + if consumer.name.startswith(consumer_scope): + consumers_in_scope.append(consumer) + else: + logging.info( + '_InsertQuantOp context="%s" name="%s" ignores ' + 'consumer "%s" because it is not in scope "%s"', + context, name, consumer.name, consumer_scope) + return + consumers = consumers_in_scope + name_prefix = _AddContextToName(context, name) # This is needed on TPU where name_scope == 'TPUReplicate/loop', and # name_prefix starts with 'TPUReplicate/loop/'; without dropping it # variables are created as TPUReplicate/loop/TPUReplicate/loop/..., which # breaks things later. - name_prefix = common.DropStringPrefix(name_prefix, ops.get_name_scope() + '/') + name_scope = ops.get_name_scope() + if name_scope: + name_prefix = common.DropStringPrefix(name_prefix, name_scope + '/') inputs = producer.outputs[0] # Prevent ops from being quantized multiple times. Bypass ops can sometimes diff --git a/tensorflow/contrib/quantize/python/quantize_graph.py b/tensorflow/contrib/quantize/python/quantize_graph.py index 0b74b438ac317967bbe10ad936b451de6f69d62c..11d052d7f491dc029d1bda9b47364d6e9c880a67 100644 --- a/tensorflow/contrib/quantize/python/quantize_graph.py +++ b/tensorflow/contrib/quantize/python/quantize_graph.py @@ -28,7 +28,8 @@ def _create_graph(input_graph=None, weight_bits=8, activation_bits=8, quant_delay=None, - freeze_bn_delay=None): + freeze_bn_delay=None, + scope=None): """Rewrites an input_graph in place for simulated quantization. The graph has fake quantization ops inserted to simulate the error @@ -48,6 +49,8 @@ def _create_graph(input_graph=None, frozen and used instead of batch statistics during training. freeze_bn_delay should be greater than quant_delay and should correspond to the number of steps when training has almost converged + scope: The scope to be transformed. If it's not None, only the ops which + are in this scope will be transformed. Raises: ValueError: If elements contains an element that isn't a tf.Tensor or @@ -66,7 +69,8 @@ def _create_graph(input_graph=None, is_training, quant_delay=quant_delay, weight_bits=weight_bits, - activation_bits=activation_bits) + activation_bits=activation_bits, + scope=scope) def create_training_graph(input_graph=None, quant_delay=0): @@ -133,7 +137,8 @@ def experimental_create_training_graph(input_graph=None, weight_bits=8, activation_bits=8, quant_delay=0, - freeze_bn_delay=None): + freeze_bn_delay=None, + scope=None): """Rewrites a training input_graph in place for simulated quantization. Variables added by the rewrite get added to the global variables collection. @@ -165,6 +170,8 @@ def experimental_create_training_graph(input_graph=None, frozen and used instead of batch statistics during training. freeze_bn_delay should be greater than quant_delay and should correspond to when training has almost converged + scope: The scope to be transformed. If it's not None, only the ops which + are in this scope will be transformed. Raises: ValueError: If elements contains an element that isn't a tf.Tensor or @@ -177,12 +184,14 @@ def experimental_create_training_graph(input_graph=None, weight_bits=weight_bits, activation_bits=activation_bits, quant_delay=quant_delay, - freeze_bn_delay=freeze_bn_delay) + freeze_bn_delay=freeze_bn_delay, + scope=scope) def experimental_create_eval_graph(input_graph=None, weight_bits=8, - activation_bits=8): + activation_bits=8, + scope=None): """Rewrites an eval input_graph in place for simulated quantization. Variables added by the rewrite get added to the global variables collection. @@ -200,8 +209,8 @@ def experimental_create_eval_graph(input_graph=None, default graph. weight_bits: Number of bits to use for quantizing weights. activation_bits: Number of bits to use for quantizing activations. - - + scope: The scope to be transformed. If it's not None, only the ops which + are in this scope will be transformed. Raises: ValueError: If elements contains an element that isn't a tf.Tensor or @@ -211,4 +220,5 @@ def experimental_create_eval_graph(input_graph=None, input_graph=input_graph, is_training=False, weight_bits=weight_bits, - activation_bits=activation_bits) + activation_bits=activation_bits, + scope=scope) diff --git a/tensorflow/contrib/quantize/python/quantize_graph_test.py b/tensorflow/contrib/quantize/python/quantize_graph_test.py index b9d03c1bc059fe7bcce75978f503cbbf76090dbd..caf8ff28d50d2880d491d04c1ed368597519dcd7 100644 --- a/tensorflow/contrib/quantize/python/quantize_graph_test.py +++ b/tensorflow/contrib/quantize/python/quantize_graph_test.py @@ -66,6 +66,20 @@ class QuantizeGraphTest(test_util.TensorFlowTestCase): for fn in rewrite_fns: test_fn(fn) + def _RunTestOverExperimentalRewritesWithScope(self, test_fn, scope): + def with_absent_scope(fn): + def fn_with_absent_scope(*args): + fn(*args, scope=scope) + return fn_with_absent_scope + rewrite_fns = [ + with_absent_scope( + quantize_graph.experimental_create_training_graph), + with_absent_scope( + quantize_graph.experimental_create_eval_graph), + ] + for fn in rewrite_fns: + test_fn(fn) + def testRewrite(self): self._RunTestOverAllRewrites(self._TestRewrite) @@ -99,6 +113,34 @@ class QuantizeGraphTest(test_util.TensorFlowTestCase): # Ensure that variables were added. self.assertTrue(len(orig_variable_names) < len(q_variables)) + def testWithPreActivationBypass(self): + self._RunTestOverAllRewrites(self._TestWithPreActivationBypass) + + def _TestWithPreActivationBypass(self, rewrite_fn): + # Tests that the default graph is correctly used when no args are provided + # to rewrite_fn. + with ops.Graph().as_default() as g: + self._ConvLayer(pre_activation_bypass=True, scope='scope1') + rewrite_fn() + + op_names = [op.name for op in g.get_operations()] + self.assertTrue( + any('scope1/add_quant/' in name for name in op_names)) + + def testWithPostActivationBypass(self): + self._RunTestOverAllRewrites(self._TestWithPostActivationBypass) + + def _TestWithPostActivationBypass(self, rewrite_fn): + # Tests that the default graph is correctly used when no args are provided + # to rewrite_fn. + with ops.Graph().as_default() as g: + self._ConvLayer(post_activation_bypass=True, scope='scope1') + rewrite_fn() + + op_names = [op.name for op in g.get_operations()] + self.assertTrue(any( + 'scope1/post_activation_bypass_quant/' in name for name in op_names)) + def testQuantDelay(self): self._RunTestOverTrainingRewrites(self._TestQuantDelay) @@ -224,20 +266,66 @@ class QuantizeGraphTest(test_util.TensorFlowTestCase): graph_def_after = str(g.as_graph_def()) self.assertEqual(graph_def_before, graph_def_after) - def _ConvLayer(self): + def testRewriteWithScope(self): + self._RunTestOverExperimentalRewritesWithScope( + self._TestRewriteWithScope, 'scope1') + + def _TestRewriteWithScope(self, rewrite_fn): + graph = ops.Graph() + with graph.as_default(): + scope1_output = self._ConvLayer(scope='scope1') + self._ConvLayer(input_tensor=scope1_output, scope='scope2') + + rewrite_fn(graph) + + op_names = [op.name for op in graph.get_operations()] + # The weights and activation of scope1 is quantized, but not scope2. + self.assertTrue( + any('scope1/Conv/act_quant' in name for name in op_names)) + self.assertTrue( + any('scope1/Conv/weights_quant' in name for name in op_names)) + self.assertFalse( + any('scope2/Conv/act_quant' in name for name in op_names)) + self.assertFalse( + any('scope2/Conv/weights_quant' in name for name in op_names)) + + def testRewriteWithNonMatchingScope(self): + self._RunTestOverExperimentalRewritesWithScope( + self._TestRewriteWithNonMatchingScope, 'NonExistingScope') + + def _TestRewriteWithNonMatchingScope(self, rewrite_fn): + graph = ops.Graph() + with graph.as_default(): + self._ConvLayer() + + op_names_before_rewrite = set([op.name for op in graph.get_operations()]) + rewrite_fn(graph) + op_names_after_rewrite = set([op.name for op in graph.get_operations()]) + + # No ops should be inserted or removed. + self.assertEqual(op_names_before_rewrite, op_names_after_rewrite) + + def _ConvLayer( + self, input_tensor=None, scope='test', pre_activation_bypass=False, + post_activation_bypass=False): """Add a basic convolution layer to the default graph.""" batch_size, height, width, depth = 5, 128, 128, 3 - inputs = array_ops.zeros((batch_size, height, width, depth)) + if input_tensor is None: + input_tensor = array_ops.zeros((batch_size, height, width, depth)) weight_init = init_ops.truncated_normal_initializer - conv = layers.conv2d( - inputs, - 32, [5, 5], - stride=2, - padding='SAME', - weights_initializer=weight_init(0.09), - activation_fn=None, - scope='test') - _ = nn_ops.relu6(conv) + with ops.name_scope(scope): + output = layers.conv2d( + input_tensor, + depth, [5, 5], + padding='SAME', + weights_initializer=weight_init(0.09), + activation_fn=None) + if pre_activation_bypass: + output += input_tensor + output = nn_ops.relu6(output) + if post_activation_bypass: + output += input_tensor + return output if __name__ == '__main__': diff --git a/tensorflow/contrib/quantize/python/quantize_test.py b/tensorflow/contrib/quantize/python/quantize_test.py index 98f05c8bfc13094aff2839b2a6aa0da5c653da2b..d37c83d6839f02c52a72cac97c9238c135dc2f66 100644 --- a/tensorflow/contrib/quantize/python/quantize_test.py +++ b/tensorflow/contrib/quantize/python/quantize_test.py @@ -247,6 +247,53 @@ class QuantizeTest(test_util.TensorFlowTestCase): self.assertTrue(not op.name.startswith('name_scope/name_scope/'), 'Broken op: %s' % op.name) + def testWithNullNameScope(self): + self._RunTestOverParameters(self._TestWithNullNameScope) + + def _TestWithNullNameScope(self, is_training): + graph = ops.Graph() + with graph.as_default(): + with graph.name_scope(None): + batch_size, height, width, depth = 5, 128, 128, 32 + input1 = array_ops.zeros((batch_size, height, width, depth)) + _ = conv2d( + input1, + 32, [5, 5], + padding='SAME', + weights_initializer=self._WeightInit(0.09), + activation_fn=None, + scope='test') + + quantize.Quantize(graph, is_training, weight_bits=8, activation_bits=8) + # Passes if Quantize() does not crash. + + def testWithNonMatchingNameScope(self): + self._RunTestOverParameters(self._testWithNonMatchingNameScope) + + def _testWithNonMatchingNameScope(self, is_training): + graph = ops.Graph() + with graph.as_default(): + with graph.name_scope('name_scope'): + batch_size, height, width, depth = 5, 128, 128, 3 + input1 = array_ops.zeros((batch_size, height, width, depth)) + _ = conv2d( + input1, + 32, [5, 5], + stride=2, + padding='SAME', + weights_initializer=self._WeightInit(0.09), + activation_fn=None, + scope='test') + + op_names_before_quantize = set([op.name for op in graph.get_operations()]) + quantize.Quantize( + graph, is_training, weight_bits=8, activation_bits=8, + scope='NonExisting/') + op_names_after_quantize = set([op.name for op in graph.get_operations()]) + + # No ops should be inserted or removed. + self.assertEqual(op_names_before_quantize, op_names_after_quantize) + def _WeightInit(self, stddev): """Returns truncated normal variable initializer. diff --git a/tensorflow/contrib/recurrent/BUILD b/tensorflow/contrib/recurrent/BUILD new file mode 100644 index 0000000000000000000000000000000000000000..b3cb04ce26d96333f516f1298c8d5c331964f05b --- /dev/null +++ b/tensorflow/contrib/recurrent/BUILD @@ -0,0 +1,106 @@ +# Recurrent library. + +package(default_visibility = ["//visibility:public"]) + +licenses(["notice"]) # Apache 2.0 + +exports_files(["LICENSE"]) + +load("//tensorflow:tensorflow.bzl", "cuda_py_tests") + +py_library( + name = "recurrent_py", + srcs = ["python/recurrent_api.py"], + srcs_version = "PY2AND3", + deps = [ + ":functional_rnn_ops_py", + ":recurrent_ops_py", + ], +) + +py_library( + name = "recurrent_ops_py", + srcs = ["python/ops/recurrent.py"], + srcs_version = "PY2AND3", + deps = [ + "//tensorflow/contrib/framework:framework_py", + "//tensorflow/contrib/util:util_py", + "//tensorflow/core:protos_all_py", + "//tensorflow/python:dtypes", + "//tensorflow/python:framework_ops", + "//tensorflow/python:function", + "//tensorflow/python:functional_ops", + "//tensorflow/python:math_ops", + "//tensorflow/python:platform", + ], +) + +py_library( + name = "functional_rnn_ops_py", + srcs = ["python/ops/functional_rnn.py"], + srcs_version = "PY2AND3", + deps = [ + ":recurrent_ops_py", + "//tensorflow/contrib/framework:framework_py", + "//tensorflow/contrib/util:util_py", + "//tensorflow/core:protos_all_py", + "//tensorflow/python:dtypes", + "//tensorflow/python:framework_ops", + "//tensorflow/python:function", + "//tensorflow/python:gradients", + "//tensorflow/python:math_ops", + "//tensorflow/python:standard_ops", + ], +) + +cuda_py_tests( + name = "recurrent_ops_test", + size = "small", + srcs = ["python/kernel_tests/recurrent_test.py"], + additional_deps = [ + ":recurrent_ops_py", + "//third_party/py/numpy", + "//tensorflow/core:protos_all_py", + "//tensorflow/python:array_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:constant_op", + "//tensorflow/python:dtypes", + "//tensorflow/python:errors", + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:framework_ops", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python:math_ops", + "//tensorflow/python:platform", + "//tensorflow/python:random_ops", + "//tensorflow/python:random_seed", + "//tensorflow/python:script_ops", + "//tensorflow/python:variables", + ], + tags = ["nopip"], +) + +cuda_py_tests( + name = "functional_rnn_ops_test", + size = "small", + srcs = ["python/kernel_tests/functional_rnn_test.py"], + additional_deps = [ + ":functional_rnn_ops_py", + "//third_party/py/numpy", + "//tensorflow/contrib/layers:layers_py", + "//tensorflow/contrib/tpu:tpu", + "//tensorflow/core:protos_all_py", + "//tensorflow/python:array_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python:gradients", + "//tensorflow/python:init_ops", + "//tensorflow/python:platform_test", + "//tensorflow/python:rnn", + "//tensorflow/python:rnn_cell", + "//tensorflow/python:util", + "//tensorflow/python:variable_scope", + "//tensorflow/python:variables", + ], + tags = ["nopip"], +) diff --git a/tensorflow/contrib/recurrent/README.md b/tensorflow/contrib/recurrent/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86e10eee517f69d1316b76af85cb13a2bfea2984 --- /dev/null +++ b/tensorflow/contrib/recurrent/README.md @@ -0,0 +1,13 @@ +# Recurrent computation library + +The recurrent computation library contains code to perform recurrent +computations. + +Its chief application is to implement recurrent neural networks (RNNs, LSTMs, +etc), which is implemented in `functional_rnn.py`. Similar techniques may be +used to implement deep networks. + +The computation saves the activations in the forward pass, and computes the +gradients in the backward pass using a single accumulator. + +The `functional_rnn` interface is compatible with the `dynamic_rnn` API. diff --git a/tensorflow/contrib/recurrent/python/kernel_tests/functional_rnn_test.py b/tensorflow/contrib/recurrent/python/kernel_tests/functional_rnn_test.py new file mode 100644 index 0000000000000000000000000000000000000000..0f19ac7dbe0cee2eb6c780ec5ea6266bc847abd7 --- /dev/null +++ b/tensorflow/contrib/recurrent/python/kernel_tests/functional_rnn_test.py @@ -0,0 +1,163 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for Functional RNN.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + + +from tensorflow.contrib.recurrent.python.ops import functional_rnn +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.framework import test_util +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import gradients_impl +from tensorflow.python.ops import rnn as rnn_lib +from tensorflow.python.ops import rnn_cell_impl +from tensorflow.python.ops import variables +import tensorflow.python.ops.nn_grad # pylint: disable=unused-import +import tensorflow.python.ops.tensor_array_grad # pylint: disable=unused-import +from tensorflow.python.platform import test as test_lib +from tensorflow.python.platform import tf_logging as logging + + +def _CreateStackedLstmCell(*cell_sizes): + subcells = [rnn_cell_impl.LSTMCell(cell_size) for cell_size in cell_sizes] + return rnn_cell_impl.MultiRNNCell(subcells) + + +class FunctionalRnnTest(test_util.TensorFlowTestCase): + + _BATCH_SIZE = 3 + _TOTAL_TIME = 5 + _INPUT_SIZE = 11 + _NUM_UNITS = 7 + + # Set this to some output if you want to use it. + _LSTM_GRAPH_DEF_FILEPATH = None + + _CELLDEFS = { + 'gru': (rnn_cell_impl.GRUCell, [_NUM_UNITS]), + 'lstm': (rnn_cell_impl.LSTMCell, [_NUM_UNITS]), + 'stacked_lstm': (_CreateStackedLstmCell, [_NUM_UNITS] * 3) + } + + def _CreateCell(self, celldef_name): + func, args = self._CELLDEFS[celldef_name] + return func(*args) + + def _CreateInputs(self): + inputs = np.random.random([FunctionalRnnTest._BATCH_SIZE, + FunctionalRnnTest._TOTAL_TIME, + FunctionalRnnTest._INPUT_SIZE]) + # Always leave one time slot empty, to check max_length behavior. + sequence_length = np.random.randint( + 0, high=FunctionalRnnTest._TOTAL_TIME - 1, + size=FunctionalRnnTest._BATCH_SIZE, + dtype=np.int) + return (inputs, sequence_length) + + def _CreateRnnGraph(self, create_rnn_computation_func, cell, tf_inputs, + tf_sequence_length, initial_state=None, + time_major=None, scope=None): + tf_result = create_rnn_computation_func(cell=cell, inputs=tf_inputs, + sequence_length=tf_sequence_length, + initial_state=initial_state, + dtype=dtypes.float32, + time_major=time_major, + scope=scope) + grad = gradients_impl.gradients(tf_result, variables.trainable_variables()) + return {'inference': tf_result, 'grad': grad} + + def _MaybeResetVariables(self, variable_cache, sess, var_list): + """Possibly resets the variables to a previously seen value.""" + reset_ops = [] + fetches = [] + for var in var_list: + if var.name in variable_cache: + reset_ops += [var.assign(variable_cache[var.name])] + else: + fetches += [(var.name, var)] + if reset_ops: + sess.run(reset_ops) + if fetches: + val = sess.run(dict(fetches)) + for n, v in val.items(): + assert n not in variable_cache + variable_cache[n] = v + + def _RunRnn(self, numpy_inputs, numpy_slen, cell_name, variable_cache, + is_dynamic): + with ops.Graph().as_default() as graph: + tf_inputs = array_ops.placeholder( + dtypes.float32, shape=numpy_inputs.shape) + tf_slen = array_ops.placeholder(dtypes.int32) + feeds = {tf_inputs: numpy_inputs, tf_slen: numpy_slen} + cell = self._CreateCell(cell_name) + fn = rnn_lib.dynamic_rnn if is_dynamic else functional_rnn.functional_rnn + fetches = self._CreateRnnGraph(fn, cell, tf_inputs, tf_slen) + with self.test_session(graph=graph) as sess: + sess.run(variables.global_variables_initializer()) + # Note that cell.trainable_variables it not always set. + self._MaybeResetVariables(variable_cache, sess, + variables.trainable_variables()) + val = sess.run(fetches, feed_dict=feeds) + graph_def = graph.as_graph_def() + return graph_def, val + + def testRunLstm(self): + """Runs a simple LSTM. Does not check output.""" + np_inputs, np_slen = self._CreateInputs() + var_cache = {} + graphdef, _ = self._RunRnn(np_inputs, np_slen, 'lstm', var_cache, False) + logging.info('graphdef: %s', graphdef) + if self._LSTM_GRAPH_DEF_FILEPATH: + with open(self._LSTM_GRAPH_DEF_FILEPATH, 'w') as f: + f.write(str(graphdef)) + + def testLstm(self): + """Checks an LSTM against the reference implementation.""" + np_inputs, np_slen = self._CreateInputs() + var_cache = {} + _, func_rnn = self._RunRnn(np_inputs, np_slen, 'lstm', var_cache, False) + _, dyn_rnn = self._RunRnn(np_inputs, np_slen, 'lstm', var_cache, True) + self.assertAllClose(dyn_rnn['inference'], func_rnn['inference']) + self.assertAllClose(dyn_rnn['grad'], func_rnn['grad']) + + def testGru(self): + """Checks a GRU cell against the reference implementation.""" + np_inputs, np_slen = self._CreateInputs() + var_cache = {} + _, func_rnn = self._RunRnn(np_inputs, np_slen, 'gru', var_cache, False) + _, dyn_rnn = self._RunRnn(np_inputs, np_slen, 'gru', var_cache, True) + self.assertAllClose(dyn_rnn['inference'], func_rnn['inference']) + self.assertAllClose(dyn_rnn['grad'], func_rnn['grad']) + + def testStackedLstm(self): + """Checks a stacked LSTM cell against the reference implementation.""" + np_inputs, np_slen = self._CreateInputs() + var_cache = {} + args = [np_inputs, np_slen, 'stacked_lstm', var_cache] + _, func_rnn = self._RunRnn(*(args + [False])) + _, dyn_rnn = self._RunRnn(*(args + [True])) + self.assertAllClose(dyn_rnn['inference'], func_rnn['inference']) + self.assertAllClose(dyn_rnn['grad'], func_rnn['grad']) + + +if __name__ == '__main__': + test_lib.main() diff --git a/tensorflow/contrib/recurrent/python/kernel_tests/recurrent_test.py b/tensorflow/contrib/recurrent/python/kernel_tests/recurrent_test.py new file mode 100644 index 0000000000000000000000000000000000000000..00fbd4fbb8205ceb649616050314e400be1785a5 --- /dev/null +++ b/tensorflow/contrib/recurrent/python/kernel_tests/recurrent_test.py @@ -0,0 +1,192 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for Recurrent ops.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections + +from six.moves import xrange # pylint: disable=redefined-builtin + +from tensorflow.contrib.recurrent.python.ops import recurrent +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import function +from tensorflow.python.framework import random_seed +from tensorflow.python.framework import test_util +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import gradients_impl +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import random_ops +from tensorflow.python.platform import test as test_lib +from tensorflow.python.platform import tf_logging as logging + + +_ElmanState = collections.namedtuple('ElmanState', ('h')) +_ElmanTheta = collections.namedtuple('ElmanTheta', ('w', 'b')) +_ElmanInputs = collections.namedtuple('ElmanInputs', ('x')) + + +# TODO(drpng): add test for max length computation. +class RecurrentTest(test_util.TensorFlowTestCase): + + def testBasic(self): + # pylint:disable=invalid-name + _PolyState = collections.namedtuple('PolyState', ('value', 'x_power')) + _PolyTheta = collections.namedtuple('PolyTheta', ('x')) + _PolyInputs = collections.namedtuple('PolyInputs', ('coeff')) + # pylint:enable=invalid-name + + def Poly(theta, state, inputs): + next_state = _PolyState( + value=state.value + inputs.coeff * state.x_power, + x_power=state.x_power * theta.x) + return next_state, [] + + with self.test_session() as sess: + theta = _PolyTheta(x=array_ops.constant(2.0)) + state = _PolyState( + value=array_ops.constant(0.0), + x_power=array_ops.constant(1.0)) + inputs = _PolyInputs(coeff=array_ops.constant([1., 2., 3.])) + + # x = 2 + # 1 + 2*x + 3*x^2 + ret = recurrent.Recurrent(theta, state, inputs, Poly) + + acc, state = sess.run(ret) + self.assertAllClose(acc.value, [1., 5., 17.]) + self.assertAllClose(acc.x_power, [2., 4., 8.]) + self.assertAllClose(state.value, 17.) + self.assertAllClose(state.x_power, 8.) + + y = ret[1].value + dx, d_coeff = gradients_impl.gradients(ys=[y], xs=[theta.x, inputs.coeff]) + dx_val, d_coeff_val = sess.run([dx, d_coeff]) + + # 2 + 6*x + self.assertAllClose(dx_val, 14.) + self.assertAllClose(d_coeff_val, [1., 2., 4.]) + + # acc = [1, 1+2x, 1+2x+3x^2] + # sum(acc) = 3 + 4x + 3x^2 + acc = ret[0].value + dx, d_coeff = gradients_impl.gradients( + ys=[math_ops.reduce_sum(acc)], xs=[theta.x, inputs.coeff]) + dx_val, d_coeff_val = sess.run([dx, d_coeff]) + # 4 + 6*x + self.assertAllClose(dx_val, 16.) + self.assertAllClose(d_coeff_val, [3., 4., 4.]) + + @staticmethod + def Rand(shape): + return random_ops.random_uniform( + shape, minval=-0.2, maxval=0.2, dtype=dtypes.float64) + + @staticmethod + def Elman(theta, state0, inputs): + h0, w, b, x = state0.h, theta.w, theta.b, inputs.x + xw = math_ops.matmul(array_ops.concat([x, h0], axis=1), w) + h1 = math_ops.sigmoid(xw + b) + state1 = _ElmanState(h=h1) + return (state1, state1) + + @staticmethod + def ElmanGrad(theta, state0, inputs, extras, dstate1): + + @function.Defun() + def Grad(h0, w, b, x, h1, dh1): + del b + # We hand-roll the gradient for the 2nd half of the cell as a demo. + dxwb = (dh1 * (1 - h1) * h1) + dxw, db = dxwb, math_ops.reduce_sum(dxwb, axis=0) + + # Uses tf.gradient for the 1nd half of the cell as a demo. + xw = math_ops.matmul(array_ops.concat([x, h0], axis=1), w) + dh0, dx, dw = gradients_impl.gradients( + ys=[xw], xs=[h0, x, w], grad_ys=[dxw]) + + return dh0, dx, dw, db + + dh0, dx, dw, db = Grad(state0.h, theta.w, theta.b, inputs.x, + extras.h, dstate1.h) + dstate0 = _ElmanState(h=dh0) + dinputs = _ElmanInputs(x=dx) + return (_ElmanTheta(w=dw, b=db), dstate0, dinputs) + + @staticmethod + def ElmanOut(state1): + return _ElmanState(x=state1.h) + + @staticmethod + def ElmanOutGrad(dout): + return _ElmanState(h=dout.x) + + def testElman(self): + for seqlen, use_grad in [(1, False), (1, True), (7, False), (7, True)]: + logging.info('== Elman: seqlen=%s, use_grad=%s', seqlen, use_grad) + self._ParameterizedTestElman(seqlen, use_grad) + + def _ParameterizedTestElman(self, seqlen, use_grad): + + with self.test_session() as sess: + random_seed.set_random_seed(342462) + + batch = 3 + dims = 4 + theta = _ElmanTheta(w=RecurrentTest.Rand([2 * dims, dims]), + b=RecurrentTest.Rand([dims])) + state0 = _ElmanState(h=RecurrentTest.Rand([batch, dims])) + inputs = _ElmanInputs(x=RecurrentTest.Rand([seqlen, batch, dims])) + + # Statically unrolled. + s = state0 + out = [] + for i in xrange(seqlen): + inp = _ElmanInputs(x=inputs.x[i, :]) + s, _ = RecurrentTest.Elman(theta, s, inp) + out += [s.h] + acc0, final0 = array_ops.stack(out), s.h + loss0 = math_ops.reduce_sum(acc0) + math_ops.reduce_sum(final0) + (dw0, db0, dh0, di0) = gradients_impl.gradients( + loss0, [theta.w, theta.b, state0.h, inputs.x]) + + acc1, final1 = recurrent.Recurrent( + theta=theta, + state0=state0, + inputs=inputs, + cell_fn=RecurrentTest.Elman, + cell_grad=RecurrentTest.ElmanGrad if use_grad else None) + assert isinstance(acc1, _ElmanState) + assert isinstance(final1, _ElmanState) + acc1, final1 = acc1.h, final1.h + loss1 = math_ops.reduce_sum(acc1) + math_ops.reduce_sum(final1) + (dw1, db1, dh1, di1) = gradients_impl.gradients( + loss1, [theta.w, theta.b, state0.h, inputs.x]) + + # Fetches a few values and compare them. + (acc0, acc1, final0, final1, dw0, dw1, db0, db1, dh0, dh1, di0, + di1) = sess.run( + [acc0, acc1, final0, final1, dw0, dw1, db0, db1, dh0, dh1, di0, di1]) + self.assertAllClose(acc0, acc1) + self.assertAllClose(final0, final1) + self.assertAllClose(dw0, dw1) + self.assertAllClose(db0, db1) + self.assertAllClose(dh0, dh1) + self.assertAllClose(di0, di1) + +if __name__ == '__main__': + test_lib.main() diff --git a/tensorflow/contrib/recurrent/python/ops/functional_rnn.py b/tensorflow/contrib/recurrent/python/ops/functional_rnn.py new file mode 100644 index 0000000000000000000000000000000000000000..a085474c1bf6117ba5663139c78d8f08f71392d3 --- /dev/null +++ b/tensorflow/contrib/recurrent/python/ops/functional_rnn.py @@ -0,0 +1,396 @@ +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""A tf.nn.dynamic_rnn variant, built on the Recurrent class. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import copy + +from tensorflow.contrib.recurrent.python.ops import recurrent +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import function +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import variable_scope +from tensorflow.python.util import nest + + +def _GetDTypesFromStructure(struct): + dtypes_list = [] + for x in nest.flatten(struct): + x = ops.convert_to_tensor(x) + dtypes_list.append(x.dtype) + return dtypes_list + + +def _SetShapeFromTemplate(struct, struct_template): + as_list = nest.flatten(struct) + template_as_list = nest.flatten(struct_template) + for element, template in zip(as_list, template_as_list): + element.set_shape(template.shape) + + +class _FunctionalRnnCell(object): + """Wrapper around RNNCell which separates state from computation. + + This class accomplishes the following: + * Turn the cell's `__call__` function into a pure function. The global + side effects are separated as `theta`. They are the variables created + for the weights of the computation. + * Unless the output is aliased as part of the state, extend the state to + contain the output so that we store the history in `Recurrent`. + * Set static shapes as required. + """ + + def __init__(self, rnn_cell, seq_inputs, initial_state): + assert initial_state is not None + + # TODO(drpng): Dtype needs to be configurable. + input_dtypes = [dtypes.float32] + _GetDTypesFromStructure(initial_state) + # See _index. + like_inputs_t = nest.map_structure( + lambda x: array_ops.stop_gradient(array_ops.gather(x, 0)), seq_inputs) + input_structure = (like_inputs_t, initial_state) + + @function.Defun(*input_dtypes) + def FlatCellStep(*flat_inputs): + """The flattened version of `rnn_cell`.""" + inputs_t, state0 = nest.pack_sequence_as(input_structure, flat_inputs) + _SetShapeFromTemplate(state0, initial_state) + _SetShapeFromTemplate(inputs_t, like_inputs_t) + outputs_t, state1 = rnn_cell(inputs_t, state0) + state_list = nest.flatten(state1) + self._output_shape = outputs_t.shape + + if outputs_t in state_list: + output_index_in_state = state_list.index(outputs_t) + else: + output_index_in_state = None + + if output_index_in_state is None: + self._prepend_output = True + self._output_state_idx = 0 + return [outputs_t] + state_list + else: + self._output_state_idx = output_index_in_state + self._prepend_output = False + # To save memory, we don't store return the output separately + # from the state list, since we know it's the same. + return state_list + + def _ToPureFunction(func): + # NOTE: This forces the creating of the function. + if func.captured_inputs: + pure_func = copy.copy(func) + # pylint: disable=protected-access + pure_func._extra_inputs = [] + return pure_func + return func + + pure_flat_cell_step = _ToPureFunction(FlatCellStep) + + def CellStep(theta, extended_state0, inputs_t): + """Performs one time steps on structured inputs. + + The purpose of this function is to turn the parameters into flattened + versions, and to resolve the parameter order difference between + `Recurrent` and `RNNCell`. + + In the event the cell returns a transformed output that is not aliased + within its state, the `extended_state0` also contains the output as its + first element. + + Args: + theta: Weights required for the computation. A structure of tensors. + extended_state0: the state0, and possibly the output at the previous + time step. A structure of tensors. + inputs_t: the inputs at time t. + + Returns: + A pair of the next state (inclusive of the output), and an empty list + (unused `extras`). + The next state is congruent to state0. + """ + extended_state0_flat = nest.flatten(extended_state0) + state0_flat = self.MaybeRemoveOutputFromState(extended_state0_flat) + full_inputs = [inputs_t] + state0_flat + theta + # Note that the thetas are additional inputs appeneded as extra + # parameters. + cell_out = pure_flat_cell_step(*full_inputs) + return cell_out, [] + + self._cell_step = CellStep + self._theta = FlatCellStep.captured_inputs + self._zero_state = rnn_cell.zero_state + self._state_template = initial_state + self._output_size = rnn_cell.output_size + + @property + def extended_initial_state(self): + if self._prepend_output: + return [array_ops.zeros(self._output_shape), self._state_template] + else: + # The base case, where the output is just the hidden state. + return self._state_template + + @property + def cell_step(self): + return self._cell_step + + @property + def theta(self): + return self._theta + + @property + def state_template(self): + return self._state_template + + @property + def output_shape(self): + return self._output_shape + + def GetOutputFromState(self, state): + return nest.flatten(state)[self._output_state_idx] + + def MaybeRemoveOutputFromState(self, flat_state): + if self._prepend_output: + return flat_state[1:] + return flat_state + + +def _ApplyLengthsToBatch(sequence_lengths, tf_output): + # TODO(drpng): just use Update so that we don't carry over the gradients? + """Sets the output to be zero at the end of the sequence.""" + # output is batch major. + batch_size, max_time, vector_size = tf_output.shape + output_time = array_ops.tile(math_ops.range(0, max_time), [batch_size]) + output_time = array_ops.reshape(output_time, [batch_size, max_time]) + lengths = array_ops.tile( + array_ops.reshape(sequence_lengths, [-1, 1]), [1, max_time]) + is_less = math_ops.cast( + math_ops.less(output_time, lengths), dtype=dtypes.float32) + keep_mask = array_ops.tile( + array_ops.expand_dims(is_less, -1), + [1, 1, vector_size]) + final_output = keep_mask * tf_output + return final_output + + +def _PickFinalStateFromHistory(acc_state, sequence_length): + """Implements acc_state[sequence_length - 1].""" + # This will work on all platforms, unlike the regular slice. + last_value = [] + for state_var in nest.flatten(acc_state): + # We compute the following with matrix operations: + # last_var = state_var[sequence_length - 1] + shape = array_ops.shape(state_var) + max_time, batch_size = shape[0], shape[1] + output_time = array_ops.tile(math_ops.range(0, max_time), [batch_size]) + output_time = array_ops.reshape(output_time, [batch_size, max_time]) + lengths = array_ops.tile(array_ops.reshape(sequence_length, + [-1, 1]), [1, max_time]) + last_idx = math_ops.cast(math_ops.equal(output_time, lengths - 1), + dtype=dtypes.float32) + last_idx = array_ops.transpose(last_idx) + last_idx_for_bcast = array_ops.expand_dims(last_idx, -1) + sliced = math_ops.multiply(last_idx_for_bcast, state_var) + last_var = math_ops.reduce_sum(sliced, 0) + last_value += [last_var] + return nest.pack_sequence_as(acc_state, last_value) + + +def _PostProcessOutput(extended_acc_state, extended_final_state, func_cell, + total_time, inputs_lengths): + """Post-process output of recurrent. + + This function takes the accumulated extended state and extracts the requested + state and output. + + When `inputs_lengths` has been set, it extracts the output from the + accumulated state. It also sets outputs past. + + It also sets the static shape information. + + Args: + extended_acc_state: A structure containing the accumulated state at each + time. It may contain the output at each time as well. + extended_final_state: A structure containing the final state. It may + contain the output at the final time. + func_cell: The functional wrapper around the cell. + total_time: A scalar integer tensor. + inputs_lengths: An integer tensor with one entry per input. + + Returns: + A tuple with the outputs at each time, and the final state. + """ + if inputs_lengths is None: + flat_final_state = func_cell.MaybeRemoveOutputFromState( + nest.flatten(extended_final_state)) + tf_state = nest.pack_sequence_as(func_cell.state_template, flat_final_state) + else: + # The accumulated state is over the entire sequence, so we pick it + # out from the acc_state sequence. + flat_acc_state = func_cell.MaybeRemoveOutputFromState( + nest.flatten(extended_acc_state)) + acc_state = nest.pack_sequence_as( + func_cell.state_template, flat_acc_state) + tf_state = _PickFinalStateFromHistory(acc_state, inputs_lengths) + + output_from_state = func_cell.GetOutputFromState(extended_acc_state) + tf_output = array_ops.transpose(output_from_state, [1, 0, 2]) + tf_output.set_shape( + [func_cell.output_shape[0], total_time, func_cell.output_shape[1]]) + if inputs_lengths is not None: + # Need set the outputs to zero. + tf_output = _ApplyLengthsToBatch(inputs_lengths, tf_output) + # tf_output = array_ops.zeros([4, 3, 5]) + _SetShapeFromTemplate(tf_state, func_cell.state_template) + return tf_output, tf_state + + +# pylint: disable=invalid-name +def functional_rnn(cell, inputs, sequence_length=None, + initial_state=None, dtype=None, time_major=False, + scope=None, use_tpu=False): + """Same interface as `tf.nn.dynamic_rnn`.""" + with variable_scope.variable_scope(scope or 'rnn'): + if not time_major: + inputs = nest.map_structure( + lambda t: array_ops.transpose(t, [1, 0, 2]), inputs) + inputs_flat = nest.flatten(inputs) + batch_size = array_ops.shape(inputs_flat[0])[1] + if initial_state is None: + initial_state = cell.zero_state(batch_size, dtype) + func_cell = _FunctionalRnnCell(cell, inputs, initial_state) + extended_acc_state, extended_final_state = recurrent.Recurrent( + theta=func_cell.theta, + state0=func_cell.extended_initial_state, + inputs=inputs, + cell_fn=func_cell.cell_step, + use_tpu=use_tpu) + return _PostProcessOutput(extended_acc_state, extended_final_state, + func_cell, inputs_flat[0].shape[0], sequence_length) + + +def bidirectional_functional_rnn( + cell_fw, + cell_bw, + inputs, + initial_state_fw=None, + initial_state_bw=None, + dtype=None, + sequence_length=None, + time_major=False, + use_tpu=False, + scope=None): + """Creates a bidirectional recurrent neural network. + + Performs fully dynamic unrolling of inputs in both directions. Built to be API + compatible with `tf.nn.bidirectional_dynamic_rnn`, but implemented with + functional control flow for TPU compatibility. + + Args: + cell_fw: An instance of `tf.contrib.rnn.RNNCell`. + cell_bw: An instance of `tf.contrib.rnn.RNNCell`. + inputs: The RNN inputs. If time_major == False (default), this must be a + Tensor (or hierarchical structure of Tensors) of shape + [batch_size, max_time, ...]. If time_major == True, this must be a Tensor + (or hierarchical structure of Tensors) of shape: + [max_time, batch_size, ...]. The first two dimensions must match across + all the inputs, but otherwise the ranks and other shape components may + differ. + initial_state_fw: An optional initial state for `cell_fw`. Should match + `cell_fw.zero_state` in structure and type. + initial_state_bw: An optional initial state for `cell_bw`. Should match + `cell_bw.zero_state` in structure and type. + dtype: (optional) The data type for the initial state and expected output. + Required if initial_states are not provided or RNN state has a + heterogeneous dtype. + sequence_length: An optional int32/int64 vector sized [batch_size]. Used to + copy-through state and zero-out outputs when past a batch element's + sequence length. So it's more for correctness than performance. + time_major: Whether the `inputs` tensor is in "time major" format. + use_tpu: Whether to enable TPU-compatible operation. If True, does not truly + reverse `inputs` in the backwards RNN. Once b/69305369 is fixed, we can + remove this flag. + scope: An optional scope name for the dynamic RNN. + + Returns: + outputs: A tuple of `(output_fw, output_bw)`. The output of the forward and + backward RNN. If time_major == False (default), these will + be Tensors shaped: [batch_size, max_time, cell.output_size]. If + time_major == True, these will be Tensors shaped: + [max_time, batch_size, cell.output_size]. Note, if cell.output_size is a + (possibly nested) tuple of integers or TensorShape objects, then the + output for that direction will be a tuple having the same structure as + cell.output_size, containing Tensors having shapes corresponding to the + shape data in cell.output_size. + final_states: A tuple of `(final_state_fw, final_state_bw)`. A Tensor or + hierarchical structure of Tensors indicating the final cell state in each + direction. Must have the same structure and shape as cell.zero_state. + + Raises: + ValueError: If `initial_state_fw` is None or `initial_state_bw` is None and + `dtype` is not provided. + """ + # Keep this code in sync with tf.nn.dynamic_rnn for compatibility. + with variable_scope.variable_scope(scope or 'bidirectional_rnn'): + # Forward direction + with variable_scope.variable_scope('fw') as fw_scope: + output_fw, output_state_fw = functional_rnn( + cell=cell_fw, inputs=inputs, sequence_length=sequence_length, + initial_state=initial_state_fw, dtype=dtype, + time_major=time_major, scope=fw_scope, use_tpu=use_tpu) + # Backward direction + if not time_major: + time_dim = 1 + batch_dim = 0 + else: + time_dim = 0 + batch_dim = 1 + + def _reverse(input_, seq_lengths, seq_dim, batch_dim): + if seq_lengths is not None: + return array_ops.reverse_sequence( + input=input_, seq_lengths=seq_lengths, + seq_dim=seq_dim, batch_dim=batch_dim) + else: + # See b/69305369. + assert not use_tpu, ( + 'Bidirectional with variable sequence lengths unsupported on TPU') + return array_ops.reverse(input_, axis=[seq_dim]) + + with variable_scope.variable_scope('bw') as bw_scope: + inputs_reverse = _reverse( + inputs, seq_lengths=sequence_length, + seq_dim=time_dim, batch_dim=batch_dim) + tmp, output_state_bw = functional_rnn( + cell=cell_bw, inputs=inputs_reverse, sequence_length=sequence_length, + initial_state=initial_state_bw, dtype=dtype, + time_major=time_major, scope=bw_scope, use_tpu=use_tpu) + + output_bw = _reverse( + tmp, seq_lengths=sequence_length, + seq_dim=time_dim, batch_dim=batch_dim) + + outputs = (output_fw, output_bw) + output_states = (output_state_fw, output_state_bw) + + return (outputs, output_states) +# pylint: enable=invalid-name diff --git a/tensorflow/contrib/recurrent/python/ops/recurrent.py b/tensorflow/contrib/recurrent/python/ops/recurrent.py new file mode 100644 index 0000000000000000000000000000000000000000..fa16b82ab62f27d034c3ca7584e7e1ca14be6f9b --- /dev/null +++ b/tensorflow/contrib/recurrent/python/ops/recurrent.py @@ -0,0 +1,720 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Recurrent computation. + +The main interface of this module is Recurrent(). +A recurrent computation describes an auto-regressive process, where outputs +of one time step are fed to the output of the next time step. + +This module uses: + theta: the "weights" each RNN uses. + state0: the initial state of each RNN. + cell_fn: A python function describing RNN cell. It must has the following + signature: + cell_fn: (theta, state0, inputs) -> (state1, extras) + state1 is the next RNN state, extras are computed by cell_fn + and the library forwards extras to cell_fn's gradient function. + cell_grad: A python function describing the backprop gradient function + for the RNN cell. It must has the following signature: + cell_grad: (theta, state0, inputs, extras, dstate1) -> ( + dtheta, dstate0, dinputs) + dstate1 is what the backprop algorithm provides representing + gradients of state1 w.r.t. the final loss. + +In this module, we handle structures of tensors for theta, state0, inputs, +and extras. The structure is an arbitrarily nested python structure, such +as a dictionary of named tuples. + +Because the computation is a left-to-right chain, a single in-place accumulator +can be used rather than a stack. Thus a special gradient was written to reduce +unnecessary memory usage. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import function +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import functional_ops +from tensorflow.python.ops import gradients_impl +from tensorflow.python.ops import inplace_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops.inplace_ops import alias_inplace_update +from tensorflow.python.util import nest + + +def _AssertIsCompatible(a, b): + """Checks that `a` and `b` are nested structures of the same type.""" + # TODO(drpng): implement. + del a + del b + + +def _Index(struct, index): + """Returns a structure with `x[index]` for each tensor `x` in the structure. + + Args: + struct: A structure of tensors. + index: A scalar integer tensor. Performance is better if `index` is + on the host memory. + + Returns: + A structure of tensors congruent to `struct`. + For each key in `ret`, `rets[key] = struct[key][index]`. + """ + index = ops.convert_to_tensor(index) + index.get_shape().assert_has_rank(0) + return nest.map_structure(lambda x: x[index], struct) + + +def _Update(struct_acc, struct_x, t): + """Updates t-th row in accumulators. + + Args: + struct_acc: The accumulators. A structure of tensors. + struct_x: The new values. A structure of tensors congruent to `struct_acc`. + t: A scalar integer. Performance is better if `t` is on the device + memory. + + Returns: + A structure of tensors. Say, ret is a returned dictionary. Then, for + each key, we have: + ret[key] = struct_acc[key]; + ret[key][t, :] = struct_x[key] + """ + to_skip_update = set() + acc_lst = nest.flatten(struct_acc) + x_lst = nest.flatten(struct_x) + t = math_ops.to_int32([t]) # tf.to_int32 casts on-device tensors. + lst = [] + for acc, x in zip(acc_lst, x_lst): + if acc in to_skip_update: + # Until b/62105730 is fixed, we need to avoid inplace update for tensors + # of rank 1. could reshape to handle it, but we don't really need the + # values applied to these, so just skip their modification. + lst += [acc] + else: + lst += [alias_inplace_update(acc, t, array_ops.expand_dims(x, 0))] + return nest.pack_sequence_as(struct_acc, lst) + + +def _SeqLenDim(struct): + """Returns the 0-th dim size of tensors in a structure of tensors. + + This is the max sequence length according to the shape of the inputs. + + Args: + struct: A structure of tensors. Every tensor's 0-th dim has the same size. + + Returns: + A scalar tensor which is the size of 0-th dim of every tensors in struct. + """ + xs = nest.flatten(struct) + assert xs + dim0 = array_ops.shape(xs[0])[0] + return dim0 + + +def _Flatten(struct): + """Flattens a structure.""" + return nest.flatten(struct) + + +def _Pack(elements, struct_template): + """Packs the list of tensors according to the structure. + + In the event that `elements` should be a scalar, `struct_template` must + contain exactly one non-trivial element (for instance, `[[], {'x':elt}]`). + + Args: + elements: Elements to be packed. A list of tensor, or a single tensor. + struct_template: The container structure in which to pack them. + Returns: + A python structure of the same type as `struct_template`, containing + `elements` as its contained elements. + """ + if not nest.is_sequence(elements): + return nest.pack_sequence_as(struct_template, [elements]) + return nest.pack_sequence_as(struct_template, elements) + + +def _EmptyAcc(slen, struct_template): + """Creates a set of accumulators for tensors in structure. + + Args: + slen: The sequence length. A scalar tensor. + struct_template: A structure of tensors. + + Returns: + A structure congruent to `struct_template`. Say ret is a returned + dictionary. Then, `ret.key`, a tensor, has the same dtype as + `struct_template.key`. The tensor's shape has 1 more dimension + than the tensor `struct_template.key`. The extra 0-th dimension is of size + `slen`. E.g., if `slen=10` and `struct_template.key`'s shape is `[3, 5]`, + then, `ret.key`'s shape is `[10, 3, 5]`. + """ + + def _EmptyAccForTensor(tensor): + return inplace_ops.empty( + array_ops.concat([[slen], array_ops.shape(tensor)], axis=0), + tensor.dtype, + init=True) + + return nest.map_structure(_EmptyAccForTensor, struct_template) + + +def _EmptyLike(struct): + """Creates a set of empty initialized tensors. + + Args: + struct: A structure of tensors. + + Returns: + A struct of tensors. Each tensor has the same shape and dtype as + its corresponding tensor in `struct`. And each tensor is initialized. + """ + return nest.map_structure( + lambda x: inplace_ops.empty_like(x, init=True), struct) + + +def _Add(struct_x, struct_y): + """Adds tensors in `struct_x` with respective tensors in `struct_y`. + + Args: + struct_x: A struct of tensors. + struct_y: A struct of tensors congruent to `struct_x`. + + Returns: + A struct of tensors. Each element of the returned value + equals `x + y`, with corresponding values in `struct_x` and `struct_y`. + """ + list_x = nest.flatten(struct_x) + list_y = nest.flatten(struct_y) + z = [] + for x, y in zip(list_x, list_y): + z += [math_ops.add(x, y)] + return nest.pack_sequence_as(struct_x, z) + + +def _Dtypes(struct): + """Returns all tensors' data types in a list.""" + return [x.dtype for x in nest.flatten(struct)] + + +def _ConvertNoneGradientToZeros(xs, dxs): + """Sanitize dxs so that None becomes zeros appropriately. + + Args: + xs: A list of tensors. + dxs: A list of tensors. dxs[i] corresponds to xs[i]'s gradient. + + Returns: + A structure same as `dxs` with `None` replaced by a zero tensor. + """ + list_xs = nest.flatten(xs) + list_dxs = nest.flatten(dxs) + + # If x does not get any backprop-ed gradient, propagate zeros. + rets = [] + for (x, dx) in zip(list_xs, list_dxs): + if dx is None: + rets.append(array_ops.zeros_like(x)) + else: + rets.append(dx) + + return nest.pack_sequence_as(dxs, rets) + + +# All structures are flattened for use internally. This is for simplicity +# and also to use the Defun construct. +# In the forward pass (inference), the computation is structured as follows. +# Forward: [gradient = _Recurrent.Grad] +# Flatten structures, create accumulators. +# for t = 0..max_input_length: +# Defun ForwardLoopBody: +# Defun Fwd: flatten/pack around cell_fn +# state1 = Fwd(inputs[t], state0) +# acc_state += [state1] +# Pack structures. +# During the backward pass (backpropping the gradient from the last time +# step to the first, through the structure), the computation is structured +# as follows. +# Grad: +# Flatten structures. +# Defun Backward: +# Create create accumulated derivatives: d_theta, d_inputs, d_acc_state. +# Regarding the note at the top of the file, there is only one accumulator +# for d_theta accumulated over the whole sequence. +# for t = max_input_length -1..0: +# Defun BackwardLoopBody: +# Retrieve acc_state[t] computed in the forward pass. +# Defun Bak: flatten/back around cell_fn_grad. +# d_state1 is d_state0 from previous step (ie next time). +# d_acc_state[dev_t] += d_state1 +# d_theta_t, d_state0, d_inputs_t, = Bak() +# d_inputs[dev_t] += d_inputs +# d_theta += d_theta_t +# d_acc_state[t] += d_state1 +# Pack structures and return. +class _Recurrent(object): + """A helper class to construct a recurrent neural net.""" + + def __init__(self, cell_fn, cell_grad, theta, state0, inputs, + max_input_length, extras, use_tpu): + """RNN helper class. + + Args: + cell_fn: A python function, which computes: + state1, extras = cell_fn(theta, state0, inputs[t, :]) + cell_grad: A python function which computes: + dtheta, dstate0, dinputs[t, :] = cell_grad( + theta, state0, inputs[t, :], extras, dstate1) + theta: weights. A structure of tensors. + state0: initial state. A structure of tensors. + inputs: inputs. A structure of tensors. + max_input_length: None, or the maximum effective length of the input over + all batches. A scalar tensor. + extras: A structure of tensors. The 2nd return value of every + invocation of cell_fn is a structure of tensors with matching keys + and shapes of this `extras`. + use_tpu: A boolean indicating whether the computation is mean to + run on a TPU. + """ + self._theta = theta + self._state = state0 + self._inputs = inputs + self._max_input_length = self._MaybeComputeMaxInputLength( + inputs, max_input_length) + self._cell_fn = cell_fn + self._cell_grad = cell_grad + self._extras = extras + + # pylint: disable=unbalanced-tuple-unpacking + + # NOTE: TF Function (Fwd, Bak, ForwardLoopBody, BackwardLoopBody, + # Forward and Backward defined below) simply takes a list of + # Tensors and returns a list of Tensors. When we pass in a + # structure (a list of structures of Tensors), we use _Flatten to + # convert the structure into a list of tensor. Conversely, the + # following code often uses _Pack to formulate a structure from a + # list of tensors based on a "template". + + # Wraps cell_fn in a TF Function: + # state1 = cell_fn(theta, state0, inputs) + fwd_sig = [self._theta, self._state, self._inputs] + + compiled = use_tpu + noinline = not compiled + dev_t_type = dtypes.int32 if use_tpu else dtypes.int64 + + @function.Defun(*_Dtypes(fwd_sig)) + def Fwd(*args): + (theta, state0, inputs) = _Pack(args, fwd_sig) + state1, extras = self._cell_fn(theta, state0, inputs) + assert not function.get_extra_args(), ( + 'cell_fn is not pure with extra args: %s.' % + (function.get_extra_args())) + _AssertIsCompatible(state1, self._state) + _AssertIsCompatible(extras, self._extras) + return _Flatten([state1, extras]) + + # Wraps cell_fn in a TF Function as a for-loop's body. + # + # The loop state is composed of: + # t: The loop variable. Timestep id. + # dev_t: The loop variable mirrored on the device. + # theta: the recurrent net's weights. + # state0: the previous recurrent state. + # inputs: inputs to the recurrent net. inputs[t, :] are for the timestep t. + # acc_state: Each timestep's computed new state is also stashed into + # acc_state. + # acc_extras: Each timestep's computed extras is stashed into acc_extras + fwdloop_sig = [ + self._theta, self._state, self._inputs, self._state, self._extras + ] + + @function.Defun(dtypes.int32, dev_t_type, *_Dtypes(fwdloop_sig)) + def ForwardLoopBody(*args): + """The body of forward loop.""" + t, dev_t = args[0], args[1] + (theta, state0, inputs, acc_state, acc_extras) = _Pack( + args[2:], fwdloop_sig) + inputs_t = _Index(inputs, t) # external input at time step t. + fwd = Fwd(*_Flatten([theta, state0, inputs_t])) + state1, extras = _Pack(fwd, [self._state, self._extras]) + # Saves state1 and extras in their accumulators. + acc_state = _Update(acc_state, state1, dev_t) + acc_extras = _Update(acc_extras, extras, dev_t) + + return [math_ops.add(dev_t, 1)] + _Flatten( + [theta, state1, inputs, acc_state, acc_extras]) + + def Grad(op, *args): + """The python grad function for the Forward function.""" + + # NOTE: tf.gradient backprops None for int32/int64 while zeros + # for float32/float64. For consistency, we always backprop + # zeros. + args = list(args) + for i, dy in enumerate(args): + if dy is None: + args[i] = array_ops.zeros_like(op.outputs[i]) + # TODO(drpng): getting the extra state here? + op_inputs = [x for x in op.inputs] + op_struct = [ + self._theta, self._state, self._inputs, self._max_input_length, + self._extras + ] + (theta, state0, inputs, max_input_length, _) = _Pack(op_inputs, op_struct) + # acc_state and acc_extras are computed by the Forward pass and + # needed by the Backward pass. + acc_state, _, acc_extras = _Pack([x for x in op.outputs], + [self._state, self._state, self._extras]) + + # Forward computes acc_state, the final state and + # acc_extras. tf.gradients gives us their gradients w.r.t. the + # final loss. Because acc_extras are not exposed by Compute(), + # it has no gradients w.r.t. the final loss (i.e., by + # construction, it must be zeros). + d_acc_state, d_state1, _ = _Pack(args, + [self._state, self._state, self._extras]) + return Backward(*_Flatten([ + theta, state0, inputs, max_input_length, acc_state, acc_extras, + d_acc_state, d_state1 + ])) + + # Forward calls ForwardLoopBody n times. Each time computes one + # time step of the recurrent net. + forward_sig = [ + self._theta, self._state, self._inputs, self._max_input_length, + self._extras + ] + + @function.Defun( + *_Dtypes(forward_sig), python_grad_func=Grad, noinline=noinline) + def Forward(*args): + """Forward pass of the recurrent net.""" + theta, state0, inputs, max_input_length, extras = _Pack(args, forward_sig) + + slen_dim = _SeqLenDim(inputs) + + # Creates accumulators for state0 and extras. + acc_state = _EmptyAcc(slen_dim, state0) + acc_extras = _EmptyAcc(slen_dim, extras) + + dev_t = array_ops.constant(0, dtype=dev_t_type) + run = functional_ops.For( + start=0, + limit=max_input_length, + delta=1, + inputs=[dev_t] + _Flatten( + [theta, state0, inputs, acc_state, acc_extras]), + body=ForwardLoopBody, + rewrite_with_while=compiled) + _, state1, _, acc_state, acc_extras = _Pack( + run[1:], + [self._theta, self._state, self._inputs, self._state, self._extras]) + + return _Flatten([acc_state, state1, acc_extras]) + + # The per-step backward computes: + # d_theta, d_state0, d_inputs = cell_grad( + # theta, state0, inputs, extras, d_state1) + # where d_state1 is the backprop-ed gradient for state1, and + # extras is the computed by the forward step to facilitate the + # backward step. + bak_sig = [ + self._theta, self._state, self._inputs, self._extras, self._state + ] + + @function.Defun(*_Dtypes(bak_sig)) + def Bak(*args): + """Backward step.""" + (theta, state0, inputs, extras, d_state1) = _Pack(args, bak_sig) + (dtheta, dstate0, dinputs) = self._cell_grad(theta, state0, inputs, + extras, d_state1) + assert not function.get_extra_args(), ( + 'cell_grad is not pure with extra args: %s.' % + (function.get_extra_args())) + _AssertIsCompatible(dtheta, self._theta) + _AssertIsCompatible(dstate0, self._state) + _AssertIsCompatible(dinputs, self._inputs) + return _Flatten( + _ConvertNoneGradientToZeros([theta, state0, inputs], + [dtheta, dstate0, dinputs])) + + # Define defuns used by a functional_ops.If in BackwardLoopBody. + state_if_sig = [self._state, self._state] + + @function.Defun(*_Dtypes(state_if_sig)) + def ReturnOrigState0(*args): + """Returns original state0 from inputs.""" + (_, orig_state0) = _Pack(args, state_if_sig) + return nest.flatten(orig_state0) + + @function.Defun(*_Dtypes(state_if_sig)) + def ReturnAccState(*args): + """Returns acc_state[t-1] from inputs.""" + (acc_state, _) = _Pack(args, state_if_sig) + return nest.flatten(acc_state) + + # Wraps cell_grad gradient function in a TF Function as a + # for-loop's body for the Backward pass. + # + # The loop state is composed of: + # t: The loop variable. Timestep id. + # state0: the initial state for the entire backward loop. + # dev_t: The loop variable mirrored on the device. + # theta: the recurrent net's weights. + # inputs: inputs to the recurrent net. inputs[t, :] are for the timestep t. + # acc_state: Each timestep's computed new state was stashed into + # acc_state by the Forward pass. + # acc_extras: Each timestep's computed extras was stashed into + # acc_extras by the Forward pass. + # d_theta: All timestep's gradient for theta is accumulated (added) into + # d_theta. + # d_state1: The backprop-ed gradient for the new stated computed by + # timestep t. + # d_inputs: d_inputs[t, :] is populated by the backward time step t. + # d_acc_state: The backprop-ed gradient for acc_state. + bakloop_sig = [ + self._theta, self._state, self._inputs, self._state, self._extras, + self._theta, self._state, self._inputs, self._state + ] + + @function.Defun(dtypes.int32, dev_t_type, *_Dtypes(bakloop_sig)) + def BackwardLoopBody(*args): + """Backward loop body function.""" + t, dev_t = args[0], args[1] + (theta, orig_state0, inputs, acc_state, acc_extras, d_theta, d_state1, + d_inputs, d_acc_state) = _Pack(args[2:], bakloop_sig) + + # The input recurrent state for time step t is previous time step's + # output, or the original state0 when on time step 0. + state_from_acc = _Index(acc_state, math_ops.maximum(0, t - 1)) + state0 = functional_ops.If( + math_ops.equal(t, array_ops.constant(0, dtypes.int32)), + _Flatten([state_from_acc, orig_state0]), ReturnOrigState0, + ReturnAccState) + state0 = nest.pack_sequence_as(orig_state0, state0) + + # The external inputs for time step t. + inputs_t = _Index(inputs, t) + # The extras for time step t. + extras_t = _Index(acc_extras, t) + + d_state1 = _Add(_Index(d_acc_state, t), d_state1) + (d_theta_t, d_state0, d_inputs_t) = _Pack( + Bak(*_Flatten([theta, state0, inputs_t, extras_t, d_state1])), + [self._theta, self._state, self._inputs]) + d_theta = _Add(d_theta, d_theta_t) + d_inputs = _Update(d_inputs, d_inputs_t, dev_t) + return [math_ops.subtract(dev_t, 1)] + _Flatten([ + theta, orig_state0, inputs, acc_state, acc_extras, d_theta, d_state0, + d_inputs, d_acc_state + ]) + + # Backward calls BackwardLoopBody n times. Each time computes the backprop + # for one time step of the recurrent net. + backward_sig = [ + self._theta, self._state, self._inputs, self._max_input_length, + self._state, self._extras, self._state, self._state + ] + + @function.Defun(*_Dtypes(backward_sig), noinline=noinline) + def Backward(*args): + """Backward pass for the recurrent net.""" + # theta, state0, inputs are Forward's inputs. + # acc_state is the accumulated 1st output of Forward. + # acc_extras is the accumulated 2nd output of Forward. + # d_acc_state is the gradient for acc_state. + # d_state1 is the gradient for the final state computed by Forward. + (theta, state0, inputs, max_input_length, acc_state, acc_extras, + d_acc_state, d_state1) = _Pack(args, backward_sig) + + # Accumulators for gradients. + d_theta = _EmptyLike(theta) + d_inputs = _EmptyLike(inputs) + + # Loop backwards. Note the loop's limit is open-ended, so goes through + # t=0. + t = max_input_length - 1 + dev_t = math_ops.to_int32(t) if use_tpu else math_ops.to_int64(t) + run = functional_ops.For( + start=t, + limit=-1, + delta=-1, + inputs=[dev_t] + _Flatten([ + theta, state0, inputs, acc_state, acc_extras, d_theta, d_state1, + d_inputs, d_acc_state + ]), + body=BackwardLoopBody, + rewrite_with_while=compiled) + + (theta, state0, inputs, acc_state, acc_extras, d_theta, d_state0, + d_inputs, d_acc_state) = _Pack(run[1:], bakloop_sig) + + d_max_input_length = array_ops.constant(0, dtype=max_input_length.dtype) + return _Flatten( + [d_theta, d_state0, d_inputs, d_max_input_length, acc_extras]) + + self._forward = Forward + + def _MaybeComputeMaxInputLength(self, inputs, max_input_length): + if max_input_length is not None: + return max_input_length + return math_ops.reduce_max(array_ops.shape(nest.flatten(inputs)[0])[0]) + + def Compute(self): + return _Pack( + self._forward(*_Flatten([ + self._theta, self._state, self._inputs, self._max_input_length, + self._extras + ])), [self._state, self._state, self._extras])[:2] + + +def _GetCellGrad(cell_fn, cell_grad): + """Returns the gradient function for cell_fn. + + Args: + cell_fn: The recurrent neural net's cell function. + cell_grad: If not None, cell_fn's gradient function. + + Returns: + Returns cell_grad if not None. Otherwise, assume cell_fn is a python + function representing the recurrent neural net's cell function, i.e., + cell_fn: (theta, state0, inputs) -> (state1, extra) + returns its default gradient python function, i.e., + cell_grad: (theta, state0, inputs, extras, dstate1) -> ( + dtheta, dstate0, dinputs) + """ + + if cell_grad: + return cell_grad + + def CellGrad(theta, state0, inputs, extras, dstate1): + """Default gradient function for cell_fn.""" + # NOTE: The default grad function recomputes the forward + # function and does not take advantage of 'extras' returned by + # the forward function. + del extras + state1, extras = cell_fn(theta, state0, inputs) + ys = _Flatten([state1]) + xs = _Flatten([theta, state0, inputs]) + grad_ys = _Flatten([dstate1]) + grads = gradients_impl.gradients(ys=ys, xs=xs, grad_ys=grad_ys) + return _ConvertNoneGradientToZeros([theta, state0, inputs], + _Pack(grads, [theta, state0, inputs])) + + return CellGrad + + +def _IsSingleTimeStep(inputs, max_input_length): + """Returns True only if the time dimension of inputs is 1.""" + if not isinstance(max_input_length, ops.Tensor): + return max_input_length == 1 + for x in nest.flatten(inputs): + if x.shape.dims is None or x.shape[0].value != 1: + return False + return True + + +def Recurrent(theta, + state0, + inputs, + cell_fn, + cell_grad=None, + extras=None, + max_input_length=None, + use_tpu=False): + """Compute a recurrent neural net. + + Roughly, Recurrent() computes the following: + state = state0 + for t in inputs' sequence length: + state = cell_fn(theta, state, inputs[t, :]) + accumulate_state[t, :] = state + return accumulate_state, state + + theta, state, inputs are all structures of tensors. + + inputs[t, :] means taking a slice out from every tensor in the inputs. + + accumulate_state[t, :] = state means that we stash every tensor in + 'state' into a slice of the corresponding tensor in + accumulate_state. + + cell_fn is a python callable computing (building up a TensorFlow + graph) the recurrent neural network's one forward step. Two calls of + cell_fn must describe two identical computations. + + By construction, Recurrent()'s backward computation does not access + any intermediate values computed by cell_fn during forward + computation. We may extend Recurrent() to support that by taking a + customized backward function of cell_fn. + + Args: + theta: weights. A structure of tensors. + state0: initial state. A structure of tensors. + inputs: inputs. A structure of tensors. + cell_fn: A python function, which computes: + state1, extras = cell_fn(theta, state0, inputs[t, :]) + cell_grad: A python function which computes: + dtheta, dstate0, dinputs[t, :] = cell_grad( + theta, state0, inputs[t, :], extras, dstate1) + extras: A structure of tensors. The 2nd return value of every + invocation of cell_fn is a structure of tensors with matching keys + and shapes of this `extras`. + max_input_length: maximum length of effective input. This is used to + truncate the computation if the inputs have been allocated to a + larger size. A scalar tensor. + use_tpu: whether or not we are on TPU. + + Returns: + accumulate_state and the final state. + """ + if cell_grad is None and _IsSingleTimeStep(inputs, max_input_length): + # The seqlen length is staticly known as 1. Hence, we just need to + # call cell_fn once without putting it into a loop. + inputs = nest.map_structure(lambda x: array_ops.squeeze(x, axis=0), inputs) + state1, _ = cell_fn(theta, state0, inputs) + acc_state = nest.map_structure(lambda x: array_ops.expand_dims(x, axis=0), + state1) + return acc_state, state1 + + # If cell_grad is not given, derives the gradient function from + # cell_fn. + cell_grad = _GetCellGrad(cell_fn, cell_grad) + + if extras is None: + # Derives 'extras' so that we can allocate extras' accumulator. + _, extras = cell_fn(theta, state0, _Index(inputs, 0)) + extras = nest.map_structure(array_ops.zeros_like, extras) + else: + _, actual = cell_fn(theta, state0, _Index(inputs, 0)) + _AssertIsCompatible(extras, actual) + + return _Recurrent( + cell_fn=cell_fn, + cell_grad=cell_grad, + theta=theta, + state0=state0, + inputs=inputs, + max_input_length=max_input_length, + extras=extras, + use_tpu=use_tpu).Compute() diff --git a/tensorflow/experimental_api.py b/tensorflow/contrib/recurrent/python/recurrent_api.py similarity index 53% rename from tensorflow/experimental_api.py rename to tensorflow/contrib/recurrent/python/recurrent_api.py index 63a8aa9cb1dc130a7999c3b248815633998c4cd0..ffe1dcf7dc49554db56ee8e8fabedf976310a554 100644 --- a/tensorflow/experimental_api.py +++ b/tensorflow/contrib/recurrent/python/recurrent_api.py @@ -1,4 +1,4 @@ -# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,26 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== - -# Bring in all of the public TensorFlow interface into this -# module. +"""Recurrent computations library.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function -# pylint: disable=g-bad-import-order -from tensorflow.python import pywrap_tensorflow # pylint: disable=unused-import -# pylint: disable=wildcard-import -from tensorflow.tools.api.generator.api import * # pylint: disable=redefined-builtin -# pylint: enable=wildcard-import - -from tensorflow.python.util.lazy_loader import LazyLoader -contrib = LazyLoader('contrib', globals(), 'tensorflow.contrib') -del LazyLoader - -from tensorflow.python.platform import flags # pylint: disable=g-import-not-at-top -app.flags = flags # pylint: disable=undefined-variable +# pylint: disable=unused-import +from tensorflow.contrib.recurrent.python.ops import functional_bidirectional_rnn +from tensorflow.contrib.recurrent.python.ops import functional_rnn +from tensorflow.contrib.recurrent.python.ops import Recurrent +# pylint: enable=unused-import del absolute_import del division diff --git a/tensorflow/contrib/remote_fused_graph/pylib/BUILD b/tensorflow/contrib/remote_fused_graph/pylib/BUILD index 996b55f9b88e76a56e3ebd3785608a433d541bb7..3aa8a14f44f38de51ed61f0b894cfd77ea9329f8 100644 --- a/tensorflow/contrib/remote_fused_graph/pylib/BUILD +++ b/tensorflow/contrib/remote_fused_graph/pylib/BUILD @@ -38,7 +38,6 @@ py_test( size = "small", srcs = ["python/ops/remote_fused_graph_ops_test.py"], srcs_version = "PY2AND3", - tags = ["no_windows"], deps = [ ":remote_fused_graph_ops_py", "//tensorflow/core:protos_all_py", diff --git a/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py b/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py index 63fdd91d368d97007280871f3886e5649e6b2e86..c7d85862f65674f60c9f63fd5c649afa75b95cc0 100644 --- a/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py +++ b/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py @@ -842,12 +842,12 @@ class RNNCellTest(test.TestCase): batch_size = 3 input_size = 4 expected_state_c = np.array( - [[6.450831e-04, 4.697885e-04], [9.862894e-05, 7.212213e-04], - [4.401947e-04, 9.143004e-04]], + [[0.00072015, 0.00036633], [0.00083481, 0.00047266], + [0.00085111, 0.00053054]], dtype=np.float32) expected_state_h = np.array( - [[4.621217e-04, 3.365449e-04], [7.438179e-05, 5.439147e-04], - [3.347936e-04, 6.953785e-04]], + [[0.0005159, 0.00026243], [0.00062958, 0.00035646], + [0.00064732, 0.00040351]], dtype=np.float32) with variable_scope.variable_scope( "root", initializer=init_ops.constant_initializer(0.5)): diff --git a/tensorflow/contrib/rnn/python/ops/rnn_cell.py b/tensorflow/contrib/rnn/python/ops/rnn_cell.py index 2f6ae9f3678e58dae67bf777991641b10e42ef94..b12e2cd5eddc3f8abdba62781692673a40e41d9b 100644 --- a/tensorflow/contrib/rnn/python/ops/rnn_cell.py +++ b/tensorflow/contrib/rnn/python/ops/rnn_cell.py @@ -2891,7 +2891,7 @@ class WeightNormLSTMCell(rnn_cell_impl.RNNCell): output_size = weight.get_shape().as_list()[1] g = vs.get_variable(name, [output_size], dtype=weight.dtype) - return nn_impl.l2_normalize(weight, dim=0) * g + return nn_impl.l2_normalize(weight, axis=0) * g def _linear(self, args, diff --git a/tensorflow/contrib/rpc/BUILD b/tensorflow/contrib/rpc/BUILD new file mode 100644 index 0000000000000000000000000000000000000000..dbd311a276bed2db9422cd8f1841d642616cac93 --- /dev/null +++ b/tensorflow/contrib/rpc/BUILD @@ -0,0 +1,29 @@ +package(default_visibility = ["//visibility:public"]) + +licenses(["notice"]) # Apache 2.0 + +exports_files(["LICENSE"]) + +load("//tensorflow/core:platform/default/build_config_root.bzl", "if_static") + +py_library( + name = "rpc", + srcs = [ + "__init__.py", + ], + deps = ["//tensorflow/contrib/rpc/python/ops:rpc_op_py"], +) + +py_library( + name = "rpc_pip", + data = if_static( + [], + otherwise = ["//tensorflow/contrib/rpc/python/kernel_tests:libtestexample.so"], + ), + deps = [ + ":rpc", + "//tensorflow/contrib/rpc/python/kernel_tests:py_test_deps", + "//tensorflow/contrib/rpc/python/kernel_tests:rpc_op_test_base", + "//tensorflow/contrib/rpc/python/kernel_tests:rpc_op_test_servicer", + ], +) diff --git a/tensorflow/contrib/rpc/__init__.py b/tensorflow/contrib/rpc/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c65c1a05def92b91ddd75e7aecdb4e4d9b8abe8a --- /dev/null +++ b/tensorflow/contrib/rpc/__init__.py @@ -0,0 +1,28 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Ops and modules related to RPC. + +@@rpc +@@try_rpc +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.rpc.python.ops.rpc_op import rpc +from tensorflow.contrib.rpc.python.ops.rpc_op import try_rpc + +from tensorflow.python.util.all_util import remove_undocumented +remove_undocumented(__name__) diff --git a/tensorflow/contrib/rpc/python/kernel_tests/BUILD b/tensorflow/contrib/rpc/python/kernel_tests/BUILD new file mode 100644 index 0000000000000000000000000000000000000000..f3e6731213fd0aa8f14845a7b1562001b134c7ac --- /dev/null +++ b/tensorflow/contrib/rpc/python/kernel_tests/BUILD @@ -0,0 +1,81 @@ +# TODO(b/76425722): Port everything in here to OS (currently excluded). + +package(default_visibility = ["//visibility:public"]) + +licenses(["notice"]) # Apache 2.0 + +exports_files(["LICENSE"]) + +load("//tensorflow:tensorflow.bzl", "tf_py_test") +load("//tensorflow:tensorflow.bzl", "tf_cc_shared_object") +load("//tensorflow/core:platform/default/build_config_root.bzl", "if_static") +load("//tensorflow/core:platform/default/build_config.bzl", "tf_proto_library") +# Placeholder for loading internal BUILD rule. + +tf_proto_library( + name = "test_example_proto", + srcs = ["test_example.proto"], + has_services = 1, + cc_api_version = 2, + protodeps = ["//tensorflow/core:protos_all"], +) + +py_library( + name = "py_test_deps", + deps = [":test_example_proto_py"], +) + +py_library( + name = "rpc_op_test_base", + srcs = ["rpc_op_test_base.py"], + tags = ["notsan"], + deps = [ + ":test_example_proto_py", + "//tensorflow/contrib/proto", + "//tensorflow/contrib/rpc", + "//tensorflow/core:protos_all_py", + "//tensorflow/python:dtypes", + "//tensorflow/python:errors", + "//third_party/py/numpy", + ], +) + +py_library( + name = "rpc_op_test_servicer", + srcs = ["rpc_op_test_servicer.py"], + deps = [ + ":py_test_deps", + ":rpc_op_test_base", + "//tensorflow/core:protos_all_py", + "//third_party/py/numpy", + ], +) + +tf_cc_shared_object( + name = "libtestexample.so", + linkstatic = 1, + deps = [ + ":test_example_proto_cc", + ], +) + +tf_py_test( + name = "rpc_op_test", + size = "small", + srcs = ["rpc_op_test.py"], + additional_deps = [ + ":py_test_deps", + ":rpc_op_test_base", + ":rpc_op_test_servicer", + "//tensorflow/core:protos_all_py", + "//tensorflow/python:client_testlib", + ], + data = if_static( + [], + otherwise = [":libtestexample.so"], + ), + tags = [ + "no_pip", # TODO(b/78026780) + "no_windows", # TODO(b/78028010) + ], +) diff --git a/tensorflow/contrib/rpc/python/kernel_tests/rpc_op_test.py b/tensorflow/contrib/rpc/python/kernel_tests/rpc_op_test.py new file mode 100644 index 0000000000000000000000000000000000000000..e2e0dbc7a22efb2d8bf1e03e325eb8b6a4734993 --- /dev/null +++ b/tensorflow/contrib/rpc/python/kernel_tests/rpc_op_test.py @@ -0,0 +1,71 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= + +"""Tests for RpcOp.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import ctypes as ct +import os + +import grpc +from grpc.framework.foundation import logging_pool +import portpicker + +from tensorflow.contrib.rpc.python.kernel_tests import rpc_op_test_base +from tensorflow.contrib.rpc.python.kernel_tests import rpc_op_test_servicer +from tensorflow.contrib.rpc.python.kernel_tests import test_example_pb2_grpc +from tensorflow.python.platform import test + + +class RpcOpTest(test.TestCase, rpc_op_test_base.RpcOpTestBase): + _protocol = 'grpc' + + invalid_method_string = 'Method not found' + + def __init__(self, methodName='runTest'): # pylint: disable=invalid-name + super(RpcOpTest, self).__init__(methodName) + lib = os.path.join(os.path.dirname(__file__), 'libtestexample.so') + if os.path.isfile(lib): + ct.cdll.LoadLibrary(lib) + + def get_method_name(self, suffix): + return '/tensorflow.contrib.rpc.TestCaseService/%s' % suffix + + def setUp(self): + super(RpcOpTest, self).setUp() + + service_port = portpicker.pick_unused_port() + + server = grpc.server(logging_pool.pool(max_workers=25)) + servicer = rpc_op_test_servicer.RpcOpTestServicer() + test_example_pb2_grpc.add_TestCaseServiceServicer_to_server( + servicer, server) + self._address = 'localhost:%d' % service_port + server.add_insecure_port(self._address) + server.start() + self._server = server + + def tearDown(self): + # TODO(ebrevdo): Figure out why this sometimes times out. + # self._service.ExitLoop() + # self._service_thread.join() + # self._server.stop() + super(RpcOpTest, self).tearDown() + + +if __name__ == '__main__': + test.main() diff --git a/tensorflow/contrib/rpc/python/kernel_tests/rpc_op_test_base.py b/tensorflow/contrib/rpc/python/kernel_tests/rpc_op_test_base.py new file mode 100644 index 0000000000000000000000000000000000000000..89f3ee1a1c52e4e292ef850ed375ad49b79fa1f5 --- /dev/null +++ b/tensorflow/contrib/rpc/python/kernel_tests/rpc_op_test_base.py @@ -0,0 +1,336 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= + +"""Base class for RpcOp tests.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import itertools + +import numpy as np + +from tensorflow.contrib.proto.python.ops import decode_proto_op +from tensorflow.contrib.proto.python.ops import encode_proto_op +from tensorflow.contrib.rpc.python.kernel_tests import test_example_pb2 +from tensorflow.contrib.rpc.python.ops import rpc_op +from tensorflow.core.protobuf import config_pb2 +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import errors + +__all__ = ['I_WARNED_YOU', 'RpcOpTestBase'] + +I_WARNED_YOU = 'I warned you!' + + +class RpcOpTestBase(object): + # pylint: disable=missing-docstring,invalid-name + """Base class for RpcOp tests.""" + + def get_method_name(self, suffix): + raise NotImplementedError + + def rpc(self, *args, **kwargs): + return rpc_op.rpc(*args, protocol=self._protocol, **kwargs) + + def try_rpc(self, *args, **kwargs): + return rpc_op.try_rpc(*args, protocol=self._protocol, **kwargs) + + def testScalarHostPortRpc(self): + with self.test_session() as sess: + request_tensors = ( + test_example_pb2.TestCase(shape=[1, 2, 3]).SerializeToString()) + response_tensors = self.rpc( + method=self.get_method_name('IncrementTestShapes'), + address=self._address, + request=request_tensors) + self.assertEqual(response_tensors.shape, ()) + response_values = sess.run(response_tensors) + response_message = test_example_pb2.TestCase() + self.assertTrue(response_message.ParseFromString(response_values)) + self.assertAllEqual([2, 3, 4], response_message.shape) + + def testScalarHostPortTryRpc(self): + with self.test_session() as sess: + request_tensors = ( + test_example_pb2.TestCase(shape=[1, 2, 3]).SerializeToString()) + response_tensors, status_code, status_message = self.try_rpc( + method=self.get_method_name('IncrementTestShapes'), + address=self._address, + request=request_tensors) + self.assertEqual(status_code.shape, ()) + self.assertEqual(status_message.shape, ()) + self.assertEqual(response_tensors.shape, ()) + response_values, status_code_values, status_message_values = ( + sess.run((response_tensors, status_code, status_message))) + response_message = test_example_pb2.TestCase() + self.assertTrue(response_message.ParseFromString(response_values)) + self.assertAllEqual([2, 3, 4], response_message.shape) + # For the base Rpc op, don't expect to get error status back. + self.assertEqual(errors.OK, status_code_values) + self.assertEqual(b'', status_message_values) + + def testEmptyHostPortRpc(self): + with self.test_session() as sess: + request_tensors = [] + response_tensors = self.rpc( + method=self.get_method_name('IncrementTestShapes'), + address=self._address, + request=request_tensors) + self.assertAllEqual(response_tensors.shape, [0]) + response_values = sess.run(response_tensors) + self.assertAllEqual(response_values.shape, [0]) + + def testInvalidAddresses(self): + with self.test_session() as sess: + with self.assertRaisesOpError(self.invalid_method_string): + sess.run( + self.rpc( + method='/InvalidService.IncrementTestShapes', + address=self._address, + request='')) + + with self.assertRaisesOpError(self.invalid_method_string): + sess.run( + self.rpc( + method=self.get_method_name('InvalidMethodName'), + address=self._address, + request='')) + + # This also covers the case of address='' + # and address='localhost:293874293874' + with self.assertRaises(errors.UnavailableError): + sess.run( + self.rpc( + method=self.get_method_name('IncrementTestShapes'), + address='unix:/tmp/this_unix_socket_doesnt_exist_97820348!!@', + request='')) + + # Test invalid method with the TryRpc op + _, status_code_value, status_message_value = sess.run( + self.try_rpc( + method=self.get_method_name('InvalidMethodName'), + address=self._address, + request='')) + self.assertEqual(errors.UNIMPLEMENTED, status_code_value) + self.assertTrue( + self.invalid_method_string in status_message_value.decode('ascii')) + + def testAlwaysFailingMethod(self): + with self.test_session() as sess: + response_tensors = self.rpc( + method=self.get_method_name('AlwaysFailWithInvalidArgument'), + address=self._address, + request='') + self.assertEqual(response_tensors.shape, ()) + with self.assertRaisesOpError(I_WARNED_YOU): + sess.run(response_tensors) + + def testSometimesFailingMethodWithManyRequests(self): + with self.test_session() as sess: + # Fail hard by default. + response_tensors = self.rpc( + method=self.get_method_name('SometimesFailWithInvalidArgument'), + address=self._address, + request=[''] * 20) + self.assertEqual(response_tensors.shape, (20,)) + with self.assertRaisesOpError(I_WARNED_YOU): + sess.run(response_tensors) + + # Don't fail hard, use TryRpc - return the failing status instead. + response_tensors, status_code, status_message = self.try_rpc( + method=self.get_method_name('SometimesFailWithInvalidArgument'), + address=self._address, + request=[''] * 20) + self.assertEqual(response_tensors.shape, (20,)) + self.assertEqual(status_code.shape, (20,)) + self.assertEqual(status_message.shape, (20,)) + status_code_values, status_message_values = sess.run((status_code, + status_message)) + self.assertTrue([ + x in (errors.OK, errors.INVALID_ARGUMENT) for x in status_code_values + ]) + expected_message_values = np.where( + status_code_values == errors.INVALID_ARGUMENT, + I_WARNED_YOU.encode('ascii'), b'') + self.assertAllEqual(expected_message_values, status_message_values) + + def testVecHostPortRpc(self): + with self.test_session() as sess: + request_tensors = [ + test_example_pb2.TestCase( + shape=[i, i + 1, i + 2]).SerializeToString() for i in range(20) + ] + response_tensors = self.rpc( + method=self.get_method_name('IncrementTestShapes'), + address=self._address, + request=request_tensors) + self.assertEqual(response_tensors.shape, (20,)) + response_values = sess.run(response_tensors) + self.assertEqual(response_values.shape, (20,)) + for i in range(20): + response_message = test_example_pb2.TestCase() + self.assertTrue(response_message.ParseFromString(response_values[i])) + self.assertAllEqual([i + 1, i + 2, i + 3], response_message.shape) + + def testVecHostPortManyParallelRpcs(self): + with self.test_session() as sess: + request_tensors = [ + test_example_pb2.TestCase( + shape=[i, i + 1, i + 2]).SerializeToString() for i in range(20) + ] + many_response_tensors = [ + self.rpc( + method=self.get_method_name('IncrementTestShapes'), + address=self._address, + request=request_tensors) for _ in range(10) + ] + # Launch parallel 10 calls to the RpcOp, each containing + # 20 rpc requests. + many_response_values = sess.run(many_response_tensors) + self.assertEqual(10, len(many_response_values)) + for response_values in many_response_values: + self.assertEqual(response_values.shape, (20,)) + for i in range(20): + response_message = test_example_pb2.TestCase() + self.assertTrue(response_message.ParseFromString(response_values[i])) + self.assertAllEqual([i + 1, i + 2, i + 3], response_message.shape) + + def testVecHostPortRpcUsingEncodeAndDecodeProto(self): + with self.test_session() as sess: + request_tensors = encode_proto_op.encode_proto( + message_type='tensorflow.contrib.rpc.TestCase', + field_names=['shape'], + sizes=[[3]] * 20, + values=[ + [[i, i + 1, i + 2] for i in range(20)], + ]) + response_tensor_strings = self.rpc( + method=self.get_method_name('IncrementTestShapes'), + address=self._address, + request=request_tensors) + _, (response_shape,) = decode_proto_op.decode_proto( + bytes=response_tensor_strings, + message_type='tensorflow.contrib.rpc.TestCase', + field_names=['shape'], + output_types=[dtypes.int32]) + response_shape_values = sess.run(response_shape) + self.assertAllEqual([[i + 1, i + 2, i + 3] + for i in range(20)], response_shape_values) + + def testVecHostPortRpcCancelsUponSessionTimeOutWhenSleepingForever(self): + with self.test_session() as sess: + request_tensors = [''] * 25 # This will launch 25 RPC requests. + response_tensors = self.rpc( + method=self.get_method_name('SleepForever'), + address=self._address, + request=request_tensors) + for timeout_ms in [1, 500, 1000]: + options = config_pb2.RunOptions(timeout_in_ms=timeout_ms) + with self.assertRaises((errors.UnavailableError, + errors.DeadlineExceededError)): + sess.run(response_tensors, options=options) + + def testVecHostPortRpcCancelsUponConfiguredTimeOutWhenSleepingForever(self): + with self.test_session() as sess: + request_tensors = [''] * 25 # This will launch 25 RPC requests. + response_tensors = self.rpc( + method=self.get_method_name('SleepForever'), + address=self._address, + timeout_in_ms=1000, + request=request_tensors) + with self.assertRaises(errors.DeadlineExceededError): + sess.run(response_tensors) + + def testTryRpcPropagatesDeadlineErrorWithSometimesTimingOutRequests(self): + with self.test_session() as sess: + response_tensors, status_code, status_message = self.try_rpc( + method=self.get_method_name('SometimesSleepForever'), + timeout_in_ms=1000, + address=self._address, + request=[''] * 20) + self.assertEqual(response_tensors.shape, (20,)) + self.assertEqual(status_code.shape, (20,)) + self.assertEqual(status_message.shape, (20,)) + status_code_values = sess.run(status_code) + self.assertTrue([ + x in (errors.OK, errors.DEADLINE_EXCEEDED) for x in status_code_values + ]) + + def testTryRpcWithMultipleAddressesSingleRequest(self): + flatten = lambda x: list(itertools.chain.from_iterable(x)) + with self.test_session() as sess: + addresses = flatten([[ + self._address, 'unix:/tmp/this_unix_socket_doesnt_exist_97820348!!@' + ] for _ in range(10)]) + request = test_example_pb2.TestCase(shape=[0, 1, 2]).SerializeToString() + response_tensors, status_code, _ = self.try_rpc( + method=self.get_method_name('IncrementTestShapes'), + address=addresses, + request=request) + response_tensors_values, status_code_values = sess.run((response_tensors, + status_code)) + self.assertAllEqual( + flatten([errors.OK, errors.UNAVAILABLE] for _ in range(10)), + status_code_values) + for i in range(10): + self.assertTrue(response_tensors_values[2 * i]) + self.assertFalse(response_tensors_values[2 * i + 1]) + + def testTryRpcWithMultipleMethodsSingleRequest(self): + flatten = lambda x: list(itertools.chain.from_iterable(x)) + with self.test_session() as sess: + methods = flatten( + [[self.get_method_name('IncrementTestShapes'), 'InvalidMethodName'] + for _ in range(10)]) + request = test_example_pb2.TestCase(shape=[0, 1, 2]).SerializeToString() + response_tensors, status_code, _ = self.try_rpc( + method=methods, address=self._address, request=request) + response_tensors_values, status_code_values = sess.run((response_tensors, + status_code)) + self.assertAllEqual( + flatten([errors.OK, errors.UNIMPLEMENTED] for _ in range(10)), + status_code_values) + for i in range(10): + self.assertTrue(response_tensors_values[2 * i]) + self.assertFalse(response_tensors_values[2 * i + 1]) + + def testTryRpcWithMultipleAddressesAndRequests(self): + flatten = lambda x: list(itertools.chain.from_iterable(x)) + with self.test_session() as sess: + addresses = flatten([[ + self._address, 'unix:/tmp/this_unix_socket_doesnt_exist_97820348!!@' + ] for _ in range(10)]) + requests = [ + test_example_pb2.TestCase( + shape=[i, i + 1, i + 2]).SerializeToString() for i in range(20) + ] + response_tensors, status_code, _ = self.try_rpc( + method=self.get_method_name('IncrementTestShapes'), + address=addresses, + request=requests) + response_tensors_values, status_code_values = sess.run((response_tensors, + status_code)) + self.assertAllEqual( + flatten([errors.OK, errors.UNAVAILABLE] for _ in range(10)), + status_code_values) + for i in range(20): + if i % 2 == 1: + self.assertFalse(response_tensors_values[i]) + else: + response_message = test_example_pb2.TestCase() + self.assertTrue( + response_message.ParseFromString(response_tensors_values[i])) + self.assertAllEqual([i + 1, i + 2, i + 3], response_message.shape) diff --git a/tensorflow/contrib/rpc/python/kernel_tests/rpc_op_test_servicer.py b/tensorflow/contrib/rpc/python/kernel_tests/rpc_op_test_servicer.py new file mode 100644 index 0000000000000000000000000000000000000000..7cbd636cb16e3befc9ae27cb231696634e859a22 --- /dev/null +++ b/tensorflow/contrib/rpc/python/kernel_tests/rpc_op_test_servicer.py @@ -0,0 +1,101 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= + +"""Test servicer for RpcOp tests.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import random +import time + +import grpc + +from tensorflow.contrib.rpc.python.kernel_tests import rpc_op_test_base +from tensorflow.contrib.rpc.python.kernel_tests import test_example_pb2_grpc + + +class RpcOpTestServicer(test_example_pb2_grpc.TestCaseServiceServicer): + """Test servicer for RpcOp tests.""" + + def IncrementTestShapes(self, request, context): + """Increment the entries in the shape attribute of request. + + Args: + request: input TestCase. + context: the rpc context. + + Returns: + output TestCase. + """ + for i in range(len(request.shape)): + request.shape[i] += 1 + return request + + def AlwaysFailWithInvalidArgument(self, request, context): + """Always fails with an InvalidArgument status. + + Args: + request: input TestCase. + context: the rpc context. + + Returns: + output TestCase. + """ + del request + context.set_code(grpc.StatusCode.INVALID_ARGUMENT) + context.set_details(rpc_op_test_base.I_WARNED_YOU) + + def SometimesFailWithInvalidArgument(self, request, context): + """Sometimes fails with an InvalidArgument status. + + Args: + request: input TestCase. + context: the rpc context. + + Returns: + output TestCase. + """ + if random.randint(0, 1) == 1: + context.set_code(grpc.StatusCode.INVALID_ARGUMENT) + context.set_details(rpc_op_test_base.I_WARNED_YOU) + return request + + def SleepForever(self, request, context): + """Sleeps forever. + + Args: + request: input TestCase. + context: the rpc context. + + Returns: + output TestCase. + """ + # TODO(ebrevdo): Make this async wait like the stubby version. + time.sleep(5) + + def SometimesSleepForever(self, request, context): + """Sometimes sleeps forever. + + Args: + request: input TestCase. + context: the rpc context. + + Returns: + output TestCase. + """ + if random.randint(0, 1) == 1: + time.sleep(5) + return request diff --git a/tensorflow/contrib/rpc/python/kernel_tests/test_example.proto b/tensorflow/contrib/rpc/python/kernel_tests/test_example.proto new file mode 100644 index 0000000000000000000000000000000000000000..96f4550f62bc17e713abe1f3843ec0964f57b046 --- /dev/null +++ b/tensorflow/contrib/rpc/python/kernel_tests/test_example.proto @@ -0,0 +1,171 @@ +// Test description and protos to work with it. +// +// Many of the protos in this file are for unit tests that haven't been written yet. + +syntax = "proto2"; + +import "tensorflow/core/framework/types.proto"; + +package tensorflow.contrib.rpc; + +// A TestCase holds a proto and a bunch of assertions +// about how it should decode. +message TestCase { + // A batch of primitives to be serialized and decoded. + repeated RepeatedPrimitiveValue primitive = 1; + // The shape of the batch. + repeated int32 shape = 2; + // Expected sizes for each field. + repeated int32 sizes = 3; + // Expected values for each field. + repeated FieldSpec field = 4; +}; + +service TestCaseService { + // Copy input, and increment each entry in 'shape' by 1. + rpc IncrementTestShapes(TestCase) returns (TestCase) { + } + + // Sleep forever. + rpc SleepForever(TestCase) returns (TestCase) { + } + + // Sleep forever 50% of the time, return immediately the other 50%. + rpc SometimesSleepForever(TestCase) returns (TestCase) { + } + + // Always fails with InvalidArgument. + rpc AlwaysFailWithInvalidArgument(TestCase) returns (TestCase) { + } + + // Fails with InvalidArgument 50% of the time. + rpc SometimesFailWithInvalidArgument(TestCase) returns (TestCase) { + } +}; + +// FieldSpec describes the expected output for a single field. +message FieldSpec { + optional string name = 1; + optional tensorflow.DataType dtype = 2; + optional RepeatedPrimitiveValue expected = 3; +}; + +message TestValue { + optional PrimitiveValue primitive_value = 1; + optional EnumValue enum_value = 2; + optional MessageValue message_value = 3; + optional RepeatedMessageValue repeated_message_value = 4; + optional RepeatedPrimitiveValue repeated_primitive_value = 6; +} + +message PrimitiveValue { + optional double double_value = 1; + optional float float_value = 2; + optional int64 int64_value = 3; + optional uint64 uint64_value = 4; + optional int32 int32_value = 5; + optional fixed64 fixed64_value = 6; + optional fixed32 fixed32_value = 7; + optional bool bool_value = 8; + optional string string_value = 9; + optional bytes bytes_value = 12; + optional uint32 uint32_value = 13; + optional sfixed32 sfixed32_value = 15; + optional sfixed64 sfixed64_value = 16; + optional sint32 sint32_value = 17; + optional sint64 sint64_value = 18; +} + +// NOTE: This definition must be kept in sync with PackedPrimitiveValue. +message RepeatedPrimitiveValue { + repeated double double_value = 1; + repeated float float_value = 2; + repeated int64 int64_value = 3; + repeated uint64 uint64_value = 4; + repeated int32 int32_value = 5; + repeated fixed64 fixed64_value = 6; + repeated fixed32 fixed32_value = 7; + repeated bool bool_value = 8; + repeated string string_value = 9; + repeated bytes bytes_value = 12; + repeated uint32 uint32_value = 13; + repeated sfixed32 sfixed32_value = 15; + repeated sfixed64 sfixed64_value = 16; + repeated sint32 sint32_value = 17; + repeated sint64 sint64_value = 18; + repeated PrimitiveValue message_value = 19; +} + +// A PackedPrimitiveValue looks exactly the same as a RepeatedPrimitiveValue +// in the text format, but the binary serializion is different. +// We test the packed representations by loading the same test cases +// using this definition instead of RepeatedPrimitiveValue. +// NOTE: This definition must be kept in sync with RepeatedPrimitiveValue +// in every way except the packed=true declaration. +message PackedPrimitiveValue { + repeated double double_value = 1 [packed = true]; + repeated float float_value = 2 [packed = true]; + repeated int64 int64_value = 3 [packed = true]; + repeated uint64 uint64_value = 4 [packed = true]; + repeated int32 int32_value = 5 [packed = true]; + repeated fixed64 fixed64_value = 6 [packed = true]; + repeated fixed32 fixed32_value = 7 [packed = true]; + repeated bool bool_value = 8 [packed = true]; + repeated string string_value = 9; + repeated bytes bytes_value = 12; + repeated uint32 uint32_value = 13 [packed = true]; + repeated sfixed32 sfixed32_value = 15 [packed = true]; + repeated sfixed64 sfixed64_value = 16 [packed = true]; + repeated sint32 sint32_value = 17 [packed = true]; + repeated sint64 sint64_value = 18 [packed = true]; + repeated PrimitiveValue message_value = 19; +} + +message EnumValue { + enum Color { + RED = 0; + ORANGE = 1; + YELLOW = 2; + GREEN = 3; + BLUE = 4; + INDIGO = 5; + VIOLET = 6; + }; + optional Color enum_value = 14; + repeated Color repeated_enum_value = 15; +} + + +message InnerMessageValue { + optional float float_value = 2; + repeated bytes bytes_values = 8; +} + +message MiddleMessageValue { + repeated int32 int32_values = 5; + optional InnerMessageValue message_value = 11; + optional uint32 uint32_value = 13; +} + +message MessageValue { + optional double double_value = 1; + optional MiddleMessageValue message_value = 11; +} + +message RepeatedMessageValue { + message NestedMessageValue { + optional float float_value = 2; + repeated bytes bytes_values = 8; + } + + repeated NestedMessageValue message_values = 11; +} + +// Message containing fields with field numbers higher than any field above. An +// instance of this message is prepended to each binary message in the test to +// exercise the code path that handles fields encoded out of order of field +// number. +message ExtraFields { + optional string string_value = 1776; + optional bool bool_value = 1777; +} diff --git a/tensorflow/contrib/rpc/python/ops/BUILD b/tensorflow/contrib/rpc/python/ops/BUILD new file mode 100644 index 0000000000000000000000000000000000000000..84d2a1832f14b61ec313e7a1a00b0672bc410cfb --- /dev/null +++ b/tensorflow/contrib/rpc/python/ops/BUILD @@ -0,0 +1,24 @@ +package(default_visibility = ["//visibility:public"]) + +licenses(["notice"]) # Apache 2.0 + +exports_files(["LICENSE"]) + +load("//tensorflow:tensorflow.bzl", "tf_gen_op_wrapper_py") + +py_library( + name = "rpc_op_py", + srcs = ["rpc_op.py"], + deps = [ + ":gen_rpc_op_py", + "//tensorflow/python:framework_ops", + ], +) + +tf_gen_op_wrapper_py( + name = "gen_rpc_op_py", + out = "gen_rpc_op.py", + deps = [ + "//tensorflow/core:rpc_ops_op_lib", + ], +) diff --git a/tensorflow/contrib/rpc/python/ops/rpc_op.py b/tensorflow/contrib/rpc/python/ops/rpc_op.py new file mode 100644 index 0000000000000000000000000000000000000000..e1b6c41137828950e73757579dca1eba4adf2ae4 --- /dev/null +++ b/tensorflow/contrib/rpc/python/ops/rpc_op.py @@ -0,0 +1,26 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= + +# pylint: disable=wildcard-import,unused-import +"""RPC communication.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.rpc.python.ops.gen_rpc_op import rpc +from tensorflow.contrib.rpc.python.ops.gen_rpc_op import try_rpc +from tensorflow.python.framework import ops +ops.NotDifferentiable("Rpc") +ops.NotDifferentiable("TryRpc") diff --git a/tensorflow/contrib/saved_model/BUILD b/tensorflow/contrib/saved_model/BUILD index faad40d33508727711529ab789e6cdb701df165c..26fd4e2023806765ea4088f4c13a780ca7338bff 100644 --- a/tensorflow/contrib/saved_model/BUILD +++ b/tensorflow/contrib/saved_model/BUILD @@ -48,15 +48,14 @@ py_library( ], ) -py_test( - name = "reader_test", - size = "small", - srcs = ["python/saved_model/reader_test.py"], +py_library( + name = "reader", + srcs = ["python/saved_model/reader.py"], srcs_version = "PY2AND3", - visibility = ["//visibility:private"], + tags = ["no_windows"], # TODO: needs investigation on Windows + visibility = ["//visibility:public"], deps = [ ":saved_model_py", - "//tensorflow/python:client_testlib", "//tensorflow/python:framework_for_generated_wrappers", "//tensorflow/python:lib", "//tensorflow/python:variables", @@ -65,6 +64,19 @@ py_test( ], ) +py_test( + name = "reader_test", + size = "small", + srcs = ["python/saved_model/reader_test.py"], + srcs_version = "PY2AND3", + tags = ["no_windows"], # TODO: needs investigation on Windows + visibility = ["//visibility:private"], + deps = [ + ":reader", + "//tensorflow/python:client_testlib", + ], +) + py_test( name = "signature_def_utils_test", size = "small", diff --git a/tensorflow/contrib/seq2seq/BUILD b/tensorflow/contrib/seq2seq/BUILD index a62069a252155a8bd1c6251d9dd3a4564a81c295..1a1591d798f6f904e23987d9d7a60193c124c20e 100644 --- a/tensorflow/contrib/seq2seq/BUILD +++ b/tensorflow/contrib/seq2seq/BUILD @@ -3,9 +3,12 @@ licenses(["notice"]) # Apache 2.0 -exports_files(["LICENSE"]) +package(default_visibility = [ + "//learning/brain/google/xla/tests:__subpackages__", + "//tensorflow:__subpackages__", +]) -package(default_visibility = ["//tensorflow:__subpackages__"]) +exports_files(["LICENSE"]) load("//tensorflow:tensorflow.bzl", "cuda_py_test") load("//tensorflow:tensorflow.bzl", "tf_custom_op_py_library") @@ -38,6 +41,7 @@ tf_custom_op_py_library( "//tensorflow/python:check_ops", "//tensorflow/python:clip_ops", "//tensorflow/python:control_flow_ops", + "//tensorflow/python:control_flow_util", "//tensorflow/python:embedding_ops", "//tensorflow/python:framework_for_generated_wrappers", "//tensorflow/python:functional_ops", diff --git a/tensorflow/contrib/seq2seq/python/kernel_tests/attention_wrapper_test.py b/tensorflow/contrib/seq2seq/python/kernel_tests/attention_wrapper_test.py index d508cf3f9db81aa7c3a1174ed13f2310b0595b04..cd162bae25aa1c1b6718b8e5b0b8687e5b80eab3 100644 --- a/tensorflow/contrib/seq2seq/python/kernel_tests/attention_wrapper_test.py +++ b/tensorflow/contrib/seq2seq/python/kernel_tests/attention_wrapper_test.py @@ -30,6 +30,7 @@ from tensorflow.contrib.seq2seq.python.ops import helper as helper_py from tensorflow.contrib.seq2seq.python.ops import basic_decoder from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes +from tensorflow.python.layers import core as layers_core from tensorflow.python.ops import array_ops from tensorflow.python.ops import init_ops from tensorflow.python.ops import math_ops @@ -110,7 +111,12 @@ class AttentionWrapperTest(test.TestCase): alignment_history=False, expected_final_alignment_history=None, attention_layer_size=6, + attention_layer=None, name=''): + attention_layer_sizes = ( + [attention_layer_size] if attention_layer_size is not None else None) + attention_layers = ( + [attention_layer] if attention_layer is not None else None) self._testWithMaybeMultiAttention( is_multi=False, create_attention_mechanisms=[create_attention_mechanism], @@ -119,7 +125,8 @@ class AttentionWrapperTest(test.TestCase): attention_mechanism_depths=[attention_mechanism_depth], alignment_history=alignment_history, expected_final_alignment_history=expected_final_alignment_history, - attention_layer_sizes=[attention_layer_size], + attention_layer_sizes=attention_layer_sizes, + attention_layers=attention_layers, name=name) def _testWithMaybeMultiAttention(self, @@ -131,6 +138,7 @@ class AttentionWrapperTest(test.TestCase): alignment_history=False, expected_final_alignment_history=None, attention_layer_sizes=None, + attention_layers=None, name=''): # Allow is_multi to be True with a single mechanism to enable test for # passing in a single mechanism in a list. @@ -144,12 +152,18 @@ class AttentionWrapperTest(test.TestCase): encoder_output_depth = 10 cell_depth = 9 - if attention_layer_sizes is None: - attention_depth = encoder_output_depth * len(create_attention_mechanisms) - else: + if attention_layer_sizes is not None: # Compute sum of attention_layer_sizes. Use encoder_output_depth if None. attention_depth = sum([attention_layer_size or encoder_output_depth for attention_layer_size in attention_layer_sizes]) + elif attention_layers is not None: + # Compute sum of attention_layers output depth. + attention_depth = sum( + attention_layer.compute_output_shape( + [batch_size, cell_depth + encoder_output_depth])[-1].value + for attention_layer in attention_layers) + else: + attention_depth = encoder_output_depth * len(create_attention_mechanisms) decoder_inputs = array_ops.placeholder_with_default( np.random.randn(batch_size, decoder_max_time, @@ -171,13 +185,20 @@ class AttentionWrapperTest(test.TestCase): with vs.variable_scope( 'root', initializer=init_ops.random_normal_initializer(stddev=0.01, seed=3)): + attention_layer_size = attention_layer_sizes + attention_layer = attention_layers + if not is_multi: + if attention_layer_size is not None: + attention_layer_size = attention_layer_size[0] + if attention_layer is not None: + attention_layer = attention_layer[0] cell = rnn_cell.LSTMCell(cell_depth) cell = wrapper.AttentionWrapper( cell, attention_mechanisms if is_multi else attention_mechanisms[0], - attention_layer_size=(attention_layer_sizes if is_multi - else attention_layer_sizes[0]), - alignment_history=alignment_history) + attention_layer_size=attention_layer_size, + alignment_history=alignment_history, + attention_layer=attention_layer) helper = helper_py.TrainingHelper(decoder_inputs, decoder_sequence_length) my_decoder = basic_decoder.BasicDecoder( @@ -260,6 +281,41 @@ class AttentionWrapperTest(test.TestCase): expected_final_alignment_history, final_alignment_history_info) + def testBahdanauNormalizedDType(self): + for dtype in [np.float16, np.float32, np.float64]: + num_units = 128 + encoder_outputs = array_ops.placeholder(dtype, shape=[64, None, 256]) + encoder_sequence_length = array_ops.placeholder(dtypes.int32, shape=[64]) + decoder_inputs = array_ops.placeholder(dtype, shape=[64, None, 128]) + decoder_sequence_length = array_ops.placeholder(dtypes.int32, shape=[64]) + batch_size = 64 + attention_mechanism = wrapper.BahdanauAttention( + num_units=num_units, + memory=encoder_outputs, + memory_sequence_length=encoder_sequence_length, + normalize=True, + dtype=dtype, + ) + cell = rnn_cell.LSTMCell(num_units) + cell = wrapper.AttentionWrapper(cell, attention_mechanism) + + helper = helper_py.TrainingHelper(decoder_inputs, + decoder_sequence_length) + my_decoder = basic_decoder.BasicDecoder( + cell=cell, + helper=helper, + initial_state=cell.zero_state( + dtype=dtype, batch_size=batch_size)) + + final_outputs, final_state, _ = decoder.dynamic_decode(my_decoder) + self.assertTrue( + isinstance(final_outputs, basic_decoder.BasicDecoderOutput)) + self.assertEqual(final_outputs.rnn_output.dtype, dtype) + self.assertTrue( + isinstance(final_state, wrapper.AttentionWrapperState)) + self.assertTrue( + isinstance(final_state.cell_state, rnn_cell.LSTMStateTuple)) + def testBahdanauNotNormalized(self): create_attention_mechanism = wrapper.BahdanauAttention @@ -355,11 +411,11 @@ class AttentionWrapperTest(test.TestCase): def testLuongScaledDType(self): # Test case for GitHub issue 18099 - for dtype in [np.float16, np.float32, np.float64]: + for dt in [np.float16, np.float32, np.float64]: num_units = 128 - encoder_outputs = array_ops.placeholder(dtype, shape=[64, None, 256]) + encoder_outputs = array_ops.placeholder(dt, shape=[64, None, 256]) encoder_sequence_length = array_ops.placeholder(dtypes.int32, shape=[64]) - decoder_inputs = array_ops.placeholder(dtype, shape=[64, None, 128]) + decoder_inputs = array_ops.placeholder(dt, shape=[64, None, 128]) decoder_sequence_length = array_ops.placeholder(dtypes.int32, shape=[64]) batch_size = 64 attention_mechanism = wrapper.LuongAttention( @@ -367,7 +423,7 @@ class AttentionWrapperTest(test.TestCase): memory=encoder_outputs, memory_sequence_length=encoder_sequence_length, scale=True, - dtype=dtype, + dtype=dt, ) cell = rnn_cell.LSTMCell(num_units) cell = wrapper.AttentionWrapper(cell, attention_mechanism) @@ -378,12 +434,12 @@ class AttentionWrapperTest(test.TestCase): cell=cell, helper=helper, initial_state=cell.zero_state( - dtype=dtype, batch_size=batch_size)) + dtype=dt, batch_size=batch_size)) final_outputs, final_state, _ = decoder.dynamic_decode(my_decoder) self.assertTrue( isinstance(final_outputs, basic_decoder.BasicDecoderOutput)) - self.assertEqual(final_outputs.rnn_output.dtype, dtype) + self.assertEqual(final_outputs.rnn_output.dtype, dt) self.assertTrue( isinstance(final_state, wrapper.AttentionWrapperState)) self.assertTrue( @@ -797,6 +853,48 @@ class AttentionWrapperTest(test.TestCase): expected_final_alignment_history=expected_final_alignment_history, name='testMultiAttention') + def testMultiAttentionWithLayerInstances(self): + create_attention_mechanisms = ( + wrapper.BahdanauAttention, wrapper.LuongAttention) + + expected_final_output = BasicDecoderOutput( + rnn_output=ResultSummary( + shape=(5, 3, 7), dtype=dtype('float32'), mean=0.0011709079), + sample_id=ResultSummary( + shape=(5, 3), dtype=dtype('int32'), mean=3.2000000000000002)) + expected_final_state = AttentionWrapperState( + cell_state=LSTMStateTuple( + c=ResultSummary( + shape=(5, 9), dtype=dtype('float32'), mean=-0.0038725811), + h=ResultSummary( + shape=(5, 9), dtype=dtype('float32'), mean=-0.0019329828)), + attention=ResultSummary( + shape=(5, 7), dtype=dtype('float32'), mean=0.001174294), + time=3, + alignments=( + ResultSummary(shape=(5, 8), dtype=dtype('float32'), mean=0.125), + ResultSummary(shape=(5, 8), dtype=dtype('float32'), mean=0.125)), + attention_state=( + ResultSummary(shape=(5, 8), dtype=dtype('float32'), mean=0.125), + ResultSummary(shape=(5, 8), dtype=dtype('float32'), mean=0.125)), + alignment_history=()) + + expected_final_alignment_history = ( + ResultSummary(shape=(3, 5, 8), dtype=dtype('float32'), mean=0.125), + ResultSummary(shape=(3, 5, 8), dtype=dtype('float32'), mean=0.125)) + + self._testWithMaybeMultiAttention( + True, + create_attention_mechanisms, + expected_final_output, + expected_final_state, + attention_mechanism_depths=[9, 9], + attention_layers=[layers_core.Dense(3, use_bias=False), + layers_core.Dense(4, use_bias=False)], + alignment_history=True, + expected_final_alignment_history=expected_final_alignment_history, + name='testMultiAttention') + def testLuongMonotonicHard(self): # Run attention mechanism with mode='hard', make sure probabilities are hard b, t, u, d = 10, 20, 30, 40 diff --git a/tensorflow/contrib/seq2seq/python/kernel_tests/decoder_test.py b/tensorflow/contrib/seq2seq/python/kernel_tests/decoder_test.py index ac830ae98e5f9878f0972fb851fd5edd1c1a8789..b549cbf568f254cbf18456145af751a8245dd379 100644 --- a/tensorflow/contrib/seq2seq/python/kernel_tests/decoder_test.py +++ b/tensorflow/contrib/seq2seq/python/kernel_tests/decoder_test.py @@ -92,14 +92,18 @@ class DynamicDecodeRNNTest(test.TestCase): # Mostly a smoke test time_steps = max_out + expected_length = sequence_length if maximum_iterations is not None: time_steps = min(max_out, maximum_iterations) + expected_length = [min(x, maximum_iterations) for x in expected_length] self.assertEqual( _t((batch_size, time_steps, cell_depth)), sess_results["final_outputs"].rnn_output.shape) self.assertEqual( _t((batch_size, time_steps)), sess_results["final_outputs"].sample_id.shape) + self.assertItemsEqual(expected_length, + sess_results["final_sequence_length"]) def testDynamicDecodeRNNBatchMajor(self): self._testDynamicDecodeRNN(time_major=False) diff --git a/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py b/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py index 9e0d69593f8727ad682b05141e9333cbeabef702..1c9d179e3c55ad07fcf709f66028c91c20e8eea0 100644 --- a/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py +++ b/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py @@ -472,7 +472,8 @@ def _bahdanau_score(processed_query, keys, normalize): # Scalar used in weight normalization g = variable_scope.get_variable( "attention_g", dtype=dtype, - initializer=math.sqrt((1. / num_units))) + initializer=init_ops.constant_initializer(math.sqrt((1. / num_units))), + shape=()) # Bias added prior to the nonlinearity b = variable_scope.get_variable( "attention_b", [num_units], dtype=dtype, @@ -610,8 +611,8 @@ def monotonic_attention(p_choose_i, previous_attention, mode): addition, once an input sequence element is attended to at a given output timestep, elements occurring before it cannot be attended to at subsequent output timesteps. This function generates attention distributions according - to these assumptions. For more information, see ``Online and Linear-Time - Attention by Enforcing Monotonic Alignments''. + to these assumptions. For more information, see `Online and Linear-Time + Attention by Enforcing Monotonic Alignments`. Args: p_choose_i: Probability of choosing input sequence/memory element i. Should @@ -654,7 +655,7 @@ def monotonic_attention(p_choose_i, previous_attention, mode): shifted_1mp_choose_i = array_ops.concat( [array_ops.ones((batch_size, 1)), 1 - p_choose_i[:, :-1]], 1) # Compute attention distribution recursively as - # q[i] = (1 - p_choose_i[i])*q[i - 1] + previous_attention[i] + # q[i] = (1 - p_choose_i[i - 1])*q[i - 1] + previous_attention[i] # attention[i] = p_choose_i[i]*q[i] attention = p_choose_i*array_ops.transpose(functional_ops.scan( # Need to use reshape to remind TF of the shape between loop iterations @@ -1082,7 +1083,8 @@ class AttentionWrapper(rnn_cell_impl.RNNCell): cell_input_fn=None, output_attention=True, initial_cell_state=None, - name=None): + name=None, + attention_layer=None): """Construct the `AttentionWrapper`. **NOTE** If you are using the `BeamSearchDecoder` with a cell wrapped in @@ -1125,7 +1127,8 @@ class AttentionWrapper(rnn_cell_impl.RNNCell): (default), use the context as attention at each time step. Otherwise, feed the context and cell output into the attention layer to generate attention at each time step. If attention_mechanism is a list, - attention_layer_size must be a list of the same length. + attention_layer_size must be a list of the same length. If + attention_layer is set, this must be None. alignment_history: Python boolean, whether to store alignment history from all time steps in the final output state (currently stored as a time major `TensorArray` on which you must call `stack()`). @@ -1145,12 +1148,19 @@ class AttentionWrapper(rnn_cell_impl.RNNCell): does not match the batch size of `initial_cell_state`, proper behavior is not guaranteed. name: Name to use when creating ops. + attention_layer: A list of `tf.layers.Layer` instances or a + single `tf.layers.Layer` instance taking the context and cell output as + inputs to generate attention at each time step. If None (default), use + the context as attention at each time step. If attention_mechanism is a + list, attention_layer must be a list of the same length. If + attention_layers_size is set, this must be None. Raises: TypeError: `attention_layer_size` is not None and (`attention_mechanism` is a list but `attention_layer_size` is not; or vice versa). ValueError: if `attention_layer_size` is not None, `attention_mechanism` - is a list, and its length does not match that of `attention_layer_size`. + is a list, and its length does not match that of `attention_layer_size`; + if `attention_layer_size` and `attention_layer` are set simultaneously. """ super(AttentionWrapper, self).__init__(name=name) rnn_cell_impl.assert_like_rnncell("cell", cell) @@ -1181,6 +1191,10 @@ class AttentionWrapper(rnn_cell_impl.RNNCell): "cell_input_fn must be callable, saw type: %s" % type(cell_input_fn).__name__) + if attention_layer_size is not None and attention_layer is not None: + raise ValueError("Only one of attention_layer_size and attention_layer " + "should be set") + if attention_layer_size is not None: attention_layer_sizes = tuple( attention_layer_size @@ -1199,6 +1213,22 @@ class AttentionWrapper(rnn_cell_impl.RNNCell): dtype=attention_mechanisms[i].dtype) for i, attention_layer_size in enumerate(attention_layer_sizes)) self._attention_layer_size = sum(attention_layer_sizes) + elif attention_layer is not None: + self._attention_layers = tuple( + attention_layer + if isinstance(attention_layer, (list, tuple)) + else (attention_layer,)) + if len(self._attention_layers) != len(attention_mechanisms): + raise ValueError( + "If provided, attention_layer must contain exactly one " + "layer per attention_mechanism, saw: %d vs %d" + % (len(self._attention_layers), len(attention_mechanisms))) + self._attention_layer_size = sum( + layer.compute_output_shape( + [None, + cell.output_size + mechanism.values.shape[-1].value])[-1].value + for layer, mechanism in zip( + self._attention_layers, attention_mechanisms)) else: self._attention_layers = None self._attention_layer_size = sum( diff --git a/tensorflow/contrib/seq2seq/python/ops/decoder.py b/tensorflow/contrib/seq2seq/python/ops/decoder.py index 898493662d7594f9996400a9636378db3c6b4cd1..e69725ff8ab1ba4de880c914a6f5fdad5e54566d 100644 --- a/tensorflow/contrib/seq2seq/python/ops/decoder.py +++ b/tensorflow/contrib/seq2seq/python/ops/decoder.py @@ -28,6 +28,7 @@ from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import tensor_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import control_flow_util from tensorflow.python.ops import math_ops from tensorflow.python.ops import rnn from tensorflow.python.ops import rnn_cell_impl @@ -181,6 +182,15 @@ def dynamic_decode(decoder, raise TypeError("Expected decoder to be type Decoder, but saw: %s" % type(decoder)) + def _is_xla_tensor(tensor): + try: + op = tensor.op + except AttributeError: + return False + if control_flow_util.IsInXLAContext(op): + return True + return False + with variable_scope.variable_scope(scope, "decoder") as varscope: # Properly cache variable values inside the while_loop if varscope.caching_device is None: @@ -198,6 +208,11 @@ def dynamic_decode(decoder, decoder.output_dtype, decoder.batch_size) + is_xla = False + if any([_is_xla_tensor(i) for i in nest.flatten(initial_inputs)]): + is_xla = True + if is_xla and maximum_iterations is None: + raise ValueError("maximum_iterations is required for XLA compilation.") if maximum_iterations is not None: initial_finished = math_ops.logical_or( initial_finished, 0 >= maximum_iterations) @@ -215,11 +230,13 @@ def dynamic_decode(decoder, batch_size, name="batch_size")) return tensor_shape.TensorShape([batch_size]).concatenate(from_shape) + dynamic_size = maximum_iterations is None or not is_xla + def _create_ta(s, d): return tensor_array_ops.TensorArray( dtype=d, - size=0, - dynamic_size=True, + size=0 if dynamic_size else maximum_iterations, + dynamic_size=dynamic_size, element_shape=_shape(decoder.batch_size, s)) initial_outputs_ta = nest.map_structure(_create_ta, decoder.output_size, @@ -251,11 +268,8 @@ def dynamic_decode(decoder, next_finished = decoder_finished else: next_finished = math_ops.logical_or(decoder_finished, finished) - if maximum_iterations is not None: - next_finished = math_ops.logical_or( - next_finished, time + 1 >= maximum_iterations) next_sequence_lengths = array_ops.where( - math_ops.logical_and(math_ops.logical_not(finished), next_finished), + math_ops.logical_not(finished), array_ops.fill(array_ops.shape(sequence_lengths), time + 1), sequence_lengths) @@ -296,11 +310,16 @@ def dynamic_decode(decoder, res = control_flow_ops.while_loop( condition, body, - loop_vars=[ - initial_time, initial_outputs_ta, initial_state, initial_inputs, - initial_finished, initial_sequence_lengths, - ], + loop_vars=( + initial_time, + initial_outputs_ta, + initial_state, + initial_inputs, + initial_finished, + initial_sequence_lengths, + ), parallel_iterations=parallel_iterations, + maximum_iterations=maximum_iterations, swap_memory=swap_memory) final_outputs_ta = res[1] diff --git a/tensorflow/contrib/session_bundle/BUILD b/tensorflow/contrib/session_bundle/BUILD index 31717305e741894473a57973bf34be9a224d0db1..9c0885918071c25ab65cb4044bc19ea22c55442a 100644 --- a/tensorflow/contrib/session_bundle/BUILD +++ b/tensorflow/contrib/session_bundle/BUILD @@ -151,6 +151,7 @@ py_test( name = "gc_test", srcs = ["gc_test.py"], srcs_version = "PY2AND3", + tags = ["no_windows"], # TODO: needs investigation on Windows visibility = ["//visibility:private"], deps = [ ":gc", diff --git a/tensorflow/contrib/signal/python/kernel_tests/mel_ops_test.py b/tensorflow/contrib/signal/python/kernel_tests/mel_ops_test.py index 35c4b5bec172858b39dd4628a37e164efe87bdbf..345eb6cfaa67fd4cda6e7e3f01a1243bbf3c9fa1 100644 --- a/tensorflow/contrib/signal/python/kernel_tests/mel_ops_test.py +++ b/tensorflow/contrib/signal/python/kernel_tests/mel_ops_test.py @@ -24,6 +24,7 @@ from tensorflow.contrib.signal.python.kernel_tests import test_util from tensorflow.contrib.signal.python.ops import mel_ops from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops from tensorflow.python.platform import test # mel spectrum constants and functions. @@ -173,6 +174,18 @@ class LinearToMelTest(test.TestCase): rewritten_graph = test_util.grappler_optimize(g, [mel_matrix]) self.assertEqual(1, len(rewritten_graph.node)) + def test_num_spectrogram_bins_dynamic(self): + with self.test_session(use_gpu=True): + num_spectrogram_bins = array_ops.placeholder(shape=(), + dtype=dtypes.int32) + mel_matrix_np = spectrogram_to_mel_matrix( + 20, 129, 8000.0, 125.0, 3800.0) + mel_matrix = mel_ops.linear_to_mel_weight_matrix( + 20, num_spectrogram_bins, 8000.0, 125.0, 3800.0) + self.assertAllClose( + mel_matrix_np, + mel_matrix.eval(feed_dict={num_spectrogram_bins: 129}), atol=3e-6) + if __name__ == "__main__": test.main() diff --git a/tensorflow/contrib/signal/python/ops/mel_ops.py b/tensorflow/contrib/signal/python/ops/mel_ops.py index d1a36548d95cf44d2bf7e6108141aeb00853db04..1e84006116daa3f28c760037cb9eeafd53eaafb8 100644 --- a/tensorflow/contrib/signal/python/ops/mel_ops.py +++ b/tensorflow/contrib/signal/python/ops/mel_ops.py @@ -64,14 +64,11 @@ def _hertz_to_mel(frequencies_hertz, name=None): 1.0 + (frequencies_hertz / _MEL_BREAK_FREQUENCY_HERTZ)) -def _validate_arguments(num_mel_bins, num_spectrogram_bins, sample_rate, +def _validate_arguments(num_mel_bins, sample_rate, lower_edge_hertz, upper_edge_hertz, dtype): """Checks the inputs to linear_to_mel_weight_matrix.""" if num_mel_bins <= 0: raise ValueError('num_mel_bins must be positive. Got: %s' % num_mel_bins) - if num_spectrogram_bins <= 0: - raise ValueError('num_spectrogram_bins must be positive. Got: %s' % - num_spectrogram_bins) if sample_rate <= 0.0: raise ValueError('sample_rate must be positive. Got: %s' % sample_rate) if lower_edge_hertz < 0.0: @@ -122,9 +119,9 @@ def linear_to_mel_weight_matrix(num_mel_bins=20, Args: num_mel_bins: Python int. How many bands in the resulting mel spectrum. - num_spectrogram_bins: Python int. How many bins there are in the source - spectrogram data, which is understood to be `fft_size // 2 + 1`, i.e. the - spectrogram only contains the nonredundant FFT bins. + num_spectrogram_bins: An integer `Tensor`. How many bins there are in the + source spectrogram data, which is understood to be `fft_size // 2 + 1`, + i.e. the spectrogram only contains the nonredundant FFT bins. sample_rate: Python float. Samples per second of the input signal used to create the spectrogram. We need this to figure out the actual frequencies for each spectrogram bin, which dictates how they are mapped into the mel @@ -148,7 +145,10 @@ def linear_to_mel_weight_matrix(num_mel_bins=20, [mel]: https://en.wikipedia.org/wiki/Mel_scale """ with ops.name_scope(name, 'linear_to_mel_weight_matrix') as name: - _validate_arguments(num_mel_bins, num_spectrogram_bins, sample_rate, + # Note: As num_spectrogram_bins is passed to `math_ops.linspace` + # and the validation is already done in linspace (both in shape function + # and in kernel), there is no need to validate num_spectrogram_bins here. + _validate_arguments(num_mel_bins, sample_rate, lower_edge_hertz, upper_edge_hertz, dtype) # To preserve accuracy, we compute the matrix at float64 precision and then diff --git a/tensorflow/contrib/slim/README.md b/tensorflow/contrib/slim/README.md index 40f484fd78302163ba36142dec057478fe899189..746b95564237617359afe1791484809369c4a894 100644 --- a/tensorflow/contrib/slim/README.md +++ b/tensorflow/contrib/slim/README.md @@ -290,9 +290,9 @@ slim.stack(x, slim.conv2d, [(32, [3, 3]), (32, [1, 1]), (64, [3, 3]), (64, [1, 1 In addition to the types of scope mechanisms in TensorFlow ([name_scope](https://www.tensorflow.org/api_docs/python/tf/name_scope), -[variable_scope](https://www.tensorflow.org/api_docs/python/tf/variable_scope), +[variable_scope](https://www.tensorflow.org/api_docs/python/tf/variable_scope)), TF-Slim adds a new scoping mechanism called -[arg_scope](https://www.tensorflow.org/api_docs/python/tf/contrib/framework/arg_scope), +[arg_scope](https://www.tensorflow.org/api_docs/python/tf/contrib/framework/arg_scope). This new scope allows a user to specify one or more operations and a set of arguments which will be passed to each of the operations defined in the `arg_scope`. This functionality is best illustrated by example. Consider the @@ -761,8 +761,8 @@ parts: 3. Finalization: (optionally) perform any final operation to compute metric values. For example, computing means, mins, maxes, etc. -For example, to compute `mean_absolute_error`, two variables, a `count` and -`total` variable are *initialized* to zero. During *aggregation*, we observed +For example, to compute `mean_absolute_error`, two variables (`count` and +`total`) are *initialized* to zero. During *aggregation*, we observed some set of predictions and labels, compute their absolute differences and add the total to `total`. Each time we observe another value, `count` is incremented. Finally, during *finalization*, `total` is divided diff --git a/tensorflow/contrib/slim/python/slim/data/BUILD b/tensorflow/contrib/slim/python/slim/data/BUILD index dc12e67fc6d727cf13e2162bdaebe9ef725464da..eef043e83276dcdffe491ee9b981c8de0894f592 100644 --- a/tensorflow/contrib/slim/python/slim/data/BUILD +++ b/tensorflow/contrib/slim/python/slim/data/BUILD @@ -61,6 +61,7 @@ py_test( name = "dataset_data_provider_test", srcs = ["dataset_data_provider_test.py"], srcs_version = "PY2AND3", + tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":dataset", ":dataset_data_provider", diff --git a/tensorflow/contrib/slim/python/slim/evaluation.py b/tensorflow/contrib/slim/python/slim/evaluation.py index 3caf4e02da3aa2d7e586c4e76807a11f84585ea6..5cfd5ee82e2a0fce33311a8783d2d4ceb031544d 100644 --- a/tensorflow/contrib/slim/python/slim/evaluation.py +++ b/tensorflow/contrib/slim/python/slim/evaluation.py @@ -230,6 +230,7 @@ def evaluation_loop(master, max_number_of_evaluations=None, session_config=None, timeout=None, + timeout_fn=None, hooks=None): """Runs TF-Slim's Evaluation Loop. @@ -261,6 +262,9 @@ def evaluation_loop(master, configure the `Session`. If left as `None`, the default will be used. timeout: The maximum amount of time to wait between checkpoints. If left as `None`, then the process will wait indefinitely. + timeout_fn: Optional function to call after a timeout. If the function + returns True, then it means that no new checkpoints will be generated and + the iterator will exit. The function is called with no arguments. hooks: A list of additional `SessionRunHook` objects to pass during repeated evaluations. @@ -298,4 +302,5 @@ def evaluation_loop(master, hooks=all_hooks, config=session_config, max_number_of_evaluations=max_number_of_evaluations, - timeout=timeout) + timeout=timeout, + timeout_fn=timeout_fn) diff --git a/tensorflow/contrib/slim/python/slim/evaluation_test.py b/tensorflow/contrib/slim/python/slim/evaluation_test.py index c24bd048512daaae116e732ac437f7c9b6f6d7fc..94fc12ca814721acf62f16b72ffa50473043cc8b 100644 --- a/tensorflow/contrib/slim/python/slim/evaluation_test.py +++ b/tensorflow/contrib/slim/python/slim/evaluation_test.py @@ -177,6 +177,17 @@ class EvaluationTest(test.TestCase): # The timeout kicked in. self.assertLess(end, start + 1.1) + def testTimeoutFnOnEvaluationLoop(self): + # We require a mutable object (e.g. list but not an int) to maintain state + # across calls of a nested function. + timeout_fn_calls = [0] + def _TimeoutFn(): + timeout_fn_calls[0] += 1 + return timeout_fn_calls[0] >= 3 + # Need not do any evaluation, but should just call timeout_fn repeatedly. + evaluation.evaluation_loop('', '', '', timeout=0, timeout_fn=_TimeoutFn) + self.assertEqual(timeout_fn_calls[0], 3) + def testMonitorCheckpointsLoopTimeout(self): ret = list( evaluation_lib.checkpoints_iterator( diff --git a/tensorflow/contrib/slim/python/slim/learning.py b/tensorflow/contrib/slim/python/slim/learning.py index 6a200de1ea172b4ccb38c0f5d889566ccaeef893..8a2c74742a8ebbfdca702943ee2f631531c7b2ca 100644 --- a/tensorflow/contrib/slim/python/slim/learning.py +++ b/tensorflow/contrib/slim/python/slim/learning.py @@ -389,7 +389,7 @@ def create_train_op(total_loss, total_loss: A `Tensor` representing the total loss. optimizer: A tf.Optimizer to use for computing the gradients. global_step: A `Tensor` representing the global step variable. If left as - `_USE_GLOBAL_STEP`, then slim.variables.global_step() is used. + `_USE_GLOBAL_STEP`, then tf.contrib.framework.global_step() is used. update_ops: An optional list of updates to execute. If `update_ops` is `None`, then the update ops are set to the contents of the `tf.GraphKeys.UPDATE_OPS` collection. If `update_ops` is not `None`, but @@ -578,7 +578,8 @@ def train(train_op, is_chief: Specifies whether or not the training is being run by the primary replica during replica training. global_step: The `Tensor` representing the global step. If left as `None`, - then slim.variables.get_or_create_global_step() is used. + then training_util.get_or_create_global_step(), that is, + tf.contrib.framework.global_step() is used. number_of_steps: The max number of gradient steps to take during training, as measured by 'global_step': training will stop if global_step is greater than 'number_of_steps'. If the value is left as None, training diff --git a/tensorflow/contrib/slim/python/slim/nets/resnet_v1.py b/tensorflow/contrib/slim/python/slim/nets/resnet_v1.py index 235a595de49f956e1df740fd821936c80eefaa55..11c4214176a8e3d69065066bb5ac4d668da10574 100644 --- a/tensorflow/contrib/slim/python/slim/nets/resnet_v1.py +++ b/tensorflow/contrib/slim/python/slim/nets/resnet_v1.py @@ -207,7 +207,7 @@ def resnet_v1(inputs, net = resnet_utils.stack_blocks_dense(net, blocks, output_stride) if global_pool: # Global average pooling. - net = math_ops.reduce_mean(net, [1, 2], name='pool5', keep_dims=True) + net = math_ops.reduce_mean(net, [1, 2], name='pool5', keepdims=True) if num_classes is not None: net = layers.conv2d( net, diff --git a/tensorflow/contrib/slim/python/slim/nets/resnet_v2.py b/tensorflow/contrib/slim/python/slim/nets/resnet_v2.py index 61665c9c8ba7817377a16bf3f2673447cab0518e..19e0538dd1e272b2bcaada3c83944e9c9c1f9eef 100644 --- a/tensorflow/contrib/slim/python/slim/nets/resnet_v2.py +++ b/tensorflow/contrib/slim/python/slim/nets/resnet_v2.py @@ -221,7 +221,7 @@ def resnet_v2(inputs, net, activation_fn=nn_ops.relu, scope='postnorm') if global_pool: # Global average pooling. - net = math_ops.reduce_mean(net, [1, 2], name='pool5', keep_dims=True) + net = math_ops.reduce_mean(net, [1, 2], name='pool5', keepdims=True) if num_classes is not None: net = layers_lib.conv2d( net, diff --git a/tensorflow/contrib/slim/python/slim/summaries.py b/tensorflow/contrib/slim/python/slim/summaries.py index 358359d6ebeea209fe83f7282f47db8be63747ac..a7dc3f6723a0d1a55dd3e8dce006e6acce083e6f 100644 --- a/tensorflow/contrib/slim/python/slim/summaries.py +++ b/tensorflow/contrib/slim/python/slim/summaries.py @@ -144,7 +144,7 @@ def add_zero_fraction_summary(tensor, name=None, prefix=None, A scalar `Tensor` of type `string` whose contents are the serialized `Summary` protocol buffer. """ - name = _get_summary_name(tensor, name, prefix, 'Fraction of Zero Values') + name = _get_summary_name(tensor, name, prefix, 'Fraction_of_Zero_Values') tensor = nn.zero_fraction(tensor) return add_scalar_summary(tensor, name, print_summary=print_summary) diff --git a/tensorflow/contrib/sparsemax/__init__.py b/tensorflow/contrib/sparsemax/__init__.py index 19d213fb3e8f615190d67862b1928205f31146b4..7bc726f4a84d683517b73814193429220f864735 100644 --- a/tensorflow/contrib/sparsemax/__init__.py +++ b/tensorflow/contrib/sparsemax/__init__.py @@ -14,7 +14,7 @@ # ============================================================================== """Module that implements sparsemax and sparsemax loss, see [1]. -[1] https://arxiv.org/abs/1602.02068 +[1]: https://arxiv.org/abs/1602.02068 ## Sparsemax diff --git a/tensorflow/contrib/sparsemax/python/ops/sparsemax.py b/tensorflow/contrib/sparsemax/python/ops/sparsemax.py index 890ca20f4cabd65146e803e54e554a5c97e72427..e617af2ff1b731eddb5b72469a1cd67e7cfd163f 100644 --- a/tensorflow/contrib/sparsemax/python/ops/sparsemax.py +++ b/tensorflow/contrib/sparsemax/python/ops/sparsemax.py @@ -31,7 +31,7 @@ def sparsemax(logits, name=None): """Computes sparsemax activations [1]. For each batch `i` and class `j` we have - sparsemax[i, j] = max(logits[i, j] - tau(logits[i, :]), 0) + $$sparsemax[i, j] = max(logits[i, j] - tau(logits[i, :]), 0)$$ [1]: https://arxiv.org/abs/1602.02068 diff --git a/tensorflow/contrib/stat_summarizer/BUILD b/tensorflow/contrib/stat_summarizer/BUILD index d4096751c44416e08bdba65819e5523999cc26cb..30be14c10cd8576ded75b8489cc89d439a9cc282 100644 --- a/tensorflow/contrib/stat_summarizer/BUILD +++ b/tensorflow/contrib/stat_summarizer/BUILD @@ -31,4 +31,5 @@ tf_py_test( "//tensorflow/python:math_ops", "//tensorflow/python:variables", ], + tags = ["no_windows"], ) diff --git a/tensorflow/contrib/stateless/__init__.py b/tensorflow/contrib/stateless/__init__.py index ca937546f50df46b7e5b1144dcbdc380cb04ca9b..0cca40f071c889773736ce009b32ba17728041ce 100644 --- a/tensorflow/contrib/stateless/__init__.py +++ b/tensorflow/contrib/stateless/__init__.py @@ -22,6 +22,7 @@ WARNING: These ops are in contrib, and are not stable. They should be consistent across multiple runs on the same hardware, but only for the same version of the code. +@@stateless_multinomial @@stateless_random_uniform @@stateless_random_normal @@stateless_truncated_normal @@ -37,6 +38,7 @@ from tensorflow.contrib.stateless.gen_stateless_random_ops import * from tensorflow.python.framework import ops from tensorflow.python.util.all_util import remove_undocumented +ops.NotDifferentiable("StatelessMultinomial") ops.NotDifferentiable("StatelessRandomNormal") ops.NotDifferentiable("StatelessRandomUniform") ops.NotDifferentiable("StatelessTruncatedNormal") diff --git a/tensorflow/contrib/stateless/python/kernel_tests/stateless_random_ops_test.py b/tensorflow/contrib/stateless/python/kernel_tests/stateless_random_ops_test.py index bea6341cfdcf7d56f255bec275b7861228e44e12..d724a5c014d2f9f5f6e3a6704341bcb8c429ae06 100644 --- a/tensorflow/contrib/stateless/python/kernel_tests/stateless_random_ops_test.py +++ b/tensorflow/contrib/stateless/python/kernel_tests/stateless_random_ops_test.py @@ -96,6 +96,52 @@ class StatelessOpsTest(test.TestCase): for s1, v1 in values: self.assertEqual(s0 == s1, np.all(v0 == v1)) + def testMatchStatefulMultinomial(self): + # Stateless ops should be the same as stateful ops on the first call + # after seed scrambling. + key = 0x3ec8f720, 0x02461e29 + num_samples = 4 + for logits_dtype in np.float16, np.float32, np.float64: + for output_dtype in dtypes.int32, dtypes.int64: + for seed in (7, 17), (11, 5), (2, 3): + preseed = invert_philox(key, + (seed[0], 0, seed[1], 0)).astype(np.uint64) + preseed = preseed[::2] | preseed[1::2] << 32 + random_seed.set_random_seed(seed[0]) + with self.test_session(use_gpu=True): + for logits in ([[0.1, 0.25, 0.5, 0.15]], [[0.5, 0.5], [0.8, 0.2], + [0.25, 0.75]]): + logits_t = constant_op.constant(logits, dtype=logits_dtype) + stateful = random_ops.multinomial( + logits_t, + num_samples, + seed=seed[1], + output_dtype=output_dtype) + pure = stateless.stateless_multinomial( + logits_t, + num_samples, + seed=preseed, + output_dtype=output_dtype) + self.assertAllEqual(stateful.eval(), pure.eval()) + + def testDeterminismMultinomial(self): + # Stateless values should be equal iff the seeds are equal (roughly) + num_samples = 10 + with self.test_session(use_gpu=True): + for seed_type in [dtypes.int32, dtypes.int64]: + seed_t = array_ops.placeholder(seed_type, shape=[2]) + seeds = [(x, y) for x in range(5) for y in range(5)] * 3 + for logits in ([[0.1, 0.25, 0.5, 0.15]], [[0.5, 0.5], [0.8, 0.2], + [0.25, 0.75]]): + pure = stateless.stateless_multinomial( + logits, num_samples, seed=seed_t) + values = [ + (seed, pure.eval(feed_dict={seed_t: seed})) for seed in seeds + ] + for s0, v0 in values: + for s1, v1 in values: + self.assertEqual(s0 == s1, np.all(v0 == v1)) + if __name__ == '__main__': test.main() diff --git a/tensorflow/contrib/summary/BUILD b/tensorflow/contrib/summary/BUILD index fda1367b156c86f385f31cc41c5fca747cf8668d..f88b03ec4c2b1f250091594ea12d7d1862029fa2 100644 --- a/tensorflow/contrib/summary/BUILD +++ b/tensorflow/contrib/summary/BUILD @@ -15,7 +15,6 @@ py_test( srcs = ["summary_ops_test.py"], srcs_version = "PY2AND3", deps = [ - ":summary_ops", ":summary_test_util", "//tensorflow/python:array_ops", "//tensorflow/python:errors", @@ -23,6 +22,7 @@ py_test( "//tensorflow/python:framework_test_lib", "//tensorflow/python:platform", "//tensorflow/python:state_ops", + "//tensorflow/python:summary_ops_v2", "//tensorflow/python:training", "//tensorflow/python/eager:function", "//tensorflow/python/eager:test", @@ -35,7 +35,6 @@ py_test( srcs = ["summary_ops_graph_test.py"], srcs_version = "PY2AND3", deps = [ - ":summary_ops", ":summary_test_util", "//tensorflow/core:protos_all_py", "//tensorflow/python:array_ops", @@ -44,31 +43,9 @@ py_test( "//tensorflow/python:control_flow_ops", "//tensorflow/python:dtypes", "//tensorflow/python:framework_ops", + "//tensorflow/python:summary_ops_v2", "//tensorflow/python:training", - "@six_archive//:six", - ], -) - -py_library( - name = "summary_ops", - srcs = ["summary_ops.py"], - srcs_version = "PY2AND3", - visibility = ["//tensorflow:internal"], - deps = [ - "//tensorflow/core:protos_all_py", - "//tensorflow/python:array_ops", - "//tensorflow/python:constant_op", - "//tensorflow/python:control_flow_ops", - "//tensorflow/python:dtypes", - "//tensorflow/python:framework_ops", - "//tensorflow/python:layers_base", - "//tensorflow/python:math_ops", - "//tensorflow/python:resource_variable_ops", - "//tensorflow/python:summary_op_util", - "//tensorflow/python:summary_ops_gen", - "//tensorflow/python:training", - "//tensorflow/python:util", - "//tensorflow/python/eager:context", + "//tensorflow/python:variables", "@six_archive//:six", ], ) @@ -79,7 +56,7 @@ py_library( srcs_version = "PY2AND3", visibility = ["//tensorflow:internal"], deps = [ - ":summary_ops", + "//tensorflow/python:summary_ops_v2", ], ) @@ -92,8 +69,10 @@ py_library( visibility = ["//tensorflow:internal"], deps = [ "//tensorflow/core:protos_all_py", + "//tensorflow/python:framework_test_lib", "//tensorflow/python:lib", "//tensorflow/python:platform", + "//tensorflow/python:summary_ops_v2", "@org_sqlite//:python", ], ) diff --git a/tensorflow/contrib/summary/summary.py b/tensorflow/contrib/summary/summary.py index 2d6d7ea6a3eff2562ba8def4117e3aa6f818b6fd..99ced53e1167ec5486d0b75cff81ffbf857c2be7 100644 --- a/tensorflow/contrib/summary/summary.py +++ b/tensorflow/contrib/summary/summary.py @@ -61,23 +61,23 @@ from __future__ import division from __future__ import print_function # pylint: disable=unused-import -from tensorflow.contrib.summary.summary_ops import all_summary_ops -from tensorflow.contrib.summary.summary_ops import always_record_summaries -from tensorflow.contrib.summary.summary_ops import audio -from tensorflow.contrib.summary.summary_ops import create_db_writer -from tensorflow.contrib.summary.summary_ops import create_file_writer -from tensorflow.contrib.summary.summary_ops import create_summary_file_writer -from tensorflow.contrib.summary.summary_ops import eval_dir -from tensorflow.contrib.summary.summary_ops import flush -from tensorflow.contrib.summary.summary_ops import generic -from tensorflow.contrib.summary.summary_ops import graph -from tensorflow.contrib.summary.summary_ops import histogram -from tensorflow.contrib.summary.summary_ops import image -from tensorflow.contrib.summary.summary_ops import import_event -from tensorflow.contrib.summary.summary_ops import initialize -from tensorflow.contrib.summary.summary_ops import never_record_summaries -from tensorflow.contrib.summary.summary_ops import record_summaries_every_n_global_steps -from tensorflow.contrib.summary.summary_ops import scalar -from tensorflow.contrib.summary.summary_ops import should_record_summaries -from tensorflow.contrib.summary.summary_ops import summary_writer_initializer_op -from tensorflow.contrib.summary.summary_ops import SummaryWriter +from tensorflow.python.ops.summary_ops_v2 import all_summary_ops +from tensorflow.python.ops.summary_ops_v2 import always_record_summaries +from tensorflow.python.ops.summary_ops_v2 import audio +from tensorflow.python.ops.summary_ops_v2 import create_db_writer +from tensorflow.python.ops.summary_ops_v2 import create_file_writer +from tensorflow.python.ops.summary_ops_v2 import create_summary_file_writer +from tensorflow.python.ops.summary_ops_v2 import eval_dir +from tensorflow.python.ops.summary_ops_v2 import flush +from tensorflow.python.ops.summary_ops_v2 import generic +from tensorflow.python.ops.summary_ops_v2 import graph +from tensorflow.python.ops.summary_ops_v2 import histogram +from tensorflow.python.ops.summary_ops_v2 import image +from tensorflow.python.ops.summary_ops_v2 import import_event +from tensorflow.python.ops.summary_ops_v2 import initialize +from tensorflow.python.ops.summary_ops_v2 import never_record_summaries +from tensorflow.python.ops.summary_ops_v2 import record_summaries_every_n_global_steps +from tensorflow.python.ops.summary_ops_v2 import scalar +from tensorflow.python.ops.summary_ops_v2 import should_record_summaries +from tensorflow.python.ops.summary_ops_v2 import summary_writer_initializer_op +from tensorflow.python.ops.summary_ops_v2 import SummaryWriter diff --git a/tensorflow/contrib/summary/summary_ops_graph_test.py b/tensorflow/contrib/summary/summary_ops_graph_test.py index 3aba04540eba12092d884cca10e23546eb91c91d..ae8336daaf8ea9113716b90b6ea9be9de7303596 100644 --- a/tensorflow/contrib/summary/summary_ops_graph_test.py +++ b/tensorflow/contrib/summary/summary_ops_graph_test.py @@ -16,27 +16,220 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import os import tempfile +import time import six -from tensorflow.contrib.summary import summary_ops from tensorflow.contrib.summary import summary_test_util from tensorflow.core.framework import graph_pb2 from tensorflow.core.framework import node_def_pb2 from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops +from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import state_ops +from tensorflow.python.ops import summary_ops_v2 as summary_ops +from tensorflow.python.ops import variables +from tensorflow.python.platform import gfile from tensorflow.python.platform import test from tensorflow.python.training import training_util get_all = summary_test_util.get_all -class DbTest(summary_test_util.SummaryDbTest): +class GraphFileTest(test_util.TensorFlowTestCase): + + def testSummaryOps(self): + logdir = self.get_temp_dir() + writer = summary_ops.create_file_writer(logdir, max_queue=0) + with writer.as_default(), summary_ops.always_record_summaries(): + summary_ops.generic('tensor', 1, step=1) + summary_ops.scalar('scalar', 2.0, step=1) + summary_ops.histogram('histogram', [1.0], step=1) + summary_ops.image('image', [[[[1.0]]]], step=1) + summary_ops.audio('audio', [[1.0]], 1.0, 1, step=1) + with self.test_session() as sess: + sess.run(summary_ops.summary_writer_initializer_op()) + sess.run(summary_ops.all_summary_ops()) + # The working condition of the ops is tested in the C++ test so we just + # test here that we're calling them correctly. + self.assertTrue(gfile.Exists(logdir)) + + def testSummaryName(self): + logdir = self.get_temp_dir() + writer = summary_ops.create_file_writer(logdir, max_queue=0) + with writer.as_default(), summary_ops.always_record_summaries(): + summary_ops.scalar('scalar', 2.0, step=1) + with self.test_session() as sess: + sess.run(summary_ops.summary_writer_initializer_op()) + sess.run(summary_ops.all_summary_ops()) + events = summary_test_util.events_from_logdir(logdir) + self.assertEqual(2, len(events)) + self.assertEqual('scalar', events[1].summary.value[0].tag) + + def testSummaryNameScope(self): + logdir = self.get_temp_dir() + writer = summary_ops.create_file_writer(logdir, max_queue=0) + with writer.as_default(), summary_ops.always_record_summaries(): + with ops.name_scope('scope'): + summary_ops.scalar('scalar', 2.0, step=1) + with self.test_session() as sess: + sess.run(summary_ops.summary_writer_initializer_op()) + sess.run(summary_ops.all_summary_ops()) + events = summary_test_util.events_from_logdir(logdir) + self.assertEqual(2, len(events)) + self.assertEqual('scope/scalar', events[1].summary.value[0].tag) + + def testSummaryGlobalStep(self): + training_util.get_or_create_global_step() + logdir = self.get_temp_dir() + writer = summary_ops.create_file_writer(logdir, max_queue=0) + with writer.as_default(), summary_ops.always_record_summaries(): + summary_ops.scalar('scalar', 2.0) + with self.test_session() as sess: + sess.run(variables.global_variables_initializer()) + sess.run(summary_ops.summary_writer_initializer_op()) + step, _ = sess.run( + [training_util.get_global_step(), summary_ops.all_summary_ops()]) + events = summary_test_util.events_from_logdir(logdir) + self.assertEqual(2, len(events)) + self.assertEqual(step, events[1].step) + + def testMaxQueue(self): + logdir = self.get_temp_dir() + writer = summary_ops.create_file_writer( + logdir, max_queue=1, flush_millis=999999) + with writer.as_default(), summary_ops.always_record_summaries(): + summary_ops.scalar('scalar', 2.0, step=1) + with self.test_session() as sess: + sess.run(summary_ops.summary_writer_initializer_op()) + get_total = lambda: len(summary_test_util.events_from_logdir(logdir)) + # Note: First tf.Event is always file_version. + self.assertEqual(1, get_total()) + sess.run(summary_ops.all_summary_ops()) + self.assertEqual(1, get_total()) + # Should flush after second summary since max_queue = 1 + sess.run(summary_ops.all_summary_ops()) + self.assertEqual(3, get_total()) + + def testFlushFunction(self): + logdir = self.get_temp_dir() + writer = summary_ops.create_file_writer( + logdir, max_queue=999999, flush_millis=999999) + with writer.as_default(), summary_ops.always_record_summaries(): + summary_ops.scalar('scalar', 2.0, step=1) + flush_op = summary_ops.flush() + with self.test_session() as sess: + sess.run(summary_ops.summary_writer_initializer_op()) + get_total = lambda: len(summary_test_util.events_from_logdir(logdir)) + # Note: First tf.Event is always file_version. + self.assertEqual(1, get_total()) + sess.run(summary_ops.all_summary_ops()) + self.assertEqual(1, get_total()) + sess.run(flush_op) + self.assertEqual(2, get_total()) + # Test "writer" parameter + sess.run(summary_ops.all_summary_ops()) + sess.run(summary_ops.flush(writer=writer)) + self.assertEqual(3, get_total()) + sess.run(summary_ops.all_summary_ops()) + sess.run(summary_ops.flush(writer=writer._resource)) # pylint:disable=protected-access + self.assertEqual(4, get_total()) + + def testSharedName(self): + logdir = self.get_temp_dir() + with summary_ops.always_record_summaries(): + # Create with default shared name (should match logdir) + writer1 = summary_ops.create_file_writer(logdir) + with writer1.as_default(): + summary_ops.scalar('one', 1.0, step=1) + # Create with explicit logdir shared name (should be same resource/file) + shared_name = 'logdir:' + logdir + writer2 = summary_ops.create_file_writer(logdir, name=shared_name) + with writer2.as_default(): + summary_ops.scalar('two', 2.0, step=2) + # Create with different shared name (should be separate resource/file) + writer3 = summary_ops.create_file_writer(logdir, name='other') + with writer3.as_default(): + summary_ops.scalar('three', 3.0, step=3) + + with self.test_session() as sess: + # Run init ops across writers sequentially to avoid race condition. + # TODO(nickfelt): fix race condition in resource manager lookup or create + sess.run(writer1.init()) + sess.run(writer2.init()) + time.sleep(1.1) # Ensure filename has a different timestamp + sess.run(writer3.init()) + sess.run(summary_ops.all_summary_ops()) + sess.run([writer1.flush(), writer2.flush(), writer3.flush()]) + + event_files = iter(sorted(gfile.Glob(os.path.join(logdir, '*tfevents*')))) + + # First file has tags "one" and "two" + events = summary_test_util.events_from_file(next(event_files)) + self.assertEqual('brain.Event:2', events[0].file_version) + tags = [e.summary.value[0].tag for e in events[1:]] + self.assertItemsEqual(['one', 'two'], tags) + + # Second file has tag "three" + events = summary_test_util.events_from_file(next(event_files)) + self.assertEqual('brain.Event:2', events[0].file_version) + tags = [e.summary.value[0].tag for e in events[1:]] + self.assertItemsEqual(['three'], tags) + + # No more files + self.assertRaises(StopIteration, lambda: next(event_files)) + + def testWriterInitAndClose(self): + logdir = self.get_temp_dir() + with summary_ops.always_record_summaries(): + writer = summary_ops.create_file_writer( + logdir, max_queue=100, flush_millis=1000000) + with writer.as_default(): + summary_ops.scalar('one', 1.0, step=1) + with self.test_session() as sess: + sess.run(summary_ops.summary_writer_initializer_op()) + get_total = lambda: len(summary_test_util.events_from_logdir(logdir)) + self.assertEqual(1, get_total()) # file_version Event + # Running init() again while writer is open has no effect + sess.run(writer.init()) + self.assertEqual(1, get_total()) + sess.run(summary_ops.all_summary_ops()) + self.assertEqual(1, get_total()) + # Running close() should do an implicit flush + sess.run(writer.close()) + self.assertEqual(2, get_total()) + # Running init() on a closed writer should start a new file + time.sleep(1.1) # Ensure filename has a different timestamp + sess.run(writer.init()) + sess.run(summary_ops.all_summary_ops()) + sess.run(writer.close()) + files = sorted(gfile.Glob(os.path.join(logdir, '*tfevents*'))) + self.assertEqual(2, len(files)) + self.assertEqual(2, len(summary_test_util.events_from_file(files[1]))) + + def testWriterFlush(self): + logdir = self.get_temp_dir() + with summary_ops.always_record_summaries(): + writer = summary_ops.create_file_writer( + logdir, max_queue=100, flush_millis=1000000) + with writer.as_default(): + summary_ops.scalar('one', 1.0, step=1) + with self.test_session() as sess: + sess.run(summary_ops.summary_writer_initializer_op()) + get_total = lambda: len(summary_test_util.events_from_logdir(logdir)) + self.assertEqual(1, get_total()) # file_version Event + sess.run(summary_ops.all_summary_ops()) + self.assertEqual(1, get_total()) + sess.run(writer.flush()) + self.assertEqual(2, get_total()) + + +class GraphDbTest(summary_test_util.SummaryDbTest): def testGraphPassedToGraph_isForbiddenForThineOwnSafety(self): with self.assertRaises(TypeError): diff --git a/tensorflow/contrib/summary/summary_ops_test.py b/tensorflow/contrib/summary/summary_ops_test.py index c756f8b27055f9cf86a311e485d97745a3c7a95b..f1ef218e74bbd225071324a8269fdfeb5de0e038 100644 --- a/tensorflow/contrib/summary/summary_ops_test.py +++ b/tensorflow/contrib/summary/summary_ops_test.py @@ -16,12 +16,13 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import os import tempfile +import time import numpy as np import six -from tensorflow.contrib.summary import summary_ops from tensorflow.contrib.summary import summary_test_util from tensorflow.core.framework import graph_pb2 from tensorflow.core.framework import node_def_pb2 @@ -33,6 +34,7 @@ from tensorflow.python.framework import ops from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import state_ops +from tensorflow.python.ops import summary_ops_v2 as summary_ops from tensorflow.python.platform import gfile from tensorflow.python.training import training_util @@ -57,7 +59,7 @@ _NUMPY_NUMERIC_TYPES = { } -class TargetTest(test_util.TensorFlowTestCase): +class EagerFileTest(test_util.TensorFlowTestCase): def testShouldRecordSummary(self): self.assertFalse(summary_ops.should_record_summaries()) @@ -138,21 +140,22 @@ class TargetTest(test_util.TensorFlowTestCase): def testMaxQueue(self): logs = tempfile.mkdtemp() with summary_ops.create_file_writer( - logs, max_queue=2, flush_millis=999999, + logs, max_queue=1, flush_millis=999999, name='lol').as_default(), summary_ops.always_record_summaries(): get_total = lambda: len(summary_test_util.events_from_logdir(logs)) # Note: First tf.Event is always file_version. self.assertEqual(1, get_total()) summary_ops.scalar('scalar', 2.0, step=1) self.assertEqual(1, get_total()) + # Should flush after second summary since max_queue = 1 summary_ops.scalar('scalar', 2.0, step=2) self.assertEqual(3, get_total()) - def testFlush(self): + def testFlushFunction(self): logs = tempfile.mkdtemp() - with summary_ops.create_file_writer( - logs, max_queue=999999, flush_millis=999999, - name='lol').as_default(), summary_ops.always_record_summaries(): + writer = summary_ops.create_file_writer( + logs, max_queue=999999, flush_millis=999999, name='lol') + with writer.as_default(), summary_ops.always_record_summaries(): get_total = lambda: len(summary_test_util.events_from_logdir(logs)) # Note: First tf.Event is always file_version. self.assertEqual(1, get_total()) @@ -161,9 +164,103 @@ class TargetTest(test_util.TensorFlowTestCase): self.assertEqual(1, get_total()) summary_ops.flush() self.assertEqual(3, get_total()) + # Test "writer" parameter + summary_ops.scalar('scalar', 2.0, step=3) + summary_ops.flush(writer=writer) + self.assertEqual(4, get_total()) + summary_ops.scalar('scalar', 2.0, step=4) + summary_ops.flush(writer=writer._resource) # pylint:disable=protected-access + self.assertEqual(5, get_total()) + + def testSharedName(self): + logdir = self.get_temp_dir() + with summary_ops.always_record_summaries(): + # Create with default shared name (should match logdir) + writer1 = summary_ops.create_file_writer(logdir) + with writer1.as_default(): + summary_ops.scalar('one', 1.0, step=1) + summary_ops.flush() + # Create with explicit logdir shared name (should be same resource/file) + shared_name = 'logdir:' + logdir + writer2 = summary_ops.create_file_writer(logdir, name=shared_name) + with writer2.as_default(): + summary_ops.scalar('two', 2.0, step=2) + summary_ops.flush() + # Create with different shared name (should be separate resource/file) + time.sleep(1.1) # Ensure filename has a different timestamp + writer3 = summary_ops.create_file_writer(logdir, name='other') + with writer3.as_default(): + summary_ops.scalar('three', 3.0, step=3) + summary_ops.flush() + + event_files = iter(sorted(gfile.Glob(os.path.join(logdir, '*tfevents*')))) + + # First file has tags "one" and "two" + events = iter(summary_test_util.events_from_file(next(event_files))) + self.assertEqual('brain.Event:2', next(events).file_version) + self.assertEqual('one', next(events).summary.value[0].tag) + self.assertEqual('two', next(events).summary.value[0].tag) + self.assertRaises(StopIteration, lambda: next(events)) + + # Second file has tag "three" + events = iter(summary_test_util.events_from_file(next(event_files))) + self.assertEqual('brain.Event:2', next(events).file_version) + self.assertEqual('three', next(events).summary.value[0].tag) + self.assertRaises(StopIteration, lambda: next(events)) + + # No more files + self.assertRaises(StopIteration, lambda: next(event_files)) + + def testWriterInitAndClose(self): + logdir = self.get_temp_dir() + get_total = lambda: len(summary_test_util.events_from_logdir(logdir)) + with summary_ops.always_record_summaries(): + writer = summary_ops.create_file_writer( + logdir, max_queue=100, flush_millis=1000000) + self.assertEqual(1, get_total()) # file_version Event + # Calling init() again while writer is open has no effect + writer.init() + self.assertEqual(1, get_total()) + try: + # Not using .as_default() to avoid implicit flush when exiting + writer.set_as_default() + summary_ops.scalar('one', 1.0, step=1) + self.assertEqual(1, get_total()) + # Calling .close() should do an implicit flush + writer.close() + self.assertEqual(2, get_total()) + # Calling init() on a closed writer should start a new file + time.sleep(1.1) # Ensure filename has a different timestamp + writer.init() + files = sorted(gfile.Glob(os.path.join(logdir, '*tfevents*'))) + self.assertEqual(2, len(files)) + get_total = lambda: len(summary_test_util.events_from_file(files[1])) + self.assertEqual(1, get_total()) # file_version Event + summary_ops.scalar('two', 2.0, step=2) + writer.close() + self.assertEqual(2, get_total()) + finally: + # Clean up by resetting default writer + summary_ops.create_file_writer(None).set_as_default() + + def testWriterFlush(self): + logdir = self.get_temp_dir() + get_total = lambda: len(summary_test_util.events_from_logdir(logdir)) + with summary_ops.always_record_summaries(): + writer = summary_ops.create_file_writer( + logdir, max_queue=100, flush_millis=1000000) + self.assertEqual(1, get_total()) # file_version Event + with writer.as_default(): + summary_ops.scalar('one', 1.0, step=1) + self.assertEqual(1, get_total()) + writer.flush() + self.assertEqual(2, get_total()) + summary_ops.scalar('two', 2.0, step=2) + # Exiting the "as_default()" should do an implicit flush of the "two" tag + self.assertEqual(3, get_total()) -class DbTest(summary_test_util.SummaryDbTest): +class EagerDbTest(summary_test_util.SummaryDbTest): def testIntegerSummaries(self): step = training_util.create_global_step() diff --git a/tensorflow/contrib/summary/summary_test_internal.py b/tensorflow/contrib/summary/summary_test_internal.py deleted file mode 100644 index d0d3384735fb1eb1a048c7aa6da0037ee9fc6936..0000000000000000000000000000000000000000 --- a/tensorflow/contrib/summary/summary_test_internal.py +++ /dev/null @@ -1,60 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Internal helpers for tests in this directory.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import functools -import os - -import sqlite3 - -from tensorflow.contrib.summary import summary_ops -from tensorflow.python.framework import test_util - - -class SummaryDbTest(test_util.TensorFlowTestCase): - """Helper for summary database testing.""" - - def setUp(self): - super(SummaryDbTest, self).setUp() - self.db_path = os.path.join(self.get_temp_dir(), 'DbTest.sqlite') - if os.path.exists(self.db_path): - os.unlink(self.db_path) - self.db = sqlite3.connect(self.db_path) - self.create_db_writer = functools.partial( - summary_ops.create_db_writer, - db_uri=self.db_path, - experiment_name='experiment', - run_name='run', - user_name='user') - - def tearDown(self): - self.db.close() - super(SummaryDbTest, self).tearDown() - - -def get_one(db, q, *p): - return db.execute(q, p).fetchone()[0] - - -def get_all(db, q, *p): - return unroll(db.execute(q, p).fetchall()) - - -def unroll(list_of_tuples): - return sum(list_of_tuples, ()) diff --git a/tensorflow/contrib/summary/summary_test_util.py b/tensorflow/contrib/summary/summary_test_util.py index 8506c4be9c4ca8305b62da17c7246e6e18313bd3..b4ae43302cb22ad17c04050eb84433c470757bf1 100644 --- a/tensorflow/contrib/summary/summary_test_util.py +++ b/tensorflow/contrib/summary/summary_test_util.py @@ -24,10 +24,10 @@ import os import sqlite3 -from tensorflow.contrib.summary import summary_ops from tensorflow.core.util import event_pb2 from tensorflow.python.framework import test_util from tensorflow.python.lib.io import tf_record +from tensorflow.python.ops import summary_ops_v2 as summary_ops from tensorflow.python.platform import gfile diff --git a/tensorflow/contrib/tensor_forest/BUILD b/tensorflow/contrib/tensor_forest/BUILD index 11a59ec22be99f8b7b26d4a1e98ec1449ce2cd94..136856c0156c41046f9af61cdd6e3d5f8213309e 100644 --- a/tensorflow/contrib/tensor_forest/BUILD +++ b/tensorflow/contrib/tensor_forest/BUILD @@ -539,7 +539,6 @@ py_test( srcs = ["client/random_forest_test.py"], srcs_version = "PY2AND3", tags = [ - "no_windows", "nomac", # b/63258195 "notsan", ], diff --git a/tensorflow/contrib/tensor_forest/client/eval_metrics.py b/tensorflow/contrib/tensor_forest/client/eval_metrics.py index 90033015ebc5e44ea70fbf2bc9735d0aeb4ec27d..e893e1d1c836cc7feef15757dde79d0db362cbaf 100644 --- a/tensorflow/contrib/tensor_forest/client/eval_metrics.py +++ b/tensorflow/contrib/tensor_forest/client/eval_metrics.py @@ -37,7 +37,7 @@ def _top_k_generator(k): def _top_k(probabilities, targets): targets = math_ops.to_int32(targets) if targets.get_shape().ndims > 1: - targets = array_ops.squeeze(targets, squeeze_dims=[1]) + targets = array_ops.squeeze(targets, axis=[1]) return metric_ops.streaming_mean(nn.in_top_k(probabilities, targets, k)) return _top_k @@ -57,7 +57,7 @@ def _r2(probabilities, targets, weights=None): def _squeeze_and_onehot(targets, depth): - targets = array_ops.squeeze(targets, squeeze_dims=[1]) + targets = array_ops.squeeze(targets, axis=[1]) return array_ops.one_hot(math_ops.to_int32(targets), depth) diff --git a/tensorflow/contrib/tensor_forest/client/random_forest.py b/tensorflow/contrib/tensor_forest/client/random_forest.py index 4abcc20ed334e706c8ae59e2127dfd6f4e152361..35e8c92aba325d9115c7ee566363a1625e6e76fc 100644 --- a/tensorflow/contrib/tensor_forest/client/random_forest.py +++ b/tensorflow/contrib/tensor_forest/client/random_forest.py @@ -399,7 +399,7 @@ def get_combined_model_fn(model_fns): training ops: tf.group them. loss: average them. predictions: concat probabilities such that predictions[*][0-C1] are the - probablities for output 1 (where C1 is the number of classes in output 1), + probabilities for output 1 (where C1 is the number of classes in output 1), predictions[*][C1-(C1+C2)] are the probabilities for output 2 (where C2 is the number of classes in output 2), etc. Also stack predictions such that predictions[i][j] is the class prediction for example i and output j. diff --git a/tensorflow/contrib/tensor_forest/hybrid/core/ops/hard_routing_function_op.cc b/tensorflow/contrib/tensor_forest/hybrid/core/ops/hard_routing_function_op.cc index cf0db788a419f64ed891df8aa097fa8826f6de91..06bfe871fdff29fcdac1a3a50500b94dbd2998d7 100644 --- a/tensorflow/contrib/tensor_forest/hybrid/core/ops/hard_routing_function_op.cc +++ b/tensorflow/contrib/tensor_forest/hybrid/core/ops/hard_routing_function_op.cc @@ -80,7 +80,7 @@ REGISTER_OP("HardRoutingFunction") regression model that translates from node features to probabilities. - path_probility: `path_probability[i]` gives the probability of reaching each + path_probability: `path_probability[i]` gives the probability of reaching each node in `path[i]`. path: `path[i][j]` gives the jth node in the path taken by the ith data instance. diff --git a/tensorflow/contrib/tensor_forest/hybrid/core/ops/stochastic_hard_routing_function_op.cc b/tensorflow/contrib/tensor_forest/hybrid/core/ops/stochastic_hard_routing_function_op.cc index c9df09bfda44e665ed013da383e1e9a2c665c454..1a055756c084016b3862ad131937e4d400f26d55 100644 --- a/tensorflow/contrib/tensor_forest/hybrid/core/ops/stochastic_hard_routing_function_op.cc +++ b/tensorflow/contrib/tensor_forest/hybrid/core/ops/stochastic_hard_routing_function_op.cc @@ -85,7 +85,7 @@ REGISTER_OP("StochasticHardRoutingFunction") regression model that translates from node features to probabilities. - path_probility: `path_probability[i]` gives the probability of reaching each + path_probability: `path_probability[i]` gives the probability of reaching each node in `path[i]`. path: `path[i][j]` gives the jth node in the path taken by the ith data instance. diff --git a/tensorflow/contrib/tensor_forest/hybrid/core/ops/stochastic_hard_routing_gradient_op.cc b/tensorflow/contrib/tensor_forest/hybrid/core/ops/stochastic_hard_routing_gradient_op.cc index b0d8b832b5437db7a4b3026e80ae99d0391d7f7a..7d092bbc24d5c6611fed431c04200b2f950887f7 100644 --- a/tensorflow/contrib/tensor_forest/hybrid/core/ops/stochastic_hard_routing_gradient_op.cc +++ b/tensorflow/contrib/tensor_forest/hybrid/core/ops/stochastic_hard_routing_gradient_op.cc @@ -81,7 +81,7 @@ REGISTER_OP("StochasticHardRoutingGradient") tree_biases: `tree_biases[i]` gives the bias of the logistic regression model that translates from node features to probabilities. - path_probility: `path_probability[i]` gives the probability of reaching each + path_probability: `path_probability[i]` gives the probability of reaching each node in `path[i]`. path: `path[i][j]` gives the jth node in the path taken by the ith data instance. diff --git a/tensorflow/contrib/tensor_forest/hybrid/python/layers/fully_connected.py b/tensorflow/contrib/tensor_forest/hybrid/python/layers/fully_connected.py index ff3ab21eaa9a4aa823f2ae7d3dd39674abea3d2a..745a5b1caf2fe348f1b276ccc245aa2ef350a62e 100644 --- a/tensorflow/contrib/tensor_forest/hybrid/python/layers/fully_connected.py +++ b/tensorflow/contrib/tensor_forest/hybrid/python/layers/fully_connected.py @@ -55,7 +55,7 @@ class ManyToOneLayer(hybrid_layer.HybridLayer): # There is always one activation per instance by definition, so squeeze # away the extra dimension. - return array_ops.squeeze(nn_activations, squeeze_dims=[1]) + return array_ops.squeeze(nn_activations, axis=[1]) class FlattenedFullyConnectedLayer(hybrid_layer.HybridLayer): diff --git a/tensorflow/contrib/tensor_forest/kernels/tree_utils.cc b/tensorflow/contrib/tensor_forest/kernels/tree_utils.cc index 44997ec5d6d5fdb9aab52ab7a50f46a731bfda66..cefcc960510293c15569391091e5c1b4611c8835 100644 --- a/tensorflow/contrib/tensor_forest/kernels/tree_utils.cc +++ b/tensorflow/contrib/tensor_forest/kernels/tree_utils.cc @@ -421,7 +421,7 @@ double getChebyshevEpsilon(const std::vector& mu1, const std::vector& mu2) { // Math time!! // We are trying to minimize d = |mu1 - x|^2 + |mu2 - y|^2 over the surface. - // Using Langrange multipliers, we get + // Using Lagrange multipliers, we get // partial d / partial x = -2 mu1 + 2 x = lambda_1 1 + 2 lambda_3 x // partial d / partial y = -2 mu2 + 2 y = lambda_2 1 - 2 lambda_3 y // or @@ -485,7 +485,7 @@ double getChebyshevEpsilon(const std::vector& mu1, } double sdiscrim = sqrt(discrim); - // TODO(thomaswc): Analyze whetever one of these is always closer. + // TODO(thomaswc): Analyze whatever one of these is always closer. double v1 = (-b + sdiscrim) / (2 * a); double v2 = (-b - sdiscrim) / (2 * a); double dist1 = getDistanceFromLambda3(v1, mu1, mu2); diff --git a/tensorflow/contrib/tensor_forest/kernels/tree_utils.h b/tensorflow/contrib/tensor_forest/kernels/tree_utils.h index edbac6700677633cbd4d41f7040b4859ca599c4a..03aab1b61ee58a647edb24f6b97e517a411e996c 100644 --- a/tensorflow/contrib/tensor_forest/kernels/tree_utils.h +++ b/tensorflow/contrib/tensor_forest/kernels/tree_utils.h @@ -123,7 +123,7 @@ bool BestSplitDominatesRegression(const Tensor& total_sums, const Tensor& split_squares, int32 accumulator); -// Performs booststrap_samples bootstrap samples of the best split's class +// Performs bootstrap_samples bootstrap samples of the best split's class // counts and the second best splits's class counts, and returns true if at // least dominate_fraction of the time, the former has a better (lower) // Gini impurity. Does not take over ownership of *rand. diff --git a/tensorflow/contrib/tensor_forest/kernels/v4/decision-tree-resource.h b/tensorflow/contrib/tensor_forest/kernels/v4/decision-tree-resource.h index 328af28725af016e90b30ae2d303ffba15c81c1f..d3edb43733761a906c6e5bf8b65f76e3e1ae56fc 100644 --- a/tensorflow/contrib/tensor_forest/kernels/v4/decision-tree-resource.h +++ b/tensorflow/contrib/tensor_forest/kernels/v4/decision-tree-resource.h @@ -60,7 +60,7 @@ class DecisionTreeResource : public ResourceBase { mutex* get_mutex() { return &mu_; } // Return the TreeNode for the leaf that the example ends up at according - // to decsion_tree_. Also fill in that leaf's depth if it isn't nullptr. + // to decision_tree_. Also fill in that leaf's depth if it isn't nullptr. int32 TraverseTree(const std::unique_ptr& input_data, int example, int32* depth, TreePath* path) const; diff --git a/tensorflow/contrib/tensor_forest/kernels/v4/decision_node_evaluator.h b/tensorflow/contrib/tensor_forest/kernels/v4/decision_node_evaluator.h index bf2b2aaa3c8f433ab4fc145217857112f7a0a579..3db351c328c73beb94d6994aa503e3e2c4c06390 100644 --- a/tensorflow/contrib/tensor_forest/kernels/v4/decision_node_evaluator.h +++ b/tensorflow/contrib/tensor_forest/kernels/v4/decision_node_evaluator.h @@ -60,7 +60,7 @@ class InequalityDecisionNodeEvaluator : public BinaryDecisionNodeEvaluator { bool include_equals_; }; -// Evalutor for splits with multiple weighted features. +// Evaluator for splits with multiple weighted features. class ObliqueInequalityDecisionNodeEvaluator : public BinaryDecisionNodeEvaluator { public: diff --git a/tensorflow/contrib/tensor_forest/ops/model_ops.cc b/tensorflow/contrib/tensor_forest/ops/model_ops.cc index 3099cccdf8b1b7b1d49d12ec454f9a0dc7325e9b..98124d519c761907ed0e62ee2a396ef84716a911 100644 --- a/tensorflow/contrib/tensor_forest/ops/model_ops.cc +++ b/tensorflow/contrib/tensor_forest/ops/model_ops.cc @@ -165,7 +165,7 @@ tree_handle: The handle to the tree. leaf_ids: `leaf_ids[i]` is the leaf id for input i. input_labels: The training batch's labels as a 1 or 2-d tensor. 'input_labels[i][j]' gives the j-th label/target for the i-th input. -input_weights: The training batch's eample weights as a 1-d tensor. +input_weights: The training batch's weights as a 1-d tensor. 'input_weights[i]' gives the weight for the i-th input. )doc"); diff --git a/tensorflow/contrib/tensor_forest/ops/stats_ops.cc b/tensorflow/contrib/tensor_forest/ops/stats_ops.cc index e8b5c5d8a6efae83b9e408ed3d05ac72d848ef7f..5be581aaec4cab342ef3fd49fa0294e5e702ba1c 100644 --- a/tensorflow/contrib/tensor_forest/ops/stats_ops.cc +++ b/tensorflow/contrib/tensor_forest/ops/stats_ops.cc @@ -75,7 +75,7 @@ REGISTER_OP("GrowTreeV4") .Attr("params: string") .Input("tree_handle: resource") .Input("stats_handle: resource") - .Input("finshed_nodes: int32") + .Input("finished_nodes: int32") .SetShapeFn(tensorflow::shape_inference::NoOutputs) .Doc(R"doc( Grows the tree for finished nodes and allocates waiting nodes. @@ -83,7 +83,7 @@ Grows the tree for finished nodes and allocates waiting nodes. params: A serialized TensorForestParams proto. tree_handle: The handle to the tree. stats_handle: The handle to the stats. -finshed_nodes: A 1-d Tensor of finished node ids from ProcessInput. +finished_nodes: A 1-d Tensor of finished node ids from ProcessInput. )doc"); REGISTER_OP("ProcessInputV4") @@ -119,7 +119,7 @@ sparse_input_values: The values tensor from the SparseTensor input. sparse_input_shape: The shape tensor from the SparseTensor input. input_labels: The training batch's labels as a 1 or 2-d tensor. 'input_labels[i][j]' gives the j-th label/target for the i-th input. -input_weights: The training batch's eample weights as a 1-d tensor. +input_weights: The training batch's weights as a 1-d tensor. 'input_weights[i]' gives the weight for the i-th input. finished_nodes: A 1-d tensor of node ids that have finished and are ready to grow. diff --git a/tensorflow/contrib/tensor_forest/python/tensor_forest.py b/tensorflow/contrib/tensor_forest/python/tensor_forest.py index 3650b5d52fe8a1b87a239d41ecfa3de677fffc72..7a35a70bbe3112e0649cefd8116cc50565978da5 100644 --- a/tensorflow/contrib/tensor_forest/python/tensor_forest.py +++ b/tensorflow/contrib/tensor_forest/python/tensor_forest.py @@ -212,7 +212,7 @@ class ForestHParams(object): self.regression = getattr(self, 'regression', False) # Num_outputs is the actual number of outputs (a single prediction for - # classification, a N-dimenensional point for regression). + # classification, a N-dimensional point for regression). self.num_outputs = self.num_classes if self.regression else 1 # Add an extra column to classes for storing counts, which is needed for @@ -445,7 +445,7 @@ class RandomForestGraphs(object): mask = math_ops.less( r, array_ops.ones_like(r) * self.params.bagging_fraction) gather_indices = array_ops.squeeze( - array_ops.where(mask), squeeze_dims=[1]) + array_ops.where(mask), axis=[1]) # TODO(thomaswc): Calculate out-of-bag data and labels, and store # them for use in calculating statistics later. tree_data = array_ops.gather(processed_dense_features, gather_indices) diff --git a/tensorflow/contrib/tensorboard/BUILD b/tensorflow/contrib/tensorboard/BUILD index f4efd9717da9e4d5a37eb6b09efcc353a7d0b1ea..2b6a2b2f3c711f48812063e98e05735e2d9b4141 100644 --- a/tensorflow/contrib/tensorboard/BUILD +++ b/tensorflow/contrib/tensorboard/BUILD @@ -9,6 +9,7 @@ exports_files(["LICENSE"]) # For platform specific build config load("//tensorflow/core:platform/default/build_config.bzl", "tf_proto_library") +load("//tensorflow:tensorflow.bzl", "py_test") tf_proto_library( name = "protos_all", @@ -81,6 +82,7 @@ py_test( size = "small", srcs = ["plugins/trace/trace_test.py"], srcs_version = "PY2AND3", + tags = ["no_windows"], deps = [ ":trace", "//tensorflow/python:client_testlib", diff --git a/tensorflow/contrib/tensorboard/db/summary_file_writer.cc b/tensorflow/contrib/tensorboard/db/summary_file_writer.cc index 85b3e7231bcb433e9510522597c03c5f764f06cf..3f24f58f03aac2ba6d368d7eccf8731f611a81b4 100644 --- a/tensorflow/contrib/tensorboard/db/summary_file_writer.cc +++ b/tensorflow/contrib/tensorboard/db/summary_file_writer.cc @@ -132,7 +132,7 @@ class SummaryFileWriter : public SummaryWriterInterface { Status WriteEvent(std::unique_ptr event) override { mutex_lock ml(mu_); queue_.emplace_back(std::move(event)); - if (queue_.size() >= max_queue_ || + if (queue_.size() > max_queue_ || env_->NowMicros() - last_flush_ > 1000 * flush_millis_) { return InternalFlush(); } diff --git a/tensorflow/contrib/tensorboard/db/summary_file_writer_test.cc b/tensorflow/contrib/tensorboard/db/summary_file_writer_test.cc index c61b4655961664a6c9c22a5f6d6f26a55c34bfcd..cd3f712256f2293ed725745f8cbe48109856ef86 100644 --- a/tensorflow/contrib/tensorboard/db/summary_file_writer_test.cc +++ b/tensorflow/contrib/tensorboard/db/summary_file_writer_test.cc @@ -19,6 +19,7 @@ limitations under the License. #include "tensorflow/core/lib/core/refcount.h" #include "tensorflow/core/lib/io/path.h" #include "tensorflow/core/lib/io/record_reader.h" +#include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/platform/env.h" #include "tensorflow/core/platform/test.h" #include "tensorflow/core/util/event.pb.h" @@ -58,7 +59,7 @@ class SummaryFileWriterTest : public ::testing::Test { TF_CHECK_OK(env_.GetChildren(testing::TmpDir(), &files)); bool found = false; for (const string& f : files) { - if (StringPiece(f).contains(test_name)) { + if (str_util::StrContains(f, test_name)) { if (found) { return errors::Unknown("Found more than one file for ", test_name); } diff --git a/tensorflow/contrib/tensorrt/BUILD b/tensorflow/contrib/tensorrt/BUILD index 2f316767b35e190c7e438a253a7395b0c5c2ee16..f80b4f1b112dcfb44244cff56d6915af9f455caa 100644 --- a/tensorflow/contrib/tensorrt/BUILD +++ b/tensorflow/contrib/tensorrt/BUILD @@ -11,6 +11,7 @@ exports_files(["LICENSE"]) load( "//tensorflow:tensorflow.bzl", + "py_test", "tf_cc_test", "tf_copts", "tf_cuda_library", @@ -52,7 +53,6 @@ tf_custom_op_library( "ops/trt_engine_op.cc", ], deps = [ - ":trt_engine_op_kernel", ":trt_shape_function", "//tensorflow/core:lib_proto_parsing", ] + if_tensorrt([ @@ -140,6 +140,7 @@ tf_custom_op_py_library( ]), srcs_version = "PY2AND3", deps = [ + "//tensorflow/contrib/util:util_py", "//tensorflow/python:framework_for_generated_wrappers", "//tensorflow/python:resources", ], @@ -174,6 +175,7 @@ py_library( srcs_version = "PY2AND3", deps = [ ":wrap_conversion", + "//tensorflow/python:tf_optimizer", ], ) @@ -183,6 +185,7 @@ tf_py_wrap_cc( copts = tf_copts(), deps = [ ":trt_conversion", + ":trt_engine_op_kernel", "//tensorflow/core:framework_lite", "//util/python:python_headers", ], @@ -272,3 +275,19 @@ tf_cc_test( "//tensorflow/core:test_main", ], ) + +py_test( + name = "tf_trt_integration_test", + srcs = ["test/tf_trt_integration_test.py"], + main = "test/tf_trt_integration_test.py", + srcs_version = "PY2AND3", + tags = [ + "manual", + "notap", + ], + deps = [ + ":init_py", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework_test_lib", + ], +) diff --git a/tensorflow/contrib/tensorrt/README.md b/tensorflow/contrib/tensorrt/README.md index 6eafc1754ca5102c8adf04f00e33dc2f8ff970f6..687dee07e1327d50fabc4e14c25a357ae6c959e7 100644 --- a/tensorflow/contrib/tensorrt/README.md +++ b/tensorflow/contrib/tensorrt/README.md @@ -1,59 +1,29 @@ # Using TensorRT in TensorFlow - -This module provides necessary bindings and introduces TRT_engine_op -operator that wraps a subgraph in TensorRT. This is still a work in progress -but should be useable with most common graphs. +This module provides necessary bindings and introduces TRT_engine_op operator +that wraps a subgraph in TensorRT. This is still a work in progress but should +be useable with most common graphs. ## Compilation - -In order to compile the module, you need to have a local TensorRT -installation ( libnvinfer.so and respective include files ). During the -configuration step, TensorRT should be enabled and installation path -should be set. If installed through package managers (deb,rpm), -configure script should find the necessary components from the system -automatically. If installed from tar packages, user has to set path to -location where the library is installed during configuration. +In order to compile the module, you need to have a local TensorRT installation +(libnvinfer.so and respective include files). During the configuration step, +TensorRT should be enabled and installation path should be set. If installed +through package managers (deb,rpm), configure script should find the necessary +components from the system automatically. If installed from tar packages, user +has to set path to location where the library is installed during configuration. ```shell bazel build --config=cuda --config=opt //tensorflow/tools/pip_package:build_pip_package bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/ ``` -After the installation of tensorflow package, TensorRT transformation -will be available. An example use can be found in test/test_tftrt.py script +After the installation of tensorflow package, TensorRT transformation will be +available. An example use can be found in test/test_tftrt.py script ## Installing TensorRT 3.0.4 -In order to make use of TensorRT integration, you will need a local installation of TensorRT 3.0.4 from the [NVIDIA Developer website](https://developer.nvidia.com/tensorrt). Due to compiler compatibility, you will need to download and install the TensorRT 3.0.4 tarball for _Ubuntu 14.04_, i.e., **_TensorRT-3.0.4.Ubuntu-14.04.5.x86_64.cuda-9.0.cudnn7.0-tar.gz_**, even if you are using Ubuntu 16.04 or later. - -### Preparing TensorRT installation - -Once you have downloaded TensorRT-3.0.4.Ubuntu-14.04.5.x86_64.cuda-9.0.cudnn7.0-tar.gz, you will need to unpack it to an installation directory, which will be referred to as . Please replace with the full path of actual installation directory you choose in commands below. - -```shell -cd && tar -zxf /path/to/TensorRT-3.0.4.Ubuntu-14.04.5.x86_64.cuda-9.0.cudnn7.0-tar.gz -``` - -After unpacking the binaries, you have several options to use them: - -#### To run TensorFlow as a user without superuser privileges - -For a regular user without any sudo rights, you should add TensorRT to your `$LD_LIBRARY_PATH`: - - ```shell - export LD_LIBRARY_PATH=/TensorRT-3.0.4/lib${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}} - ``` - -Then you are ready to use TensorFlow-TensorRT integration. `$LD_LIBRARY_PATH` must contain the path to TensorRT installation for TensorFlow-TensorRT integration to work. If you are using a VirtualEnv-like setup, you can add the command above to your `bin/activate` script or to your `.bashrc` script. - -#### To run TensorFlow as a superuser - - When running as a superuser, such as in a container or via sudo, the `$LD_LIBRARY_PATH` approach above may not work. The following is preferred when the user has superuser privileges: - - ```shell - echo "/TensorRT-3.0.4/lib" | sudo tee /etc/ld.so.conf.d/tensorrt304.conf && sudo ldconfig - ``` - - Please ensure that any existing deb package installation of TensorRT is removed before following these instructions to avoid package conflicts. \ No newline at end of file +In order to make use of TensorRT integration, you will need a local installation +of TensorRT 3.0.4 from the [NVIDIA Developer website](https://developer.nvidia.com/tensorrt). +Installation instructions for compatibility with TensorFlow are provided on the +[TensorFlow Installation page](https://www.tensorflow.org/install/install_linux#nvidia_requirements_to_run_tensorflow_with_gpu_support). diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc index ff8cc6374d40dc0b49721a784e25015c76541d03..b412b296e02751427b80e7c1596f2530942519c6 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc @@ -405,7 +405,13 @@ tensorflow::Status ConvertGraphDefToTensorRT( max_mem_per_engine, static_graph_properties, &output_edge_map, precision_mode); if (precision_mode == INT8MODE) { - TF_RETURN_IF_ERROR(GetCalibNode(&p)); + tensorflow::Status status = GetCalibNode(&p); + if (status != tensorflow::Status::OK()) { + LOG(WARNING) << "subgraph conversion error for subgraph_index:" << count + << " due to: \"" << status.ToString() + << "\" SKIPPING......( " << subgraph_node_names.size() + << " nodes)"; + } } else { tensorflow::Status status = ConvertSubGraphToTensorRT(&p); if (status != tensorflow::Status::OK()) { @@ -414,8 +420,8 @@ tensorflow::Status ConvertGraphDefToTensorRT( << "\" SKIPPING......( " << subgraph_node_names.size() << " nodes)"; } - count++; } + count++; } graph.ToGraphDef(new_graph_def); return tensorflow::Status::OK(); diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc index e920a797fe428620ef62a2b67c07f35d85ef5211..b81ae9dc3eeed6f7b7c6eeac0186700bdd692245 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc @@ -443,7 +443,9 @@ class Converter { * 2) Control dependency inputs contain caret at the beginning and we * remove this and annotate the edge as a control dependency. ************************************************************************/ - string name = input_name[0] == '^' ? input_name.substr(1) : input_name; + // skip control nodes + if (input_name[0] == '^') continue; + string name = input_name; auto first = name.find_first_of(':'); if (first != string::npos && first + 2 == name.size() && name[first + 1] == '0') @@ -2262,6 +2264,7 @@ tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) { auto ws = new tensorflow::tensorrt::TRTWeightStore(); TF_CHECK_OK(weight_rmgr->Create(calib_op_name, calib_op_name, ws)); Converter converter(op_res->network_, ws, s.precision_mode == FP16MODE); + std::vector input_names; std::vector input_dtypes; for (const std::pair& input : s.input_inds) { @@ -2270,20 +2273,41 @@ tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) { int output_idx = input.second; tensorflow::Node* node = s.graph.FindNodeId(node_id); auto node_name = node->name(); - input_names.push_back(node_name); // insert original node name without port - // TODO(jie): alternative :) - if (!s.graph_properties.HasOutputProperties(node_name)) + // input_names should use the node name in the graph + // here it should be the input tensor name -> matching the binding + // insert original node name without port + auto tensor_name = node_name; + if (output_idx != 0) { + tensor_name = StrCat(tensor_name, ":", output_idx); + } + + VLOG(2) << "input name: " << node_name << " tensor_name: " << tensor_name + << " idx: " << output_idx; + + auto shape_inference_node_name = node_name; + auto shape_inference_output_idx = output_idx; + // rewire the shape inference to original node in the graph + if (s.output_edge_map->count(tensor_name)) { + shape_inference_node_name = s.output_edge_map->at(tensor_name).second; + shape_inference_output_idx = s.output_edge_map->at(tensor_name).first; + } + if (shape_inference_output_idx < 0) continue; + VLOG(2) << "shapeinference name: " << shape_inference_node_name + << " idx: " << shape_inference_output_idx; + + if (!s.graph_properties.HasOutputProperties(shape_inference_node_name)) return tensorflow::errors::Internal("failed to find input node: " + - node_name); + shape_inference_node_name); - auto op_info_vec = s.graph_properties.GetOutputProperties(node_name); - if (static_cast(op_info_vec.size()) < output_idx) + auto op_info_vec = + s.graph_properties.GetOutputProperties(shape_inference_node_name); + if (static_cast(op_info_vec.size()) <= shape_inference_output_idx) return tensorflow::errors::Internal( - "accessing output index of: ", output_idx, ", at node: ", node_name, - "with output entry from shape_map: ", op_info_vec.size()); - - auto op_info = op_info_vec.at(output_idx); + "accessing output index of: ", shape_inference_output_idx, + ", at node: ", shape_inference_node_name, + " with output entry from shape_map: ", op_info_vec.size()); + auto op_info = op_info_vec.at(shape_inference_output_idx); tensorflow::DataType tf_dtype = op_info.dtype(); input_dtypes.push_back(tf_dtype); @@ -2294,16 +2318,23 @@ tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) { << "' failed"; return type_status; } - TF_CHECK_OK(ConvertDType(tf_dtype, &dtype)); VLOG(2) << "accessing output index of: " << output_idx << ", at node: " << node_name << "with output entry from shape_map: " << op_info_vec.size(); - // TODO(ben,jie): update TRT input format/dimension nvinfer1::DimsCHW input_dim_psuedo_chw; for (int i = 0; i < 3; i++) input_dim_psuedo_chw.d[i] = 1; + // TODO(jie): TRT 3.x only support 4 dimensional input tensor. + // update the code once TRT 4.0 comes out. + if (op_info.shape().dim_size() != 4) { + string err_str = "Require 4 dimensional input."; + StrAppend(&err_str, " Got ", op_info.shape().dim_size(), " ", + shape_inference_node_name); + return tensorflow::errors::Unimplemented(err_str); + } + for (int i = 1; i < op_info.shape().dim_size(); i++) { VLOG(2) << "dimension: " << i << " , size: " << op_info.shape().dim(i).size(); @@ -2312,8 +2343,11 @@ tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) { // TODO(ben,jie): proper way to restore input tensor name? auto input_tensor_name = node_name; - if (output_idx != 0) input_tensor_name = StrCat(node_name, ":", output_idx); + if (output_idx != 0) { + input_tensor_name = StrCat(node_name, ":", output_idx); + } + input_names.push_back(input_tensor_name); nvinfer1::ITensor* input_tensor = converter.network()->addInput( input_tensor_name.c_str(), dtype, input_dim_psuedo_chw); @@ -2377,11 +2411,13 @@ tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) { tensor->setType(trt_dtype); } - VLOG(2) << "finished output"; + VLOG(2) << "Finished processing outputs"; // Build the engine op_res->builder_->setMaxBatchSize(s.max_batch_size); op_res->builder_->setMaxWorkspaceSize(s.max_workspace_size_bytes); + VLOG(0) << "Max batch size= " << s.max_batch_size + << " max workspace size= " << s.max_workspace_size_bytes; // Build the TRT op // TODO(sami,ben,jie): proper naming! @@ -2475,7 +2511,7 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( std::vector input_names; std::vector input_dtypes; for (const std::pair& input : s.input_inds) { - VLOG(2) << "parsing input!!!!!"; + VLOG(2) << "parsing input. Node id= " << input.first; int node_id = input.first; int output_idx = input.second; tensorflow::Node* node = s.graph.FindNodeId(node_id); diff --git a/tensorflow/contrib/tensorrt/resources/trt_resource_manager.cc b/tensorflow/contrib/tensorrt/resources/trt_resource_manager.cc index e663eed4dd6704e2f41bde1dfabd411e86669ecd..9c3698e5d1cc5d6d8d31a8fcaf03d103f1e1915d 100644 --- a/tensorflow/contrib/tensorrt/resources/trt_resource_manager.cc +++ b/tensorflow/contrib/tensorrt/resources/trt_resource_manager.cc @@ -19,6 +19,12 @@ limitations under the License. namespace tensorflow { namespace tensorrt { +std::shared_ptr +tensorflow::tensorrt::TRTResourceManager::instance() { + static std::shared_ptr instance_(new TRTResourceManager); + return instance_; +} + std::shared_ptr tensorflow::tensorrt::TRTResourceManager::getManager(const string& op_name) { // mutex is held for lookup only. Most instantiations where mutex will be held diff --git a/tensorflow/contrib/tensorrt/resources/trt_resource_manager.h b/tensorflow/contrib/tensorrt/resources/trt_resource_manager.h index 5f8ad491d3c13e8911b0b95c3e95e19afe4d59c0..bc15b51e05ef743d0aa260bbd9bd21302a752ec0 100644 --- a/tensorflow/contrib/tensorrt/resources/trt_resource_manager.h +++ b/tensorflow/contrib/tensorrt/resources/trt_resource_manager.h @@ -29,11 +29,7 @@ class TRTResourceManager { TRTResourceManager() = default; public: - static std::shared_ptr instance() { - static std::shared_ptr instance_( - new TRTResourceManager); - return instance_; - } + static std::shared_ptr instance(); // returns a manager for given op, if it doesn't exists it creates one std::shared_ptr getManager(const string& op_name); diff --git a/tensorflow/contrib/tensorrt/test/tf_trt_integration_test.py b/tensorflow/contrib/tensorrt/test/tf_trt_integration_test.py new file mode 100644 index 0000000000000000000000000000000000000000..7a4732876286a9484bc607242ae19a31941313db --- /dev/null +++ b/tensorflow/contrib/tensorrt/test/tf_trt_integration_test.py @@ -0,0 +1,156 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Script to test TF-TensorRT integration.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import warnings +import numpy as np + +from tensorflow.contrib import tensorrt as trt +from tensorflow.core.protobuf import config_pb2 as cpb2 +from tensorflow.python.framework import constant_op as cop +from tensorflow.python.framework import dtypes as dtypes +from tensorflow.python.framework import importer as importer +from tensorflow.python.framework import ops as ops +from tensorflow.python.framework import test_util +from tensorflow.python.ops import array_ops as aops +from tensorflow.python.ops import nn as nn +from tensorflow.python.ops import nn_ops as nn_ops +from tensorflow.python.platform import googletest + + +@test_util.with_c_api +class IntegrationTest(test_util.TensorFlowTestCase): + """Class to test Tensorflow-TensorRT integration.""" + + def setUp(self): + """Setup method.""" + super(IntegrationTest, self).setUp() + warnings.simplefilter("always") + inp_dims = (100, 24, 24, 2) + self._input = np.random.random_sample(inp_dims) + self._original_graph = self.get_simple_graph_def() + self._gpu_options = cpb2.GPUOptions( + per_process_gpu_memory_fraction=0.50) + self._config = cpb2.ConfigProto(gpu_options=self._gpu_options) + self._reference = self.run_graph(self._original_graph, self._input) + + def get_simple_graph_def(self): + """Create a simple graph and return its graph_def.""" + g = ops.Graph() + with g.as_default(): + a = aops.placeholder( + dtype=dtypes.float32, shape=(None, 24, 24, 2), name="input") + e = cop.constant( + [[[[1., 0.5, 4., 6., 0.5, 1.], [1., 0.5, 1., 1., 0.5, 1.]]]], + name="weights", + dtype=dtypes.float32) + conv = nn.conv2d( + input=a, + filter=e, + strides=[1, 2, 2, 1], + padding="SAME", + name="conv") + b = cop.constant( + [4., 1.5, 2., 3., 5., 7.], name="bias", dtype=dtypes.float32) + t = nn.bias_add(conv, b, name="biasAdd") + relu = nn.relu(t, "relu") + idty = aops.identity(relu, "ID") + v = nn_ops.max_pool( + idty, [1, 2, 2, 1], [1, 2, 2, 1], "VALID", name="max_pool") + aops.squeeze(v, name="output") + return g.as_graph_def() + + def run_graph(self, gdef, dumm_inp): + """Run given graphdef once.""" + ops.reset_default_graph() + g = ops.Graph() + with g.as_default(): + inp, out = importer.import_graph_def( + graph_def=gdef, return_elements=["input", "output"]) + inp = inp.outputs[0] + out = out.outputs[0] + with self.test_session( + graph=g, config=self._config, use_gpu=True, + force_gpu=True) as sess: + val = sess.run(out, {inp: dumm_inp}) + return val + + # Use real data that is representative of the inference dataset + # for calibration. For this test script it is random data. + def run_calibration(self, gdef, dumm_inp): + """Run given calibration graph multiple times.""" + ops.reset_default_graph() + g = ops.Graph() + with g.as_default(): + inp, out = importer.import_graph_def( + graph_def=gdef, return_elements=["input", "output"]) + inp = inp.outputs[0] + out = out.outputs[0] + # run over real calibration data here, we are mimicking a calibration + # set of 30 different batches. Use as much calibration data as you want + with self.test_session( + graph=g, config=self._config, use_gpu=True, + force_gpu=True) as sess: + for _ in range(30): + val = sess.run(out, {inp: dumm_inp}) + return val + + def get_trt_graph(self, mode): + """Return trt converted graph.""" + if mode in ["FP32", "FP16", "INT8"]: + return trt.create_inference_graph( + input_graph_def=self._original_graph, + outputs=["output"], + max_batch_size=self._input.shape[0], + max_workspace_size_bytes=1 << 25, + precision_mode=mode, # TRT Engine precision "FP32","FP16" or "INT8" + minimum_segment_size=2 # minimum number of nodes in an engine + ) + return None + + def testFP32(self): + """Test FP32 conversion. Results should be identical to native case.""" + trt_graph = self.get_trt_graph("FP32") + result = self.run_graph(trt_graph, self._input) + self.assertAllEqual(self._reference, result) + result1 = self.run_graph(trt_graph, self._input) + self.assertAllEqual(result1, result) + + def testFP16(self): + """Test FP16 conversion. Results may be different from native case.""" + trt_graph = self.get_trt_graph("FP16") + result = self.run_graph(trt_graph, self._input) + self.assertAllClose(self._reference, result, rtol=1.e-03) + result1 = self.run_graph(trt_graph, self._input) + self.assertAllEqual(result1, result) + + def testINT8(self): + """Test INT8 conversion. Results may be different from native case.""" + calib_graph = self.get_trt_graph("INT8") + result = self.run_calibration(calib_graph, self._input) + self.assertAllEqual(self._reference, result) + int8_graph = trt.calib_graph_to_infer_graph(calib_graph) + result = self.run_graph(int8_graph, self._input) + self.assertAllClose(self._reference, result, rtol=1.e-03) + result1 = self.run_graph(int8_graph, self._input) + self.assertAllEqual(result1, result) + + +if __name__ == "__main__": + googletest.main() diff --git a/tensorflow/contrib/testing/python/framework/fake_summary_writer.py b/tensorflow/contrib/testing/python/framework/fake_summary_writer.py index 15a415df303df5be44e89c00005cb253ae2af286..eac34afc4adb268e909076ec9c5fb379cc95537b 100644 --- a/tensorflow/contrib/testing/python/framework/fake_summary_writer.py +++ b/tensorflow/contrib/testing/python/framework/fake_summary_writer.py @@ -52,6 +52,7 @@ class FakeSummaryWriter(object): self._added_graphs = [] self._added_meta_graphs = [] self._added_session_logs = [] + self._added_run_metadata = {} @property def summaries(self): @@ -127,6 +128,11 @@ class FakeSummaryWriter(object): # pylint: disable=unused-argument self._added_session_logs.append(session_log) + def add_run_metadata(self, run_metadata, tag, global_step=None): + if (global_step is not None) and (global_step < 0): + raise ValueError('Invalid global_step %s.' % global_step) + self._added_run_metadata[tag] = run_metadata + def flush(self): pass diff --git a/tensorflow/contrib/timeseries/examples/BUILD b/tensorflow/contrib/timeseries/examples/BUILD index 40cf9147b37ec7ed56d2cafdf6adaa4ce12bcbd3..355303acf6ddf866ecf18815b394fcea8488d67d 100644 --- a/tensorflow/contrib/timeseries/examples/BUILD +++ b/tensorflow/contrib/timeseries/examples/BUILD @@ -8,14 +8,22 @@ licenses(["notice"]) # Apache 2.0 exports_files(["LICENSE"]) +config_setting( + name = "empty_condition", + values = {"define": "UNUSED=unused"}, +) + py_binary( name = "predict", srcs = ["predict.py"], srcs_version = "PY2AND3", tags = ["no_pip"], - deps = [ - "//tensorflow:tensorflow_py", + deps = select({ + ":empty_condition": [], + "//conditions:default": [], + }) + [ "//third_party/py/numpy", + "//tensorflow:tensorflow_py", ], ) @@ -25,7 +33,10 @@ py_test( srcs = ["predict_test.py"], data = ["data/period_trend.csv"], srcs_version = "PY2AND3", - tags = ["notsan"], # b/67513579 + tags = [ + "no_windows", # TODO: needs investigation on Windows + "notsan", # b/67513579 + ], deps = [ ":predict", "//tensorflow/python:client_testlib", @@ -38,9 +49,12 @@ py_binary( data = ["data/changepoints.csv"], srcs_version = "PY2AND3", tags = ["no_pip"], - deps = [ - "//tensorflow:tensorflow_py", + deps = select({ + ":empty_condition": [], + "//conditions:default": [], + }) + [ "//third_party/py/numpy", + "//tensorflow:tensorflow_py", ], ) @@ -61,9 +75,12 @@ py_binary( data = ["data/multivariate_level.csv"], srcs_version = "PY2AND3", tags = ["no_pip"], - deps = [ - "//tensorflow:tensorflow_py", + deps = select({ + ":empty_condition": [], + "//conditions:default": [], + }) + [ "//third_party/py/numpy", + "//tensorflow:tensorflow_py", ], ) @@ -86,11 +103,14 @@ py_binary( data = ["data/multivariate_periods.csv"], srcs_version = "PY2AND3", tags = ["no_pip"], - deps = [ + deps = select({ + ":empty_condition": [], + "//conditions:default": [], + }) + [ + "//third_party/py/numpy", "//tensorflow:tensorflow_py", "//tensorflow/contrib/timeseries/python/timeseries:estimators", "//tensorflow/contrib/timeseries/python/timeseries:model", - "//third_party/py/numpy", ], ) diff --git a/tensorflow/contrib/timeseries/examples/lstm_test.py b/tensorflow/contrib/timeseries/examples/lstm_test.py index ca56e38ca079f71b38cf29605a295a50929945e8..c58e24e6d9748868791d21b0ff4ec28ca2f646c3 100644 --- a/tensorflow/contrib/timeseries/examples/lstm_test.py +++ b/tensorflow/contrib/timeseries/examples/lstm_test.py @@ -36,17 +36,14 @@ class LSTMExampleTest(test.TestCase): def test_periodicity_learned(self): (observed_times, observed_values, all_times, predicted_values) = lstm.train_and_predict( - training_steps=100, estimator_config=_SeedRunConfig(), + training_steps=2, estimator_config=_SeedRunConfig(), export_directory=self.get_temp_dir()) self.assertAllEqual([100], observed_times.shape) self.assertAllEqual([100, 5], observed_values.shape) self.assertAllEqual([200], all_times.shape) self.assertAllEqual([200, 5], predicted_values.shape) - self.assertGreater( - predicted_values[100, 4] - - predicted_values[115, 4], # Amplitude of fifth component - 0.2) - + # TODO(allenl): Make the model deterministic so you can check something + # substantive. if __name__ == "__main__": test.main() diff --git a/tensorflow/contrib/timeseries/python/timeseries/BUILD b/tensorflow/contrib/timeseries/python/timeseries/BUILD index 55a25e39fe38b01cf4d9679f2e30af38383a2b6d..d2746032a04946cdfab4b5ac968ea3add5f6b51d 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/BUILD +++ b/tensorflow/contrib/timeseries/python/timeseries/BUILD @@ -88,10 +88,14 @@ py_library( "//tensorflow/python:array_ops", "//tensorflow/python:dtypes", "//tensorflow/python:framework_ops", + "//tensorflow/python:parsing_ops", "//tensorflow/python:tensor_shape", + "//tensorflow/python:tensor_util", "//tensorflow/python:training", + "//tensorflow/python:util", "//tensorflow/python/estimator:estimator_py", "//tensorflow/python/estimator:export", + "//tensorflow/python/feature_column", ], ) @@ -132,7 +136,6 @@ py_library( srcs_version = "PY2AND3", deps = [ ":feature_keys", - "//tensorflow/contrib/framework:framework_py", "//tensorflow/contrib/layers:layers_py", "//tensorflow/python:array_ops", "//tensorflow/python:control_flow_ops", @@ -141,6 +144,7 @@ py_library( "//tensorflow/python:math_ops", "//tensorflow/python:state_ops", "//tensorflow/python:summary", + "//tensorflow/python:training", "//tensorflow/python:util", "//tensorflow/python:variable_scope", "//tensorflow/python/estimator:estimator_py", @@ -156,23 +160,30 @@ py_test( "head_test.py", ], srcs_version = "PY2AND3", - tags = [ - "no_pip_gpu", # b/63391119 - ], + tags = ["no_pip_gpu"], # b/63391119 deps = [ + ":estimators", ":feature_keys", ":head", + ":input_pipeline", ":model", ":state_management", + "//tensorflow/contrib/timeseries/examples:lstm", "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", "//tensorflow/python:dtypes", "//tensorflow/python:framework_ops", "//tensorflow/python:math_ops", "//tensorflow/python:metrics", + "//tensorflow/python:session", "//tensorflow/python:training", "//tensorflow/python:variables", "//tensorflow/python/estimator:estimator_py", + "//tensorflow/python/feature_column", + "//tensorflow/python/saved_model:loader", + "//tensorflow/python/saved_model:tag_constants", + "//third_party/py/numpy", + "@six_archive//:six", ], ) @@ -235,6 +246,7 @@ py_test( ], srcs_version = "PY2AND3", tags = [ + "no_oss", "no_pip", # b/64527635 "no_pip_gpu", # b/63391119 ], @@ -427,6 +439,7 @@ py_test( srcs_version = "PY2AND3", tags = [ "no_pip_gpu", # b/63391119 + "no_windows", # TODO: needs investigation on Windows ], deps = [ ":feature_keys", diff --git a/tensorflow/contrib/timeseries/python/timeseries/estimators.py b/tensorflow/contrib/timeseries/python/timeseries/estimators.py index 469cea4fd2fca65373eef85b1931a267e6e60238..886e1846e2a4f75503a47a3ff92adf97f814053f 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/estimators.py +++ b/tensorflow/contrib/timeseries/python/timeseries/estimators.py @@ -44,7 +44,7 @@ class TimeSeriesRegressor(estimator_lib.Estimator): """An Estimator to fit and evaluate a time series model.""" def __init__(self, model, state_manager=None, optimizer=None, model_dir=None, - config=None): + config=None, head_type=ts_head_lib.TimeSeriesRegressionHead): """Initialize the Estimator. Args: @@ -55,6 +55,8 @@ class TimeSeriesRegressor(estimator_lib.Estimator): from tf.train.Optimizer. Defaults to Adam with step size 0.02. model_dir: See `Estimator`. config: See `Estimator`. + head_type: The kind of head to use for the model (inheriting from + `TimeSeriesRegressionHead`). """ input_statistics_generator = math_utils.InputStatisticsFromMiniBatch( dtype=model.dtype, num_features=model.num_features) @@ -63,8 +65,8 @@ class TimeSeriesRegressor(estimator_lib.Estimator): if optimizer is None: optimizer = train.AdamOptimizer(0.02) self._model = model - ts_regression_head = ts_head_lib.time_series_regression_head( - model, state_manager, optimizer, + ts_regression_head = head_type( + model=model, state_manager=state_manager, optimizer=optimizer, input_statistics_generator=input_statistics_generator) model_fn = ts_regression_head.create_estimator_spec super(TimeSeriesRegressor, self).__init__( diff --git a/tensorflow/contrib/timeseries/python/timeseries/estimators_test.py b/tensorflow/contrib/timeseries/python/timeseries/estimators_test.py index 51d0c0ca3f6d380d2c3bde088f73ae9aa1298a28..9f161c1695f415ad28c41ad0c00bc0b056399b96 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/estimators_test.py +++ b/tensorflow/contrib/timeseries/python/timeseries/estimators_test.py @@ -19,6 +19,7 @@ from __future__ import print_function import tempfile import numpy +import six from tensorflow.contrib.timeseries.python.timeseries import ar_model from tensorflow.contrib.timeseries.python.timeseries import estimators @@ -127,6 +128,12 @@ class TimeSeriesRegressorTest(test.TestCase): session=sess) # Test cold starting + six.assertCountEqual( + self, + [feature_keys.FilteringFeatures.TIMES, + feature_keys.FilteringFeatures.VALUES], + signatures.signature_def[ + feature_keys.SavedModelLabels.COLD_START_FILTER].inputs.keys()) batch_numpy_times = numpy.tile( numpy.arange(30, dtype=numpy.int64)[None, :], (10, 1)) batch_numpy_values = numpy.ones([10, 30, 1]) diff --git a/tensorflow/contrib/timeseries/python/timeseries/head.py b/tensorflow/contrib/timeseries/python/timeseries/head.py index 4cf6bbcfd469d7a39332c47b0f9aafc1bf27d4f9..a28a5872b850b51630240bdeb3ff22f372613523 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/head.py +++ b/tensorflow/contrib/timeseries/python/timeseries/head.py @@ -39,27 +39,18 @@ from tensorflow.python.util import nest from tensorflow.python.summary import summary -def time_series_regression_head(model, - state_manager, - optimizer, - input_statistics_generator=None): - """Creates a `_Head` for time series regression. +class _NoStatePredictOutput(export_lib.PredictOutput): - Args: - model: A model for time series regression. - state_manager: A state manager. - optimizer: An optimizer. - input_statistics_generator: A input statistics generator. - - Returns: - An instance of `_Head` for time series regression. - """ - return _TimeSeriesRegressionHead(model, state_manager, optimizer, - input_statistics_generator) + def as_signature_def(self, receiver_tensors): + no_state_receiver_tensors = { + key: value for key, value in receiver_tensors.items() + if not key.startswith(feature_keys.State.STATE_PREFIX)} + return super(_NoStatePredictOutput, self).as_signature_def( + receiver_tensors=no_state_receiver_tensors) -class _TimeSeriesRegressionHead(head_lib._Head): # pylint:disable=protected-access - """See `time_series_regression_head`.""" +class TimeSeriesRegressionHead(head_lib._Head): # pylint:disable=protected-access + """Determines input and output signatures for a time series model.""" def __init__(self, model, @@ -67,6 +58,15 @@ class _TimeSeriesRegressionHead(head_lib._Head): # pylint:disable=protected-acc optimizer, input_statistics_generator=None, name=None): + """Creates a `_Head` for time series regression. + + Args: + model: A model for time series regression. + state_manager: A state manager. + optimizer: An optimizer. + input_statistics_generator: A input statistics generator. + name: An optional name for the model. + """ self.model = model self.state_manager = state_manager self.optimizer = optimizer @@ -167,7 +167,7 @@ class _TimeSeriesRegressionHead(head_lib._Head): # pylint:disable=protected-acc export_lib.PredictOutput( state_to_dictionary(filtering_outputs.end_state)), feature_keys.SavedModelLabels.COLD_START_FILTER: - export_lib.PredictOutput( + _NoStatePredictOutput( state_to_dictionary(cold_filtering_outputs.end_state)) }, # Likely unused, but it is necessary to return `predictions` to satisfy @@ -255,6 +255,58 @@ class _TimeSeriesRegressionHead(head_lib._Head): # pylint:disable=protected-acc return self._serving_ops(features) +class OneShotPredictionHead(TimeSeriesRegressionHead): + """A time series head which exports a single stateless serving signature. + + The serving default signature exported by this head expects `times`, `values`, + and any exogenous features, but no state. `values` has shape `[batch_size, + filter_length, num_features]` and `times` has shape `[batch_size, + total_length]`, where `total_length > filter_length`. Any exogenous features + must have their shapes prefixed by the shape of the `times` feature. + + When serving, first performs filtering on the series up to `filter_length` + starting from the default start state for the model, then computes predictions + on the remainder of the series, returning them. + + Model state is neither accepted nor returned, so filtering must be performed + each time predictions are requested when using this head. + """ + + def _serving_ops(self, features): + """Add ops for serving to the graph.""" + with variable_scope.variable_scope("model", use_resource=True): + filtering_features = {} + prediction_features = {} + values_length = array_ops.shape( + features[feature_keys.FilteringFeatures.VALUES])[1] + for key, value in features.items(): + if key == feature_keys.State.STATE_TUPLE: + # Ignore state input. The model's default start state is replicated + # across the batch. + continue + if key == feature_keys.FilteringFeatures.VALUES: + filtering_features[key] = value + else: + filtering_features[key] = value[:, :values_length] + prediction_features[key] = value[:, values_length:] + cold_filtering_outputs = self.model.define_loss( + features=filtering_features, mode=estimator_lib.ModeKeys.EVAL) + prediction_features[feature_keys.State.STATE_TUPLE] = ( + cold_filtering_outputs.end_state) + with variable_scope.variable_scope("model", reuse=True): + prediction_outputs = self.model.predict( + features=prediction_features) + return estimator_lib.EstimatorSpec( + mode=estimator_lib.ModeKeys.PREDICT, + export_outputs={ + feature_keys.SavedModelLabels.PREDICT: + _NoStatePredictOutput(prediction_outputs), + }, + # Likely unused, but it is necessary to return `predictions` to satisfy + # the Estimator's error checking. + predictions={}) + + def _check_feature_shapes_compatible_with(features, compatible_with_name, compatible_with_value, diff --git a/tensorflow/contrib/timeseries/python/timeseries/head_test.py b/tensorflow/contrib/timeseries/python/timeseries/head_test.py index 3415061cfd87358cccaf36dcb301fb36986bbde6..c606db76a668235ab6a837159b9dec072b5fd801 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/head_test.py +++ b/tensorflow/contrib/timeseries/python/timeseries/head_test.py @@ -18,12 +18,20 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import numpy +import six + +from tensorflow.contrib.timeseries.examples import lstm as lstm_example +from tensorflow.contrib.timeseries.python.timeseries import estimators as ts_estimators from tensorflow.contrib.timeseries.python.timeseries import feature_keys from tensorflow.contrib.timeseries.python.timeseries import head as ts_head_lib +from tensorflow.contrib.timeseries.python.timeseries import input_pipeline from tensorflow.contrib.timeseries.python.timeseries import model from tensorflow.contrib.timeseries.python.timeseries import state_management +from tensorflow.python.client import session as session_lib from tensorflow.python.estimator import estimator_lib +from tensorflow.python.feature_column import feature_column from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops @@ -31,6 +39,9 @@ from tensorflow.python.ops import math_ops from tensorflow.python.ops import metrics from tensorflow.python.ops import variables from tensorflow.python.platform import test +from tensorflow.python.saved_model import loader +from tensorflow.python.saved_model import tag_constants +from tensorflow.python.training import adam from tensorflow.python.training import coordinator as coordinator_lib from tensorflow.python.training import queue_runner_impl from tensorflow.python.training import training as train @@ -90,7 +101,7 @@ class EvaluationMetricsTests(test.TestCase): .count_up_to(10), dtype=dtypes.float32), (1, 1, 1)) } - model_fn = ts_head_lib.time_series_regression_head( + model_fn = ts_head_lib.TimeSeriesRegressionHead( model=_TickerModel(), state_manager=state_management.PassthroughStateManager(), optimizer=train.GradientDescentOptimizer(0.001)).create_estimator_spec @@ -127,7 +138,7 @@ class _StubModel(object): def _stub_model_fn(): - return ts_head_lib.time_series_regression_head( + return ts_head_lib.TimeSeriesRegressionHead( model=_StubModel(), state_manager=state_management.PassthroughStateManager(), optimizer=train.AdamOptimizer(0.001)).create_estimator_spec @@ -263,5 +274,76 @@ class PredictFeatureCheckingTests(test.TestCase): mode=estimator_lib.ModeKeys.PREDICT) +class OneShotTests(test.TestCase): + + def test_one_shot_prediction_head_export(self): + model_dir = self.get_temp_dir() + categorical_column = feature_column.categorical_column_with_hash_bucket( + key="categorical_exogenous_feature", hash_bucket_size=16) + exogenous_feature_columns = [ + feature_column.numeric_column( + "2d_exogenous_feature", shape=(2,)), + feature_column.embedding_column( + categorical_column=categorical_column, dimension=10)] + estimator = ts_estimators.TimeSeriesRegressor( + model=lstm_example._LSTMModel( + num_features=5, num_units=128, + exogenous_feature_columns=exogenous_feature_columns), + optimizer=adam.AdamOptimizer(0.001), + config=estimator_lib.RunConfig(tf_random_seed=4), + state_manager=state_management.ChainingStateManager(), + head_type=ts_head_lib.OneShotPredictionHead, + model_dir=model_dir) + train_features = { + feature_keys.TrainEvalFeatures.TIMES: numpy.arange( + 20, dtype=numpy.int64), + feature_keys.TrainEvalFeatures.VALUES: numpy.tile(numpy.arange( + 20, dtype=numpy.float32)[:, None], [1, 5]), + "2d_exogenous_feature": numpy.ones([20, 2]), + "categorical_exogenous_feature": numpy.array( + ["strkey"] * 20)[:, None] + } + train_input_fn = input_pipeline.RandomWindowInputFn( + input_pipeline.NumpyReader(train_features), shuffle_seed=2, + num_threads=1, batch_size=16, window_size=16) + estimator.train(input_fn=train_input_fn, steps=5) + input_receiver_fn = estimator.build_raw_serving_input_receiver_fn() + export_location = estimator.export_savedmodel(self.get_temp_dir(), + input_receiver_fn) + graph = ops.Graph() + with graph.as_default(): + with session_lib.Session() as session: + signatures = loader.load( + session, [tag_constants.SERVING], export_location) + self.assertEqual([feature_keys.SavedModelLabels.PREDICT], + list(signatures.signature_def.keys())) + predict_signature = signatures.signature_def[ + feature_keys.SavedModelLabels.PREDICT] + six.assertCountEqual( + self, + [feature_keys.FilteringFeatures.TIMES, + feature_keys.FilteringFeatures.VALUES, + "2d_exogenous_feature", + "categorical_exogenous_feature"], + predict_signature.inputs.keys()) + features = { + feature_keys.TrainEvalFeatures.TIMES: numpy.tile( + numpy.arange(35, dtype=numpy.int64)[None, :], [2, 1]), + feature_keys.TrainEvalFeatures.VALUES: numpy.tile(numpy.arange( + 20, dtype=numpy.float32)[None, :, None], [2, 1, 5]), + "2d_exogenous_feature": numpy.ones([2, 35, 2]), + "categorical_exogenous_feature": numpy.tile(numpy.array( + ["strkey"] * 35)[None, :, None], [2, 1, 1]) + } + feeds = { + graph.as_graph_element(input_value.name): features[input_key] + for input_key, input_value in predict_signature.inputs.items()} + fetches = {output_key: graph.as_graph_element(output_value.name) + for output_key, output_value + in predict_signature.outputs.items()} + output = session.run(fetches, feed_dict=feeds) + self.assertAllEqual((2, 15, 5), output["mean"].shape) + + if __name__ == "__main__": test.main() diff --git a/tensorflow/contrib/timeseries/python/timeseries/math_utils.py b/tensorflow/contrib/timeseries/python/timeseries/math_utils.py index 26793c80bfbb3c9394e81a5bbfae360deb95ca58..9b593fecbb3fbc3b8b57848462c85dff4c3b7577 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/math_utils.py +++ b/tensorflow/contrib/timeseries/python/timeseries/math_utils.py @@ -60,7 +60,7 @@ def clip_covariance( # TODO(allenl): Smarter scaling here so that correlations are preserved when # fiddling with diagonal elements. diagonal = array_ops.matrix_diag_part(covariance_matrix) - maximum = math_ops.reduce_max(diagonal, axis=-1, keep_dims=True) + maximum = math_ops.reduce_max(diagonal, axis=-1, keepdims=True) new_diagonal = gen_math_ops.maximum( diagonal, maximum / maximum_variance_ratio) return array_ops.matrix_set_diag( diff --git a/tensorflow/contrib/timeseries/python/timeseries/state_management_test.py b/tensorflow/contrib/timeseries/python/timeseries/state_management_test.py index d5dce30fda0353bd70f44ec567ac91acce1e9394..5f7e3da2db6da26f50aad9d500959238063a3e3c 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/state_management_test.py +++ b/tensorflow/contrib/timeseries/python/timeseries/state_management_test.py @@ -78,7 +78,7 @@ class StubTimeSeriesModel(model.TimeSeriesModel): batch_end_values = array_ops.squeeze( array_ops.slice(values, [0, array_ops.shape(times)[1] - 1, 0], [-1, 1, -1]), - squeeze_dims=[1, 2]) + axis=[1, 2]) # A pretty odd but easy to think about loss: L1 loss on the batch end # values. loss = math_ops.reduce_sum( diff --git a/tensorflow/contrib/timeseries/python/timeseries/state_space_models/BUILD b/tensorflow/contrib/timeseries/python/timeseries/state_space_models/BUILD index ca25ccd2b87580347452a282cc810b293a592196..5d33e23a427bd54fd02b0eb7489f84d189e05e35 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/state_space_models/BUILD +++ b/tensorflow/contrib/timeseries/python/timeseries/state_space_models/BUILD @@ -40,6 +40,7 @@ py_test( timeout = "long", # Moderate but for asan srcs = ["state_space_model_test.py"], srcs_version = "PY2AND3", + tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":state_space_model", "//tensorflow/contrib/layers:layers_py", diff --git a/tensorflow/contrib/timeseries/python/timeseries/state_space_models/kalman_filter.py b/tensorflow/contrib/timeseries/python/timeseries/state_space_models/kalman_filter.py index 1fcd3e391b63c2362d6187da9556e2c71836dbaa..a614386121e000961bf8b32625a28e1251654320 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/state_space_models/kalman_filter.py +++ b/tensorflow/contrib/timeseries/python/timeseries/state_space_models/kalman_filter.py @@ -170,7 +170,7 @@ class KalmanFilter(object): math_ops.matmul( transition_matrices, prior_state[..., None]), - squeeze_dims=[-1]) + axis=[-1]) return advanced_state def predict_state_var( @@ -254,7 +254,7 @@ class KalmanFilter(object): kalman_gain_transposed, array_ops.expand_dims(residual, -1), adjoint_a=True), - squeeze_dims=[-1]) + axis=[-1]) gain_obs = math_ops.matmul( kalman_gain_transposed, observation_model, adjoint_a=True) identity_extradim = linalg_ops.eye( @@ -332,7 +332,7 @@ class KalmanFilter(object): array_ops.expand_dims(state_mean, 1), observation_model, adjoint_b=True), - squeeze_dims=[1]) + axis=[1]) observed_var = math_ops.matmul( math_ops.matmul(observation_model, state_var), observation_model, diff --git a/tensorflow/contrib/tpu/BUILD b/tensorflow/contrib/tpu/BUILD index 3e32a7a85cc04b7df041bb646dd3d6a7b3cccd28..9646d15486ef618f206936ce55a5eb6ca0387e41 100644 --- a/tensorflow/contrib/tpu/BUILD +++ b/tensorflow/contrib/tpu/BUILD @@ -46,7 +46,7 @@ py_library( deps = [ ":tpu_lib", ":tpu_py", - "//tensorflow/contrib/summary:summary_ops", + "//tensorflow/contrib/training:training_py", "//tensorflow/core:protos_all_py", "//tensorflow/python:array_ops", "//tensorflow/python:control_flow_ops", @@ -56,6 +56,7 @@ py_library( "//tensorflow/python:platform", "//tensorflow/python:state_ops", "//tensorflow/python:summary", + "//tensorflow/python:summary_ops_v2", "//tensorflow/python:training", "//tensorflow/python:variable_scope", "//tensorflow/python:variables", @@ -159,6 +160,7 @@ py_library( name = "tpu_lib", srcs = [ "python/tpu/__init__.py", + "python/tpu/bfloat16.py", "python/tpu/device_assignment.py", "python/tpu/topology.py", "python/tpu/tpu.py", @@ -196,7 +198,8 @@ py_library( ], srcs_version = "PY2AND3", deps = [ - "//tensorflow/contrib/data/python/ops:transformation_ops", + "//tensorflow/contrib/data/python/ops:batching", + "//tensorflow/contrib/data/python/ops:interleave_ops", "//tensorflow/python:dtypes", "//tensorflow/python:function", "//tensorflow/python:functional_ops", @@ -214,6 +217,7 @@ tf_py_test( ":datasets", ], grpc_enabled = True, + tags = ["no_windows"], ) tf_py_test( @@ -227,6 +231,7 @@ tf_py_test( "//tensorflow/python:framework", "//tensorflow/python:layers", ], + tags = ["no_windows"], # TODO: needs investigation on Windows ) tf_py_test( @@ -240,6 +245,17 @@ tf_py_test( ], ) +tf_py_test( + name = "bfloat16_test", + size = "small", + srcs = ["python/tpu/bfloat16_test.py"], + additional_deps = [ + ":tpu", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework", + ], +) + tf_py_test( name = "tpu_infeed_test", size = "small", diff --git a/tensorflow/contrib/tpu/__init__.py b/tensorflow/contrib/tpu/__init__.py index ea6e874f2d952b03e8cdabeee00ccfe1b076a0d0..dc9066855990f372c28dc481959117daa4c2da97 100644 --- a/tensorflow/contrib/tpu/__init__.py +++ b/tensorflow/contrib/tpu/__init__.py @@ -43,6 +43,7 @@ @@TPUEstimator @@TPUEstimatorSpec @@RunConfig +@@InputPipelineConfig @@TPUConfig """ @@ -53,6 +54,7 @@ from __future__ import print_function # pylint: disable=wildcard-import,unused-import from tensorflow.contrib.tpu.python import profiler from tensorflow.contrib.tpu.python.ops.tpu_ops import * +from tensorflow.contrib.tpu.python.tpu.bfloat16 import * from tensorflow.contrib.tpu.python.tpu.device_assignment import * from tensorflow.contrib.tpu.python.tpu.topology import * from tensorflow.contrib.tpu.python.tpu.tpu import * diff --git a/tensorflow/contrib/tpu/ops/outfeed_ops.cc b/tensorflow/contrib/tpu/ops/outfeed_ops.cc index 5900c61a38726551391c212f92b9b9eacd4a465b..b05c76ca64fbaedc205ab06cc31616787ccc84b8 100644 --- a/tensorflow/contrib/tpu/ops/outfeed_ops.cc +++ b/tensorflow/contrib/tpu/ops/outfeed_ops.cc @@ -26,6 +26,7 @@ REGISTER_OP("OutfeedEnqueue") .Input("input: dtype") .Attr("dtype: type") .SetIsStateful() + .SetShapeFn(shape_inference::NoOutputs) .Doc(R"doc( An op which emits a single Tensor value from an XLA computation. @@ -36,6 +37,7 @@ REGISTER_OP("OutfeedEnqueueTuple") .Input("inputs: dtypes") .Attr("dtypes: list(type)") .SetIsStateful() + .SetShapeFn(shape_inference::NoOutputs) .Doc(R"doc( An op which emits multiple Tensor values from an XLA computation. diff --git a/tensorflow/contrib/tpu/profiler/BUILD b/tensorflow/contrib/tpu/profiler/BUILD index 1c32993e8e546a17b8b3c289a306ad8f8388c345..dbf1ab6bbf0ddc7429d8e19279451eb862981e0c 100644 --- a/tensorflow/contrib/tpu/profiler/BUILD +++ b/tensorflow/contrib/tpu/profiler/BUILD @@ -46,6 +46,7 @@ tf_cc_binary( visibility = ["//visibility:public"], deps = [ ":dump_tpu_profile", + ":tpu_profiler_analysis_proto_cc", ":tpu_profiler_proto_cc", ":version", "//tensorflow/core:framework_internal", diff --git a/tensorflow/contrib/tpu/profiler/capture_tpu_profile.cc b/tensorflow/contrib/tpu/profiler/capture_tpu_profile.cc index f2003e04dd061a57353951fb601ebca2f5bccb74..816897499b7a49365060c026d60b977990f3ecdc 100644 --- a/tensorflow/contrib/tpu/profiler/capture_tpu_profile.cc +++ b/tensorflow/contrib/tpu/profiler/capture_tpu_profile.cc @@ -26,6 +26,7 @@ limitations under the License. #include "tensorflow/contrib/tpu/profiler/dump_tpu_profile.h" #include "tensorflow/contrib/tpu/profiler/tpu_profiler.grpc.pb.h" +#include "tensorflow/contrib/tpu/profiler/tpu_profiler_analysis.grpc.pb.h" #include "tensorflow/contrib/tpu/profiler/version.h" #include "tensorflow/core/distributed_runtime/rpc/grpc_util.h" #include "tensorflow/core/lib/core/errors.h" @@ -40,6 +41,7 @@ namespace tensorflow { namespace tpu { namespace { +using ::tensorflow::TPUProfileAnalysis; using ::tensorflow::TPUProfiler; constexpr uint64 kMaxEvents = 1000000; @@ -64,9 +66,10 @@ Status ValidateHostPortPair(const string& host_port) { return Status::OK(); } -ProfileResponse Profile(const string& service_addr, int duration_ms, - const string& repository_root, const string& session_id, - const ProfileOptions& opts) { +ProfileRequest PopulateProfileRequest(int duration_ms, + const string& repository_root, + const string& session_id, + const ProfileOptions& opts) { ProfileRequest request; request.set_duration_ms(duration_ms); request.set_max_events(kMaxEvents); @@ -81,6 +84,17 @@ ProfileResponse Profile(const string& service_addr, int duration_ms, *request.mutable_opts() = opts; std::cout << "Limiting the number of trace events to " << kMaxEvents << std::endl; + return request; +} + +// Returns whether the returned trace is empty. +// Failure are handled by CHECK, i.e. abort() +bool Profile(const string& service_addr, const string& logdir, int duration_ms, + const string& repository_root, const string& session_id, + const ProfileOptions& opts) { + ProfileRequest request = + PopulateProfileRequest(duration_ms, repository_root, session_id, opts); + ::grpc::ClientContext context; ::grpc::ChannelArguments channel_args; // TODO(ioeric): use `SetMaxReceiveMessageSize` instead once it's available. @@ -94,7 +108,60 @@ ProfileResponse Profile(const string& service_addr, int duration_ms, channel_args)); ProfileResponse response; TF_QCHECK_OK(FromGrpcStatus(stub->Profile(&context, request, &response))); - return response; + + if (!response.encoded_trace().empty()) { + TF_CHECK_OK(tensorflow::tpu::WriteTensorboardTPUProfile( + logdir, session_id, "", response, &std::cout)); + // Print this at the end so that it's not buried in irrelevant LOG messages. + std::cout + << "NOTE: using the trace duration " << duration_ms << "ms." + << std::endl + << "Set an appropriate duration (with --duration_ms) if you " + "don't see a full step in your trace or the captured trace is too " + "large." + << std::endl; + } + + return response.encoded_trace().empty(); +} + +// Start a new profiling session that include all the hosts included in +// hostnames, for the time interval of duration_ms. Possibly save the profiling +// result in the directory specified by repository_root and session_id. +bool NewSession(const string& service_addr, + const std::vector& hostnames, + int duration_ms, const string& repository_root, + const string& session_id, const ProfileOptions& opts) { + NewProfileSessionRequest new_session_request; + *new_session_request.mutable_request() = + PopulateProfileRequest(duration_ms, repository_root, session_id, opts); + new_session_request.set_repository_root(repository_root); + new_session_request.set_session_id(session_id); + for (const auto& hostname : hostnames) { + new_session_request.add_hosts(hostname); + } + + ::grpc::ClientContext context; + ::grpc::ChannelArguments channel_args; + // TODO(qiuminxu): use `NewHostPortGrpcChannel` instead once their + // `ValidateHostPortPair` checks for empty host string case. + channel_args.SetMaxReceiveMessageSize(std::numeric_limits::max()); + // TODO(jiesun): GRPC support following relevant naming scheme: + // 1. dns:///host:port + // 2. ipv4:host:port or ipv6:[host]:port + // We might need to change the prefix which depends on what TPU name resolver + // will give us. + std::unique_ptr stub = + TPUProfileAnalysis::NewStub(::grpc::CreateCustomChannel( + "dns:///" + service_addr, ::grpc::InsecureChannelCredentials(), + channel_args)); + NewProfileSessionResponse new_session_response; + TF_QCHECK_OK(FromGrpcStatus( + stub->NewSession(&context, new_session_request, &new_session_response))); + + std::cout << "Profile session succeed for host(s):" + << str_util::Join(hostnames, ",") << std::endl; + return new_session_response.empty_trace(); } } // namespace @@ -104,12 +171,16 @@ ProfileResponse Profile(const string& service_addr, int duration_ms, int main(int argc, char** argv) { tensorflow::string FLAGS_service_addr; tensorflow::string FLAGS_logdir; + tensorflow::string FLAGS_workers_list; int FLAGS_duration_ms = 2000; int FLAGS_num_tracing_attempts = 3; bool FLAGS_include_dataset_ops = true; std::vector flag_list = { tensorflow::Flag("service_addr", &FLAGS_service_addr, "Address of TPU profiler service e.g. localhost:8466"), + tensorflow::Flag("workers_list", &FLAGS_workers_list, + "The list of worker TPUs that we are about to profile " + "in the current session."), tensorflow::Flag("logdir", &FLAGS_logdir, "Path of TensorBoard log directory e.g. /tmp/tb_log, " "gs://tb_bucket"), @@ -153,18 +224,30 @@ int main(int argc, char** argv) { constexpr char kProfilePluginDirectory[] = "plugins/profile/"; tensorflow::string repository_root = ::tensorflow::io::JoinPath(FLAGS_logdir, kProfilePluginDirectory); + std::vector hostnames = + tensorflow::str_util::Split(FLAGS_workers_list, ","); + + bool empty_trace = false; while (true) { std::cout << "Starting to profile TPU traces for " << duration_ms << " ms. " << "Remaining attempt(s): " << remaining_attempts-- << std::endl; - response = tensorflow::tpu::Profile(FLAGS_service_addr, duration_ms, - repository_root, session_id, opts); - if (remaining_attempts <= 0 || !response.encoded_trace().empty()) break; + if (hostnames.empty()) { + empty_trace = tensorflow::tpu::Profile(FLAGS_service_addr, FLAGS_logdir, + duration_ms, repository_root, + session_id, opts); + } else { + tensorflow::string tpu_master = FLAGS_service_addr; + empty_trace = + tensorflow::tpu::NewSession(tpu_master, hostnames, duration_ms, + repository_root, session_id, opts); + } + if (remaining_attempts <= 0 || !empty_trace) break; std::cout << "No trace event is collected. Automatically retrying." << std::endl << std::endl; } - if (response.encoded_trace().empty()) { + if (empty_trace) { std::cout << "No trace event is collected after " << FLAGS_num_tracing_attempts << " attempt(s). " << "Perhaps, you want to try again (with more attempts?)." @@ -175,13 +258,5 @@ int main(int argc, char** argv) { return 0; } - TF_CHECK_OK(tensorflow::tpu::WriteTensorboardTPUProfile( - FLAGS_logdir, session_id, response, &std::cout)); - // Print this at the end so that it's not buried in irrelevant LOG messages. - std::cout - << "NOTE: using the trace duration " << duration_ms << "ms." << std::endl - << "Set an appropriate duration (with --duration_ms) if you " - "don't see a full step in your trace or the captured trace is too " - "large." - << std::endl; + return 0; } diff --git a/tensorflow/contrib/tpu/profiler/dump_tpu_profile.cc b/tensorflow/contrib/tpu/profiler/dump_tpu_profile.cc index ebd6185faad28ae7a22eb33f6b358eb2344c9c22..5e85a967ad4ea373e213fa90c3640e9ab1f92d25 100644 --- a/tensorflow/contrib/tpu/profiler/dump_tpu_profile.cc +++ b/tensorflow/contrib/tpu/profiler/dump_tpu_profile.cc @@ -41,6 +41,7 @@ namespace { using ::tensorflow::io::JoinPath; using ::tensorflow::protobuf::util::JsonOptions; using ::tensorflow::protobuf::util::MessageToJsonString; +using ::tensorflow::strings::StrCat; constexpr char kGraphRunPrefix[] = "tpu_profiler.hlo_graph."; constexpr char kJsonOpProfileFileName[] = "op_profile.json"; @@ -61,28 +62,34 @@ Status WriteGzippedDataToFile(const string& filename, const string& data) { return Status::OK(); } -Status DumpTraceToLogDirectory(StringPiece run_dir, const string& encoded_trace, - std::ostream* os) { - string proto_path = JoinPath(run_dir, kProtoTraceFileName); +Status DumpTraceToLogDirectory(StringPiece run_dir, const string& host_prefix, + const string& encoded_trace, std::ostream* os) { + string proto_path = + JoinPath(run_dir, StrCat(host_prefix, kProtoTraceFileName)); TF_RETURN_IF_ERROR( WriteStringToFile(Env::Default(), proto_path, encoded_trace)); LOG(INFO) << "Dumped raw-proto trace data to " << proto_path; - string json_path = JoinPath(run_dir, kJsonTraceFileName); + string json_path = JoinPath(run_dir, StrCat(host_prefix, kJsonTraceFileName)); Trace trace; trace.ParseFromString(encoded_trace); - *os << "Trace contains " << trace.trace_events_size() << " events." - << std::endl; + if (os) { + *os << "Trace contains " << trace.trace_events_size() << " events." + << std::endl; + } TF_RETURN_IF_ERROR( WriteGzippedDataToFile(json_path, TraceEventsToJson(trace))); - *os << "Dumped JSON trace data to " << json_path << std::endl; + if (os) { + *os << "Dumped JSON trace data to " << json_path << std::endl; + } return Status::OK(); } Status DumpOpProfileToLogDirectory(StringPiece run_dir, + const string& host_prefix, const tpu::op_profile::Profile& profile, std::ostream* os) { - string path = JoinPath(run_dir, kJsonOpProfileFileName); + string path = JoinPath(run_dir, StrCat(host_prefix, kJsonOpProfileFileName)); string json; JsonOptions options; options.always_print_primitive_fields = true; @@ -93,49 +100,20 @@ Status DumpOpProfileToLogDirectory(StringPiece run_dir, string(status.error_message())); } TF_RETURN_IF_ERROR(WriteStringToFile(Env::Default(), path, json)); - *os << "Dumped json op profile data to " << path << std::endl; + if (os) { + *os << "Dumped json op profile data to " << path << std::endl; + } return Status::OK(); } Status DumpToolDataToLogDirectory(StringPiece run_dir, + const string& host_prefix, const tensorflow::ProfileToolData& tool, std::ostream* os) { - string path = JoinPath(run_dir, tool.name()); + string path = JoinPath(run_dir, StrCat(host_prefix, tool.name())); TF_RETURN_IF_ERROR(WriteStringToFile(Env::Default(), path, tool.data())); - *os << "Dumped tool data for " << tool.name() << " to " << path << std::endl; - return Status::OK(); -} - -Status DumpGraphEvents(const string& logdir, const string& run, - const ProfileResponse& response, std::ostream* os) { - int num_graphs = response.computation_graph_size(); - if (response.computation_graph_size() == 0) return Status::OK(); - // The server might generates multiple graphs for one program; we simply - // pick the first one. - if (num_graphs > 1) { - *os << num_graphs - << " TPU program variants observed over the profiling period. " - << "One computation graph will be chosen arbitrarily." << std::endl; - } - // The graph plugin expects the graph in //. - string run_dir = JoinPath(logdir, strings::StrCat(kGraphRunPrefix, run)); - TF_RETURN_IF_ERROR(Env::Default()->RecursivelyCreateDir(run_dir)); - EventsWriter event_writer(JoinPath(run_dir, "events")); - Event event; - // Add the computation graph. - event.set_graph_def(response.computation_graph(0).SerializeAsString()); - event_writer.WriteEvent(event); - *os << "Wrote a HLO graph to " << event_writer.FileName() << std::endl; - - if (response.has_hlo_metadata()) { - tensorflow::TaggedRunMetadata tagged_run_metadata; - tagged_run_metadata.set_tag(run); - tagged_run_metadata.set_run_metadata( - response.hlo_metadata().SerializeAsString()); - tensorflow::Event meta_event; - *meta_event.mutable_tagged_run_metadata() = tagged_run_metadata; - event_writer.WriteEvent(meta_event); - *os << "Wrote HLO ops run metadata to " << event_writer.FileName() + if (os) { + *os << "Dumped tool data for " << tool.name() << " to " << path << std::endl; } return Status::OK(); @@ -144,27 +122,30 @@ Status DumpGraphEvents(const string& logdir, const string& run, } // namespace Status WriteTensorboardTPUProfile(const string& logdir, const string& run, + const string& host, const ProfileResponse& response, std::ostream* os) { // Dumps profile data to /plugins/profile//. + string host_prefix = host.empty() ? "" : StrCat(host, "."); string profile_run_dir = JoinPath(logdir, kProfilePluginDirectory, run); + *os << "Creating directory: " << profile_run_dir; TF_RETURN_IF_ERROR(Env::Default()->RecursivelyCreateDir(profile_run_dir)); // Ignore computation_graph for now. if (!response.encoded_trace().empty()) { LOG(INFO) << "Converting trace events to TraceViewer JSON."; - TF_RETURN_IF_ERROR( - DumpTraceToLogDirectory(profile_run_dir, response.encoded_trace(), os)); + TF_RETURN_IF_ERROR(DumpTraceToLogDirectory(profile_run_dir, host_prefix, + response.encoded_trace(), os)); } if (response.has_op_profile() && (response.op_profile().has_by_program_structure() || response.op_profile().has_by_category())) { - TF_RETURN_IF_ERROR(DumpOpProfileToLogDirectory(profile_run_dir, + TF_RETURN_IF_ERROR(DumpOpProfileToLogDirectory(profile_run_dir, host_prefix, response.op_profile(), os)); } for (const auto& tool_data : response.tool_data()) { - TF_RETURN_IF_ERROR( - DumpToolDataToLogDirectory(profile_run_dir, tool_data, os)); + TF_RETURN_IF_ERROR(DumpToolDataToLogDirectory(profile_run_dir, host_prefix, + tool_data, os)); } return Status::OK(); diff --git a/tensorflow/contrib/tpu/profiler/dump_tpu_profile.h b/tensorflow/contrib/tpu/profiler/dump_tpu_profile.h index 29ef977bacfd61e163be49558c5b94277ed479c1..ecf21b1de2219e8896d5e8b79325a193de0b0fa1 100644 --- a/tensorflow/contrib/tpu/profiler/dump_tpu_profile.h +++ b/tensorflow/contrib/tpu/profiler/dump_tpu_profile.h @@ -32,6 +32,7 @@ namespace tpu { // Note: this function creates a directory even when all fields in // ProfileResponse are unset/empty. Status WriteTensorboardTPUProfile(const string& logdir, const string& run, + const string& host, const ProfileResponse& response, std::ostream* os); diff --git a/tensorflow/contrib/tpu/profiler/pip_package/cloud_tpu_profiler/main.py b/tensorflow/contrib/tpu/profiler/pip_package/cloud_tpu_profiler/main.py index 0b78cf8695091daf797bcb80586397e7ab1c6284..508c7a842fb82ec080082d7e7f02f8d2f2a79447 100644 --- a/tensorflow/contrib/tpu/profiler/pip_package/cloud_tpu_profiler/main.py +++ b/tensorflow/contrib/tpu/profiler/pip_package/cloud_tpu_profiler/main.py @@ -37,12 +37,17 @@ flags.DEFINE_string( 'will attempt to automatically detect the GCE project from metadata.') flags.DEFINE_string('tpu_name', None, 'Name of the Cloud TPU for Cluster Resolvers. You must ' - 'specify either this flag or --master.') + 'specify either this flag or --service_addr.') # Tool specific parameters flags.DEFINE_string( 'service_addr', None, 'Address of TPU profiler service e.g. ' 'localhost:8466, you must specify either this flag or --tpu_name.') +flags.DEFINE_string( + 'workers_list', None, 'The list of worker TPUs that we are about to profile' + ' e.g. 10.0.1.2, 10.0.1.3. You can specify this flag with --tpu_name or ' + '--service_addr to profile a subset of tpu nodes. You can also use only' + '--tpu_name and leave this flag unspecified to profile all the tpus.') flags.DEFINE_string('logdir', None, 'Path of TensorBoard log directory e.g. /tmp/tb_log, ' 'gs://tb_bucket') @@ -56,18 +61,25 @@ flags.DEFINE_boolean('include_dataset_ops', True, FLAGS = flags.FLAGS EXECUTABLE = 'data/capture_tpu_profile' +JOB_NAME = 'worker' +def get_workers_list(cluster_resolver): + cluster_spec = cluster_resolver.cluster_spec() + task_indices = cluster_spec.task_indices(JOB_NAME) + workers_list = [cluster_spec.task_address(JOB_NAME, i).split(':')[0] + for i in task_indices] + return ','.join(workers_list) def run_main(): tf.app.run(main) - def main(unused_argv=None): tf.logging.set_verbosity(tf.logging.INFO) if FLAGS.service_addr is None and FLAGS.tpu_name is None: sys.exit('You must specify either --service_addr or --tpu_name.') + tpu_cluster_resolver = None if FLAGS.service_addr is not None: if FLAGS.tpu_name is not None: tf.logging.warn('Both --service_addr and --tpu_name are set. Ignoring ' @@ -82,6 +94,12 @@ def main(unused_argv=None): service_addr = tpu_cluster_resolver.get_master() service_addr = service_addr.replace('grpc://', '').replace(':8470', ':8466') + workers_list = "" + if FLAGS.workers_list is not None: + workers_list = FLAGS.workers_list + elif tpu_cluster_resolver is not None: + workers_list = get_workers_list(tpu_cluster_resolver) + if not FLAGS.logdir: sys.exit('logdir must be provided.') executable_path = os.path.join(os.path.dirname(__file__), EXECUTABLE) @@ -89,6 +107,7 @@ def main(unused_argv=None): cmd = [executable_path] cmd.append('--logdir=' + logdir) cmd.append('--service_addr=' + service_addr) + cmd.append('--workers_list=' + workers_list) cmd.append('--duration_ms=' + str(FLAGS.duration_ms)) cmd.append('--num_tracing_attempts=' + str(FLAGS.num_tracing_attempts)) cmd.append('--include_dataset_ops=' + str(FLAGS.include_dataset_ops).lower()) diff --git a/tensorflow/contrib/tpu/profiler/pip_package/setup.py b/tensorflow/contrib/tpu/profiler/pip_package/setup.py index 8d99835b64152629c66607e6792495eb36319eb8..ebd478fd02295108b9d2454963eb06165828b523 100644 --- a/tensorflow/contrib/tpu/profiler/pip_package/setup.py +++ b/tensorflow/contrib/tpu/profiler/pip_package/setup.py @@ -20,7 +20,7 @@ from __future__ import print_function from setuptools import setup -_VERSION = '1.6.0-rc1' +_VERSION = '1.6.0' CONSOLE_SCRIPTS = [ 'capture_tpu_profile=cloud_tpu_profiler.main:run_main', diff --git a/tensorflow/contrib/tpu/profiler/tf_op_stats.proto b/tensorflow/contrib/tpu/profiler/tf_op_stats.proto index 590db2c376c251280b05be483bc41a8fd7056fc2..63955d18068fc9d3b3ca1a657a3fd526edf10e6f 100644 --- a/tensorflow/contrib/tpu/profiler/tf_op_stats.proto +++ b/tensorflow/contrib/tpu/profiler/tf_op_stats.proto @@ -66,6 +66,10 @@ message OpMetricsDbResult { // The total of the difference between the start times of two // consecutive infeed-enqueues (per host) in picoseconds. optional uint64 total_host_infeed_enq_start_timestamp_ps_diff = 3; + // The total device time in microseconds. + optional double total_device_time_in_us = 4; + // The total host time in microseconds. + optional double total_host_time_in_us = 5; } // Result proto for StepInfo. @@ -79,6 +83,10 @@ message StepInfoResult { optional uint64 infeed_duration_ps = 3; // The start time of this step in picoseconds. optional uint64 begin_ps = 4; + // The waiting time within this step in picoseconds. + optional uint64 wait_duration_ps = 5; + // The time spent on cross-replica-sum in picoseconds. + optional uint64 crs_duration_ps = 6; } // Result proto for a sequence of steps. diff --git a/tensorflow/contrib/tpu/profiler/tpu_profiler.proto b/tensorflow/contrib/tpu/profiler/tpu_profiler.proto index cddc3cd1b41d6e00409222170e69c429fe6f91f8..7be694e866729c58efae4ccf7932dd929c03ed91 100644 --- a/tensorflow/contrib/tpu/profiler/tpu_profiler.proto +++ b/tensorflow/contrib/tpu/profiler/tpu_profiler.proto @@ -21,6 +21,17 @@ message ProfileOptions { // next-field: 2 } +message ToolRequestOptions { + // Required formats for the tool, it should be one of "json", "proto", "raw" + // etc. If not specified (backward compatible), use default format, i.e. most + // tools use json format. + string output_formats = 2; + + // Whether save the result directly to repository or pass it back to caller. + // Default to false for backward compatibilities. + bool save_to_repo = 3; +} + message ProfileRequest { // In future, the caller will be able to customize when profiling starts and // stops. For now, it collects `duration_ms` milliseconds worth of data. @@ -30,9 +41,12 @@ message ProfileRequest { // events. uint64 max_events = 2; - // required profiling tools name such as "input_pipeline_analyzer" etc + // Required profiling tools name such as "input_pipeline_analyzer" etc repeated string tools = 3; + // Specifies the requirement for each tools. + map tool_options = 8; + // Optional profiling options that control how a TF session will be profiled. ProfileOptions opts = 4; @@ -43,10 +57,14 @@ message ProfileRequest { // The user provided profile session identifier. string session_id = 6; + // The hostname of system where the profile should happen. + // We use it as identifier in part of our output filename. + string host_name = 7; + // In future, the caller will indicate which TF session is being profiled, and // only data relating to that program will be returned. For now, we assume // all activity during the profiling period is relevant. - // next-field: 7 + // next-field: 9 } message ProfileToolData { @@ -78,5 +96,10 @@ message ProfileResponse { // Data payload for each required tools. repeated ProfileToolData tool_data = 6; - // next-field: 7 + + // When we write profiling data directly to repository directory, we need a + // way to figure out whether the captured trace is empty (due to idle TPU). + bool empty_trace = 7; + + // next-field: 8 } diff --git a/tensorflow/contrib/tpu/profiler/tpu_profiler_analysis.proto b/tensorflow/contrib/tpu/profiler/tpu_profiler_analysis.proto index a4fc8d4e879eb85522f35663c9c628ecd5ef562c..8b0bbde98e6a1dee8ade789328f3ba0624049562 100644 --- a/tensorflow/contrib/tpu/profiler/tpu_profiler_analysis.proto +++ b/tensorflow/contrib/tpu/profiler/tpu_profiler_analysis.proto @@ -7,13 +7,15 @@ message NewProfileSessionRequest { ProfileRequest request = 1; string repository_root = 2; repeated string hosts = 3; + string session_id = 4; } message NewProfileSessionResponse { // Auxiliary error_message. string error_message = 1; - // If success, return session identifier for future reference. - string session_id = 2; + + // Whether all hosts had returned a empty trace. + bool empty_trace = 2; } message EnumProfileSessionsAndToolsRequest { diff --git a/tensorflow/contrib/tpu/profiler/version.h b/tensorflow/contrib/tpu/profiler/version.h index dc6a934891138018d32d511750120453bdf290cf..618479e1a6ccf26a4103ea1f182b662d7d9998da 100644 --- a/tensorflow/contrib/tpu/profiler/version.h +++ b/tensorflow/contrib/tpu/profiler/version.h @@ -16,6 +16,6 @@ limitations under the License. #ifndef TENSORFLOW_CONTRIB_TPU_PROFILER_VERSION_H_ #define TENSORFLOW_CONTRIB_TPU_PROFILER_VERSION_H_ -#define TPU_PROFILER_VERSION "1.5.0" +#define TPU_PROFILER_VERSION "1.6.0" #endif // TENSORFLOW_CONTRIB_TPU_PROFILER_VERSION_H_ diff --git a/tensorflow/contrib/tpu/python/tpu/bfloat16.py b/tensorflow/contrib/tpu/python/tpu/bfloat16.py new file mode 100644 index 0000000000000000000000000000000000000000..5e49af6408e8aaf2d6bd56335a60724853ac14c2 --- /dev/null +++ b/tensorflow/contrib/tpu/python/tpu/bfloat16.py @@ -0,0 +1,77 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= + +"""Helper context for running models with bfloat16.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.framework import dtypes +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import variable_scope +from tensorflow.python.util import tf_contextlib + + +def _get_custom_getter(): + """Returns a custom getter that this class's methods must be called under. + + All methods of this class must be called under a variable scope that was + passed this custom getter. Example: + + ```python + network = ConvNetBuilder(...) + with tf.variable_scope('cg', custom_getter=network.get_custom_getter()): + network.conv(...) + # Call more methods of network here + ``` + + Currently, this custom getter only does anything if self.use_tf_layers is + True. In that case, it causes variables to be stored as dtype + self.variable_type, then casted to the requested dtype, instead of directly + storing the variable as the requested dtype. + """ + + def inner_custom_getter(getter, *args, **kwargs): + """Custom getter that forces variables to have type self.variable_type.""" + cast_to_bfloat16 = False + requested_dtype = kwargs['dtype'] + if requested_dtype == dtypes.bfloat16: + # Only change the variable dtype if doing so does not decrease variable + # precision. + kwargs['dtype'] = dtypes.float32 + cast_to_bfloat16 = True + var = getter(*args, **kwargs) + # This if statement is needed to guard the cast, because batch norm + # assigns directly to the return value of this custom getter. The cast + # makes the return value not a variable so it cannot be assigned. Batch + # norm variables are always in fp32 so this if statement is never + # triggered for them. + if cast_to_bfloat16: + var = math_ops.cast(var, dtypes.bfloat16) + return var + + return inner_custom_getter + + +@tf_contextlib.contextmanager +def bfloat16_scope(): + """Scope class for bfloat16 variables so that the model uses custom getter. + + This enables variables to be read as bfloat16 type when using get_variable. + """ + with variable_scope.variable_scope( + 'bfloat16', custom_getter=_get_custom_getter()) as varscope: + yield varscope diff --git a/tensorflow/contrib/tpu/python/tpu/bfloat16_test.py b/tensorflow/contrib/tpu/python/tpu/bfloat16_test.py new file mode 100644 index 0000000000000000000000000000000000000000..48a01c7308fbf14d2fb3bd29382d98a6ade1d810 --- /dev/null +++ b/tensorflow/contrib/tpu/python/tpu/bfloat16_test.py @@ -0,0 +1,50 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= + +"""Tests for bfloat16 helper.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.tpu.python.tpu import bfloat16 +from tensorflow.python.framework import dtypes +from tensorflow.python.ops import variable_scope + +from tensorflow.python.platform import test + + +class BFloat16ScopeTest(test.TestCase): + + def testScopeName(self): + """Test if name for the variable scope is propogated correctly. + """ + with bfloat16.bfloat16_scope() as bf: + self.assertEqual(bf.name, "bfloat16") + + def testRequestedDType(self): + """Test if requested dtype is honored in the getter. + """ + with bfloat16.bfloat16_scope() as scope: + v1 = variable_scope.get_variable("v1", []) + self.assertEqual(v1.dtype.base_dtype, dtypes.float32) + v2 = variable_scope.get_variable("v2", [], dtype=dtypes.bfloat16) + self.assertEqual(v2.dtype.base_dtype, dtypes.bfloat16) + self.assertEqual([dtypes.float32, dtypes.float32], + [v.dtype.base_dtype for v in scope.global_variables()]) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/tpu/python/tpu/datasets.py b/tensorflow/contrib/tpu/python/tpu/datasets.py index 465c668fd8b42f150892f8e4b52de76c6fe13fa9..2e472a2805f98b15505f56af403aa6223e28c667 100644 --- a/tensorflow/contrib/tpu/python/tpu/datasets.py +++ b/tensorflow/contrib/tpu/python/tpu/datasets.py @@ -170,7 +170,7 @@ def StreamingFilesDataset(files, args=[source_handle], Tout=[dtypes.string], f=LoadingFunc, - target='/job:%s/replica:0/task:0/cpu:0' % file_reader_job) + target='/job:%s/replica:0/task:0/cpu:0' % file_reader_job)[0] with ops.device('/job:%s' % worker_job): output_dataset = dataset_ops.Dataset.range(2).repeat().map( diff --git a/tensorflow/contrib/tpu/python/tpu/tpu.py b/tensorflow/contrib/tpu/python/tpu/tpu.py index 3f2db548ace9e10df7844d8fb461670d27234670..a1690dadffe5770af9416a7c5ad3a7e336f6bc18 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu.py @@ -25,6 +25,8 @@ from tensorflow.contrib.tpu.python.ops import tpu_ops from tensorflow.contrib.tpu.python.tpu import tpu_function from tensorflow.core.framework import attr_value_pb2 +from tensorflow.python.framework import device as pydev +from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops @@ -56,6 +58,7 @@ _NOT_IMPLEMENTED_OPS = set([ _MAX_WARNING_LINES = 5 _TPU_REPLICATE_ATTR = "_tpu_replicate" +_OUTSIDE_COMPILATION_ATTR = "_xla_outside_compilation" def _tpu_system_device_name(job): @@ -121,8 +124,16 @@ class TPUReplicateContext(control_flow_ops.XLAControlFlowContext): outside the replicated computation. """ - def __init__(self, name): + def __init__(self, name, num_replicas): super(TPUReplicateContext, self).__init__() + self._num_replicas = num_replicas + self._outer_device_function_stack = None + self._oc_dev_fn_stack = None + self._outside_compilation_cluster = None + self._outside_compilation_counter = 0 + self._in_gradient_colocation = None + self._gradient_colocation_stack = [] + self._host_compute_core = [] self._name = name self._unsupported_ops = [] @@ -136,6 +147,143 @@ class TPUReplicateContext(control_flow_ops.XLAControlFlowContext): logging.warning("... and %d more" % (len(self._unsupported_ops) - _MAX_WARNING_LINES)) + def EnterGradientColocation(self, op, gradient_uid): + if op is not None: + self._gradient_colocation_stack.append(op) + if not self._outside_compilation_cluster: + try: + outside_attr = op.get_attr(_OUTSIDE_COMPILATION_ATTR) + if self._in_gradient_colocation: + raise NotImplementedError( + "Cannot nest gradient colocation operations outside compilation" + ) + if gradient_uid == "__unsupported__": + raise NotImplementedError( + "No gradient_uid calling gradient within outside_compilation") + # When we take the gradient of an op X in an + # outside_compilation cluster C in a forward computation we + # would like to put the ops corresponding to the gradient of + # X into a new outside_compilation cluster C'. However, if + # we take the gradient of X twice, the second one should get + # yet another new outside_compilation cluster C''. + # + # The mechanism we adopt is to use a 'root_cluster' which is + # the cluster that X was in before we took gradients, and a + # 'gradient_uid' which is different for every invocation of + # gradients, and put the gradient of X in cluster + # 'root_cluster.gradient_uid'. + # + # When the gradient code adds multiple Ops, it asks them to + # be colocated either with the original Op X, or with one of + # the preceding Ops that was added to the gradient. In other + # words, we want to detect the case where we are colocating + # with an Op that is in cluster root_cluster.gradient_uid + # and put the new Op in that same cluster if the + # gradient_uid is the same (the case that we are in the same + # invocation of gradients, and just adding new Ops to the + # cluster); and in a different cluster if the gradient_uids + # are different (the case that we are in a new invocation of + # gradients, taking the gradient of a previously-computed + # gradient). + self._in_gradient_colocation = op + parts = outside_attr.split(".") + if len(parts) > 1: + uid = parts[-1] + if uid == gradient_uid: + # Keep using the same cluster + cluster = outside_attr + else: + # We're taking the gradient of a gradient so make a new + # cluster attr, adding a new '.uid' on the end to + # preserve the invariant that the gradient_uid is the + # suffix after the last '.' in the attr. + cluster = outside_attr + "." + gradient_uid + else: + # We're taking the gradient of an Op in the forward pass, so + # make a new cluster combining the Op's cluster and the + # gradient id. + cluster = outside_attr + "." + gradient_uid + self._EnterOutsideCompilationScope(cluster=cluster) + except ValueError: + # The attr was not present: do nothing. + pass + + def ExitGradientColocation(self, op, gradient_uid): + if op is not None: + if not self._gradient_colocation_stack: + raise errors.InternalError( + op.node_def, op, + "Badly nested gradient colocation: empty stack when popping Op " + + op.name) + last_op = self._gradient_colocation_stack.pop() + if op is last_op: + if op is self._in_gradient_colocation: + self._in_gradient_colocation = None + self._ExitOutsideCompilationScope() + else: + raise errors.InternalError( + op.node_def, op, "Badly nested gradient colocation, expected " + + last_op + ", got " + op.name) + + def _EnterOutsideCompilationScope(self, cluster=None): + + class FakeOp(object): + """A helper class to determine the current device. + + Supports only the device set/get methods needed to run the + graph's _apply_device_function method. + """ + + def __init__(self): + self._device = "" + + @property + def device(self): + return self._device + + def _set_device(self, device): + self._device = device.to_string() + + if self._outside_compilation_cluster: + raise NotImplementedError("Cannot nest outside_compilation clusters") + if cluster: + self._outside_compilation_cluster = cluster + else: + self._outside_compilation_cluster = str(self._outside_compilation_counter) + self._outside_compilation_counter += 1 + graph = ops.get_default_graph() + fake_op = FakeOp() + graph._apply_device_functions(fake_op) # pylint: disable=protected-access + device = pydev.DeviceSpec.from_string(fake_op.device) + if (device.device_type == "TPU_REPLICATED_CORE" and + device.device_index is not None): + self._host_compute_core.append(self._outside_compilation_cluster + ":" + + str(device.device_index)) + self._oc_dev_fn_stack = graph._device_function_stack # pylint: disable=protected-access + graph._device_function_stack = self._outer_device_function_stack # pylint: disable=protected-access + + def _ExitOutsideCompilationScope(self): + if not self._outside_compilation_cluster: + raise NotImplementedError( + "Attempted to exit outside_compilation scope when not in scope") + self._outside_compilation_cluster = None + graph = ops.get_default_graph() + graph._device_function_stack = self._oc_dev_fn_stack # pylint: disable=protected-access + + def Enter(self): + if not self._outer_device_function_stack: + # Capture the device function stack at the time of first entry + # since that is the stack that will be used outside_compilation. + graph = ops.get_default_graph() + self._outer_device_function_stack = list(graph._device_function_stack) # pylint: disable=protected-access + super(TPUReplicateContext, self).Enter() + + def Exit(self): + super(TPUReplicateContext, self).Exit() + + def HostComputeCore(self): + return self._host_compute_core + def AddOp(self, op): self._AddOpInternal(op) @@ -157,9 +305,16 @@ class TPUReplicateContext(control_flow_ops.XLAControlFlowContext): raise ValueError("TPU computations cannot be nested") op._set_attr(_TPU_REPLICATE_ATTR, attr_value_pb2.AttrValue(s=compat.as_bytes(self._name))) - # pylint: enable=protected-access - op.graph.prevent_feeding(op) - op.graph.prevent_fetching(op) + if self._outside_compilation_cluster: + op._set_attr( + _OUTSIDE_COMPILATION_ATTR, + attr_value_pb2.AttrValue( + s=compat.as_bytes(self._outside_compilation_cluster))) + if self._num_replicas > 1 or not self._outside_compilation_cluster: + # Prevent feeding or fetching anything that is being compiled, + # and any replicated outside_compilation Op. + op.graph.prevent_feeding(op) + op.graph.prevent_fetching(op) def AddValue(self, val): result = val @@ -181,6 +336,45 @@ class TPUReplicateContext(control_flow_ops.XLAControlFlowContext): return None +def outside_compilation(computation, args=None): + """Builds part of a computation outside any current TPU replicate scope. + + Args: + computation: A Python function that builds the computation to + place on the host. + args: Inputs to pass to computation. + Returns: + The Tensors returned by computation. + """ + graph = ops.get_default_graph() + + # If we are in a TPUReplicateContext, signal that we are now + # outside_compilation + initial_context = graph._get_control_flow_context() # pylint: disable=protected-access + context = initial_context + while context: + if isinstance(context, TPUReplicateContext): + context._EnterOutsideCompilationScope() # pylint: disable=protected-access + context = context.outer_context + + retval = computation(*args) + + # If we are in a TPUReplicateContext, signal that we are no longer + # outside_compilation + final_context = graph._get_control_flow_context() # pylint: disable=protected-access + if initial_context is not final_context: + raise NotImplementedError( + "Control-flow context cannot be different at start and end of an " + "outside_compilation scope") + context = initial_context + while context: + if isinstance(context, TPUReplicateContext): + context._ExitOutsideCompilationScope() # pylint: disable=protected-access + context = context.outer_context + + return retval + + def replicate(computation, inputs=None, infeed_queue=None, @@ -280,7 +474,8 @@ def replicate(computation, computation_inputs.append( tpu_ops.tpu_replicated_input(replicas, name="input{}".format(i))) - context = TPUReplicateContext(name=graph.unique_name("cluster")) + context = TPUReplicateContext( + name=graph.unique_name("cluster"), num_replicas=num_replicas) try: context.Enter() @@ -361,6 +556,12 @@ def replicate(computation, finally: context.report_unsupported_operations() context.Exit() + host_compute_core = context.HostComputeCore() + + if host_compute_core: + attr_value = attr_value_pb2.AttrValue() + attr_value.list.s.extend([compat.as_bytes(x) for x in host_compute_core]) + metadata._set_attr("host_compute_core", attr_value) # pylint: disable=protected-access # Fan-out: Builds a TPUReplicatedOutput node for each output. outputs = [tpu_ops.tpu_replicated_output(output_tensors[i], num_replicas, diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_config.py b/tensorflow/contrib/tpu/python/tpu/tpu_config.py index cc1a7fd801506e3f0b758c4848205f1c375403d2..6d7331e3c79ade9c12c15de79f550cf3973c4e6c 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_config.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_config.py @@ -210,8 +210,9 @@ class RunConfig(run_config_lib.RunConfig): raise ValueError( 'You cannot provide a ClusterResolver and ' 'session_config.cluster_def.') - self._session_config.cluster_def.CopyFrom( - self._cluster_spec.as_cluster_def()) + if self._cluster_spec: + self._session_config.cluster_def.CopyFrom( + self._cluster_spec.as_cluster_def()) @property def evaluation_master(self): diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py index fa56708f44722378f2733963a9a6864b92f33443..98eb0e240f0666b2d4a1b5135faef383e49b7468 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py @@ -30,7 +30,6 @@ import six from six.moves import queue as Queue # pylint: disable=redefined-builtin from six.moves import xrange # pylint: disable=redefined-builtin -from tensorflow.contrib.summary import summary_ops as contrib_summary from tensorflow.contrib.tpu.python.ops import tpu_ops from tensorflow.contrib.tpu.python.tpu import tpu from tensorflow.contrib.tpu.python.tpu import tpu_config @@ -38,6 +37,8 @@ from tensorflow.contrib.tpu.python.tpu import tpu_context from tensorflow.contrib.tpu.python.tpu import tpu_feed from tensorflow.contrib.tpu.python.tpu import training_loop from tensorflow.contrib.tpu.python.tpu import util as util_lib +from tensorflow.contrib.training.python.training import hparam +from tensorflow.core.framework import variable_pb2 from tensorflow.core.framework.summary_pb2 import Summary from tensorflow.core.protobuf import config_pb2 from tensorflow.python.data.ops import dataset_ops @@ -53,7 +54,9 @@ from tensorflow.python.ops import check_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import init_ops from tensorflow.python.ops import math_ops +from tensorflow.python.ops import resource_variable_ops from tensorflow.python.ops import state_ops +from tensorflow.python.ops import summary_ops_v2 as contrib_summary from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables from tensorflow.python.platform import tf_logging as logging @@ -73,6 +76,8 @@ _ITERATIONS_PER_LOOP_VAR = 'iterations_per_loop' _BATCH_SIZE_KEY = 'batch_size' _CROSS_REPLICA_SUM_OP = 'CrossReplicaSum' _ONE_GIGABYTE = 1024 * 1024 * 1024 +_TPU_ENQUEUE_OPS = '_tpu_enqueue_ops' +_TPU_TRAIN_OP = '_tpu_train_op' _RESERVED_PARAMS_KEYS = [_BATCH_SIZE_KEY] @@ -85,6 +90,13 @@ _RESERVED_PARAMS_KEYS = [_BATCH_SIZE_KEY] _WRAP_INPUT_FN_INTO_WHILE_LOOP = False +ops.register_proto_function( + '{}_{}'.format(_TPU_ESTIMATOR, _ITERATIONS_PER_LOOP_VAR), + proto_type=variable_pb2.VariableDef, + to_proto=resource_variable_ops._to_proto_fn, # pylint: disable=protected-access + from_proto=resource_variable_ops._from_proto_fn) # pylint: disable=protected-access + + def _create_global_step(graph): graph = graph or ops.get_default_graph() if training.get_global_step(graph) is not None: @@ -1297,7 +1309,10 @@ class _ModelFnWrapper(object): batch_size_for_model_fn = self._ctx.batch_size_for_model_fn if batch_size_for_model_fn is not None: - params[_BATCH_SIZE_KEY] = batch_size_for_model_fn + if isinstance(params, hparam.HParams): + params.add_hparam(_BATCH_SIZE_KEY, batch_size_for_model_fn) + else: + params[_BATCH_SIZE_KEY] = batch_size_for_model_fn estimator_spec = self._model_fn(features=features, **kwargs) if (self._ctx.is_running_on_cpu(is_export_mode) and @@ -1936,7 +1951,10 @@ class TPUEstimator(estimator_lib.Estimator): # input_fn for use_tpu=True/False. batch_size_for_input_fn = ctx.batch_size_for_input_fn if batch_size_for_input_fn is not None: - kwargs['params'][_BATCH_SIZE_KEY] = batch_size_for_input_fn + if isinstance(kwargs['params'], hparam.HParams): + kwargs['params'].add_hparam(_BATCH_SIZE_KEY, batch_size_for_input_fn) + else: + kwargs['params'][_BATCH_SIZE_KEY] = batch_size_for_input_fn # For export_savedmodel, input_fn is never passed to Estimator. So, # `is_export_mode` must be False. @@ -2006,6 +2024,13 @@ class TPUEstimator(estimator_lib.Estimator): enqueue_ops, dequeue_fn, input_hooks, run_infeed_loop_on_coordinator = ( input_holders.generate_infeed_enqueue_ops_and_dequeue_fn()) + graph = ops.get_default_graph() + for enqueue_op in enqueue_ops: + if isinstance(enqueue_op, list): + graph.get_collection_ref(_TPU_ENQUEUE_OPS).extend(enqueue_op) + else: + graph.add_to_collection(_TPU_ENQUEUE_OPS, enqueue_op) + if mode == model_fn_lib.ModeKeys.TRAIN: loss, host_call, scaffold = ( _train_on_tpu_system(ctx, model_fn_wrapper, dequeue_fn)) @@ -2019,7 +2044,8 @@ class TPUEstimator(estimator_lib.Estimator): host_ops, run_infeed_loop_on_coordinator=( run_infeed_loop_on_coordinator)), - ExamplesPerSecondHook(ctx.global_batch_size), + ExamplesPerSecondHook(ctx.global_batch_size, + output_dir=self.model_dir), InstallSignalHandlerHook(), training.LoggingTensorHook( { @@ -2028,6 +2054,16 @@ class TPUEstimator(estimator_lib.Estimator): }, every_n_secs=30) ] + input_hooks + chief_hooks = [] + if (self._config.save_checkpoints_secs or + self._config.save_checkpoints_steps): + chief_hooks.append( + training.CheckpointSaverHook( + self.model_dir, + save_secs=self._config.save_checkpoints_secs, + save_steps=self._config.save_checkpoints_steps, + steps_per_run=self._config.tpu_config.iterations_per_loop, + scaffold=scaffold)) summary.scalar(model_fn_lib.LOSS_METRIC_KEY, loss) with ops.control_dependencies([loss]): update_ops = _sync_variables_ops() @@ -2035,11 +2071,15 @@ class TPUEstimator(estimator_lib.Estimator): # Validate the TPU training graph to catch basic errors _validate_tpu_training_graph() + train_op = control_flow_ops.group(*update_ops) + graph.add_to_collection(_TPU_TRAIN_OP, train_op) + return model_fn_lib.EstimatorSpec( mode, loss=loss, + training_chief_hooks=chief_hooks, training_hooks=hooks, - train_op=control_flow_ops.group(*update_ops), + train_op=train_op, scaffold=scaffold) if mode == model_fn_lib.ModeKeys.EVAL: diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_system_metadata.py b/tensorflow/contrib/tpu/python/tpu/tpu_system_metadata.py index eea57ed336731339476c08f3890e93ac8fcad7d1..3ae350c7bb345cabdb74783c3233354d67394d3a 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_system_metadata.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_system_metadata.py @@ -120,7 +120,8 @@ def _query_tpu_system_metadata(master_address, run_config, logging.info('*** Num TPU Workers: %d', metadata.num_hosts) logging.info('*** Num TPU Cores Per Worker: %d', metadata.num_of_cores_per_host) - logging.info('*** Available Devices: %s', metadata.devices) + for device in metadata.devices: + logging.info('*** Available Device: %s', device) else: logging.info('Failed to find TPU: %s', metadata) return metadata diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_test.py b/tensorflow/contrib/tpu/python/tpu/tpu_test.py index 336d8260c3c8a5c30efa603e3faeabcc0944b8d0..c3882b8a27bc835f906c47dc5219f280c53800b8 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_test.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_test.py @@ -37,7 +37,7 @@ class TPUContextTest(test.TestCase): def testIsInContext(self): """Test that control_flow_util can check that we're in a TPU context.""" z1 = array_ops.identity(1) - context = tpu.TPUReplicateContext(b"context") + context = tpu.TPUReplicateContext(b"context", 1) context.Enter() z2 = array_ops.identity(1) context.Exit() diff --git a/tensorflow/contrib/training/BUILD b/tensorflow/contrib/training/BUILD index 4d2bfd3e434e60b3fac408931688e8e486b7e494..5de55b5f7f2a41ac6edd27e5a102e565f33df12c 100644 --- a/tensorflow/contrib/training/BUILD +++ b/tensorflow/contrib/training/BUILD @@ -60,6 +60,7 @@ py_library( "//tensorflow/python:util", "//tensorflow/python:variable_scope", "//tensorflow/python:variables", + "//tensorflow/python/data", "//tensorflow/python/estimator:inputs_queues", "//third_party/py/numpy", "@six_archive//:six", diff --git a/tensorflow/contrib/training/__init__.py b/tensorflow/contrib/training/__init__.py index da2de3e421b841937e4125168ea1ecea066ff841..edd71fb2502cf6c965a97485e074d20f876fd504 100644 --- a/tensorflow/contrib/training/__init__.py +++ b/tensorflow/contrib/training/__init__.py @@ -57,6 +57,8 @@ from tensorflow.contrib.training.python.training.hparam import * from tensorflow.contrib.training.python.training.resample import * from tensorflow.contrib.training.python.training.sampling_ops import * from tensorflow.contrib.training.python.training.sequence_queueing_state_saver import * +from tensorflow.contrib.training.python.training.tensor_queue_dataset import enqueue_in_queue_dataset +from tensorflow.contrib.training.python.training.tensor_queue_dataset import prepend_from_queue_and_padded_batch_dataset from tensorflow.contrib.training.python.training.training import add_gradients_summaries from tensorflow.contrib.training.python.training.training import clip_gradient_norms from tensorflow.contrib.training.python.training.training import clip_gradient_norms_fn @@ -75,6 +77,7 @@ _allowed_symbols = [ 'FeedingQueueRunner', 'get_or_create_eval_step', 'StopAfterNEvalsHook', 'SummaryAtEndHook', 'wait_for_new_checkpoint', 'add_gradients_summaries', 'clip_gradient_norms', 'clip_gradient_norms_fn', 'create_train_op', - 'multiply_gradients', 'train'] + 'multiply_gradients', 'enqueue_in_queue_dataset', + 'prepend_from_queue_and_padded_batch_dataset', 'train'] remove_undocumented(__name__, _allowed_symbols) diff --git a/tensorflow/contrib/training/python/training/batch_sequences_with_states_test.py b/tensorflow/contrib/training/python/training/batch_sequences_with_states_test.py index dbdbb08a8252c799924812c83fff7f0631424761..f305197c190b67355338c407a7895a0507941ddb 100644 --- a/tensorflow/contrib/training/python/training/batch_sequences_with_states_test.py +++ b/tensorflow/contrib/training/python/training/batch_sequences_with_states_test.py @@ -27,6 +27,7 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors_impl from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor +from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import random_ops @@ -517,6 +518,7 @@ class BatchSequencesWithStatesTestWithCApi(BatchSequencesWithStatesTest): ops._USE_C_API = self._prev_value +@test_util.with_c_api class PaddingTest(test.TestCase): def testPaddingInvalidLengths(self): diff --git a/tensorflow/contrib/training/python/training/evaluation.py b/tensorflow/contrib/training/python/training/evaluation.py index 4bb53e867811b27dc95857cfdfe936dd2e3b5c6e..f7fd66d33fc0c329db7daaf87373385156d84217 100644 --- a/tensorflow/contrib/training/python/training/evaluation.py +++ b/tensorflow/contrib/training/python/training/evaluation.py @@ -138,7 +138,6 @@ from __future__ import print_function import time -from tensorflow.contrib.framework.python.ops import variables from tensorflow.python.ops import state_ops from tensorflow.python.platform import tf_logging as logging from tensorflow.python.summary import summary @@ -298,7 +297,7 @@ class SummaryAtEndHook(session_run_hook.SessionRunHook): def begin(self): if self._replace_summary_op: self._summary_op = summary.merge_all() - self._global_step = variables.get_or_create_global_step() + self._global_step = training_util.get_or_create_global_step() def after_create_session(self, session, coord): if self._summary_writer is None and self._log_dir: diff --git a/tensorflow/contrib/training/python/training/hparam.py b/tensorflow/contrib/training/python/training/hparam.py index 95e051e3b5bb9f8075e66891a45c64a27bca68d1..6c59b68053cfc6c1aebfca149bfba583d645a1e7 100644 --- a/tensorflow/contrib/training/python/training/hparam.py +++ b/tensorflow/contrib/training/python/training/hparam.py @@ -315,7 +315,7 @@ class HParams(object): Hyperparameters have type, which is inferred from the type of their value passed at construction type. The currently supported types are: integer, - float, string, and list of integer, float, or string. + float, boolean, string, and list of integer, float, boolean, or string. You can override hyperparameter values by calling the [`parse()`](#HParams.parse) method, passing a string of comma separated @@ -630,6 +630,9 @@ class HParams(object): def __str__(self): return str(sorted(self.values().items())) + def __repr__(self): + return '%s(%s)' % (type(self).__name__, self.__str__()) + @staticmethod def _get_kind_name(param_type, is_list): """Returns the field name given parameter type and is_list. diff --git a/tensorflow/contrib/training/python/training/resample.py b/tensorflow/contrib/training/python/training/resample.py index b16159bc16bb5f3ccd9b5cc7f4af64d9c24b22af..7b8332b1d672dfdfb233e24c0e4f760e49382bc0 100644 --- a/tensorflow/contrib/training/python/training/resample.py +++ b/tensorflow/contrib/training/python/training/resample.py @@ -77,7 +77,7 @@ def resample_at_rate(inputs, rates, scope=None, seed=None, back_prop=False): Args: inputs: A list of tensors, each of which has a shape of `[batch_size, ...]` - rates: A tensor of shape `[batch_size]` contiaining the resampling rates + rates: A tensor of shape `[batch_size]` containing the resampling rates for each input. scope: Scope for the op. seed: Random seed to use. diff --git a/tensorflow/contrib/training/python/training/sampling_ops.py b/tensorflow/contrib/training/python/training/sampling_ops.py index ba888f87dc8c12788c1160f7199c0b762be186c1..7140f2a46d57f0f3b76ff4f1ea9d0d73808405c8 100644 --- a/tensorflow/contrib/training/python/training/sampling_ops.py +++ b/tensorflow/contrib/training/python/training/sampling_ops.py @@ -123,7 +123,7 @@ def rejection_sample(tensors, batch_size=batch_size, num_threads=queue_threads) - # Queues return a single tensor if the list of enqued tensors is one. Since + # Queues return a single tensor if the list of enqueued tensors is one. Since # we want the type to always be the same, always return a list. if isinstance(minibatch, ops.Tensor): minibatch = [minibatch] @@ -312,7 +312,7 @@ def _verify_input(tensor_list, labels, probs_list): """Verify that batched inputs are well-formed.""" checked_probs_list = [] for probs in probs_list: - # Since number of classes shouldn't change at runtime, probalities shape + # Since number of classes shouldn't change at runtime, probabilities shape # should be fully defined. probs.get_shape().assert_is_fully_defined() @@ -407,7 +407,7 @@ def _calculate_acceptance_probabilities(init_probs, target_probs): ``` - A solution for a_i in terms of the other variabes is the following: + A solution for a_i in terms of the other variables is the following: ```a_i = (t_i / p_i) / max_i[t_i / p_i]``` """ # Make list of t_i / p_i. diff --git a/tensorflow/contrib/training/python/training/sequence_queueing_state_saver.py b/tensorflow/contrib/training/python/training/sequence_queueing_state_saver.py index 72231948856b38edd3d022a99a62e6d4c8c5649e..39d75a080604e3a7ae93391652d4c03be9857218 100644 --- a/tensorflow/contrib/training/python/training/sequence_queueing_state_saver.py +++ b/tensorflow/contrib/training/python/training/sequence_queueing_state_saver.py @@ -876,7 +876,7 @@ class SequenceQueueingStateSaver(object): ]): self._length = array_ops.identity(self._length) - # Only create barrier; enqueu and dequeue operations happen when you + # Only create barrier; enqueue and dequeue operations happen when you # access prefetch_op and next_batch. self._create_barrier() self._scope = scope @@ -1574,8 +1574,9 @@ def _padding(sequences, num_unroll): if not sequences: return 0, {} - sequences_dict = {} - for key, value in sequences.items(): + # Sort 'sequences_dict' so 'length' will have a predictable value below. + sequences_dict = collections.OrderedDict() + for key, value in sorted(sequences.items()): if not (isinstance(value, sparse_tensor.SparseTensor) or isinstance(value, sparse_tensor.SparseTensorValue)): sequences_dict[key] = ops.convert_to_tensor(value) @@ -1636,7 +1637,7 @@ def _move_sparse_tensor_out_context(input_context, input_sequences, num_unroll): For `key, value` pairs in `input_context` with `SparseTensor` `value` removes them from `input_context` and transforms the `value` into a sequence and - then adding `key`, transformed `value` into `input_seuqences`. + then adding `key`, transformed `value` into `input_sequences`. The transformation is done by adding a new first dimension of `value_length` equal to that of the other values in input_sequences` and tiling the `value` every `num_unroll` steps. diff --git a/tensorflow/contrib/util/loader.py b/tensorflow/contrib/util/loader.py index f4283cd9ed6eb185ec56bdfef45f6ddc5253d128..dca01d26f4c6291074d855322c8215027cfd2ace 100644 --- a/tensorflow/contrib/util/loader.py +++ b/tensorflow/contrib/util/loader.py @@ -42,9 +42,10 @@ def load_op_library(path): plugin. """ if os.name == 'nt': - # To avoid makeing every user_ops aware of windows, re-write - # the file extension from .so to .dll. - path = re.sub(r'\.so$', '.dll', path) + # To avoid making every user_ops aware of windows, re-write + # the file extension from .so to .dll if .so file doesn't exist. + if not os.path.exists(path): + path = re.sub(r'\.so$', '.dll', path) # Currently we have only some user_ops as dlls on windows - don't try # to load them if the dll is not found. diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 21f7866abdddf28b421302a6989a0685398efdeb..24ea732877576b3faaa1da713f6a9bf617301511 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -149,6 +149,7 @@ load( "//third_party/mkl:build_defs.bzl", "if_mkl", ) +load("@io_bazel_rules_closure//closure:defs.bzl", "closure_proto_library") exports_files(["ops/ops.pbtxt"]) @@ -159,6 +160,7 @@ exports_files(["ops/ops.pbtxt"]) # # Note that some protos are in neither additional_core_proto_srcs nor this # filegroup; e.g. ones with individual proto_library targets. +# LINT.IfChange CORE_PROTO_SRCS = [ "example/example.proto", "example/feature.proto", @@ -200,6 +202,7 @@ CORE_PROTO_SRCS = [ "util/memmapped_file_system.proto", "util/saved_tensor_slice.proto", ] +# LINT.ThenChange(//tensorflow/core/android_proto_config.asciipb) # Protos which are not needed on mobile builds, but should be included in # protos_all. @@ -208,6 +211,7 @@ CORE_PROTO_SRCS = [ # ones with individual proto_library targets. ADDITIONAL_CORE_PROTO_SRCS = [ "example/example_parser_configuration.proto", + "protobuf/checkpointable_object_graph.proto", "protobuf/control_flow.proto", # TODO(ebrevdo): Re-enable once CriticalSection is in core. # "protobuf/critical_section.proto", @@ -244,6 +248,21 @@ tf_nano_proto_library( deps = [":protos_all_cc"], ) +proto_library( + name = "example_protos", + srcs = [ + "example/example.proto", + "example/feature.proto", + ], + visibility = ["//visibility:public"], +) + +closure_proto_library( + name = "example_protos_closure", + visibility = ["//visibility:public"], + deps = [":example_protos"], +) + exports_files([ "framework/types.proto", ]) @@ -256,7 +275,7 @@ tf_proto_library( visibility = ["//visibility:public"], ) -# Minimal lib to detect plafrom +# Minimal lib to detect platform cc_library( name = "lib_platform", hdrs = [ @@ -264,6 +283,55 @@ cc_library( ], ) +PLATFORM_BASE_HDRS = [ + "platform/logging.h", + "platform/macros.h", + "platform/types.h", + "platform/cpu_info.h", +] + +PLATFORM_OTHER_HDRS = [ + "platform/abi.h", + "platform/stacktrace.h", + "platform/stacktrace_handler.h", + "platform/context.h", + "platform/cpu_feature_guard.h", + "platform/dynamic_annotations.h", + "platform/env.h", + "platform/env_time.h", + "platform/file_system.h", + "platform/file_system_helper.h", + "platform/fingerprint.h", + "platform/init_main.h", + "platform/mem.h", + "platform/mutex.h", + "platform/net.h", + "platform/notification.h", + "platform/null_file_system.h", + "platform/prefetch.h", + "platform/profile_utils/clock_cycle_profiler.h", + "platform/profile_utils/cpu_utils.h", + "platform/protobuf.h", + "platform/strong_hash.h", + "platform/subprocess.h", + "platform/thread_annotations.h", +] + +# Smaller platform libraries that don't depend on "lib" or "lib_internal". +cc_library( + name = "platform_base", + srcs = glob([ + "platform/*/integral_types.h", + "platform/*/logging.h", + "platform/*/cpu_info.h", + ]), + hdrs = PLATFORM_BASE_HDRS, + deps = [ + ":lib_platform", + "//tensorflow/core/platform/default/build_config:base", + ], +) + # Minimal lib so that tools used for mobile compilation # don't have to depend on lib/platformlib. cc_library( @@ -285,7 +353,9 @@ cc_library( "lib/bfloat16/bfloat16.h", ] + tf_additional_proto_hdrs() + glob(tf_env_time_hdrs()), copts = tf_copts(), - deps = tf_lib_proto_parsing_deps(), + deps = tf_lib_proto_parsing_deps() + [ + "@double_conversion//:double-conversion", + ], ) # This build rule (along with :lib_internal, :framework, and @@ -294,7 +364,8 @@ cc_library( # tf_cc_test and tf_cc_binary will include the necessary symbols. cc_library( name = "lib", - hdrs = [ + hdrs = PLATFORM_BASE_HDRS + + PLATFORM_OTHER_HDRS + [ "lib/bfloat16/bfloat16.h", "lib/core/arena.h", "lib/core/bitmap.h", @@ -341,33 +412,6 @@ cc_library( "lib/strings/str_util.h", "lib/strings/strcat.h", "lib/strings/stringprintf.h", - "platform/abi.h", - "platform/context.h", - "platform/cpu_feature_guard.h", - "platform/cpu_info.h", - "platform/dynamic_annotations.h", - "platform/env.h", - "platform/env_time.h", - "platform/file_system.h", - "platform/fingerprint.h", - "platform/init_main.h", - "platform/logging.h", - "platform/macros.h", - "platform/mem.h", - "platform/mutex.h", - "platform/net.h", - "platform/notification.h", - "platform/null_file_system.h", - "platform/prefetch.h", - "platform/profile_utils/clock_cycle_profiler.h", - "platform/profile_utils/cpu_utils.h", - "platform/protobuf.h", - "platform/stacktrace.h", - "platform/strong_hash.h", - "platform/subprocess.h", - "platform/thread_annotations.h", - "platform/types.h", - "platform/windows/cpu_info.h", ], visibility = ["//visibility:public"], deps = [ @@ -414,6 +458,17 @@ cc_library( ], ) +# Libraries that will eventually be moved into lib/core +# Note that stringpiece_test can't be place here yet, because we are +# required to use tf_cc_test, and that rule will change / into _ +cc_library( + name = "core_stringpiece", + srcs = ["lib/core/stringpiece.cc"], + hdrs = ["lib/core/stringpiece.h"], + copts = tf_copts(), + deps = [":platform_base"], +) + # Test support library needed for all tests # This is currently public, but may be made internal in the # future. Try to avoid depending on it. @@ -441,6 +496,27 @@ cc_library( ] + tf_additional_test_deps(), ) +# Testing libraries - lite versions that don't depend on all of "lib" or +# "lib_internal". Instead, they only need a much smaller set of support +# libraries such as ":platform_base" and ":core_stringpiece". +cc_library( + name = "test_lite", + testonly = 1, + srcs = [ + "platform/test.cc", + ], + hdrs = [ + "platform/test.h", + "platform/test_benchmark.h", + ], + copts = tf_copts(), + deps = [ + ":lib_platform", + ":platform_base", + "//tensorflow/core/platform/default/build_config:gtest", + ], +) + # This build rule (along with :framework_internal, :lib, and :lib_internal) # purposefully omits the definitions of many declared symbols, which are # included in //tensorflow:libtensorflow_framework.so. Using tf_cc_test and tf_cc_binary @@ -489,6 +565,7 @@ tf_cuda_library( "framework/selective_registration.h", "framework/session_state.h", "framework/shape_inference.h", + "framework/stats_aggregator.h", "framework/tensor.h", "framework/tensor_shape.h", "framework/tensor_slice.h", @@ -498,7 +575,6 @@ tf_cuda_library( "framework/type_index.h", "framework/type_traits.h", "framework/types.h", - "framework/visitable_allocator.h", "public/version.h", "util/activation_mode.h", "util/bcast.h", @@ -632,10 +708,13 @@ tf_gen_op_libs( "boosted_trees_ops", "candidate_sampling_ops", "checkpoint_ops", + "collective_ops", "control_flow_ops", "ctc_ops", "data_flow_ops", "dataset_ops", + "decode_proto_ops", + "encode_proto_ops", "function_ops", "functional_ops", "image_ops", @@ -652,6 +731,7 @@ tf_gen_op_libs( "random_ops", "remote_fused_graph_ops", "resource_variable_ops", + "rpc_ops", "scoped_allocator_ops", "sdca_ops", "set_ops", @@ -745,11 +825,14 @@ cc_library( ":boosted_trees_ops_op_lib", ":candidate_sampling_ops_op_lib", ":checkpoint_ops_op_lib", + ":collective_ops_op_lib", ":control_flow_ops_op_lib", ":ctc_ops_op_lib", ":cudnn_rnn_ops_op_lib", ":data_flow_ops_op_lib", ":dataset_ops_op_lib", + ":decode_proto_ops_op_lib", + ":encode_proto_ops_op_lib", ":function_ops_op_lib", ":functional_ops_op_lib", ":image_ops_op_lib", @@ -766,6 +849,7 @@ cc_library( ":random_ops_op_lib", ":remote_fused_graph_ops_op_lib", ":resource_variable_ops_op_lib", + ":rpc_ops_op_lib", ":scoped_allocator_ops_op_lib", ":script_ops_op_lib", ":sdca_ops_op_lib", @@ -887,11 +971,14 @@ cc_library( "//tensorflow/core/kernels:boosted_trees_ops", "//tensorflow/core/kernels:candidate_sampler_ops", "//tensorflow/core/kernels:checkpoint_ops", + "//tensorflow/core/kernels:collective_ops", "//tensorflow/core/kernels:control_flow_ops", "//tensorflow/core/kernels:ctc_ops", "//tensorflow/core/kernels:cudnn_rnn_kernels", "//tensorflow/core/kernels:data_flow", "//tensorflow/core/kernels:dataset_ops", + "//tensorflow/core/kernels:decode_proto_op", + "//tensorflow/core/kernels:encode_proto_op", "//tensorflow/core/kernels:fake_quant_ops", "//tensorflow/core/kernels:function_ops", "//tensorflow/core/kernels:functional_ops", @@ -913,6 +1000,7 @@ cc_library( "//tensorflow/core/kernels:remote_fused_graph_ops", "//tensorflow/core/kernels:required", "//tensorflow/core/kernels:resource_variable_ops", + "//tensorflow/core/kernels:rpc_op", "//tensorflow/core/kernels:scoped_allocator_ops", "//tensorflow/core/kernels:sdca_ops", "//tensorflow/core/kernels:set_kernels", @@ -986,6 +1074,7 @@ cc_library( hdrs = [ "common_runtime/function_testlib.h", "common_runtime/kernel_benchmark_testlib.h", + "common_runtime/test_collective_executor_mgr.h", "framework/fake_input.h", "framework/function_testlib.h", "framework/shape_inference_testutil.h", @@ -1160,6 +1249,7 @@ cc_library( deps = [ ":protos_all_cc_impl", "//third_party/eigen3", + "@double_conversion//:double-conversion", "@nsync//:nsync_cpp", "@protobuf_archive//:protobuf", ], @@ -1199,6 +1289,7 @@ cc_library( deps = [ ":protos_all_cc_impl", "//third_party/eigen3", + "@double_conversion//:double-conversion", "@nsync//:nsync_cpp", "@protobuf_archive//:protobuf", ], @@ -1262,6 +1353,7 @@ cc_library( deps = [ ":protos_all_cc_impl", "//third_party/eigen3", + "@double_conversion//:double-conversion", "@nsync//:nsync_cpp", "@protobuf_archive//:protobuf", ], @@ -1284,6 +1376,7 @@ cc_library( deps = [ ":protos_all_cc_impl", "//third_party/eigen3", + "@double_conversion//:double-conversion", "@nsync//:nsync_cpp", "@protobuf_archive//:protobuf", ], @@ -1640,6 +1733,7 @@ cc_library( exclude = [ "**/*test*", "framework/variant.cc", + "lib/core/stringpiece.cc", "lib/hash/crc32c_accelerate.cc", "lib/gif/**/*", "lib/jpeg/**/*", @@ -1653,6 +1747,7 @@ cc_library( ) + tf_additional_lib_srcs( exclude = [ "**/*test*", + "lib/core/stringpiece.cc", "platform/**/cuda.h", "platform/**/cuda_libdevice_path.cc", "platform/**/stream_executor.h", @@ -1673,10 +1768,12 @@ cc_library( ":lib_hash_crc32c_accelerate_internal", ":lib_proto_parsing", ":abi", + ":core_stringpiece", "//third_party/eigen3", "//tensorflow/core/platform/default/build_config:platformlib", "@snappy", "@zlib_archive//:zlib", + "@double_conversion//:double-conversion", "@protobuf_archive//:protobuf", ] + tf_protos_all_impl() + tf_protos_grappler_impl(), ) @@ -1904,7 +2001,6 @@ FRAMEWORK_INTERNAL_PUBLIC_HEADERS = [ "framework/tracking_allocator.h", # only needed for tests "framework/unique_tensor_references.h", "framework/variant.h", - "framework/visitable_allocator.h", "platform/variant_coding.h", "util/command_line_flags.h", "util/env_var.h", @@ -2181,18 +2277,20 @@ tf_cuda_library( CORE_CPU_LIB_HEADERS = CORE_CPU_BASE_HDRS + [ "common_runtime/allocator_retry.h", + "common_runtime/base_collective_executor.h", "common_runtime/bfc_allocator.h", + "common_runtime/broadcaster.h", + "common_runtime/buf_rendezvous.h", + "common_runtime/build_graph_options.h", "common_runtime/collective_executor_mgr.h", "common_runtime/collective_param_resolver_local.h", "common_runtime/collective_rma_local.h", - "common_runtime/device_resolver_local.h", - "common_runtime/buf_rendezvous.h", - "common_runtime/build_graph_options.h", "common_runtime/constant_folding.h", "common_runtime/copy_tensor.h", "common_runtime/costmodel_manager.h", "common_runtime/debugger_state_interface.h", "common_runtime/device_factory.h", + "common_runtime/device_resolver_local.h", "common_runtime/device_set.h", "common_runtime/dma_helper.h", "common_runtime/eigen_thread_pool.h", @@ -2203,18 +2301,21 @@ CORE_CPU_LIB_HEADERS = CORE_CPU_BASE_HDRS + [ "common_runtime/mkl_cpu_allocator.h", "common_runtime/optimization_registry.h", "common_runtime/pending_counts.h", + "common_runtime/placer.h", "common_runtime/process_util.h", "common_runtime/profile_handler.h", "common_runtime/renamed_device.h", "common_runtime/rendezvous_mgr.h", "common_runtime/rendezvous_util.h", + "common_runtime/ring_reducer.h", "common_runtime/scoped_allocator.h", "common_runtime/scoped_allocator_mgr.h", "common_runtime/session_factory.h", - "common_runtime/placer.h", + "common_runtime/single_threaded_cpu_device.h", "common_runtime/stats_publisher_interface.h", "common_runtime/step_stats_collector.h", "common_runtime/threadpool_device.h", + "common_runtime/visitable_allocator.h", "graph/gradients.h", "graph/quantize_training.h", ] + if_mkl(["graph/mkl_graph_util.h"]) @@ -2224,7 +2325,9 @@ tf_cuda_library( srcs = [ "common_runtime/accumulate_n_optimizer.cc", "common_runtime/allocator_retry.cc", + "common_runtime/base_collective_executor.cc", "common_runtime/bfc_allocator.cc", + "common_runtime/broadcaster.cc", "common_runtime/buf_rendezvous.cc", "common_runtime/build_graph_options.cc", "common_runtime/collective_executor_mgr.cc", @@ -2254,6 +2357,7 @@ tf_cuda_library( "common_runtime/renamed_device.cc", "common_runtime/rendezvous_mgr.cc", "common_runtime/rendezvous_util.cc", + "common_runtime/ring_reducer.cc", "common_runtime/scoped_allocator.cc", "common_runtime/scoped_allocator_mgr.cc", "common_runtime/session.cc", @@ -2616,6 +2720,23 @@ cc_library( alwayslink = 1, ) +# This is the lite version of a main() for tests. It does not include any +# support for reporting benchmark results when running on TPUs. +cc_library( + name = "test_lite_main", + testonly = 1, + srcs = ["platform/test_main.cc"], + copts = tf_copts(), + deps = [ + ":core_stringpiece", + ":lib_platform", + ":stacktrace_handler", + ":test_lite", + "//tensorflow/core/platform/default/build_config:test_lite_main", + ], + alwayslink = 1, +) + tf_cc_tests( name = "low_level_library_tests", size = "small", @@ -2664,7 +2785,6 @@ tf_cc_tests( "lib/monitoring/sampler_test.cc", "lib/random/distribution_sampler_test.cc", "lib/random/philox_random_test.cc", - "lib/random/random_distributions_test.cc", "lib/random/random_test.cc", "lib/random/simple_philox_test.cc", "lib/strings/base64_test.cc", @@ -2694,6 +2814,21 @@ tf_cc_tests( ], ) +tf_cc_test( + name = "lib_random_random_distributions_test", + srcs = ["lib/random/random_distributions_test.cc"], + tags = ["optonly"], + deps = [ + ":lib", + ":lib_internal", + ":lib_test_internal", + ":protos_all_cc", + ":test", + ":test_main", + "//third_party/eigen3", + ], +) + tf_cc_test( name = "platform_env_test", size = "small", @@ -3002,6 +3137,62 @@ tf_cc_test( ], ) +tf_cc_tests_gpu( + name = "ring_reducer_test", + size = "medium", + srcs = [ + "common_runtime/ring_reducer_test.cc", + ], + linkstatic = tf_kernel_tests_linkstatic(), + tags = tf_cuda_tests_tags(), + deps = [ + ":all_kernels", + ":core", + ":core_cpu", + ":core_cpu_internal", + ":direct_session_internal", + ":framework", + ":framework_internal", + ":gpu_runtime", + ":lib", + ":lib_internal", + ":ops", + ":protos_all_cc", + ":protos_test_cc", + ":test", + ":test_main", + ":testlib", + ], +) + +tf_cc_tests_gpu( + name = "broadcaster_test", + size = "small", + srcs = [ + "common_runtime/broadcaster_test.cc", + ], + linkstatic = tf_kernel_tests_linkstatic(), + tags = tf_cuda_tests_tags(), + deps = [ + ":all_kernels", + ":core", + ":core_cpu", + ":core_cpu_internal", + ":direct_session_internal", + ":framework", + ":framework_internal", + ":gpu_runtime", + ":lib", + ":lib_internal", + ":ops", + ":protos_all_cc", + ":protos_test_cc", + ":test", + ":test_main", + ":testlib", + ], +) + tf_cc_test_mkl( name = "mkl_runtime_tests", size = "small", diff --git a/tensorflow/core/api_def/base_api/api_def_ApplyAdaMax.pbtxt b/tensorflow/core/api_def/base_api/api_def_ApplyAdaMax.pbtxt new file mode 100644 index 0000000000000000000000000000000000000000..145d05de59a16ed67af4b978e72593ae88da3d05 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_ApplyAdaMax.pbtxt @@ -0,0 +1,78 @@ +op { + graph_op_name: "ApplyAdaMax" + visibility: HIDDEN + in_arg { + name: "var" + description: <>> x = tf.constant([1, 2, 3]) +>>> y = tf.broadcast_to(x, [3, 3]) +>>> sess.run(y) +array([[1, 2, 3], + [1, 2, 3], + [1, 2, 3]], dtype=int32) +``` +In the above example, the input Tensor with the shape of `[1, 3]` +is broadcasted to output Tensor with shape of `[3, 3]`. +END +} diff --git a/tensorflow/core/api_def/base_api/api_def_ClipByValue.pbtxt b/tensorflow/core/api_def/base_api/api_def_ClipByValue.pbtxt new file mode 100644 index 0000000000000000000000000000000000000000..803d8970ab78de347936a8dbbd2f39d8d9915f1e --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_ClipByValue.pbtxt @@ -0,0 +1,36 @@ +op { + graph_op_name: "ClipByValue" + in_arg { + name: "t" + description: < ## Validate your installation @@ -630,14 +681,14 @@ This section documents the relevant values for Linux installations. CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.7.0rc1-cp27-none-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc1-cp27-none-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.7.0rc1-cp27-none-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0rc1-cp27-none-linux_x86_64.whl
 
Note that GPU support requires the NVIDIA hardware and software described in @@ -649,14 +700,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.7.0rc1-cp34-cp34m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc1-cp34-cp34m-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.7.0rc1-cp34-cp34m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0rc1-cp34-cp34m-linux_x86_64.whl
 
Note that GPU support requires the NVIDIA hardware and software described in @@ -668,14 +719,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.7.0rc1-cp35-cp35m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc1-cp35-cp35m-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.7.0rc1-cp35-cp35m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0rc1-cp35-cp35m-linux_x86_64.whl
 
@@ -687,14 +738,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.7.0rc1-cp36-cp36m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc1-cp36-cp36m-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.7.0rc1-cp36-cp36m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0rc1-cp36-cp36m-linux_x86_64.whl
 
diff --git a/tensorflow/docs_src/install/install_mac.md b/tensorflow/docs_src/install/install_mac.md index 7060ef43da3e978a87250cacf916b4a792274a47..ff6c2f5e447873f479570429d1f19b859ab6c56d 100644 --- a/tensorflow/docs_src/install/install_mac.md +++ b/tensorflow/docs_src/install/install_mac.md @@ -119,7 +119,7 @@ Take the following steps to install TensorFlow with Virtualenv: TensorFlow in the active Virtualenv is as follows:
 $ pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.7.0rc1-py3-none-any.whl
+ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc1-py3-none-any.whl If you encounter installation problems, see [Common Installation Problems](#common-installation-problems). @@ -242,7 +242,7 @@ take the following steps: issue the following command:
 $ sudo pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.7.0rc1-py3-none-any.whl 
+ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc1-py3-none-any.whl If the preceding command fails, see [installation problems](#common-installation-problems). @@ -350,7 +350,7 @@ Take the following steps to install TensorFlow in an Anaconda environment: TensorFlow for Python 2.7:
 (targetDirectory)$ pip install --ignore-installed --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.7.0rc1-py2-none-any.whl
+ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc1-py2-none-any.whl @@ -524,7 +524,7 @@ The value you specify depends on your Python version.
-https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.7.0rc1-py2-none-any.whl
+https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc1-py2-none-any.whl
 
@@ -532,5 +532,5 @@ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.7.0rc1-py2-none-a
-https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.7.0rc1-py3-none-any.whl
+https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc1-py3-none-any.whl
 
diff --git a/tensorflow/docs_src/install/install_sources.md b/tensorflow/docs_src/install/install_sources.md index 148f80efe25f12cfaef9df8a8edfaa700782dacd..a5b05491affda50e32abc723bcc64cc6f1de989d 100644 --- a/tensorflow/docs_src/install/install_sources.md +++ b/tensorflow/docs_src/install/install_sources.md @@ -241,12 +241,12 @@ One of the questions that `configure` will ask is as follows: Please specify optimization flags to use during compilation when bazel option "--config=opt" is specified [Default is -march=native] -This question refers to a later phase in which you'll use bazel to -[build the pip package](#build-the-pip-package). We recommend -accepting the default (`-march=native`), which will -optimize the generated code for your local machine's CPU type. However, -if you are building TensorFlow on one CPU type but will run TensorFlow on -a different CPU type, then consider specifying a more specific optimization +This question refers to a later phase in which you'll use bazel to [build the +pip package](#build-the-pip-package) or the [C/Java libraries](#BuildCorJava). +We recommend accepting the default (`-march=native`), which will optimize the +generated code for your local machine's CPU type. However, if you are building +TensorFlow on one CPU type but will run TensorFlow on a different CPU type, then +consider specifying a more specific optimization flag as described in [the gcc documentation](https://gcc.gnu.org/onlinedocs/gcc-4.5.3/gcc/i386-and-x86_002d64-Options.html). @@ -311,6 +311,10 @@ Note the following: ## Build the pip package +Note: If you're only interested in building the libraries for the TensorFlow C +or Java APIs, see [Build the C or Java libraries](#BuildCorJava), you do not +need to build the pip package in that case. + To build a pip package for TensorFlow with CPU-only support, you would typically invoke the following command: @@ -350,10 +354,10 @@ Invoke `pip install` to install that pip package. The filename of the `.whl` file depends on your platform. For example, the following command will install the pip package -for TensorFlow 1.7.0rc1 on Linux: +for TensorFlow 1.8.0rc1 on Linux:
-$ sudo pip install /tmp/tensorflow_pkg/tensorflow-1.7.0rc1-py2-none-any.whl
+$ sudo pip install /tmp/tensorflow_pkg/tensorflow-1.8.0rc1-py2-none-any.whl
 
## Validate your installation @@ -450,8 +454,10 @@ Stack Overflow and specify the `tensorflow` tag. **Linux** - - + + + + @@ -471,7 +477,8 @@ Stack Overflow and specify the `tensorflow` tag. **Mac**
Version:CPU/GPU:Python Version:Compiler:Build Tools:cuDNN:CUDA:
tensorflow-1.7.0rc1CPU2.7, 3.3-3.6GCC 4.8Bazel 0.10.0N/AN/A
tensorflow_gpu-1.7.0rc1GPU2.7, 3.3-3.6GCC 4.8Bazel 0.9.079
tensorflow-1.8.0CPU2.7, 3.3-3.6GCC 4.8Bazel 0.10.0N/AN/A
tensorflow_gpu-1.8.0GPU2.7, 3.3-3.6GCC 4.8Bazel 0.9.079
tensorflow-1.7.0CPU2.7, 3.3-3.6GCC 4.8Bazel 0.10.0N/AN/A
tensorflow_gpu-1.7.0GPU2.7, 3.3-3.6GCC 4.8Bazel 0.9.079
tensorflow-1.6.0CPU2.7, 3.3-3.6GCC 4.8Bazel 0.9.0N/AN/A
tensorflow_gpu-1.6.0GPU2.7, 3.3-3.6GCC 4.8Bazel 0.9.079
tensorflow-1.5.0CPU2.7, 3.3-3.6GCC 4.8Bazel 0.8.0N/AN/A
- + + @@ -486,8 +493,10 @@ Stack Overflow and specify the `tensorflow` tag. **Windows**
Version:CPU/GPU:Python Version:Compiler:Build Tools:cuDNN:CUDA:
tensorflow-1.7.0rc1CPU2.7, 3.3-3.6Clang from xcodeBazel 0.10.1N/AN/A
tensorflow-1.8.0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.10.1N/AN/A
tensorflow-1.7.0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.10.1N/AN/A
tensorflow-1.6.0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.8.1N/AN/A
tensorflow-1.5.0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.8.1N/AN/A
tensorflow-1.4.0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.5.4N/AN/A
- - + + + + @@ -503,3 +512,20 @@ Stack Overflow and specify the `tensorflow` tag.
Version:CPU/GPU:Python Version:Compiler:Build Tools:cuDNN:CUDA:
tensorflow-1.7.0rc1CPU3.5-3.6MSVC 2015 update 3Cmake v3.6.3N/AN/A
tensorflow_gpu-1.7.0rc1GPU3.5-3.6MSVC 2015 update 3Cmake v3.6.379
tensorflow-1.8.0CPU3.5-3.6MSVC 2015 update 3Cmake v3.6.3N/AN/A
tensorflow_gpu-1.8.0GPU3.5-3.6MSVC 2015 update 3Cmake v3.6.379
tensorflow-1.7.0CPU3.5-3.6MSVC 2015 update 3Cmake v3.6.3N/AN/A
tensorflow_gpu-1.7.0GPU3.5-3.6MSVC 2015 update 3Cmake v3.6.379
tensorflow-1.6.0CPU3.5-3.6MSVC 2015 update 3Cmake v3.6.3N/AN/A
tensorflow_gpu-1.6.0GPU3.5-3.6MSVC 2015 update 3Cmake v3.6.379
tensorflow-1.5.0CPU3.5-3.6MSVC 2015 update 3Cmake v3.6.3N/AN/A
tensorflow-1.0.0CPU3.5MSVC 2015 update 3Cmake v3.6.3N/AN/A
tensorflow_gpu-1.0.0GPU3.5MSVC 2015 update 3Cmake v3.6.35.18
+ + +## Build the C or Java libraries + +The instructions above are tailored to building the TensorFlow Python packages. + +If you're interested in building the libraries for the TensorFlow C API, do the +following: + +1. Follow the steps up to [Configure the installation](#ConfigureInstallation) +2. Build the C libraries following instructions in the [README](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/tools/lib_package/README.md). + +If you're interested inv building the libraries for the TensorFlow Java API, +do the following: + +1. Follow the steps up to [Configure the installation](#ConfigureInstallation) +2. Build the Java library following instructions in the [README](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/tools/lib_package/README.md). diff --git a/tensorflow/docs_src/mobile/android_build.md b/tensorflow/docs_src/mobile/android_build.md index 08a5fbe41c87c88399682208c38bf7a892d8fc1a..c35530061dcaf2a4a894dcdf54fd794907d98162 100644 --- a/tensorflow/docs_src/mobile/android_build.md +++ b/tensorflow/docs_src/mobile/android_build.md @@ -51,7 +51,8 @@ If you haven't already, do the following two things: // set to 'bazel', 'cmake', 'makefile', 'none' def nativeBuildSystem = 'none' -4. Click the Run button (the green arrow) or use **Run -> Run 'android'** from the top menu. +4. Click the *Run* button (the green arrow) or select *Run > Run 'android'* from the + top menu. You may need to rebuild the project using *Build > Rebuild Project*. If it asks you to use Instant Run, click **Proceed Without Instant Run**. diff --git a/tensorflow/docs_src/mobile/tflite/devguide.md b/tensorflow/docs_src/mobile/tflite/devguide.md index 5b521dca7bfdae1cd9a0cbb2e1330b9724b747f5..4133bc172a1924f0ce8bb515d66fc03d716923c8 100644 --- a/tensorflow/docs_src/mobile/tflite/devguide.md +++ b/tensorflow/docs_src/mobile/tflite/devguide.md @@ -88,7 +88,7 @@ Tensorflow Lite format. This process uses several model formats: extracted from a `SavedModel`. * *TensorFlow Lite model* (.tflite) —A serialized [FlatBuffer](https://google.github.io/flatbuffers/) that contains TensorFlow - Lite operators and tensors for the TensorFlow Lite interpreter, similiar to a + Lite operators and tensors for the TensorFlow Lite interpreter, similar to a `FrozenGraphDef`. ### Freeze Graph @@ -190,7 +190,7 @@ graph visualization. ## 3. Use the TensorFlow Lite model for inference in a mobile app -After completing the prior steps, you should now have a .tflite model file. +After completing the prior steps, you should now have a `.tflite` model file. ### Android @@ -222,3 +222,10 @@ trained Tensorflow models to the [CoreML](https://developer.apple.com/machine-learning/) format for use on Apple devices. To use the converter, refer to the [Tensorflow-CoreML converter documentation](https://github.com/tf-coreml/tf-coreml). + +### Raspberry Pi + +Compile Tensorflow Lite for a Raspberry Pi by following the +[RPi build instructions](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/lite/g3doc/rpi.md) +This compiles a static library file (`.a`) used to build your app. There are +plans for Python bindings and a demo app. diff --git a/tensorflow/docs_src/performance/leftnav_files b/tensorflow/docs_src/performance/leftnav_files index d11a7e5d07c3e6cfa092e7ac11189ce6c272c1ad..1f894c39fe4554261cd37ebc8cd48af6b36eef43 100644 --- a/tensorflow/docs_src/performance/leftnav_files +++ b/tensorflow/docs_src/performance/leftnav_files @@ -1,3 +1,4 @@ +index.md performance_guide.md datasets_performance.md performance_models.md diff --git a/tensorflow/docs_src/performance/performance_guide.md b/tensorflow/docs_src/performance/performance_guide.md index 580a899ac4e4f5c3d97ce023f25083168fe00d01..b1796cf9b2d0bf7459e70ab542b6e6fcb203667a 100644 --- a/tensorflow/docs_src/performance/performance_guide.md +++ b/tensorflow/docs_src/performance/performance_guide.md @@ -475,7 +475,7 @@ optimizations. ### TensorFlow with Intel® MKL DNN Intel® has added optimizations to TensorFlow for Intel® Xeon® and Intel® Xeon -Phi™ though the use of Intel® Math Kernel Library for Deep Neural Networks +Phi™ through the use of the Intel® Math Kernel Library for Deep Neural Networks (Intel® MKL-DNN) optimized primitives. The optimizations also provide speedups for the consumer line of processors, e.g. i5 and i7 Intel processors. The Intel published paper @@ -581,9 +581,9 @@ Each variable that impacts performance is discussed below. for optimal settings. * **intra_op_parallelism_threads**: Setting this equal to the number of - physical cores is recommended. Setting the value to 0, which is the default - and will result in the value being set to the number of logical cores, is an - option to try for some architectures. This value and `OMP_NUM_THREADS` + physical cores is recommended. Setting the value to 0, which is the default, + results in the value being set to the number of logical cores - this is an + alternate option to try for some architectures. This value and `OMP_NUM_THREADS` should be equal. * **inter_op_parallelism_threads**: Setting this equal to the number of diff --git a/tensorflow/docs_src/performance/quantization.md b/tensorflow/docs_src/performance/quantization.md index 411889cb1c616130f809e6228cc692ba3f951d48..2fea02d861d314cc61f2ba20475bf08ebea8fb5f 100644 --- a/tensorflow/docs_src/performance/quantization.md +++ b/tensorflow/docs_src/performance/quantization.md @@ -110,7 +110,7 @@ we've added a separate rewrite for the *eval graph*: ``` # Build eval model -logits = tf.nn.softmax_cross_entropy_with_logits(...) +logits = tf.nn.softmax_cross_entropy_with_logits_v2(...) # Call the eval rewrite which rewrites the graph in-place with # FakeQuantization nodes and fold batchnorm for eval. diff --git a/tensorflow/docs_src/performance/xla/operation_semantics.md b/tensorflow/docs_src/performance/xla/operation_semantics.md index 217ab596b72bc263ae5dda377a8faab8a39b0a3c..8373a1219dab0cfe57b3b01ad0d4cb00ef72093e 100644 --- a/tensorflow/docs_src/performance/xla/operation_semantics.md +++ b/tensorflow/docs_src/performance/xla/operation_semantics.md @@ -854,12 +854,13 @@ calculation of 'start_indices') is currently implementation-defined. | `operand` | `ComputationDataHandle` | N dimensional array of type T | | `update` | `ComputationDataHandle` | N dimensional array of type T | : : : containing the slice update. : -: : : Each dimension of update shape : +: : : Each dimension of update shape : : : : must be strictly greater than : : : : zero, and start + update must be : -: : : less than operand size for each : -: : : dimension to avoid generating : -: : : out-of-bounds update indices. : +: : : less than or equal to the operand: +: : : size for each dimension to avoid : +: : : generating out-of-bounds update : +: : : indices. : | `start_indices` | `ComputationDataHandle` | Rank 1 array of N integers | : : : containing the starting indices : : : : of the slice for each dimension. : @@ -1416,12 +1417,12 @@ Applies a reduction function to an array. | `dimensions` | `int64` array | unordered array of dimensions to | : : : reduce : -Conceptually, this operation reduces one or more dimensions in the input array -into scalars. The rank of the result array is `rank(operand) - len(dimensions)`. -`init_value` is the initial value used for every reduction and may also be -inserted anywhere during computation if the back-end chooses to do so. So in -most cases `init_value` should be an identity of the reduction function (for -example, 0 for addition). +This operation reduces one or more dimensions of the input array into scalars. +The rank of the returned array is `rank(operand) - len(dimensions)`. +`init_value` is the initial value used for every reduction and may be inserted +anywhere during computation by the back-end. In most cases, `init_value` is an +identity of the reduction function (for example, 0 for addition). The applied +`computation` is always passed the `init_value` on the left-hand side. The evaluation order of the reduction function is arbitrary and may be non-deterministic. Therefore, the reduction function should not be overly @@ -1441,8 +1442,7 @@ could be computed as but there are also many other possibilities, e.g. -`f(init_value, f(f(10, f(init_value, 11)), f(f(init_value, 12), f(13, -init_value))))` +`f(init_value, f(f(10, f(init_value, 11)), f(f(init_value, 12), f(init_value, 13))))` The following is a rough pseudo-code example of how reduction could be implemented, using summation as the reduction computation with an initial value @@ -1560,7 +1560,9 @@ See also Applies a reduction function to all elements in each window of the input multi-dimensional array, producing an output multi-dimensional array with the same number of elements as the number of valid positions of the window. A -pooling layer can be expressed as a `ReduceWindow`. +pooling layer can be expressed as a `ReduceWindow`. Similar to +[`Reduce`](#reduce), the applied `computation` is always passed the `init_value` +on the left-hand side. `ReduceWindow(operand, init_value, computation, window_dimensions, window_strides, padding)` diff --git a/tensorflow/docs_src/performance/xla/tfcompile.md b/tensorflow/docs_src/performance/xla/tfcompile.md index f57ca3948dd52d6e300f5f498b0e047fa63a3aff..8521d7eacb4a7fec7d187bdd1c4f452b644dc8b2 100644 --- a/tensorflow/docs_src/performance/xla/tfcompile.md +++ b/tensorflow/docs_src/performance/xla/tfcompile.md @@ -86,7 +86,7 @@ code. `tf_library` utilizes `tfcompile` to compile the TensorFlow graph into executable code. ```build -load("//third_party/tensorflow/compiler/aot:tfcompile.bzl", "tf_library") +load("//tensorflow/compiler/aot:tfcompile.bzl", "tf_library") # Use the tf_library macro to compile your graph into executable code. tf_library( @@ -258,8 +258,8 @@ file. ```build # Example of linking your binary -# Also see //third_party/tensorflow/compiler/aot/tests/BUILD -load("//third_party/tensorflow/compiler/aot:tfcompile.bzl", "tf_library") +# Also see //tensorflow/compiler/aot/tests/BUILD +load("//tensorflow/compiler/aot:tfcompile.bzl", "tf_library") # The same tf_library call from step 2 above. tf_library( diff --git a/tensorflow/docs_src/programmers_guide/datasets.md b/tensorflow/docs_src/programmers_guide/datasets.md index 9ccdbde627e6b2415835f7c0771eca1afa04f7f8..67be41b1a68bc72913d859f83ccd74b529350a4c 100644 --- a/tensorflow/docs_src/programmers_guide/datasets.md +++ b/tensorflow/docs_src/programmers_guide/datasets.md @@ -540,7 +540,7 @@ batched into a fixed size. # to a fixed shape. def _parse_function(filename, label): image_string = tf.read_file(filename) - image_decoded = tf.image.decode_image(image_string) + image_decoded = tf.image.decode_jpeg(image_string) image_resized = tf.image.resize_images(image_decoded, [28, 28]) return image_resized, label diff --git a/tensorflow/docs_src/programmers_guide/debugger.md b/tensorflow/docs_src/programmers_guide/debugger.md index d1cd7e7c06e525abd9fadf24d5e706780bb316fc..f7817b06d4c8bd8fe6ff8c3ffae2db856611cb8e 100644 --- a/tensorflow/docs_src/programmers_guide/debugger.md +++ b/tensorflow/docs_src/programmers_guide/debugger.md @@ -4,29 +4,28 @@ [TOC] -TensorFlow debugger (**tfdbg**) is a specialized debugger for TensorFlow. It -lets you view the internal structure and states of running TensorFlow graphs -during training and inference, which is difficult to debug with general-purpose -debuggers such as Python's `pdb` due to TensorFlow's computation-graph paradigm. - -> NOTE: TensorFlow debugger uses a -> [curses](https://en.wikipedia.org/wiki/Curses_\(programming_library\))-based -> text user interface. On Mac OS X, the `ncurses` library is required and can -> be installed with `brew install homebrew/dupes/ncurses`. On Windows, curses -> isn't as well supported, so a -> [readline](https://en.wikipedia.org/wiki/GNU_Readline)-based interface can -> be used with tfdbg by installing `pyreadline` with pip. -> If you use Anaconda3, you can install it with a command -> such as `"C:\Program Files\Anaconda3\Scripts\pip.exe" install pyreadline`. -> Unofficial Windows curses packages can be downloaded -> [here](https://www.lfd.uci.edu/~gohlke/pythonlibs/#curses), then subsequently -> installed using `pip install .whl`, however curses on Windows -> may not work as reliably as curses on Linux or Mac. - -> NOTE: This guide focuses on the command-line interface (CLI) of tfdbg. For -> guide on how to use the graphical user interface (GUI) of tfdbg, i.e., the -> **TensorBoard Debugger Plugin**, please visit -> [its README](https://github.com/tensorflow/tensorboard/blob/master/tensorboard/plugins/debugger/README.md). +`tfdbg` is a specialized debugger for TensorFlow. It lets you view the internal +structure and states of running TensorFlow graphs during training and inference, +which is difficult to debug with general-purpose debuggers such as Python's `pdb` +due to TensorFlow's computation-graph paradigm. + +This guide focuses on the command-line interface (CLI) of `tfdbg`. For guide on +how to use the graphical user interface (GUI) of tfdbg, i.e., the +**TensorBoard Debugger Plugin**, please visit +[its README](https://github.com/tensorflow/tensorboard/blob/master/tensorboard/plugins/debugger/README.md). + +Note: The TensorFlow debugger uses a +[curses](https://en.wikipedia.org/wiki/Curses_\(programming_library\))-based text +user interface. On Mac OS X, the `ncurses` library is required and can be +installed with `brew install homebrew/dupes/ncurses`. On Windows, curses isn't as +well supported, so a [readline](https://en.wikipedia.org/wiki/GNU_Readline)-based +interface can be used with tfdbg by installing `pyreadline` with `pip`. If you +use Anaconda3, you can install it with a command such as +`"C:\Program Files\Anaconda3\Scripts\pip.exe" install pyreadline`. Unofficial +Windows curses packages can be downloaded +[here](https://www.lfd.uci.edu/~gohlke/pythonlibs/#curses), then subsequently +installed using `pip install .whl`, however curses on Windows may +not work as reliably as curses on Linux or Mac. This tutorial demonstrates how to use the **tfdbg** CLI to debug the appearance of [`nan`s](https://en.wikipedia.org/wiki/NaN) @@ -401,7 +400,7 @@ diff = -(y_ * tf.log(y)) to the built-in, numerically-stable implementation of softmax cross-entropy: ```python -diff = tf.losses.sparse_softmax_cross_entropy(labels=y_, logits=logits) +diff = tf.losses.softmax_cross_entropy(labels=y_, logits=logits) ``` Rerun with the `--debug` flag as follows: @@ -748,16 +747,16 @@ There are three possible workarounds or solutions: to which tfdbg dumps the debug data. You can use it to let tfdbg dump the debug data on a disk with larger free space. For example: - ``` python - # For LocalCLIDebugWrapperSession - sess = tf_debug.LocalCLIDebugWrapperSession(dump_root="/with/lots/of/space") - - # For LocalCLIDebugHook - hooks = [tf_debug.LocalCLIDebugHook(dump_root="/with/lots/of/space")] - ``` +```python +# For LocalCLIDebugWrapperSession +sess = tf_debug.LocalCLIDebugWrapperSession(dump_root="/with/lots/of/space") +# For LocalCLIDebugHook +hooks = [tf_debug.LocalCLIDebugHook(dump_root="/with/lots/of/space")] +``` Make sure that the directory pointed to by dump_root is empty or nonexistent. - tfdbg cleans up the dump directories before exiting. + `tfdbg` cleans up the dump directories before exiting. + * Reduce the batch size used during the runs. * Use the filtering options of tfdbg's `run` command to watch only specific nodes in the graph. For example: diff --git a/tensorflow/docs_src/programmers_guide/eager.md b/tensorflow/docs_src/programmers_guide/eager.md index 8db65737dcb610e478f1a954a1db9068a3258216..595e6be4af78d7d684ddeca0adea59e5a754134d 100644 --- a/tensorflow/docs_src/programmers_guide/eager.md +++ b/tensorflow/docs_src/programmers_guide/eager.md @@ -1,35 +1,34 @@ # Eager Execution TensorFlow's eager execution is an imperative programming environment that -evaluates operations immediately, without an extra graph-building step. -Operations return concrete values instead of constructing a computational graph -to run later. This makes it easy to get started with TensorFlow, debug models, -reduce boilerplate code, and is fun! To follow along with this guide, run the -code samples below in an interactive `python` interpreter. - -Eager execution supports most TensorFlow operations and GPU acceleration. -Automatic differentiation uses a dynamically-constructed tape instead of a static -graph to compute gradients. Eager execution is a flexible machine learning -platform for research and experimentation that provides: - -* *An intuitive interface* —Structure your code naturally and use Python data +evaluates operations immediately, without building graphs: operations return +concrete values instead of constructing a computational graph to run later. This +makes it easy to get started with TensorFlow and debug models, and it +reduces boilerplate as well. To follow along with this guide, run the code +samples below in an interactive `python` interpreter. + +Eager execution is a flexible machine learning platform for research and +experimentation, providing: + +* *An intuitive interface*—Structure your code naturally and use Python data structures. Quickly iterate on small models and small data. -* *Easier debugging* —Call ops directly to inspect running models and test +* *Easier debugging*—Call ops directly to inspect running models and test changes. Use standard Python debugging tools for immediate error reporting. -* *Natural control flow* —Use Python control flow instead of graph control flow, - including support for dynamic models. +* *Natural control flow*—Use Python control flow instead of graph control + flow, simplifying the specification of dynamic models. -For a collection of examples running in eager execution, see: +Eager execution supports most TensorFlow operations and GPU acceleration. For a +collection of examples running in eager execution, see: [tensorflow/contrib/eager/python/examples](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/eager/python/examples). -Note: Some models may experience increased overhead with eager execution enabled. -Performance improvements are ongoing, but please +Note: Some models may experience increased overhead with eager execution +enabled. Performance improvements are ongoing, but please [file a bug](https://github.com/tensorflow/tensorflow/issues) if you find a problem and share your benchmarks. ## Setup and basic usage -Upgrade to TensorFlow 1.7 to include updates for eager execution: +Upgrade to the latest version of TensorFlow: ``` $ pip install --upgrade tensorflow @@ -103,39 +102,136 @@ print(a.numpy()) # [3 4]] ``` -The `tfe` module contains symbols available to both eager and graph execution +The `tf.contrib.eager` module contains symbols available to both eager and graph execution environments and is useful for writing code to [work with graphs](#work_with_graphs): ```py -import tensorflow.contrib.eager as tfe +tfe = tf.contrib.eager +``` + +## Dynamic control flow + +A major benefit of eager execution is that all the functionality of the host +language is available while your model is executing. So, for example, +it is easy to write [fizzbuzz](https://en.wikipedia.org/wiki/Fizz_buzz): + +```py +def fizzbuzz(max_num): + counter = tf.constant(0) + for num in range(max_num): + num = tf.constant(num) + if num % 3 == 0 and num % 5 == 0: + print('FizzBuzz') + elif num % 3 == 0: + print('Fizz') + elif num % 5 == 0: + print('Buzz') + else: + print(num) + counter += 1 + return counter +``` + +This has conditionals that depend on tensor values and it prints these values +at runtime. + +## Build a model + +Many machine learning models are represented by composing layers. When +using TensorFlow with eager execution you can either write your own layers or +use a layer provided in the `tf.keras.layers` package. + +While you can use any Python object to represent a layer, +TensorFlow has `tf.keras.layers.Layer` as a convenient base class. Inherit from +it to implement your own layer: + +```py +class MySimpleLayer(tf.keras.layers.Layer): + def __init__(self, output_units): + self.output_units = output_units + + def build(self, input): + # The build method gets called the first time your layer is used. + # Creating variables on build() allows you to make their shape depend + # on the input shape and hence remove the need for the user to specify + # full shapes. It is possible to create variables during __init__() if + # you already know their full shapes. + self.kernel = self.add_variable( + "kernel", [input.shape[-1], self.output_units]) + + def call(self, input): + # Override call() instead of __call__ so we can perform some bookkeeping. + return tf.matmul(input, self.kernel) +``` + +Use `tf.keras.layers.Dense` layer instead of `MySimpleLayer` above as it has +a superset of its functionality (it can also add a bias). + +When composing layers into models you can use `tf.keras.Sequential` to represent +models which are a linear stack of layers. It is easy to use for basic models: + +```py +model = tf.keras.Sequential([ + tf.keras.layers.Dense(10, input_shape=(784,)), # must declare input shape + tf.keras.layers.Dense(10) +]) +``` + +Alternatively, organize models in classes by inheriting from `tf.keras.Model`. +This is a container for layers that is a layer itself, allowing `tf.keras.Model` +objects to contain other `tf.keras.Model` objects. + +```py +class MNISTModel(tf.keras.Model): + def __init__(self): + super(MNISTModel, self).__init__() + self.dense1 = tf.keras.layers.Dense(units=10) + self.dense2 = tf.keras.layers.Dense(units=10) + + def call(self, input): + """Run the model.""" + result = self.dense1(input) + result = self.dense2(result) + result = self.dense2(result) # reuse variables from dense2 layer + return result + +model = MNISTModel() ``` +It's not required to set an input shape for the `tf.keras.Model` class since +the parameters are set the first time input is passed to the layer. + +`tf.keras.layers` classes create and contain their own model variables that +are tied to the lifetime of their layer objects. To share layer variables, share +their objects. + + ## Eager training -### Automatic differentiation +### Computing gradients [Automatic differentiation](https://en.wikipedia.org/wiki/Automatic_differentiation) is useful for implementing machine learning algorithms such as [backpropagation](https://en.wikipedia.org/wiki/Backpropagation) for training -neural networks. During eager execution, use `tfe.GradientTape` to trace +neural networks. During eager execution, use `tf.GradientTape` to trace operations for computing gradients later. -`tfe.GradientTape` is an opt-in feature to provide maximal performance when +`tf.GradientTape` is an opt-in feature to provide maximal performance when not tracing. Since different operations can occur during each call, all forward-pass operations get recorded to a "tape". To compute the gradient, play -the tape backwards and then discard. A particular `tfe.GradientTape` can only -be computed once, subsequent calls throw a runtime error. +the tape backwards and then discard. A particular `tf.GradientTape` can only +compute one gradient; subsequent calls throw a runtime error. ```py w = tfe.Variable([[1.0]]) -with tfe.GradientTape() as tape: +with tf.GradientTape() as tape: loss = w * w grad = tape.gradient(loss, [w]) print(grad) # => [tf.Tensor([[ 2.]], shape=(1, 1), dtype=float32)] ``` -Here's an example of `tfe.GradientTape` that records forward-pass operations +Here's an example of `tf.GradientTape` that records forward-pass operations to train a simple model: ```py @@ -155,8 +251,8 @@ def loss(weights, biases): # Return the derivative of loss with respect to weight and bias def grad(weights, biases): - with tfe.GradientTape() as tape: - loss_value = loss(weights, biases) + with tf.GradientTape() as tape: + loss_value = loss(weights, biases) return tape.gradient(loss_value, [weights, biases]) train_steps = 200 @@ -196,7 +292,7 @@ Final loss: 0.974 W = 3.01582956314, B = 2.1191945076 ``` -Replay the `tfe.GradientTape` to compute the gradients and apply them in a +Replay the `tf.GradientTape` to compute the gradients and apply them in a training loop. This is demonstrated in an excerpt from the [mnist_eager.py](https://github.com/tensorflow/models/blob/master/official/mnist/mnist_eager.py) example: @@ -205,9 +301,9 @@ example: dataset = tf.data.Dataset.from_tensor_slices((data.train.images, data.train.labels)) ... -for (batch, (images, labels)) in enumerate(tfe.Iterator(dataset)): +for (batch, (images, labels)) in enumerate(dataset): ... - with tfe.GradientTape() as tape: + with tf.GradientTape() as tape: logits = model(images, training=True) loss_value = loss(logits, labels) ... @@ -216,189 +312,12 @@ for (batch, (images, labels)) in enumerate(tfe.Iterator(dataset)): global_step=tf.train.get_or_create_global_step()) ``` -#### Dynamic models - -`tfe.GradientTape` can also be used in dynamic models. This example for a -[backtracking line search](https://wikipedia.org/wiki/Backtracking_line_search) -algorithm looks like normal NumPy code, except there are gradients and is -differentiable, despite the complex control flow: - -```py -def line_search_step(fn, init_x, rate=1.0): - with tfe.GradientTape() as tape: - # Variables are automatically recorded, but manually watch a tensor - tape.watch(init_x) - value = fn(init_x) - grad, = tape.gradient(value, [init_x]) - grad_norm = tf.reduce_sum(grad * grad) - init_value = value - while value > init_value - rate * grad_norm: - x = init_x - rate * grad - value = fn(x) - rate /= 2.0 - return x, value -``` - -#### Additional functions to compute gradients - -`tfe.GradientTape` is a powerful interface for computing gradients, but there -is another [Autograd](https://github.com/HIPS/autograd)-style API available for -automatic differentiation. These functions are useful if writing math code with -only tensors and gradient functions, and without `tfe.Variables`: - -* `tfe.gradients_function` —Returns a function that computes the derivatives - of its input function parameter with respect to its arguments. The input - function parameter must return a scalar value. When the returned function is - invoked, it returns a list of `tf.Tensor` objects: one element for each - argument of the input function. Since anything of interest must be passed as a - function parameter, this becomes unwieldy if there's a dependency on many - trainable parameters. -* `tfe.value_and_gradients_function` —Similar to - `tfe.gradients_function`, but when the returned function is invoked, it - returns the value from the input function in addition to the list of - derivatives of the input function with respect to its arguments. - -In the following example, `tfe.gradients_function` takes the `square` -function as an argument and returns a function that computes the partial -derivatives of `square` with respect to its inputs. To calculate the derivative -of `square` at `3`, `grad(3.0)` returns `6`. - -```py -def square(x): - return tf.multiply(x, x) - -grad = tfe.gradients_function(square) - -square(3.) # => 9.0 -grad(3.) # => [6.0] - -# The second-order derivative of square: -gradgrad = tfe.gradients_function(lambda x: grad(x)[0]) -gradgrad(3.) # => [2.0] - -# The third-order derivative is None: -gradgradgrad = tfe.gradients_function(lambda x: gradgrad(x)[0]) -gradgradgrad(3.) # => [None] - - -# With flow control: -def abs(x): - return x if x > 0. else -x - -grad = tfe.gradients_function(abs) - -grad(3.) # => [1.0] -grad(-3.) # => [-1.0] -``` - -### Custom gradients - -Custom gradients are an easy way to override gradients in eager and graph -execution. Within the forward function, define the gradient with respect to the -inputs, outputs, or intermediate results. For example, here's an easy way to clip -the norm of the gradients in the backward pass: - -```py -@tf.custom_gradient -def clip_gradient_by_norm(x, norm): - y = tf.identity(x) - def grad_fn(dresult): - return [tf.clip_by_norm(dresult, norm), None] - return y, grad_fn -``` - -Custom gradients are commonly used to provide a numerically stable gradient for a -sequence of operations: - -```py -def log1pexp(x): - return tf.log(1 + tf.exp(x)) -grad_log1pexp = tfe.gradients_function(log1pexp) - -# The gradient computation works fine at x = 0. -grad_log1pexp(0.) # => [0.5] - -# However, x = 100 fails because of numerical instability. -grad_log1pexp(100.) # => [nan] -``` - -Here, the `log1pexp` function can be analytically simplified with a custom -gradient. The implementation below reuses the value for `tf.exp(x)` that is -computed during the forward pass—making it more efficient by eliminating -redundant calculations: - -```py -@tf.custom_gradient -def log1pexp(x): - e = tf.exp(x) - def grad(dy): - return dy * (1 - 1 / (1 + e)) - return tf.log(1 + e), grad - -grad_log1pexp = tfe.gradients_function(log1pexp) - -# As before, the gradient computation works fine at x = 0. -grad_log1pexp(0.) # => [0.5] - -# And the gradient computation also works at x = 100. -grad_log1pexp(100.) # => [1.0] -``` - - -## Build and train models - -There are many parameters to optimize when calculating derivatives. TensorFlow -code is easier to read when structured into reusable classes and objects instead -of a single top-level function. Eager execution encourages the use of the -Keras-style layer classes in the `tf.keras.layers` module. Additionally, the -`tf.train.Optimizer` classes provide sophisticated techniques to calculate -parameter updates. The following example creates a multi-layer model that classifies the standard [MNIST handwritten digits](https://www.tensorflow.org/tutorials/layers). It demonstrates the optimizer and layer APIs to build trainable graphs in an eager execution environment. -### Build a model - -The `tf.keras.Sequential` model is a linear stack of layers. It is easy to -use for basic models: - -```py -model = tf.keras.Sequential([ - tf.keras.layers.Dense(10, input_shape=(784,)), # must declare input shape - tf.keras.layers.Dense(10) -]) -``` - -Alternatively, organize models in classes by inheriting from `tf.keras.Model`. -This is a container for layers that is a layer itself, allowing `tf.keras.Model` -objects to contain other `tf.keras.Model` objects. - -```py -class MNISTModel(tf.keras.Model): - def __init__(self): - super(MNISTModel, self).__init__() - self.dense1 = tf.keras.layers.Dense(units=10) - self.dense2 = tf.keras.layers.Dense(units=10) - - def call(self, input): - """Run the model.""" - result = self.dense1(input) - result = self.dense2(result) - result = self.dense2(result) # reuse variables from dense2 layer - return result - -model = MNISTModel() -``` - -It's not required to set an input shape for the `tf.keras.Model` class since -the parameters are set the first time input is passed to the layer. - -`tf.keras.layers` classes create and contain their own model variables that -are tied to the lifetime of their layer objects. To share layer variables, share -their objects. - ### Train a model Even without training, call the model and inspect the output in eager execution: @@ -415,7 +334,7 @@ result = model(batch) This example uses the [dataset.py module](https://github.com/tensorflow/models/blob/master/official/mnist/dataset.py) from the -[TensorFlow MNIST example](https://github.com/tensorflow/models/tree/master/official/mnist), +[TensorFlow MNIST example](https://github.com/tensorflow/models/tree/master/official/mnist); download this file to your local directory. Run the following to download the MNIST data files to your working directory and prepare a `tf.data.Dataset` for training: @@ -434,17 +353,17 @@ def loss(model, x, y): return tf.losses.sparse_softmax_cross_entropy(labels=y, logits=prediction) def grad(model, inputs, targets): - with tfe.GradientTape() as tape: + with tf.GradientTape() as tape: loss_value = loss(model, inputs, targets) return tape.gradient(loss_value, model.variables) optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001) -x, y = tfe.Iterator(dataset_train).next() +x, y = iter(dataset_train).next() print("Initial loss: {:.3f}".format(loss(model, x, y))) # Training loop -for (i, (x, y)) in enumerate(tfe.Iterator(dataset_train)): +for (i, (x, y)) in enumerate(dataset_train): # Calculate derivatives of the input function with respect to its parameters. grads = grad(model, x, y) # Apply the gradient to the model @@ -479,7 +398,7 @@ And for faster training, move the computation to a GPU: ```py with tf.device("/gpu:0"): - for (i, (x, y)) in enumerate(tfe.Iterator(dataset_train)): + for (i, (x, y)) in enumerate(dataset_train): # minimize() is equivalent to the grad() and apply_gradients() calls. optimizer.minimize(lambda: loss(model, x, y), global_step=tf.train.get_or_create_global_step()) @@ -492,7 +411,7 @@ training to make automatic differentiation easier. The parameters of a model can be encapsulated in classes as variables. Better encapsulate model parameters by using `tfe.Variable` with -`tfe.GradientTape`. For example, the automatic differentiation example above +`tf.GradientTape`. For example, the automatic differentiation example above can be rewritten: ```py @@ -516,7 +435,7 @@ def loss(model, inputs, targets): return tf.reduce_mean(tf.square(error)) def grad(model, inputs, targets): - with tfe.GradientTape() as tape: + with tf.GradientTape() as tape: loss_value = loss(model, inputs, targets) return tape.gradient(loss_value, [model.W, model.B]) @@ -662,11 +581,141 @@ for _ in range(iterations): ... ``` +## Advanced automatic differentiation topics + +### Dynamic models + +`tf.GradientTape` can also be used in dynamic models. This example for a +[backtracking line search](https://wikipedia.org/wiki/Backtracking_line_search) +algorithm looks like normal NumPy code, except there are gradients and is +differentiable, despite the complex control flow: + +```py +def line_search_step(fn, init_x, rate=1.0): + with tf.GradientTape() as tape: + # Variables are automatically recorded, but manually watch a tensor + tape.watch(init_x) + value = fn(init_x) + grad, = tape.gradient(value, [init_x]) + grad_norm = tf.reduce_sum(grad * grad) + init_value = value + while value > init_value - rate * grad_norm: + x = init_x - rate * grad + value = fn(x) + rate /= 2.0 + return x, value +``` + +### Additional functions to compute gradients + +`tf.GradientTape` is a powerful interface for computing gradients, but there +is another [Autograd](https://github.com/HIPS/autograd)-style API available for +automatic differentiation. These functions are useful if writing math code with +only tensors and gradient functions, and without `tfe.Variables`: + +* `tfe.gradients_function` —Returns a function that computes the derivatives + of its input function parameter with respect to its arguments. The input + function parameter must return a scalar value. When the returned function is + invoked, it returns a list of `tf.Tensor` objects: one element for each + argument of the input function. Since anything of interest must be passed as a + function parameter, this becomes unwieldy if there's a dependency on many + trainable parameters. +* `tfe.value_and_gradients_function` —Similar to + `tfe.gradients_function`, but when the returned function is invoked, it + returns the value from the input function in addition to the list of + derivatives of the input function with respect to its arguments. + +In the following example, `tfe.gradients_function` takes the `square` +function as an argument and returns a function that computes the partial +derivatives of `square` with respect to its inputs. To calculate the derivative +of `square` at `3`, `grad(3.0)` returns `6`. + +```py +def square(x): + return tf.multiply(x, x) + +grad = tfe.gradients_function(square) + +square(3.) # => 9.0 +grad(3.) # => [6.0] + +# The second-order derivative of square: +gradgrad = tfe.gradients_function(lambda x: grad(x)[0]) +gradgrad(3.) # => [2.0] + +# The third-order derivative is None: +gradgradgrad = tfe.gradients_function(lambda x: gradgrad(x)[0]) +gradgradgrad(3.) # => [None] + + +# With flow control: +def abs(x): + return x if x > 0. else -x + +grad = tfe.gradients_function(abs) + +grad(3.) # => [1.0] +grad(-3.) # => [-1.0] +``` + +### Custom gradients + +Custom gradients are an easy way to override gradients in eager and graph +execution. Within the forward function, define the gradient with respect to the +inputs, outputs, or intermediate results. For example, here's an easy way to clip +the norm of the gradients in the backward pass: + +```py +@tf.custom_gradient +def clip_gradient_by_norm(x, norm): + y = tf.identity(x) + def grad_fn(dresult): + return [tf.clip_by_norm(dresult, norm), None] + return y, grad_fn +``` + +Custom gradients are commonly used to provide a numerically stable gradient for a +sequence of operations: + +```py +def log1pexp(x): + return tf.log(1 + tf.exp(x)) +grad_log1pexp = tfe.gradients_function(log1pexp) + +# The gradient computation works fine at x = 0. +grad_log1pexp(0.) # => [0.5] + +# However, x = 100 fails because of numerical instability. +grad_log1pexp(100.) # => [nan] +``` + +Here, the `log1pexp` function can be analytically simplified with a custom +gradient. The implementation below reuses the value for `tf.exp(x)` that is +computed during the forward pass—making it more efficient by eliminating +redundant calculations: + +```py +@tf.custom_gradient +def log1pexp(x): + e = tf.exp(x) + def grad(dy): + return dy * (1 - 1 / (1 + e)) + return tf.log(1 + e), grad + +grad_log1pexp = tfe.gradients_function(log1pexp) + +# As before, the gradient computation works fine at x = 0. +grad_log1pexp(0.) # => [0.5] + +# And the gradient computation also works at x = 100. +grad_log1pexp(100.) # => [1.0] +``` + ## Performance -Computation is not automatically offloaded to GPUs during eager execution. To -explicitly direct a computation to a GPU, enclose it in a -`tf.device('/gpu:0')` block: +Computation is automatically offloaded to GPUs during eager execution. If you +want control over where a computation runs you can enclose it in a +`tf.device('/gpu:0')` block (or the CPU equivalent): ```py import time diff --git a/tensorflow/docs_src/programmers_guide/faq.md b/tensorflow/docs_src/programmers_guide/faq.md index 392ac6f7f12532c3efce5bec1917691f55c7bee5..51c1a1e032baae7eff334da785fc5ffa2438e0ca 100644 --- a/tensorflow/docs_src/programmers_guide/faq.md +++ b/tensorflow/docs_src/programmers_guide/faq.md @@ -121,7 +121,7 @@ dimensions: devices, which makes it possible to speed up @{$deep_cnn$CIFAR-10 training using multiple GPUs}. * The Session API allows multiple concurrent steps (i.e. calls to - @{tf.Session.run} in parallel. This + @{tf.Session.run} in parallel). This enables the runtime to get higher throughput, if a single step does not use all of the resources in your computer. diff --git a/tensorflow/docs_src/programmers_guide/graphs.md b/tensorflow/docs_src/programmers_guide/graphs.md index e69b717432e6a8fab0085eb419dcbc0991cd9d28..f0dd8def17fd6dfed241167a5ebb5be678152c16 100644 --- a/tensorflow/docs_src/programmers_guide/graphs.md +++ b/tensorflow/docs_src/programmers_guide/graphs.md @@ -96,7 +96,7 @@ to all API functions in the same context. For example: (See @{$programmers_guide/variables} for more information about variables.) * Calling @{tf.train.Optimizer.minimize} will add operations and tensors to the - default graph that calculate gradients, and return a @{tf.Operation} that, + default graph that calculates gradients, and return a @{tf.Operation} that, when run, will apply those gradients to a set of variables. Most programs rely solely on the default graph. However, @@ -210,7 +210,7 @@ with tf.device("/device:GPU:0"): # Operations created in this context will be pinned to the GPU. result = tf.matmul(weights, img) ``` -If you are deploying TensorFlow in a @{$deploy/distributed$typical distributed configuration}, +If you are deploying TensorFlow in a @{$distributed$typical distributed configuration}, you might specify the job name and task ID to place variables on a task in the parameter server job (`"/job:ps"`), and the other operations on task in the worker job (`"/job:worker"`): @@ -362,7 +362,7 @@ operations that are needed to compute the result. @{tf.Session.run} requires you to specify a list of **fetches**, which determine the return values, and may be a @{tf.Operation}, a @{tf.Tensor}, or -a [tensor-like type](#tensor-like-objects) such as @{tf.Variable}. These fetches +a [tensor-like type](#tensor-like_objects) such as @{tf.Variable}. These fetches determine what **subgraph** of the overall @{tf.Graph} must be executed to produce the result: this is the subgraph that contains all operations named in the fetch list, plus all operations whose outputs are used to compute the value @@ -505,7 +505,7 @@ multiple graphs in the same process. As noted above, TensorFlow provides a "default graph" that is implicitly passed to all API functions in the same context. For many applications, a single graph is sufficient. However, TensorFlow also provides methods for manipulating -the default graph, which can be useful in more advanced used cases. For example: +the default graph, which can be useful in more advanced use cases. For example: * A @{tf.Graph} defines the namespace for @{tf.Operation} objects: each operation in a single graph must have a unique name. TensorFlow will diff --git a/tensorflow/docs_src/programmers_guide/index.md b/tensorflow/docs_src/programmers_guide/index.md index e8c2fa6990c8ecfca1cfe76b3f813b4ae6917742..648d001bd3535fe3dcc460c9ebdb6e6a997dc332 100644 --- a/tensorflow/docs_src/programmers_guide/index.md +++ b/tensorflow/docs_src/programmers_guide/index.md @@ -5,6 +5,7 @@ works. The units are as follows: ## High Level APIs + * @{$programmers_guide/eager}, which is the easiest way to use TensorFlow. * @{$programmers_guide/estimators}, which introduces a high-level TensorFlow API that greatly simplifies ML programming. * @{$programmers_guide/datasets}, which explains how to diff --git a/tensorflow/docs_src/programmers_guide/saved_model.md b/tensorflow/docs_src/programmers_guide/saved_model.md index 55ee42dd6405db6bd34b064d71deaeb94839b0fa..c6ef87c54a3bc37dbfc0553232a8e3d30f8ee2f6 100644 --- a/tensorflow/docs_src/programmers_guide/saved_model.md +++ b/tensorflow/docs_src/programmers_guide/saved_model.md @@ -485,31 +485,7 @@ portion of the signature. That is, when writing a to expect and how to map them to your model's expected inputs. By contrast, the *output* portion of the signature is determined by the model. - -### Perform the export - -To export your trained Estimator, call -@{tf.estimator.Estimator.export_savedmodel} with the export base path and -the `serving_input_receiver_fn`. - -```py -estimator.export_savedmodel(export_dir_base, serving_input_receiver_fn, - strip_default_attrs=True) -``` - -This method builds a new graph by first calling the -`serving_input_receiver_fn()` to obtain feature `Tensor`s, and then calling -this `Estimator`'s `model_fn()` to generate the model graph based on those -features. It starts a fresh `Session`, and, by default, restores the most recent -checkpoint into it. (A different checkpoint may be passed, if needed.) -Finally it creates a time-stamped export directory below the given -`export_dir_base` (i.e., `export_dir_base/`), and writes a -SavedModel into it containing a single `MetaGraphDef` saved from this -Session. - -> Note: It is your responsibility to garbage-collect old exports. -> Otherwise, successive exports will accumulate under `export_dir_base`. - + ### Specify the outputs of a custom model When writing a custom `model_fn`, you must populate the `export_outputs` element @@ -541,6 +517,30 @@ using [`signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY`](https://www.tens indicating which `SignatureDef` will be served when an inference request does not specify one. + +### Perform the export + +To export your trained Estimator, call +@{tf.estimator.Estimator.export_savedmodel} with the export base path and +the `serving_input_receiver_fn`. + +```py +estimator.export_savedmodel(export_dir_base, serving_input_receiver_fn, + strip_default_attrs=True) +``` + +This method builds a new graph by first calling the +`serving_input_receiver_fn()` to obtain feature `Tensor`s, and then calling +this `Estimator`'s `model_fn()` to generate the model graph based on those +features. It starts a fresh `Session`, and, by default, restores the most recent +checkpoint into it. (A different checkpoint may be passed, if needed.) +Finally it creates a time-stamped export directory below the given +`export_dir_base` (i.e., `export_dir_base/`), and writes a +SavedModel into it containing a single `MetaGraphDef` saved from this +Session. + +> Note: It is your responsibility to garbage-collect old exports. +> Otherwise, successive exports will accumulate under `export_dir_base`. ### Serve the exported model locally diff --git a/tensorflow/docs_src/programmers_guide/using_tpu.md b/tensorflow/docs_src/programmers_guide/using_tpu.md index cb0d86fc4c54ac690f13c93ebbd10805c7738c62..5e3e49d43402cd76f8b7062483259df4598bd8ff 100644 --- a/tensorflow/docs_src/programmers_guide/using_tpu.md +++ b/tensorflow/docs_src/programmers_guide/using_tpu.md @@ -280,8 +280,8 @@ Where `params['batch-size']` will contain the batch size. ### Static shapes and batch size The input pipeline generated by your `input_fn` is run on CPU. So it is mostly -free strict static shape requirements imposed by the XLA/TPU environment. The -one requirement is that the batches of data fed from your input pipeline to +free from the strict static shape requirements imposed by the XLA/TPU environment. +The one requirement is that the batches of data fed from your input pipeline to the TPU have a static shape, as determined by the standard TensorFlow shape inference algorithm. Intermediate tensors are free to have a dynamic shapes. If shape inference has failed, but the shape is known it is possible to diff --git a/tensorflow/docs_src/tutorials/audio_recognition.md b/tensorflow/docs_src/tutorials/audio_recognition.md index 7d79f433c41b42a268816d8277ea69b0d62a04f3..372ab47df7df309ab926836ca19f34d2d0d38915 100644 --- a/tensorflow/docs_src/tutorials/audio_recognition.md +++ b/tensorflow/docs_src/tutorials/audio_recognition.md @@ -280,7 +280,7 @@ tool: ``` bazel run tensorflow/examples/wav_to_spectrogram:wav_to_spectrogram -- \ --input_wav=/tmp/speech_dataset/happy/ab00c4b2_nohash_0.wav \ ---output_png=/tmp/spectrogram.png +--output_image=/tmp/spectrogram.png ``` If you open up `/tmp/spectrogram.png` you should see something like this: diff --git a/tensorflow/docs_src/tutorials/image_retraining.md b/tensorflow/docs_src/tutorials/image_retraining.md index 93d7c86e42aa90d145d27b56edc0abfec7034686..27784eef9cdb5c6f8b9af44b3fc3f876cda39d13 100644 --- a/tensorflow/docs_src/tutorials/image_retraining.md +++ b/tensorflow/docs_src/tutorials/image_retraining.md @@ -1,404 +1,4 @@ # How to Retrain Inception's Final Layer for New Categories -Modern object recognition models have millions of parameters and can take weeks -to fully train. Transfer learning is a technique that shortcuts a lot of this -work by taking a fully-trained model for a set of categories like ImageNet, and -retrains from the existing weights for new classes. In this example we'll be -retraining the final layer from scratch, while leaving all the others untouched. -For more information on the approach you can see -[this paper on Decaf](https://arxiv.org/pdf/1310.1531v1.pdf). - -Though it's not as good as a full training run, this is surprisingly effective -for many applications, and can be run in as little as thirty minutes on a -laptop, without requiring a GPU. This tutorial will show you how to run the -example script on your own images, and will explain some of the options you have -to help control the training process. - -Note: A version of this tutorial is also available -[as a codelab](https://codelabs.developers.google.com/codelabs/tensorflow-for-poets/#0). - -Before you start, you must @{$install$install tensorflow}. - -[TOC] - -## Training on Flowers - -![Daisies by Kelly Sikkema](https://www.tensorflow.org/images/daisies.jpg) - -[Image by Kelly Sikkema](https://www.flickr.com/photos/95072945@N05/9922116524/) - -Before you start any training, you'll need a set of images to teach the network -about the new classes you want to recognize. There's a later section that -explains how to prepare your own images, but to make it easy we've created an -archive of creative-commons licensed flower photos to use initially. To get the -set of flower photos, run these commands: - -```sh -cd ~ -curl -O http://download.tensorflow.org/example_images/flower_photos.tgz -tar xzf flower_photos.tgz -``` - -Once you have the images, you can clone the tensorflow repository using the -following command (these examples are not included in the installation): - -```sh -git clone https://github.com/tensorflow/tensorflow -``` - -Then checkout the version of the tensorflow repository matching your -installation and this tutorial as follows: - -``` sh -cd tensorflow -git checkout {version} -``` - -In the simplest cases the retrainer can then be run like this: - -```sh -python tensorflow/examples/image_retraining/retrain.py --image_dir ~/flower_photos -``` - -The script has many other options. You can get a full listing with: - -```sh -python tensorflow/examples/image_retraining/retrain.py -h -``` - -This script loads the pre-trained Inception v3 model, removes the old top layer, -and trains a new one on the flower photos you've downloaded. None of the flower -species were in the original ImageNet classes the full network was trained on. -The magic of transfer learning is that lower layers that have been trained to -distinguish between some objects can be reused for many recognition tasks -without any alteration. - -## Bottlenecks - -The script can take thirty minutes or more to complete, depending on the speed -of your machine. The first phase analyzes all the images on disk and calculates -the bottleneck values for each of them. 'Bottleneck' is an informal term we -often use for the layer just before the final output layer that actually does -the classification. This penultimate layer has been trained to output a set of -values that's good enough for the classifier to use to distinguish between all -the classes it's been asked to recognize. That means it has to be a meaningful -and compact summary of the images, since it has to contain enough information -for the classifier to make a good choice in a very small set of values. The -reason our final layer retraining can work on new classes is that it turns out -the kind of information needed to distinguish between all the 1,000 classes in -ImageNet is often also useful to distinguish between new kinds of objects. - -Because every image is reused multiple times during training and calculating -each bottleneck takes a significant amount of time, it speeds things up to -cache these bottleneck values on disk so they don't have to be repeatedly -recalculated. By default they're stored in the `/tmp/bottleneck` directory, and -if you rerun the script they'll be reused so you don't have to wait for this -part again. - -## Training - -Once the bottlenecks are complete, the actual training of the top layer of the -network begins. You'll see a series of step outputs, each one showing training -accuracy, validation accuracy, and the cross entropy. The training accuracy -shows what percent of the images used in the current training batch were -labeled with the correct class. The validation accuracy is the precision on a -randomly-selected group of images from a different set. The key difference is -that the training accuracy is based on images that the network has been able -to learn from so the network can overfit to the noise in the training data. A -true measure of the performance of the network is to measure its performance on -a data set not contained in the training data -- this is measured by the -validation accuracy. If the train accuracy is high but the validation accuracy -remains low, that means the network is overfitting and memorizing particular -features in the training images that aren't helpful more generally. Cross -entropy is a loss function which gives a glimpse into how well the learning -process is progressing. The training's objective is to make the loss as small as -possible, so you can tell if the learning is working by keeping an eye on -whether the loss keeps trending downwards, ignoring the short-term noise. - -By default this script will run 4,000 training steps. Each step chooses 100 -images at random from the training set, finds their bottlenecks from the cache, -and feeds them into the final layer to get predictions. Those predictions are -then compared against the actual labels to update the final layer's weights -through the back-propagation process. As the process continues you should see -the reported accuracy improve, and after all the steps are done, a final test -accuracy evaluation is run on a set of images kept separate from the training -and validation pictures. This test evaluation is the best estimate of how the -trained model will perform on the classification task. You should see an -accuracy value of between 90% and 95%, though the exact value will vary from run -to run since there's randomness in the training process. This number is based on -the percent of the images in the test set that are given the correct label -after the model is fully trained. - -## Visualizing the Retraining with TensorBoard - -The script includes TensorBoard summaries that make it easier to understand, debug, and optimize the retraining. For example, you can visualize the graph and statistics, such as how the weights or accuracy varied during training. - -To launch TensorBoard, run this command during or after retraining: - -```sh -tensorboard --logdir /tmp/retrain_logs -``` - -Once TensorBoard is running, navigate your web browser to `localhost:6006` to view the TensorBoard. - -The script will log TensorBoard summaries to `/tmp/retrain_logs` by default. You can change the directory with the `--summaries_dir` flag. - -The [TensorBoard's GitHub](https://github.com/tensorflow/tensorboard) has a lot more information on TensorBoard usage, including tips & tricks, and debugging information. - -## Using the Retrained Model - -The script will write out a version of the Inception v3 network with a final -layer retrained to your categories to /tmp/output_graph.pb, and a text file -containing the labels to /tmp/output_labels.txt. These are both in a format that -the @{$image_recognition$C++ and Python image classification examples} -can read in, so you can start using your new model immediately. Since you've -replaced the top layer, you will need to specify the new name in the script, for -example with the flag `--output_layer=final_result` if you're using label_image. - -Here's an example of how to run the label_image example with your -retrained graphs: - -```sh -python tensorflow/examples/label_image/label_image.py \ ---graph=/tmp/output_graph.pb --labels=/tmp/output_labels.txt \ ---input_layer=Mul \ ---output_layer=final_result \ ---input_mean=128 --input_std=128 \ ---image=$HOME/flower_photos/daisy/21652746_cc379e0eea_m.jpg -``` - -You should see a list of flower labels, in most cases with daisy on top -(though each retrained model may be slightly different). You can replace the -`--image` parameter with your own images to try those out. - -If you'd like to use the retrained model in your own Python program, then the -above -[`label_image` script](https://www.tensorflow.org/code/tensorflow/examples/label_image/label_image.py) -is a reasonable starting point. The `label_image` -directory also contains C++ code which you can use as a template to integrate -tensorflow with your own applications. - -If you find the default Inception v3 model is too large or slow for your -application, take a look at the [Other Model Architectures section](/tutorials/image_retraining#other_model_architectures) -below for options to speed up and slim down your network. - -## Training on Your Own Categories - -If you've managed to get the script working on the flower example images, you -can start looking at teaching it to recognize categories you care about instead. -In theory all you'll need to do is point it at a set of sub-folders, each named -after one of your categories and containing only images from that category. If -you do that and pass the root folder of the subdirectories as the argument to -`--image_dir`, the script should train just like it did for the flowers. - -Here's what the folder structure of the flowers archive looks like, to give you -and example of the kind of layout the script is looking for: - -![Folder Structure](https://www.tensorflow.org/images/folder_structure.png) - -In practice it may take some work to get the accuracy you want. I'll try to -guide you through some of the common problems you might encounter below. - -## Creating a Set of Training Images - -The first place to start is by looking at the images you've gathered, since the -most common issues we see with training come from the data that's being fed in. - -For training to work well, you should gather at least a hundred photos of each -kind of object you want to recognize. The more you can gather, the better the -accuracy of your trained model is likely to be. You also need to make sure that -the photos are a good representation of what your application will actually -encounter. For example, if you take all your photos indoors against a blank wall -and your users are trying to recognize objects outdoors, you probably won't see -good results when you deploy. - -Another pitfall to avoid is that the learning process will pick up on anything -that the labeled images have in common with each other, and if you're not -careful that might be something that's not useful. For example if you photograph -one kind of object in a blue room, and another in a green one, then the model -will end up basing its prediction on the background color, not the features of -the object you actually care about. To avoid this, try to take pictures in as -wide a variety of situations as you can, at different times, and with different -devices. If you want to know more about this problem, you can read about the -classic (and possibly apocryphal) -[tank recognition problem](https://www.jefftk.com/p/detecting-tanks). - -You may also want to think about the categories you use. It might be worth -splitting big categories that cover a lot of different physical forms into -smaller ones that are more visually distinct. For example instead of 'vehicle' -you might use 'car', 'motorbike', and 'truck'. It's also worth thinking about -whether you have a 'closed world' or an 'open world' problem. In a closed world, -the only things you'll ever be asked to categorize are the classes of object you -know about. This might apply to a plant recognition app where you know the user -is likely to be taking a picture of a flower, so all you have to do is decide -which species. By contrast a roaming robot might see all sorts of different -things through its camera as it wanders around the world. In that case you'd -want the classifier to report if it wasn't sure what it was seeing. This can be -hard to do well, but often if you collect a large number of typical 'background' -photos with no relevant objects in them, you can add them to an extra 'unknown' -class in your image folders. - -It's also worth checking to make sure that all of your images are labeled -correctly. Often user-generated tags are unreliable for our purposes, for -example using #daisy for pictures of a person named Daisy. If you go through -your images and weed out any mistakes it can do wonders for your overall -accuracy. - -## Training Steps - -If you're happy with your images, you can take a look at improving your results -by altering the details of the learning process. The simplest one to try is -`--how_many_training_steps`. This defaults to 4,000, but if you increase it to -8,000 it will train for twice as long. The rate of improvement in the accuracy -slows the longer you train for, and at some point will stop altogether, but you -can experiment to see when you hit that limit for your model. - -## Distortions - -A common way of improving the results of image training is by deforming, -cropping, or brightening the training inputs in random ways. This has the -advantage of expanding the effective size of the training data thanks to all the -possible variations of the same images, and tends to help the network learn to -cope with all the distortions that will occur in real-life uses of the -classifier. The biggest disadvantage of enabling these distortions in our script -is that the bottleneck caching is no longer useful, since input images are never -reused exactly. This means the training process takes a lot longer, so I -recommend trying this as a way of fine-tuning your model once you've got one -that you're reasonably happy with. - -You enable these distortions by passing `--random_crop`, `--random_scale` and -`--random_brightness` to the script. These are all percentage values that -control how much of each of the distortions is applied to each image. It's -reasonable to start with values of 5 or 10 for each of them and then experiment -to see which of them help with your application. `--flip_left_right` will -randomly mirror half of the images horizontally, which makes sense as long as -those inversions are likely to happen in your application. For example it -wouldn't be a good idea if you were trying to recognize letters, since flipping -them destroys their meaning. - -## Hyper-parameters - -There are several other parameters you can try adjusting to see if they help -your results. The `--learning_rate` controls the magnitude of the updates to the -final layer during training. Intuitively if this is smaller then the learning -will take longer, but it can end up helping the overall precision. That's not -always the case though, so you need to experiment carefully to see what works -for your case. The `--train_batch_size` controls how many images are examined -during one training step, and because the learning rate is applied per batch -you'll need to reduce it if you have larger batches to get the same overall -effect. - -## Training, Validation, and Testing Sets - -One of the things the script does under the hood when you point it at a folder -of images is divide them up into three different sets. The largest is usually -the training set, which are all the images fed into the network during training, -with the results used to update the model's weights. You might wonder why we -don't use all the images for training? A big potential problem when we're doing -machine learning is that our model may just be memorizing irrelevant details of -the training images to come up with the right answers. For example, you could -imagine a network remembering a pattern in the background of each photo it was -shown, and using that to match labels with objects. It could produce good -results on all the images it's seen before during training, but then fail on new -images because it's not learned general characteristics of the objects, just -memorized unimportant details of the training images. - -This problem is known as overfitting, and to avoid it we keep some of our data -out of the training process, so that the model can't memorize them. We then use -those images as a check to make sure that overfitting isn't occurring, since if -we see good accuracy on them it's a good sign the network isn't overfitting. The -usual split is to put 80% of the images into the main training set, keep 10% -aside to run as validation frequently during training, and then have a final 10% -that are used less often as a testing set to predict the real-world performance -of the classifier. These ratios can be controlled using the -`--testing_percentage` and `--validation_percentage` flags. In general -you should be able to leave these values at their defaults, since you won't -usually find any advantage to training to adjusting them. - -Note that the script uses the image filenames (rather than a completely random -function) to divide the images among the training, validation, and test sets. -This is done to ensure that images don't get moved between training and testing -sets on different runs, since that could be a problem if images that had been -used for training a model were subsequently used in a validation set. - -You might notice that the validation accuracy fluctuates among iterations. Much -of this fluctuation arises from the fact that a random subset of the validation -set is chosen for each validation accuracy measurement. The fluctuations can be -greatly reduced, at the cost of some increase in training time, by choosing -`--validation_batch_size=-1`, which uses the entire validation set for each -accuracy computation. - -Once training is complete, you may find it insightful to examine misclassified -images in the test set. This can be done by adding the flag -`--print_misclassified_test_images`. This may help you get a feeling for which -types of images were most confusing for the model, and which categories were -most difficult to distinguish. For instance, you might discover that some -subtype of a particular category, or some unusual photo angle, is particularly -difficult to identify, which may encourage you to add more training images of -that subtype. Oftentimes, examining misclassified images can also point to -errors in the input data set, such as mislabeled, low-quality, or ambiguous -images. However, one should generally avoid point-fixing individual errors in -the test set, since they are likely to merely reflect more general problems in -the (much larger) training set. - -## Other Model Architectures - -By default the script uses a pretrained version of the Inception v3 model -architecture. This is a good place to start because it provides high accuracy -results, but if you intend to deploy your model on mobile devices or other -resource-constrained environments you may want to trade off a little accuracy -for much smaller file sizes or faster speeds. To help with that, the -[retrain.py script](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/image_retraining/retrain.py) -supports different variations on the [Mobilenet architecture](https://research.googleblog.com/2017/06/mobilenets-open-source-models-for.html). - -These are a little less precise than Inception v3, but can result in far -smaller file sizes (a few megabytes) and can be many times faster -to run. To train with one of these models, pass in the `--architecture` flag, -for example: - -``` -python tensorflow/examples/image_retraining/retrain.py \ - --image_dir ~/flower_photos --architecture mobilenet_0.25_128 -``` - -This will create a 1.9MB model file in `/tmp/output_graph.pb`, with only 25% of -the number of neurons of the full Mobilenet, and trained to take 128x128 sized -input images. - -You can choose '1.0', '0.75', '0.50', or '0.25' to control the number of -neurons (activations of hidden layers); the number of weights (and hence to -some extent the file size and speed) shrinks like the square of that fraction. -You can choose '224', '192', '160', or '128' for the input image size, -with smaller sizes giving faster speeds. - -The speed and size advantages come at a loss to accuracy of course, but for many -purposes this isn't critical. They can also be somewhat offset with improved -training data. For example, training with distortions allows me to get above 80% -accuracy on the flower data set even with the 0.25/128 graph above. - -If you're going to be using the Mobilenet models in label_image or your own -programs, you'll need to feed in an image of the specified size converted to a -float range into the 'input' tensor. Typically 24-bit images are in the range -[0,255], and you must convert them to the [-1,1] float range expected by the -model with the formula `(image - 128.)/128.`. - -The default arguments for the `label_image` script are set for Inception V3. -To use it with a MobileNet, specify the above normalization parameters as -`input_mean` and `input_std` on the command line. You also must specify the -image size that your model expects, as follows: - -```sh -python tensorflow/examples/label_image/label_image.py \ ---graph=/tmp/output_graph.pb --labels=/tmp/output_labels.txt \ ---input_layer=input \ ---output_layer=final_result \ ---input_height=224 --input_width=224 \ ---input_mean=128 --input_std=128 \ ---image=$HOME/flower_photos/daisy/21652746_cc379e0eea_m.jpg -``` - -For more information on deploying the retrained model to a mobile device, see -the [codelab version](https://codelabs.developers.google.com/codelabs/tensorflow-for-poets/#0) -of this tutorial, especially [part 2](https://codelabs.developers.google.com/codelabs/tensorflow-for-poets-2-tflite/#0), which describes -[TensorFlow Lite](/mobile/tflite/) and the additional optimizations it offers -(including quantization of model weights). +**NOTE: This tutorial has moved to** +https://github.com/tensorflow/hub/tree/master/docs/tutorials/image_retraining.md diff --git a/tensorflow/docs_src/tutorials/layers.md b/tensorflow/docs_src/tutorials/layers.md index cadaec391d8970faf5847c9b9e39bccb31f885ed..37cd2bb1397deaddec9f7194d8c5093145e08644 100644 --- a/tensorflow/docs_src/tutorials/layers.md +++ b/tensorflow/docs_src/tutorials/layers.md @@ -192,8 +192,7 @@ dive deeper into the `tf.layers` code used to create each layer, as well as how to calculate loss, configure the training op, and generate predictions. If you're already experienced with CNNs and @{$get_started/custom_estimators$TensorFlow `Estimator`s}, and find the above code intuitive, you may want to skim these sections or just -skip ahead to ["Training and Evaluating the CNN MNIST -Classifier"](#training_and_evaluating_the_cnn_mnist_classifier). +skip ahead to ["Training and Evaluating the CNN MNIST Classifier"](#train_eval_mnist). ### Input Layer @@ -536,8 +535,9 @@ if mode == tf.estimator.ModeKeys.TRAIN: ``` > Note: For a more in-depth look at configuring training ops for Estimator model -> functions, see @{$get_started/custom_estimators#defining_the_training_op_for_the_model$"Defining the training op for the model"} -> in the @{$get_started/custom_estimators$"Creating Estimators in tf.estimator."} tutorial. +> functions, see @{$get_started/custom_estimators#defining-the-training-op-for-the-model$"Defining the training op for the model"} +> in the @{$get_started/custom_estimators$"Creating Estimations in tf.estimator"} tutorial. + ### Add evaluation metrics @@ -552,7 +552,8 @@ return tf.estimator.EstimatorSpec( mode=mode, loss=loss, eval_metric_ops=eval_metric_ops) ``` -## Training and Evaluating the CNN MNIST Classifier {#training_and_evaluating_the_cnn_mnist_classifier} + +## Training and Evaluating the CNN MNIST Classifier We've coded our MNIST CNN model function; now we're ready to train and evaluate it. @@ -612,9 +613,9 @@ following to `main()`: ```python # Set up logging for predictions - tensors_to_log = {"probabilities": "softmax_tensor"} - logging_hook = tf.train.LoggingTensorHook( - tensors=tensors_to_log, every_n_iter=50) +tensors_to_log = {"probabilities": "softmax_tensor"} +logging_hook = tf.train.LoggingTensorHook( + tensors=tensors_to_log, every_n_iter=50) ``` We store a dict of the tensors we want to log in `tensors_to_log`. Each key is a diff --git a/tensorflow/examples/android/BUILD b/tensorflow/examples/android/BUILD index a088d7cf2f05c81cf2e60cb5aa8de79957a30de2..aa594a63c6ad5ab7129e452e7a6345114b994231 100644 --- a/tensorflow/examples/android/BUILD +++ b/tensorflow/examples/android/BUILD @@ -76,7 +76,6 @@ android_binary( custom_package = "org.tensorflow.demo", inline_constants = 1, manifest = "AndroidManifest.xml", - manifest_merger = "legacy", resource_files = glob(["res/**"]), tags = [ "manual", diff --git a/tensorflow/examples/image_retraining/BUILD b/tensorflow/examples/image_retraining/BUILD deleted file mode 100644 index ecd79a3b004d0ca9f50d2a6f140dbc353efe30cb..0000000000000000000000000000000000000000 --- a/tensorflow/examples/image_retraining/BUILD +++ /dev/null @@ -1,51 +0,0 @@ -# Description: -# Transfer learning example for TensorFlow. - -licenses(["notice"]) # Apache 2.0 - -exports_files(["LICENSE"]) - -load("//tensorflow:tensorflow.bzl", "py_test") - -py_binary( - name = "retrain", - srcs = [ - "retrain.py", - ], - srcs_version = "PY2AND3", - visibility = ["//tensorflow:__subpackages__"], - deps = [ - "//tensorflow:tensorflow_py", - "//tensorflow/python:framework", - "//tensorflow/python:framework_for_generated_wrappers", - "//tensorflow/python:graph_util", - "//tensorflow/python:platform", - "//tensorflow/python:util", - "//third_party/py/numpy", - ], -) - -py_test( - name = "retrain_test", - size = "small", - srcs = [ - "retrain.py", - "retrain_test.py", - ], - data = [ - ":data/labels.txt", - "//tensorflow/examples/label_image:data/grace_hopper.jpg", - ], - srcs_version = "PY2AND3", - deps = [ - ":retrain", - "//tensorflow:tensorflow_py", - "//tensorflow/python:framework_test_lib", - "//tensorflow/python:graph_util", - "//tensorflow/python:platform", - "//tensorflow/python:platform_test", - "//tensorflow/python:tensor_shape", - "//tensorflow/python:util", - "//third_party/py/numpy", - ], -) diff --git a/tensorflow/examples/image_retraining/README.md b/tensorflow/examples/image_retraining/README.md index 8a49525c6eff003f2c7acb592f213285e627eb51..3f0b3d12682b81e7c13ed6c2d32149746d506cd3 100644 --- a/tensorflow/examples/image_retraining/README.md +++ b/tensorflow/examples/image_retraining/README.md @@ -1,12 +1,15 @@ -retrain.py is an example script that shows how one can adapt a pretrained -network for other classification problems. A detailed overview of this script -can be found at: -https://codelabs.developers.google.com/codelabs/tensorflow-for-poets/#0 +**NOTE: This code has moved to** +https://github.com/tensorflow/hub/tree/master/examples/image_retraining -The script also shows how one can train layers -with quantized weights and activations instead of taking a pre-trained floating -point model and then quantizing weights and activations. -The output graphdef produced by this script is compatible with the TensorFlow -Lite Optimizing Converter and can be converted to TFLite format. +retrain.py is an example script that shows how one can adapt a pretrained +network for other classification problems (including use with TFLite and +quantization). +As of TensorFlow 1.7, it is recommended to use a pretrained network from +TensorFlow Hub, using the new version of this example found in the location +above, as explained in TensorFlow's revised [image retraining +tutorial](https://www.tensorflow.org/tutorials/image_retraining). +Older versions of this example (using frozen GraphDefs instead of +TensorFlow Hub modules) are available in the release branches of +TensorFlow versions up to and including 1.7. diff --git a/tensorflow/examples/image_retraining/data/labels.txt b/tensorflow/examples/image_retraining/data/labels.txt deleted file mode 100644 index bc1131ac4591ca1bdb840695b55f79a6feb95db3..0000000000000000000000000000000000000000 --- a/tensorflow/examples/image_retraining/data/labels.txt +++ /dev/null @@ -1,3 +0,0 @@ -Runner-up -Winner -Loser diff --git a/tensorflow/examples/image_retraining/retrain.py b/tensorflow/examples/image_retraining/retrain.py deleted file mode 100644 index 99a71206acbd533ec8bc5a9644435eacad564cd4..0000000000000000000000000000000000000000 --- a/tensorflow/examples/image_retraining/retrain.py +++ /dev/null @@ -1,1486 +0,0 @@ -# Copyright 2015 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -r"""Simple transfer learning with Inception v3 or Mobilenet models. - -With support for TensorBoard. - -This example shows how to take a Inception v3 or Mobilenet model trained on -ImageNet images, and train a new top layer that can recognize other classes of -images. - -The top layer receives as input a 2048-dimensional vector (1001-dimensional for -Mobilenet) for each image. We train a softmax layer on top of this -representation. Assuming the softmax layer contains N labels, this corresponds -to learning N + 2048*N (or 1001*N) model parameters corresponding to the -learned biases and weights. - -Here's an example, which assumes you have a folder containing class-named -subfolders, each full of images for each label. The example folder flower_photos -should have a structure like this: - -~/flower_photos/daisy/photo1.jpg -~/flower_photos/daisy/photo2.jpg -... -~/flower_photos/rose/anotherphoto77.jpg -... -~/flower_photos/sunflower/somepicture.jpg - -The subfolder names are important, since they define what label is applied to -each image, but the filenames themselves don't matter. Once your images are -prepared, you can run the training with a command like this: - -```bash -bazel build tensorflow/examples/image_retraining:retrain && \ -bazel-bin/tensorflow/examples/image_retraining/retrain \ - --image_dir ~/flower_photos -``` - -Or, if you have a pip installation of tensorflow, `retrain.py` can be run -without bazel: - -```bash -python tensorflow/examples/image_retraining/retrain.py \ - --image_dir ~/flower_photos -``` - -You can replace the image_dir argument with any folder containing subfolders of -images. The label for each image is taken from the name of the subfolder it's -in. - -This produces a new model file that can be loaded and run by any TensorFlow -program, for example the label_image sample code. - -By default this script will use the high accuracy, but comparatively large and -slow Inception v3 model architecture. It's recommended that you start with this -to validate that you have gathered good training data, but if you want to deploy -on resource-limited platforms, you can try the `--architecture` flag with a -Mobilenet model. For example: - -Run floating-point version of mobilenet: - -```bash -python tensorflow/examples/image_retraining/retrain.py \ - --image_dir ~/flower_photos --architecture mobilenet_1.0_224 -``` - -Run mobilenet, instrumented for quantization: - -```bash -python tensorflow/examples/image_retraining/retrain.py \ - --image_dir ~/flower_photos/ --architecture mobilenet_1.0_224_quant -``` - -These instrumented models can be converted to fully quantized mobile models via -TensorFlow Lite. - -There are 32 different Mobilenet models to choose from, with a variety of file -size and latency options. The first number can be '1.0', '0.75', '0.50', or -'0.25' to control the size, and the second controls the input image size, either -'224', '192', '160', or '128', with smaller sizes running faster. See -https://research.googleblog.com/2017/06/mobilenets-open-source-models-for.html -for more information on Mobilenet. - -To use with TensorBoard: - -By default, this script will log summaries to /tmp/retrain_logs directory - -Visualize the summaries with this command: - -tensorboard --logdir /tmp/retrain_logs - -To use with Tensorflow Serving: - -```bash -tensorflow_model_server --port=9000 --model_name=inception \ - --model_base_path=/tmp/saved_models/ -``` -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import argparse -from datetime import datetime -import hashlib -import os.path -import random -import re -import sys -import tarfile - -import numpy as np -from six.moves import urllib -import tensorflow as tf - -from tensorflow.python.framework import graph_util -from tensorflow.python.framework import tensor_shape -from tensorflow.python.platform import gfile -from tensorflow.python.util import compat - -FLAGS = None - -# These are all parameters that are tied to the particular model architecture -# we're using for Inception v3. These include things like tensor names and their -# sizes. If you want to adapt this script to work with another model, you will -# need to update these to reflect the values in the network you're using. -MAX_NUM_IMAGES_PER_CLASS = 2 ** 27 - 1 # ~134M - -# The location where variable checkpoints will be stored. -CHECKPOINT_NAME = '/tmp/_retrain_checkpoint' - - -def create_image_lists(image_dir, testing_percentage, validation_percentage): - """Builds a list of training images from the file system. - - Analyzes the sub folders in the image directory, splits them into stable - training, testing, and validation sets, and returns a data structure - describing the lists of images for each label and their paths. - - Args: - image_dir: String path to a folder containing subfolders of images. - testing_percentage: Integer percentage of the images to reserve for tests. - validation_percentage: Integer percentage of images reserved for validation. - - Returns: - A dictionary containing an entry for each label subfolder, with images split - into training, testing, and validation sets within each label. - """ - if not gfile.Exists(image_dir): - tf.logging.error("Image directory '" + image_dir + "' not found.") - return None - result = {} - sub_dirs = [x[0] for x in gfile.Walk(image_dir)] - # The root directory comes first, so skip it. - is_root_dir = True - for sub_dir in sub_dirs: - if is_root_dir: - is_root_dir = False - continue - extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] - file_list = [] - dir_name = os.path.basename(sub_dir) - if dir_name == image_dir: - continue - tf.logging.info("Looking for images in '" + dir_name + "'") - for extension in extensions: - file_glob = os.path.join(image_dir, dir_name, '*.' + extension) - file_list.extend(gfile.Glob(file_glob)) - if not file_list: - tf.logging.warning('No files found') - continue - if len(file_list) < 20: - tf.logging.warning( - 'WARNING: Folder has less than 20 images, which may cause issues.') - elif len(file_list) > MAX_NUM_IMAGES_PER_CLASS: - tf.logging.warning( - 'WARNING: Folder {} has more than {} images. Some images will ' - 'never be selected.'.format(dir_name, MAX_NUM_IMAGES_PER_CLASS)) - label_name = re.sub(r'[^a-z0-9]+', ' ', dir_name.lower()) - training_images = [] - testing_images = [] - validation_images = [] - for file_name in file_list: - base_name = os.path.basename(file_name) - # We want to ignore anything after '_nohash_' in the file name when - # deciding which set to put an image in, the data set creator has a way of - # grouping photos that are close variations of each other. For example - # this is used in the plant disease data set to group multiple pictures of - # the same leaf. - hash_name = re.sub(r'_nohash_.*$', '', file_name) - # This looks a bit magical, but we need to decide whether this file should - # go into the training, testing, or validation sets, and we want to keep - # existing files in the same set even if more files are subsequently - # added. - # To do that, we need a stable way of deciding based on just the file name - # itself, so we do a hash of that and then use that to generate a - # probability value that we use to assign it. - hash_name_hashed = hashlib.sha1(compat.as_bytes(hash_name)).hexdigest() - percentage_hash = ((int(hash_name_hashed, 16) % - (MAX_NUM_IMAGES_PER_CLASS + 1)) * - (100.0 / MAX_NUM_IMAGES_PER_CLASS)) - if percentage_hash < validation_percentage: - validation_images.append(base_name) - elif percentage_hash < (testing_percentage + validation_percentage): - testing_images.append(base_name) - else: - training_images.append(base_name) - result[label_name] = { - 'dir': dir_name, - 'training': training_images, - 'testing': testing_images, - 'validation': validation_images, - } - return result - - -def get_image_path(image_lists, label_name, index, image_dir, category): - """"Returns a path to an image for a label at the given index. - - Args: - image_lists: Dictionary of training images for each label. - label_name: Label string we want to get an image for. - index: Int offset of the image we want. This will be moduloed by the - available number of images for the label, so it can be arbitrarily large. - image_dir: Root folder string of the subfolders containing the training - images. - category: Name string of set to pull images from - training, testing, or - validation. - - Returns: - File system path string to an image that meets the requested parameters. - - """ - if label_name not in image_lists: - tf.logging.fatal('Label does not exist %s.', label_name) - label_lists = image_lists[label_name] - if category not in label_lists: - tf.logging.fatal('Category does not exist %s.', category) - category_list = label_lists[category] - if not category_list: - tf.logging.fatal('Label %s has no images in the category %s.', - label_name, category) - mod_index = index % len(category_list) - base_name = category_list[mod_index] - sub_dir = label_lists['dir'] - full_path = os.path.join(image_dir, sub_dir, base_name) - return full_path - - -def get_bottleneck_path(image_lists, label_name, index, bottleneck_dir, - category, architecture): - """"Returns a path to a bottleneck file for a label at the given index. - - Args: - image_lists: Dictionary of training images for each label. - label_name: Label string we want to get an image for. - index: Integer offset of the image we want. This will be moduloed by the - available number of images for the label, so it can be arbitrarily large. - bottleneck_dir: Folder string holding cached files of bottleneck values. - category: Name string of set to pull images from - training, testing, or - validation. - architecture: The name of the model architecture. - - Returns: - File system path string to an image that meets the requested parameters. - """ - return get_image_path(image_lists, label_name, index, bottleneck_dir, - category) + '_' + architecture + '.txt' - - -def create_model_graph(model_info): - """"Creates a graph from saved GraphDef file and returns a Graph object. - - Args: - model_info: Dictionary containing information about the model architecture. - - Returns: - Graph holding the trained Inception network, and various tensors we'll be - manipulating. - """ - with tf.Graph().as_default() as graph: - model_path = os.path.join(FLAGS.model_dir, model_info['model_file_name']) - print('Model path: ', model_path) - with gfile.FastGFile(model_path, 'rb') as f: - graph_def = tf.GraphDef() - graph_def.ParseFromString(f.read()) - bottleneck_tensor, resized_input_tensor = (tf.import_graph_def( - graph_def, - name='', - return_elements=[ - model_info['bottleneck_tensor_name'], - model_info['resized_input_tensor_name'], - ])) - return graph, bottleneck_tensor, resized_input_tensor - - -def run_bottleneck_on_image(sess, image_data, image_data_tensor, - decoded_image_tensor, resized_input_tensor, - bottleneck_tensor): - """Runs inference on an image to extract the 'bottleneck' summary layer. - - Args: - sess: Current active TensorFlow Session. - image_data: String of raw JPEG data. - image_data_tensor: Input data layer in the graph. - decoded_image_tensor: Output of initial image resizing and preprocessing. - resized_input_tensor: The input node of the recognition graph. - bottleneck_tensor: Layer before the final softmax. - - Returns: - Numpy array of bottleneck values. - """ - # First decode the JPEG image, resize it, and rescale the pixel values. - resized_input_values = sess.run(decoded_image_tensor, - {image_data_tensor: image_data}) - # Then run it through the recognition network. - bottleneck_values = sess.run(bottleneck_tensor, - {resized_input_tensor: resized_input_values}) - bottleneck_values = np.squeeze(bottleneck_values) - return bottleneck_values - - -def maybe_download_and_extract(data_url): - """Download and extract model tar file. - - If the pretrained model we're using doesn't already exist, this function - downloads it from the TensorFlow.org website and unpacks it into a directory. - - Args: - data_url: Web location of the tar file containing the pretrained model. - """ - dest_directory = FLAGS.model_dir - if not os.path.exists(dest_directory): - os.makedirs(dest_directory) - filename = data_url.split('/')[-1] - filepath = os.path.join(dest_directory, filename) - if not os.path.exists(filepath): - - def _progress(count, block_size, total_size): - sys.stdout.write('\r>> Downloading %s %.1f%%' % - (filename, - float(count * block_size) / float(total_size) * 100.0)) - sys.stdout.flush() - - filepath, _ = urllib.request.urlretrieve(data_url, filepath, _progress) - print() - statinfo = os.stat(filepath) - tf.logging.info('Successfully downloaded %s %d bytes.', filename, - statinfo.st_size) - print('Extracting file from ', filepath) - tarfile.open(filepath, 'r:gz').extractall(dest_directory) - else: - print('Not extracting or downloading files, model already present in disk') - - -def ensure_dir_exists(dir_name): - """Makes sure the folder exists on disk. - - Args: - dir_name: Path string to the folder we want to create. - """ - if not os.path.exists(dir_name): - os.makedirs(dir_name) - - -bottleneck_path_2_bottleneck_values = {} - - -def create_bottleneck_file(bottleneck_path, image_lists, label_name, index, - image_dir, category, sess, jpeg_data_tensor, - decoded_image_tensor, resized_input_tensor, - bottleneck_tensor): - """Create a single bottleneck file.""" - tf.logging.info('Creating bottleneck at ' + bottleneck_path) - image_path = get_image_path(image_lists, label_name, index, - image_dir, category) - if not gfile.Exists(image_path): - tf.logging.fatal('File does not exist %s', image_path) - image_data = gfile.FastGFile(image_path, 'rb').read() - try: - bottleneck_values = run_bottleneck_on_image( - sess, image_data, jpeg_data_tensor, decoded_image_tensor, - resized_input_tensor, bottleneck_tensor) - except Exception as e: - raise RuntimeError('Error during processing file %s (%s)' % (image_path, - str(e))) - bottleneck_string = ','.join(str(x) for x in bottleneck_values) - with open(bottleneck_path, 'w') as bottleneck_file: - bottleneck_file.write(bottleneck_string) - - -def get_or_create_bottleneck(sess, image_lists, label_name, index, image_dir, - category, bottleneck_dir, jpeg_data_tensor, - decoded_image_tensor, resized_input_tensor, - bottleneck_tensor, architecture): - """Retrieves or calculates bottleneck values for an image. - - If a cached version of the bottleneck data exists on-disk, return that, - otherwise calculate the data and save it to disk for future use. - - Args: - sess: The current active TensorFlow Session. - image_lists: Dictionary of training images for each label. - label_name: Label string we want to get an image for. - index: Integer offset of the image we want. This will be modulo-ed by the - available number of images for the label, so it can be arbitrarily large. - image_dir: Root folder string of the subfolders containing the training - images. - category: Name string of which set to pull images from - training, testing, - or validation. - bottleneck_dir: Folder string holding cached files of bottleneck values. - jpeg_data_tensor: The tensor to feed loaded jpeg data into. - decoded_image_tensor: The output of decoding and resizing the image. - resized_input_tensor: The input node of the recognition graph. - bottleneck_tensor: The output tensor for the bottleneck values. - architecture: The name of the model architecture. - - Returns: - Numpy array of values produced by the bottleneck layer for the image. - """ - label_lists = image_lists[label_name] - sub_dir = label_lists['dir'] - sub_dir_path = os.path.join(bottleneck_dir, sub_dir) - ensure_dir_exists(sub_dir_path) - bottleneck_path = get_bottleneck_path(image_lists, label_name, index, - bottleneck_dir, category, architecture) - if not os.path.exists(bottleneck_path): - create_bottleneck_file(bottleneck_path, image_lists, label_name, index, - image_dir, category, sess, jpeg_data_tensor, - decoded_image_tensor, resized_input_tensor, - bottleneck_tensor) - with open(bottleneck_path, 'r') as bottleneck_file: - bottleneck_string = bottleneck_file.read() - did_hit_error = False - try: - bottleneck_values = [float(x) for x in bottleneck_string.split(',')] - except ValueError: - tf.logging.warning('Invalid float found, recreating bottleneck') - did_hit_error = True - if did_hit_error: - create_bottleneck_file(bottleneck_path, image_lists, label_name, index, - image_dir, category, sess, jpeg_data_tensor, - decoded_image_tensor, resized_input_tensor, - bottleneck_tensor) - with open(bottleneck_path, 'r') as bottleneck_file: - bottleneck_string = bottleneck_file.read() - # Allow exceptions to propagate here, since they shouldn't happen after a - # fresh creation - bottleneck_values = [float(x) for x in bottleneck_string.split(',')] - return bottleneck_values - - -def cache_bottlenecks(sess, image_lists, image_dir, bottleneck_dir, - jpeg_data_tensor, decoded_image_tensor, - resized_input_tensor, bottleneck_tensor, architecture): - """Ensures all the training, testing, and validation bottlenecks are cached. - - Because we're likely to read the same image multiple times (if there are no - distortions applied during training) it can speed things up a lot if we - calculate the bottleneck layer values once for each image during - preprocessing, and then just read those cached values repeatedly during - training. Here we go through all the images we've found, calculate those - values, and save them off. - - Args: - sess: The current active TensorFlow Session. - image_lists: Dictionary of training images for each label. - image_dir: Root folder string of the subfolders containing the training - images. - bottleneck_dir: Folder string holding cached files of bottleneck values. - jpeg_data_tensor: Input tensor for jpeg data from file. - decoded_image_tensor: The output of decoding and resizing the image. - resized_input_tensor: The input node of the recognition graph. - bottleneck_tensor: The penultimate output layer of the graph. - architecture: The name of the model architecture. - - Returns: - Nothing. - """ - how_many_bottlenecks = 0 - ensure_dir_exists(bottleneck_dir) - for label_name, label_lists in image_lists.items(): - for category in ['training', 'testing', 'validation']: - category_list = label_lists[category] - for index, unused_base_name in enumerate(category_list): - get_or_create_bottleneck( - sess, image_lists, label_name, index, image_dir, category, - bottleneck_dir, jpeg_data_tensor, decoded_image_tensor, - resized_input_tensor, bottleneck_tensor, architecture) - - how_many_bottlenecks += 1 - if how_many_bottlenecks % 100 == 0: - tf.logging.info( - str(how_many_bottlenecks) + ' bottleneck files created.') - - -def get_random_cached_bottlenecks(sess, image_lists, how_many, category, - bottleneck_dir, image_dir, jpeg_data_tensor, - decoded_image_tensor, resized_input_tensor, - bottleneck_tensor, architecture): - """Retrieves bottleneck values for cached images. - - If no distortions are being applied, this function can retrieve the cached - bottleneck values directly from disk for images. It picks a random set of - images from the specified category. - - Args: - sess: Current TensorFlow Session. - image_lists: Dictionary of training images for each label. - how_many: If positive, a random sample of this size will be chosen. - If negative, all bottlenecks will be retrieved. - category: Name string of which set to pull from - training, testing, or - validation. - bottleneck_dir: Folder string holding cached files of bottleneck values. - image_dir: Root folder string of the subfolders containing the training - images. - jpeg_data_tensor: The layer to feed jpeg image data into. - decoded_image_tensor: The output of decoding and resizing the image. - resized_input_tensor: The input node of the recognition graph. - bottleneck_tensor: The bottleneck output layer of the CNN graph. - architecture: The name of the model architecture. - - Returns: - List of bottleneck arrays, their corresponding ground truths, and the - relevant filenames. - """ - class_count = len(image_lists.keys()) - bottlenecks = [] - ground_truths = [] - filenames = [] - if how_many >= 0: - # Retrieve a random sample of bottlenecks. - for unused_i in range(how_many): - label_index = random.randrange(class_count) - label_name = list(image_lists.keys())[label_index] - image_index = random.randrange(MAX_NUM_IMAGES_PER_CLASS + 1) - image_name = get_image_path(image_lists, label_name, image_index, - image_dir, category) - bottleneck = get_or_create_bottleneck( - sess, image_lists, label_name, image_index, image_dir, category, - bottleneck_dir, jpeg_data_tensor, decoded_image_tensor, - resized_input_tensor, bottleneck_tensor, architecture) - bottlenecks.append(bottleneck) - ground_truths.append(label_index) - filenames.append(image_name) - else: - # Retrieve all bottlenecks. - for label_index, label_name in enumerate(image_lists.keys()): - for image_index, image_name in enumerate( - image_lists[label_name][category]): - image_name = get_image_path(image_lists, label_name, image_index, - image_dir, category) - bottleneck = get_or_create_bottleneck( - sess, image_lists, label_name, image_index, image_dir, category, - bottleneck_dir, jpeg_data_tensor, decoded_image_tensor, - resized_input_tensor, bottleneck_tensor, architecture) - bottlenecks.append(bottleneck) - ground_truths.append(label_index) - filenames.append(image_name) - return bottlenecks, ground_truths, filenames - - -def get_random_distorted_bottlenecks( - sess, image_lists, how_many, category, image_dir, input_jpeg_tensor, - distorted_image, resized_input_tensor, bottleneck_tensor): - """Retrieves bottleneck values for training images, after distortions. - - If we're training with distortions like crops, scales, or flips, we have to - recalculate the full model for every image, and so we can't use cached - bottleneck values. Instead we find random images for the requested category, - run them through the distortion graph, and then the full graph to get the - bottleneck results for each. - - Args: - sess: Current TensorFlow Session. - image_lists: Dictionary of training images for each label. - how_many: The integer number of bottleneck values to return. - category: Name string of which set of images to fetch - training, testing, - or validation. - image_dir: Root folder string of the subfolders containing the training - images. - input_jpeg_tensor: The input layer we feed the image data to. - distorted_image: The output node of the distortion graph. - resized_input_tensor: The input node of the recognition graph. - bottleneck_tensor: The bottleneck output layer of the CNN graph. - - Returns: - List of bottleneck arrays and their corresponding ground truths. - """ - class_count = len(image_lists.keys()) - bottlenecks = [] - ground_truths = [] - for unused_i in range(how_many): - label_index = random.randrange(class_count) - label_name = list(image_lists.keys())[label_index] - image_index = random.randrange(MAX_NUM_IMAGES_PER_CLASS + 1) - image_path = get_image_path(image_lists, label_name, image_index, image_dir, - category) - if not gfile.Exists(image_path): - tf.logging.fatal('File does not exist %s', image_path) - jpeg_data = gfile.FastGFile(image_path, 'rb').read() - # Note that we materialize the distorted_image_data as a numpy array before - # sending running inference on the image. This involves 2 memory copies and - # might be optimized in other implementations. - distorted_image_data = sess.run(distorted_image, - {input_jpeg_tensor: jpeg_data}) - bottleneck_values = sess.run(bottleneck_tensor, - {resized_input_tensor: distorted_image_data}) - bottleneck_values = np.squeeze(bottleneck_values) - bottlenecks.append(bottleneck_values) - ground_truths.append(label_index) - return bottlenecks, ground_truths - - -def should_distort_images(flip_left_right, random_crop, random_scale, - random_brightness): - """Whether any distortions are enabled, from the input flags. - - Args: - flip_left_right: Boolean whether to randomly mirror images horizontally. - random_crop: Integer percentage setting the total margin used around the - crop box. - random_scale: Integer percentage of how much to vary the scale by. - random_brightness: Integer range to randomly multiply the pixel values by. - - Returns: - Boolean value indicating whether any distortions should be applied. - """ - return (flip_left_right or (random_crop != 0) or (random_scale != 0) or - (random_brightness != 0)) - - -def add_input_distortions(flip_left_right, random_crop, random_scale, - random_brightness, input_width, input_height, - input_depth, input_mean, input_std): - """Creates the operations to apply the specified distortions. - - During training it can help to improve the results if we run the images - through simple distortions like crops, scales, and flips. These reflect the - kind of variations we expect in the real world, and so can help train the - model to cope with natural data more effectively. Here we take the supplied - parameters and construct a network of operations to apply them to an image. - - Cropping - ~~~~~~~~ - - Cropping is done by placing a bounding box at a random position in the full - image. The cropping parameter controls the size of that box relative to the - input image. If it's zero, then the box is the same size as the input and no - cropping is performed. If the value is 50%, then the crop box will be half the - width and height of the input. In a diagram it looks like this: - - < width > - +---------------------+ - | | - | width - crop% | - | < > | - | +------+ | - | | | | - | | | | - | | | | - | +------+ | - | | - | | - +---------------------+ - - Scaling - ~~~~~~~ - - Scaling is a lot like cropping, except that the bounding box is always - centered and its size varies randomly within the given range. For example if - the scale percentage is zero, then the bounding box is the same size as the - input and no scaling is applied. If it's 50%, then the bounding box will be in - a random range between half the width and height and full size. - - Args: - flip_left_right: Boolean whether to randomly mirror images horizontally. - random_crop: Integer percentage setting the total margin used around the - crop box. - random_scale: Integer percentage of how much to vary the scale by. - random_brightness: Integer range to randomly multiply the pixel values by. - graph. - input_width: Horizontal size of expected input image to model. - input_height: Vertical size of expected input image to model. - input_depth: How many channels the expected input image should have. - input_mean: Pixel value that should be zero in the image for the graph. - input_std: How much to divide the pixel values by before recognition. - - Returns: - The jpeg input layer and the distorted result tensor. - """ - - jpeg_data = tf.placeholder(tf.string, name='DistortJPGInput') - decoded_image = tf.image.decode_jpeg(jpeg_data, channels=input_depth) - decoded_image_as_float = tf.cast(decoded_image, dtype=tf.float32) - decoded_image_4d = tf.expand_dims(decoded_image_as_float, 0) - margin_scale = 1.0 + (random_crop / 100.0) - resize_scale = 1.0 + (random_scale / 100.0) - margin_scale_value = tf.constant(margin_scale) - resize_scale_value = tf.random_uniform(tensor_shape.scalar(), - minval=1.0, - maxval=resize_scale) - scale_value = tf.multiply(margin_scale_value, resize_scale_value) - precrop_width = tf.multiply(scale_value, input_width) - precrop_height = tf.multiply(scale_value, input_height) - precrop_shape = tf.stack([precrop_height, precrop_width]) - precrop_shape_as_int = tf.cast(precrop_shape, dtype=tf.int32) - precropped_image = tf.image.resize_bilinear(decoded_image_4d, - precrop_shape_as_int) - precropped_image_3d = tf.squeeze(precropped_image, squeeze_dims=[0]) - cropped_image = tf.random_crop(precropped_image_3d, - [input_height, input_width, input_depth]) - if flip_left_right: - flipped_image = tf.image.random_flip_left_right(cropped_image) - else: - flipped_image = cropped_image - brightness_min = 1.0 - (random_brightness / 100.0) - brightness_max = 1.0 + (random_brightness / 100.0) - brightness_value = tf.random_uniform(tensor_shape.scalar(), - minval=brightness_min, - maxval=brightness_max) - brightened_image = tf.multiply(flipped_image, brightness_value) - offset_image = tf.subtract(brightened_image, input_mean) - mul_image = tf.multiply(offset_image, 1.0 / input_std) - distort_result = tf.expand_dims(mul_image, 0, name='DistortResult') - return jpeg_data, distort_result - - -def variable_summaries(var): - """Attach a lot of summaries to a Tensor (for TensorBoard visualization).""" - with tf.name_scope('summaries'): - mean = tf.reduce_mean(var) - tf.summary.scalar('mean', mean) - with tf.name_scope('stddev'): - stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean))) - tf.summary.scalar('stddev', stddev) - tf.summary.scalar('max', tf.reduce_max(var)) - tf.summary.scalar('min', tf.reduce_min(var)) - tf.summary.histogram('histogram', var) - - -def add_final_retrain_ops(class_count, final_tensor_name, bottleneck_tensor, - bottleneck_tensor_size, quantize_layer, is_training): - """Adds a new softmax and fully-connected layer for training and eval. - - We need to retrain the top layer to identify our new classes, so this function - adds the right operations to the graph, along with some variables to hold the - weights, and then sets up all the gradients for the backward pass. - - The set up for the softmax and fully-connected layers is based on: - https://www.tensorflow.org/versions/master/tutorials/mnist/beginners/index.html - - Args: - class_count: Integer of how many categories of things we're trying to - recognize. - final_tensor_name: Name string for the new final node that produces results. - bottleneck_tensor: The output of the main CNN graph. - bottleneck_tensor_size: How many entries in the bottleneck vector. - quantize_layer: Boolean, specifying whether the newly added layer should be - instrumented for quantized. - is_training: Boolean, specifying whether the newly add layer is for training - or eval. - - Returns: - The tensors for the training and cross entropy results, and tensors for the - bottleneck input and ground truth input. - """ - with tf.name_scope('input'): - bottleneck_input = tf.placeholder_with_default( - bottleneck_tensor, - shape=[None, bottleneck_tensor_size], - name='BottleneckInputPlaceholder') - - ground_truth_input = tf.placeholder( - tf.int64, [None], name='GroundTruthInput') - - # Organizing the following ops so they are easier to see in TensorBoard. - layer_name = 'final_retrain_ops' - with tf.name_scope(layer_name): - with tf.name_scope('weights'): - initial_value = tf.truncated_normal( - [bottleneck_tensor_size, class_count], stddev=0.001) - layer_weights = tf.Variable(initial_value, name='final_weights') - variable_summaries(layer_weights) - - with tf.name_scope('biases'): - layer_biases = tf.Variable(tf.zeros([class_count]), name='final_biases') - variable_summaries(layer_biases) - - with tf.name_scope('Wx_plus_b'): - logits = tf.matmul(bottleneck_input, layer_weights) + layer_biases - tf.summary.histogram('pre_activations', logits) - - final_tensor = tf.nn.softmax(logits, name=final_tensor_name) - - # The tf.contrib.quantize functions rewrite the graph in place for - # quantization. The imported model graph has already been rewritten, so upon - # calling these rewrites, only the newly added final layer will be - # transformed. - if quantize_layer: - if is_training: - tf.contrib.quantize.create_training_graph() - else: - tf.contrib.quantize.create_eval_graph() - - tf.summary.histogram('activations', final_tensor) - - # If this is an eval graph, we don't need to add loss ops or an optimizer. - if not is_training: - return None, None, bottleneck_input, ground_truth_input, final_tensor - - with tf.name_scope('cross_entropy'): - cross_entropy_mean = tf.losses.sparse_softmax_cross_entropy( - labels=ground_truth_input, logits=logits) - - tf.summary.scalar('cross_entropy', cross_entropy_mean) - - with tf.name_scope('train'): - optimizer = tf.train.GradientDescentOptimizer(FLAGS.learning_rate) - train_step = optimizer.minimize(cross_entropy_mean) - - return (train_step, cross_entropy_mean, bottleneck_input, ground_truth_input, - final_tensor) - - -def add_evaluation_step(result_tensor, ground_truth_tensor): - """Inserts the operations we need to evaluate the accuracy of our results. - - Args: - result_tensor: The new final node that produces results. - ground_truth_tensor: The node we feed ground truth data - into. - - Returns: - Tuple of (evaluation step, prediction). - """ - with tf.name_scope('accuracy'): - with tf.name_scope('correct_prediction'): - prediction = tf.argmax(result_tensor, 1) - correct_prediction = tf.equal(prediction, ground_truth_tensor) - with tf.name_scope('accuracy'): - evaluation_step = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) - tf.summary.scalar('accuracy', evaluation_step) - return evaluation_step, prediction - - -def run_final_eval(sess, model_info, class_count, image_lists, jpeg_data_tensor, - decoded_image_tensor, resized_image_tensor, - bottleneck_tensor): - """Runs a final evaluation on an eval graph using the test data set. - - Args: - sess: Session for the train graph. - model_info: Model info dictionary from create_model_info() - class_count: Number of classes - image_lists: Dictionary of training images for each label. - jpeg_data_tensor: The layer to feed jpeg image data into. - decoded_image_tensor: The output of decoding and resizing the image. - resized_image_tensor: The input node of the recognition graph. - bottleneck_tensor: The bottleneck output layer of the CNN graph. - """ - (sess, bottleneck_input, ground_truth_input, evaluation_step, - prediction) = build_eval_session(model_info, class_count) - - test_bottlenecks, test_ground_truth, test_filenames = ( - get_random_cached_bottlenecks(sess, image_lists, FLAGS.test_batch_size, - 'testing', FLAGS.bottleneck_dir, - FLAGS.image_dir, jpeg_data_tensor, - decoded_image_tensor, resized_image_tensor, - bottleneck_tensor, FLAGS.architecture)) - test_accuracy, predictions = sess.run( - [evaluation_step, prediction], - feed_dict={ - bottleneck_input: test_bottlenecks, - ground_truth_input: test_ground_truth - }) - tf.logging.info('Final test accuracy = %.1f%% (N=%d)' % - (test_accuracy * 100, len(test_bottlenecks))) - - if FLAGS.print_misclassified_test_images: - tf.logging.info('=== MISCLASSIFIED TEST IMAGES ===') - for i, test_filename in enumerate(test_filenames): - if predictions[i] != test_ground_truth[i]: - tf.logging.info('%70s %s' % (test_filename, - list(image_lists.keys())[predictions[i]])) - - -def build_eval_session(model_info, class_count): - """Builds an restored eval session without train operations for exporting. - - Args: - model_info: Model info dictionary from create_model_info() - class_count: Number of classes - - Returns: - Eval session containing the restored eval graph. - The bottleneck input, ground truth, eval step, and prediction tensors. - """ - # If quantized, we need to create the correct eval graph for exporting. - eval_graph, bottleneck_tensor, _ = create_model_graph(model_info) - - eval_sess = tf.Session(graph=eval_graph) - with eval_graph.as_default(): - # Add the new layer for exporting. - (_, _, bottleneck_input, - ground_truth_input, final_tensor) = add_final_retrain_ops( - class_count, FLAGS.final_tensor_name, bottleneck_tensor, - model_info['bottleneck_tensor_size'], model_info['quantize_layer'], - False) - - # Now we need to restore the values from the training graph to the eval - # graph. - tf.train.Saver().restore(eval_sess, CHECKPOINT_NAME) - - evaluation_step, prediction = add_evaluation_step(final_tensor, - ground_truth_input) - - return (eval_sess, bottleneck_input, ground_truth_input, evaluation_step, - prediction) - - -def save_graph_to_file(graph, graph_file_name, model_info, class_count): - """Saves an graph to file, creating a valid quantized one if necessary.""" - sess, _, _, _, _ = build_eval_session(model_info, class_count) - graph = sess.graph - - output_graph_def = graph_util.convert_variables_to_constants( - sess, graph.as_graph_def(), [FLAGS.final_tensor_name]) - - with gfile.FastGFile(graph_file_name, 'wb') as f: - f.write(output_graph_def.SerializeToString()) - - -def prepare_file_system(): - # Setup the directory we'll write summaries to for TensorBoard - if tf.gfile.Exists(FLAGS.summaries_dir): - tf.gfile.DeleteRecursively(FLAGS.summaries_dir) - tf.gfile.MakeDirs(FLAGS.summaries_dir) - if FLAGS.intermediate_store_frequency > 0: - ensure_dir_exists(FLAGS.intermediate_output_graphs_dir) - return - - -def create_model_info(architecture): - """Given the name of a model architecture, returns information about it. - - There are different base image recognition pretrained models that can be - retrained using transfer learning, and this function translates from the name - of a model to the attributes that are needed to download and train with it. - - Args: - architecture: Name of a model architecture. - - Returns: - Dictionary of information about the model, or None if the name isn't - recognized - - Raises: - ValueError: If architecture name is unknown. - """ - architecture = architecture.lower() - is_quantized = False - if architecture == 'inception_v3': - # pylint: disable=line-too-long - data_url = 'http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz' - # pylint: enable=line-too-long - bottleneck_tensor_name = 'pool_3/_reshape:0' - bottleneck_tensor_size = 2048 - input_width = 299 - input_height = 299 - input_depth = 3 - resized_input_tensor_name = 'Mul:0' - model_file_name = 'classify_image_graph_def.pb' - input_mean = 128 - input_std = 128 - elif architecture.startswith('mobilenet_'): - parts = architecture.split('_') - if len(parts) != 3 and len(parts) != 4: - tf.logging.error("Couldn't understand architecture name '%s'", - architecture) - return None - version_string = parts[1] - if (version_string != '1.0' and version_string != '0.75' and - version_string != '0.5' and version_string != '0.25'): - tf.logging.error( - """"The Mobilenet version should be '1.0', '0.75', '0.5', or '0.25', - but found '%s' for architecture '%s'""", version_string, architecture) - return None - size_string = parts[2] - if (size_string != '224' and size_string != '192' and - size_string != '160' and size_string != '128'): - tf.logging.error( - """The Mobilenet input size should be '224', '192', '160', or '128', - but found '%s' for architecture '%s'""", - size_string, architecture) - return None - if len(parts) == 3: - is_quantized = False - else: - if parts[3] != 'quant': - tf.logging.error( - "Couldn't understand architecture suffix '%s' for '%s'", parts[3], - architecture) - return None - is_quantized = True - - data_url = 'http://download.tensorflow.org/models/mobilenet_v1_2018_02_22/' - model_name = 'mobilenet_v1_' + version_string + '_' + size_string - if is_quantized: - model_name += '_quant' - data_url += model_name + '.tgz' - bottleneck_tensor_name = 'MobilenetV1/Predictions/Reshape:0' - resized_input_tensor_name = 'input:0' - model_file_name = model_name + '_frozen.pb' - - bottleneck_tensor_size = 1001 - input_width = int(size_string) - input_height = int(size_string) - input_depth = 3 - input_mean = 127.5 - input_std = 127.5 - else: - tf.logging.error("Couldn't understand architecture name '%s'", architecture) - raise ValueError('Unknown architecture', architecture) - - return { - 'data_url': data_url, - 'bottleneck_tensor_name': bottleneck_tensor_name, - 'bottleneck_tensor_size': bottleneck_tensor_size, - 'input_width': input_width, - 'input_height': input_height, - 'input_depth': input_depth, - 'resized_input_tensor_name': resized_input_tensor_name, - 'model_file_name': model_file_name, - 'input_mean': input_mean, - 'input_std': input_std, - 'quantize_layer': is_quantized, - } - - -def add_jpeg_decoding(input_width, input_height, input_depth, input_mean, - input_std): - """Adds operations that perform JPEG decoding and resizing to the graph.. - - Args: - input_width: Desired width of the image fed into the recognizer graph. - input_height: Desired width of the image fed into the recognizer graph. - input_depth: Desired channels of the image fed into the recognizer graph. - input_mean: Pixel value that should be zero in the image for the graph. - input_std: How much to divide the pixel values by before recognition. - - Returns: - Tensors for the node to feed JPEG data into, and the output of the - preprocessing steps. - """ - jpeg_data = tf.placeholder(tf.string, name='DecodeJPGInput') - decoded_image = tf.image.decode_jpeg(jpeg_data, channels=input_depth) - decoded_image_as_float = tf.cast(decoded_image, dtype=tf.float32) - decoded_image_4d = tf.expand_dims(decoded_image_as_float, 0) - resize_shape = tf.stack([input_height, input_width]) - resize_shape_as_int = tf.cast(resize_shape, dtype=tf.int32) - resized_image = tf.image.resize_bilinear(decoded_image_4d, - resize_shape_as_int) - offset_image = tf.subtract(resized_image, input_mean) - mul_image = tf.multiply(offset_image, 1.0 / input_std) - return jpeg_data, mul_image - - -def export_model(model_info, class_count, saved_model_dir): - """Exports model for serving. - - Args: - model_info: The modelinfo for the current model. - class_count: The number of classes. - saved_model_dir: Directory in which to save exported model and variables. - """ - # The SavedModel should hold the eval graph. - sess, _, _, _, _ = build_eval_session(model_info, class_count) - graph = sess.graph - with graph.as_default(): - input_tensor = model_info['resized_input_tensor_name'] - in_image = sess.graph.get_tensor_by_name(input_tensor) - inputs = {'image': tf.saved_model.utils.build_tensor_info(in_image)} - - out_classes = sess.graph.get_tensor_by_name('final_result:0') - outputs = { - 'prediction': tf.saved_model.utils.build_tensor_info(out_classes) - } - - signature = tf.saved_model.signature_def_utils.build_signature_def( - inputs=inputs, - outputs=outputs, - method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME) - - legacy_init_op = tf.group(tf.tables_initializer(), name='legacy_init_op') - - # Save out the SavedModel. - builder = tf.saved_model.builder.SavedModelBuilder(saved_model_dir) - builder.add_meta_graph_and_variables( - sess, [tf.saved_model.tag_constants.SERVING], - signature_def_map={ - tf.saved_model.signature_constants. - DEFAULT_SERVING_SIGNATURE_DEF_KEY: - signature - }, - legacy_init_op=legacy_init_op) - builder.save() - - -def main(_): - # Needed to make sure the logging output is visible. - # See https://github.com/tensorflow/tensorflow/issues/3047 - tf.logging.set_verbosity(tf.logging.INFO) - - # Prepare necessary directories that can be used during training - prepare_file_system() - - # Gather information about the model architecture we'll be using. - model_info = create_model_info(FLAGS.architecture) - if not model_info: - tf.logging.error('Did not recognize architecture flag') - return -1 - - # Look at the folder structure, and create lists of all the images. - image_lists = create_image_lists(FLAGS.image_dir, FLAGS.testing_percentage, - FLAGS.validation_percentage) - class_count = len(image_lists.keys()) - if class_count == 0: - tf.logging.error('No valid folders of images found at ' + FLAGS.image_dir) - return -1 - if class_count == 1: - tf.logging.error('Only one valid folder of images found at ' + - FLAGS.image_dir + - ' - multiple classes are needed for classification.') - return -1 - - # See if the command-line flags mean we're applying any distortions. - do_distort_images = should_distort_images( - FLAGS.flip_left_right, FLAGS.random_crop, FLAGS.random_scale, - FLAGS.random_brightness) - - # Set up the pre-trained graph. - maybe_download_and_extract(model_info['data_url']) - graph, bottleneck_tensor, resized_image_tensor = ( - create_model_graph(model_info)) - - # Add the new layer that we'll be training. - with graph.as_default(): - (train_step, cross_entropy, bottleneck_input, - ground_truth_input, final_tensor) = add_final_retrain_ops( - class_count, FLAGS.final_tensor_name, bottleneck_tensor, - model_info['bottleneck_tensor_size'], model_info['quantize_layer'], - True) - - with tf.Session(graph=graph) as sess: - # Set up the image decoding sub-graph. - jpeg_data_tensor, decoded_image_tensor = add_jpeg_decoding( - model_info['input_width'], model_info['input_height'], - model_info['input_depth'], model_info['input_mean'], - model_info['input_std']) - - if do_distort_images: - # We will be applying distortions, so setup the operations we'll need. - (distorted_jpeg_data_tensor, - distorted_image_tensor) = add_input_distortions( - FLAGS.flip_left_right, FLAGS.random_crop, FLAGS.random_scale, - FLAGS.random_brightness, model_info['input_width'], - model_info['input_height'], model_info['input_depth'], - model_info['input_mean'], model_info['input_std']) - else: - # We'll make sure we've calculated the 'bottleneck' image summaries and - # cached them on disk. - cache_bottlenecks(sess, image_lists, FLAGS.image_dir, - FLAGS.bottleneck_dir, jpeg_data_tensor, - decoded_image_tensor, resized_image_tensor, - bottleneck_tensor, FLAGS.architecture) - - # Create the operations we need to evaluate the accuracy of our new layer. - evaluation_step, _ = add_evaluation_step(final_tensor, ground_truth_input) - - # Merge all the summaries and write them out to the summaries_dir - merged = tf.summary.merge_all() - train_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/train', - sess.graph) - - validation_writer = tf.summary.FileWriter( - FLAGS.summaries_dir + '/validation') - - # Create a train saver that is used to restore values into an eval graph - # when exporting models. - train_saver = tf.train.Saver() - - # Set up all our weights to their initial default values. - init = tf.global_variables_initializer() - sess.run(init) - - # Run the training for as many cycles as requested on the command line. - for i in range(FLAGS.how_many_training_steps): - # Get a batch of input bottleneck values, either calculated fresh every - # time with distortions applied, or from the cache stored on disk. - if do_distort_images: - (train_bottlenecks, - train_ground_truth) = get_random_distorted_bottlenecks( - sess, image_lists, FLAGS.train_batch_size, 'training', - FLAGS.image_dir, distorted_jpeg_data_tensor, - distorted_image_tensor, resized_image_tensor, bottleneck_tensor) - else: - (train_bottlenecks, - train_ground_truth, _) = get_random_cached_bottlenecks( - sess, image_lists, FLAGS.train_batch_size, 'training', - FLAGS.bottleneck_dir, FLAGS.image_dir, jpeg_data_tensor, - decoded_image_tensor, resized_image_tensor, bottleneck_tensor, - FLAGS.architecture) - # Feed the bottlenecks and ground truth into the graph, and run a training - # step. Capture training summaries for TensorBoard with the `merged` op. - train_summary, _ = sess.run( - [merged, train_step], - feed_dict={bottleneck_input: train_bottlenecks, - ground_truth_input: train_ground_truth}) - train_writer.add_summary(train_summary, i) - - # Every so often, print out how well the graph is training. - is_last_step = (i + 1 == FLAGS.how_many_training_steps) - if (i % FLAGS.eval_step_interval) == 0 or is_last_step: - train_accuracy, cross_entropy_value = sess.run( - [evaluation_step, cross_entropy], - feed_dict={bottleneck_input: train_bottlenecks, - ground_truth_input: train_ground_truth}) - tf.logging.info('%s: Step %d: Train accuracy = %.1f%%' % - (datetime.now(), i, train_accuracy * 100)) - tf.logging.info('%s: Step %d: Cross entropy = %f' % - (datetime.now(), i, cross_entropy_value)) - # TODO(suharshs): Make this use an eval graph, to avoid quantization - # moving averages being updated by the validation set, though in - # practice this makes a negligable difference. - validation_bottlenecks, validation_ground_truth, _ = ( - get_random_cached_bottlenecks( - sess, image_lists, FLAGS.validation_batch_size, 'validation', - FLAGS.bottleneck_dir, FLAGS.image_dir, jpeg_data_tensor, - decoded_image_tensor, resized_image_tensor, bottleneck_tensor, - FLAGS.architecture)) - # Run a validation step and capture training summaries for TensorBoard - # with the `merged` op. - validation_summary, validation_accuracy = sess.run( - [merged, evaluation_step], - feed_dict={bottleneck_input: validation_bottlenecks, - ground_truth_input: validation_ground_truth}) - validation_writer.add_summary(validation_summary, i) - tf.logging.info('%s: Step %d: Validation accuracy = %.1f%% (N=%d)' % - (datetime.now(), i, validation_accuracy * 100, - len(validation_bottlenecks))) - - # Store intermediate results - intermediate_frequency = FLAGS.intermediate_store_frequency - - if (intermediate_frequency > 0 and (i % intermediate_frequency == 0) - and i > 0): - # If we want to do an intermediate save, save a checkpoint of the train - # graph, to restore into the eval graph. - train_saver.save(sess, CHECKPOINT_NAME) - intermediate_file_name = (FLAGS.intermediate_output_graphs_dir + - 'intermediate_' + str(i) + '.pb') - tf.logging.info('Save intermediate result to : ' + - intermediate_file_name) - save_graph_to_file(graph, intermediate_file_name, model_info, - class_count) - - # After training is complete, force one last save of the train checkpoint. - train_saver.save(sess, CHECKPOINT_NAME) - - # We've completed all our training, so run a final test evaluation on - # some new images we haven't used before. - run_final_eval(sess, model_info, class_count, image_lists, jpeg_data_tensor, - decoded_image_tensor, resized_image_tensor, - bottleneck_tensor) - - # Write out the trained graph and labels with the weights stored as - # constants. - save_graph_to_file(graph, FLAGS.output_graph, model_info, class_count) - with gfile.FastGFile(FLAGS.output_labels, 'w') as f: - f.write('\n'.join(image_lists.keys()) + '\n') - - export_model(model_info, class_count, FLAGS.saved_model_dir) - - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument( - '--image_dir', - type=str, - default='', - help='Path to folders of labeled images.' - ) - parser.add_argument( - '--output_graph', - type=str, - default='/tmp/output_graph.pb', - help='Where to save the trained graph.' - ) - parser.add_argument( - '--intermediate_output_graphs_dir', - type=str, - default='/tmp/intermediate_graph/', - help='Where to save the intermediate graphs.' - ) - parser.add_argument( - '--intermediate_store_frequency', - type=int, - default=0, - help="""\ - How many steps to store intermediate graph. If "0" then will not - store.\ - """ - ) - parser.add_argument( - '--output_labels', - type=str, - default='/tmp/output_labels.txt', - help='Where to save the trained graph\'s labels.' - ) - parser.add_argument( - '--summaries_dir', - type=str, - default='/tmp/retrain_logs', - help='Where to save summary logs for TensorBoard.' - ) - parser.add_argument( - '--how_many_training_steps', - type=int, - default=4000, - help='How many training steps to run before ending.' - ) - parser.add_argument( - '--learning_rate', - type=float, - default=0.01, - help='How large a learning rate to use when training.' - ) - parser.add_argument( - '--testing_percentage', - type=int, - default=10, - help='What percentage of images to use as a test set.' - ) - parser.add_argument( - '--validation_percentage', - type=int, - default=10, - help='What percentage of images to use as a validation set.' - ) - parser.add_argument( - '--eval_step_interval', - type=int, - default=10, - help='How often to evaluate the training results.' - ) - parser.add_argument( - '--train_batch_size', - type=int, - default=100, - help='How many images to train on at a time.' - ) - parser.add_argument( - '--test_batch_size', - type=int, - default=-1, - help="""\ - How many images to test on. This test set is only used once, to evaluate - the final accuracy of the model after training completes. - A value of -1 causes the entire test set to be used, which leads to more - stable results across runs.\ - """ - ) - parser.add_argument( - '--validation_batch_size', - type=int, - default=100, - help="""\ - How many images to use in an evaluation batch. This validation set is - used much more often than the test set, and is an early indicator of how - accurate the model is during training. - A value of -1 causes the entire validation set to be used, which leads to - more stable results across training iterations, but may be slower on large - training sets.\ - """ - ) - parser.add_argument( - '--print_misclassified_test_images', - default=False, - help="""\ - Whether to print out a list of all misclassified test images.\ - """, - action='store_true' - ) - parser.add_argument( - '--model_dir', - type=str, - default='/tmp/imagenet', - help="""\ - Path to classify_image_graph_def.pb, - imagenet_synset_to_human_label_map.txt, and - imagenet_2012_challenge_label_map_proto.pbtxt.\ - """ - ) - parser.add_argument( - '--bottleneck_dir', - type=str, - default='/tmp/bottleneck', - help='Path to cache bottleneck layer values as files.' - ) - parser.add_argument( - '--final_tensor_name', - type=str, - default='final_result', - help="""\ - The name of the output classification layer in the retrained graph.\ - """ - ) - parser.add_argument( - '--flip_left_right', - default=False, - help="""\ - Whether to randomly flip half of the training images horizontally.\ - """, - action='store_true' - ) - parser.add_argument( - '--random_crop', - type=int, - default=0, - help="""\ - A percentage determining how much of a margin to randomly crop off the - training images.\ - """ - ) - parser.add_argument( - '--random_scale', - type=int, - default=0, - help="""\ - A percentage determining how much to randomly scale up the size of the - training images by.\ - """ - ) - parser.add_argument( - '--random_brightness', - type=int, - default=0, - help="""\ - A percentage determining how much to randomly multiply the training image - input pixels up or down by.\ - """ - ) - parser.add_argument( - '--architecture', - type=str, - default='inception_v3', - help="""\ - Which model architecture to use. 'inception_v3' is the most accurate, but - also the slowest. For faster or smaller models, chose a MobileNet with the - form 'mobilenet__[_quantized]'. For example, - 'mobilenet_1.0_224' will pick a model that is 17 MB in size and takes 224 - pixel input images, while 'mobilenet_0.25_128_quantized' will choose a much - smaller and less accurate model, taking 128x128 images, and instrumented - for eventual quantization via TensorFlow Lite. - See https://research.googleblog.com/2017/06/mobilenets-open-source-models-for.html - for more information on Mobilenet.\ - """) - parser.add_argument( - '--saved_model_dir', - type=str, - default='/tmp/saved_models/1/', - help='Where to save the exported graph.') - FLAGS, unparsed = parser.parse_known_args() - tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) diff --git a/tensorflow/examples/image_retraining/retrain_test.py b/tensorflow/examples/image_retraining/retrain_test.py deleted file mode 100644 index fb7324c58ac1be60baad840207f31a61ec6182be..0000000000000000000000000000000000000000 --- a/tensorflow/examples/image_retraining/retrain_test.py +++ /dev/null @@ -1,148 +0,0 @@ -# Copyright 2015 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -# pylint: disable=g-bad-import-order,unused-import -"""Tests the graph freezing tool.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf -import os - -from tensorflow.examples.image_retraining import retrain -from tensorflow.python.framework import test_util - - -class ImageRetrainingTest(test_util.TensorFlowTestCase): - - def dummyImageLists(self): - return {'label_one': {'dir': 'somedir', 'training': ['image_one.jpg', - 'image_two.jpg'], - 'testing': ['image_three.jpg', 'image_four.jpg'], - 'validation': ['image_five.jpg', 'image_six.jpg']}, - 'label_two': {'dir': 'otherdir', 'training': ['image_one.jpg', - 'image_two.jpg'], - 'testing': ['image_three.jpg', 'image_four.jpg'], - 'validation': ['image_five.jpg', 'image_six.jpg']}} - - def testGetImagePath(self): - image_lists = self.dummyImageLists() - self.assertEqual('image_dir/somedir/image_one.jpg', retrain.get_image_path( - image_lists, 'label_one', 0, 'image_dir', 'training')) - self.assertEqual('image_dir/otherdir/image_four.jpg', - retrain.get_image_path(image_lists, 'label_two', 1, - 'image_dir', 'testing')) - - def testGetBottleneckPath(self): - image_lists = self.dummyImageLists() - self.assertEqual('bottleneck_dir/somedir/image_five.jpg_imagenet_v3.txt', - retrain.get_bottleneck_path( - image_lists, 'label_one', 0, 'bottleneck_dir', - 'validation', 'imagenet_v3')) - - def testShouldDistortImage(self): - self.assertEqual(False, retrain.should_distort_images(False, 0, 0, 0)) - self.assertEqual(True, retrain.should_distort_images(True, 0, 0, 0)) - self.assertEqual(True, retrain.should_distort_images(False, 10, 0, 0)) - self.assertEqual(True, retrain.should_distort_images(False, 0, 1, 0)) - self.assertEqual(True, retrain.should_distort_images(False, 0, 0, 50)) - - def testAddInputDistortions(self): - with tf.Graph().as_default(): - with tf.Session() as sess: - retrain.add_input_distortions(True, 10, 10, 10, 299, 299, 3, 128, 128) - self.assertIsNotNone(sess.graph.get_tensor_by_name('DistortJPGInput:0')) - self.assertIsNotNone(sess.graph.get_tensor_by_name('DistortResult:0')) - - @tf.test.mock.patch.object(retrain, 'FLAGS', learning_rate=0.01) - def testAddFinalRetrainOps(self, flags_mock): - with tf.Graph().as_default(): - with tf.Session() as sess: - bottleneck = tf.placeholder(tf.float32, [1, 1024], name='bottleneck') - # Test creating final training op with quantization. - retrain.add_final_retrain_ops(5, 'final', bottleneck, 1024, False, - False) - self.assertIsNotNone(sess.graph.get_tensor_by_name('final:0')) - - @tf.test.mock.patch.object(retrain, 'FLAGS', learning_rate=0.01) - def testAddFinalRetrainOpsQuantized(self, flags_mock): - # Ensure that the training and eval graph for quantized models are correctly - # created. - with tf.Graph().as_default() as g: - with tf.Session() as sess: - bottleneck = tf.placeholder(tf.float32, [1, 1024], name='bottleneck') - # Test creating final training op with quantization, set is_training to - # true. - retrain.add_final_retrain_ops(5, 'final', bottleneck, 1024, True, True) - self.assertIsNotNone(sess.graph.get_tensor_by_name('final:0')) - found_fake_quant = 0 - for op in g.get_operations(): - if op.type == 'FakeQuantWithMinMaxVars': - found_fake_quant += 1 - # Ensure that the inputs of each FakeQuant operations has 2 Assign - # operations in the training graph (Assign[Min,Max]Last, - # Assign[Min,Max]Ema) - self.assertEqual(2, - len([i for i in op.inputs if 'Assign' in i.name])) - self.assertEqual(found_fake_quant, 2) - with tf.Graph().as_default() as g: - with tf.Session() as sess: - bottleneck = tf.placeholder(tf.float32, [1, 1024], name='bottleneck') - # Test creating final training op with quantization, set is_training to - # false. - retrain.add_final_retrain_ops(5, 'final', bottleneck, 1024, True, False) - self.assertIsNotNone(sess.graph.get_tensor_by_name('final:0')) - found_fake_quant = 0 - for op in g.get_operations(): - if op.type == 'FakeQuantWithMinMaxVars': - found_fake_quant += 1 - for i in op.inputs: - # Ensure that no operations are Assign operation since this is the - # evaluation graph. - self.assertTrue('Assign' not in i.name) - self.assertEqual(found_fake_quant, 2) - - def testAddEvaluationStep(self): - with tf.Graph().as_default(): - final = tf.placeholder(tf.float32, [1], name='final') - gt = tf.placeholder(tf.int64, [1], name='gt') - self.assertIsNotNone(retrain.add_evaluation_step(final, gt)) - - def testAddJpegDecoding(self): - with tf.Graph().as_default(): - jpeg_data, mul_image = retrain.add_jpeg_decoding(10, 10, 3, 0, 255) - self.assertIsNotNone(jpeg_data) - self.assertIsNotNone(mul_image) - - def testCreateModelInfo(self): - did_raise_value_error = False - try: - retrain.create_model_info('no_such_model_name') - except ValueError: - did_raise_value_error = True - self.assertTrue(did_raise_value_error) - model_info = retrain.create_model_info('inception_v3') - self.assertIsNotNone(model_info) - self.assertEqual(299, model_info['input_width']) - - def testCreateModelInfoQuantized(self): - # Test for mobilenet_quantized - model_info = retrain.create_model_info('mobilenet_1.0_224') - self.assertIsNotNone(model_info) - self.assertEqual(224, model_info['input_width']) - - -if __name__ == '__main__': - tf.test.main() diff --git a/tensorflow/examples/label_image/main.cc b/tensorflow/examples/label_image/main.cc index 63bc39de6c0a420e03adada56cbc8b0f895b6155..baa65d3243ffbebdf3ccf8a786a2434dfb7cfdad 100644 --- a/tensorflow/examples/label_image/main.cc +++ b/tensorflow/examples/label_image/main.cc @@ -49,6 +49,7 @@ limitations under the License. #include "tensorflow/core/lib/core/stringpiece.h" #include "tensorflow/core/lib/core/threadpool.h" #include "tensorflow/core/lib/io/path.h" +#include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/lib/strings/stringprintf.h" #include "tensorflow/core/platform/env.h" #include "tensorflow/core/platform/init_main.h" @@ -137,15 +138,15 @@ Status ReadTensorFromImageFile(const string& file_name, const int input_height, // Now try to figure out what kind of file it is and decode it. const int wanted_channels = 3; tensorflow::Output image_reader; - if (tensorflow::StringPiece(file_name).ends_with(".png")) { + if (tensorflow::str_util::EndsWith(file_name, ".png")) { image_reader = DecodePng(root.WithOpName("png_reader"), file_reader, DecodePng::Channels(wanted_channels)); - } else if (tensorflow::StringPiece(file_name).ends_with(".gif")) { + } else if (tensorflow::str_util::EndsWith(file_name, ".gif")) { // gif decoder returns 4-D tensor, remove the first dim image_reader = Squeeze(root.WithOpName("squeeze_first_dim"), DecodeGif(root.WithOpName("gif_reader"), file_reader)); - } else if (tensorflow::StringPiece(file_name).ends_with(".bmp")) { + } else if (tensorflow::str_util::EndsWith(file_name, ".bmp")) { image_reader = DecodeBmp(root.WithOpName("bmp_reader"), file_reader); } else { // Assume if it's neither a PNG nor a GIF then it must be a JPEG. diff --git a/tensorflow/examples/multibox_detector/main.cc b/tensorflow/examples/multibox_detector/main.cc index e38704fd98cea6928231f2fc2bc989705ae46bb4..96ea525a4e74c68da17d0310f0ad475789314215 100644 --- a/tensorflow/examples/multibox_detector/main.cc +++ b/tensorflow/examples/multibox_detector/main.cc @@ -30,6 +30,7 @@ limitations under the License. #include "tensorflow/core/lib/core/stringpiece.h" #include "tensorflow/core/lib/core/threadpool.h" #include "tensorflow/core/lib/io/path.h" +#include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/lib/strings/stringprintf.h" #include "tensorflow/core/platform/init_main.h" #include "tensorflow/core/platform/logging.h" @@ -84,10 +85,10 @@ Status ReadTensorFromImageFile(const string& file_name, const int input_height, // Now try to figure out what kind of file it is and decode it. const int wanted_channels = 3; tensorflow::Output image_reader; - if (tensorflow::StringPiece(file_name).ends_with(".png")) { + if (tensorflow::str_util::EndsWith(file_name, ".png")) { image_reader = DecodePng(root.WithOpName("png_reader"), file_reader, DecodePng::Channels(wanted_channels)); - } else if (tensorflow::StringPiece(file_name).ends_with(".gif")) { + } else if (tensorflow::str_util::EndsWith(file_name, ".gif")) { image_reader = DecodeGif(root.WithOpName("gif_reader"), file_reader); } else { // Assume if it's neither a PNG nor a GIF then it must be a JPEG. @@ -131,7 +132,7 @@ Status ReadTensorFromImageFile(const string& file_name, const int input_height, Status SaveImage(const Tensor& tensor, const string& file_path) { LOG(INFO) << "Saving image to " << file_path; - CHECK(tensorflow::StringPiece(file_path).ends_with(".png")) + CHECK(tensorflow::str_util::EndsWith(file_path, ".png")) << "Only saving of png files is supported."; auto root = tensorflow::Scope::NewRootScope(); diff --git a/tensorflow/examples/speech_commands/input_data.py b/tensorflow/examples/speech_commands/input_data.py index e7db9cddf02daf9a32d3ed859ee9bd35b2cae838..63dd18457fea42acb09058b9ddd4623d72d1fd04 100644 --- a/tensorflow/examples/speech_commands/input_data.py +++ b/tensorflow/examples/speech_commands/input_data.py @@ -457,7 +457,7 @@ class AudioProcessor(object): self.time_shift_offset_placeholder_: time_shift_offset, } # Choose a section of background noise to mix in. - if use_background: + if use_background or sample['label'] == SILENCE_LABEL: background_index = np.random.randint(len(self.background_data)) background_samples = self.background_data[background_index] background_offset = np.random.randint( @@ -465,7 +465,9 @@ class AudioProcessor(object): background_clipped = background_samples[background_offset:( background_offset + desired_samples)] background_reshaped = background_clipped.reshape([desired_samples, 1]) - if np.random.uniform(0, 1) < background_frequency: + if sample['label'] == SILENCE_LABEL: + background_volume = np.random.uniform(0, 1) + elif np.random.uniform(0, 1) < background_frequency: background_volume = np.random.uniform(0, background_volume_range) else: background_volume = 0 diff --git a/tensorflow/examples/image_retraining/__init__.py b/tensorflow/examples/tutorials/estimators/__init__.py similarity index 100% rename from tensorflow/examples/image_retraining/__init__.py rename to tensorflow/examples/tutorials/estimators/__init__.py diff --git a/tensorflow/examples/tutorials/input_fn/__init__.py b/tensorflow/examples/tutorials/input_fn/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/tensorflow/examples/tutorials/layers/__init__.py b/tensorflow/examples/tutorials/layers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/tensorflow/examples/tutorials/mnist/BUILD b/tensorflow/examples/tutorials/mnist/BUILD index aa1b2ec2db34f3cb0350bfde88a1598ed71456de..d7bc6a5a7d1e4cd3927c7c5067ccc22993885994 100644 --- a/tensorflow/examples/tutorials/mnist/BUILD +++ b/tensorflow/examples/tutorials/mnist/BUILD @@ -51,6 +51,7 @@ py_binary( "fully_connected_feed.py", ], srcs_version = "PY2AND3", + tags = ["optonly"], deps = [ ":input_data", ":mnist", diff --git a/tensorflow/examples/tutorials/monitors/__init__.py b/tensorflow/examples/tutorials/monitors/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/tensorflow/examples/tutorials/monitors/iris_monitors.py b/tensorflow/examples/tutorials/monitors/iris_monitors.py index 850d105f7b1b33fadd40bc6a6cab3d08c0da3734..a2b7fe60237da0604f74f31c0a09951f708e908b 100644 --- a/tensorflow/examples/tutorials/monitors/iris_monitors.py +++ b/tensorflow/examples/tutorials/monitors/iris_monitors.py @@ -32,9 +32,9 @@ IRIS_TEST = os.path.join(os.path.dirname(__file__), "iris_test.csv") def main(unused_argv): # Load datasets. training_set = tf.contrib.learn.datasets.base.load_csv_with_header( - filename=IRIS_TRAINING, target_dtype=np.int, features_dtype=np.float) + filename=IRIS_TRAINING, target_dtype=np.int, features_dtype=np.float32) test_set = tf.contrib.learn.datasets.base.load_csv_with_header( - filename=IRIS_TEST, target_dtype=np.int, features_dtype=np.float) + filename=IRIS_TEST, target_dtype=np.int, features_dtype=np.float32) validation_metrics = { "accuracy": @@ -83,7 +83,7 @@ def main(unused_argv): # Classify two new flower samples. new_samples = np.array( - [[6.4, 3.2, 4.5, 1.5], [5.8, 3.1, 5.0, 1.7]], dtype=float) + [[6.4, 3.2, 4.5, 1.5], [5.8, 3.1, 5.0, 1.7]], dtype=np.float32) y = list(classifier.predict(new_samples)) print("Predictions: {}".format(str(y))) diff --git a/tensorflow/examples/tutorials/word2vec/word2vec_basic.py b/tensorflow/examples/tutorials/word2vec/word2vec_basic.py index 14ae7fbf35836ad7f5d56101ae0fc33a3f3fb9ba..b09ee9976897fcab2e90fdc17e8030532080aca8 100644 --- a/tensorflow/examples/tutorials/word2vec/word2vec_basic.py +++ b/tensorflow/examples/tutorials/word2vec/word2vec_basic.py @@ -224,7 +224,7 @@ with graph.as_default(): optimizer = tf.train.GradientDescentOptimizer(1.0).minimize(loss) # Compute the cosine similarity between minibatch examples and all embeddings. - norm = tf.sqrt(tf.reduce_sum(tf.square(embeddings), 1, keep_dims=True)) + norm = tf.sqrt(tf.reduce_sum(tf.square(embeddings), 1, keepdims=True)) normalized_embeddings = embeddings / norm valid_embeddings = tf.nn.embedding_lookup(normalized_embeddings, valid_dataset) diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index a33703ad6fa3e17b96312376d35171af27a85c75..c31ca8b67a10707e4e145243b6f1a1992601b4b1 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -43,7 +43,7 @@ type FakeQuantWithMinMaxVarsPerChannelGradientAttr func(optionalAttr) // FakeQuantWithMinMaxVarsPerChannelGradientNumBits sets the optional num_bits attribute to value. // -// value: The bitwidth of the quantization; between 2 and 8, inclusive. +// value: The bitwidth of the quantization; between 2 and 16, inclusive. // If not specified, defaults to 8 func FakeQuantWithMinMaxVarsPerChannelGradientNumBits(value int64) FakeQuantWithMinMaxVarsPerChannelGradientAttr { return func(m optionalAttr) { @@ -124,7 +124,7 @@ func FakeQuantWithMinMaxVarsPerChannelNarrowRange(value bool) FakeQuantWithMinMa // `inputs` values are quantized into the quantization range (`[0; 2^num_bits - 1]` // when `narrow_range` is false and `[1; 2^num_bits - 1]` when it is true) and // then de-quantized and output as floats in `[min; max]` interval. -// `num_bits` is the bitwidth of the quantization; between 2 and 8, inclusive. +// `num_bits` is the bitwidth of the quantization; between 2 and 16, inclusive. // // This operation has a gradient and thus allows for training `min` and `max` // values. @@ -305,7 +305,7 @@ func FakeQuantWithMinMaxArgsNarrowRange(value bool) FakeQuantWithMinMaxArgsAttr // `inputs` values are quantized into the quantization range (`[0; 2^num_bits - 1]` // when `narrow_range` is false and `[1; 2^num_bits - 1]` when it is true) and // then de-quantized and output as floats in `[min; max]` interval. -// `num_bits` is the bitwidth of the quantization; between 2 and 8, inclusive. +// `num_bits` is the bitwidth of the quantization; between 2 and 16, inclusive. // // Quantization is called fake since the output is still in floating point. func FakeQuantWithMinMaxArgs(scope *Scope, inputs tf.Output, optional ...FakeQuantWithMinMaxArgsAttr) (outputs tf.Output) { @@ -401,6 +401,9 @@ func FakeQuantWithMinMaxArgs(scope *Scope, inputs tf.Output, optional ...FakeQua // [[5, 5, 5, 5], [6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8]], // [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]] // +// Note that on CPU, if an out of bound index is found, an error is returned. +// On GPU, if an out of bound index is found, the index is ignored. +// // Arguments: // indices: Index tensor. // updates: Updates to scatter into output. @@ -1720,27 +1723,27 @@ func Size(scope *Scope, input tf.Output, optional ...SizeAttr) (output tf.Output return op.Output(0) } -// Returns the complex conjugate of a complex number. -// -// Given a tensor `input` of complex numbers, this operation returns a tensor of -// complex numbers that are the complex conjugate of each element in `input`. The -// complex numbers in `input` must be of the form \\(a + bj\\), where *a* is the -// real part and *b* is the imaginary part. +// Returns the rank of a tensor. // -// The complex conjugate returned by this operation is of the form \\(a - bj\\). +// This operation returns an integer representing the rank of `input`. // // For example: // // ``` -// # tensor 'input' is [-2.25 + 4.75j, 3.25 + 5.75j] -// tf.conj(input) ==> [-2.25 - 4.75j, 3.25 - 5.75j] +// # 't' is [[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]]] +// # shape of tensor 't' is [2, 2, 3] +// rank(t) ==> 3 // ``` -func Conj(scope *Scope, input tf.Output) (output tf.Output) { +// +// **Note**: The rank of a tensor is not the same as the rank of a matrix. The rank +// of a tensor is the number of indices required to uniquely select each element +// of the tensor. Rank is also known as "order", "degree", or "ndims." +func Rank(scope *Scope, input tf.Output) (output tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "Conj", + Type: "Rank", Input: []tf.Input{ input, }, @@ -1749,277 +1752,360 @@ func Conj(scope *Scope, input tf.Output) (output tf.Output) { return op.Output(0) } -// ResourceSparseApplyMomentumAttr is an optional argument to ResourceSparseApplyMomentum. -type ResourceSparseApplyMomentumAttr func(optionalAttr) +// ReverseSequenceAttr is an optional argument to ReverseSequence. +type ReverseSequenceAttr func(optionalAttr) -// ResourceSparseApplyMomentumUseLocking sets the optional use_locking attribute to value. +// ReverseSequenceBatchDim sets the optional batch_dim attribute to value. // -// value: If `True`, updating of the var and accum tensors will be protected -// by a lock; otherwise the behavior is undefined, but may exhibit less -// contention. -// If not specified, defaults to false -func ResourceSparseApplyMomentumUseLocking(value bool) ResourceSparseApplyMomentumAttr { +// value: The dimension along which reversal is performed. +// If not specified, defaults to 0 +func ReverseSequenceBatchDim(value int64) ReverseSequenceAttr { return func(m optionalAttr) { - m["use_locking"] = value + m["batch_dim"] = value } } -// ResourceSparseApplyMomentumUseNesterov sets the optional use_nesterov attribute to value. +// Reverses variable length slices. // -// value: If `True`, the tensor passed to compute grad will be -// var - lr * momentum * accum, so in the end, the var you get is actually -// var - lr * momentum * accum. -// If not specified, defaults to false -func ResourceSparseApplyMomentumUseNesterov(value bool) ResourceSparseApplyMomentumAttr { - return func(m optionalAttr) { - m["use_nesterov"] = value - } -} - -// Update relevant entries in '*var' and '*accum' according to the momentum scheme. +// This op first slices `input` along the dimension `batch_dim`, and for each +// slice `i`, reverses the first `seq_lengths[i]` elements along +// the dimension `seq_dim`. // -// Set use_nesterov = True if you want to use Nesterov momentum. +// The elements of `seq_lengths` must obey `seq_lengths[i] <= input.dims[seq_dim]`, +// and `seq_lengths` must be a vector of length `input.dims[batch_dim]`. // -// That is for rows we have grad for, we update var and accum as follows: +// The output slice `i` along dimension `batch_dim` is then given by input +// slice `i`, with the first `seq_lengths[i]` slices along dimension +// `seq_dim` reversed. // -// accum = accum * momentum + grad -// var -= lr * accum +// For example: +// +// ``` +// # Given this: +// batch_dim = 0 +// seq_dim = 1 +// input.dims = (4, 8, ...) +// seq_lengths = [7, 2, 3, 5] +// +// # then slices of input are reversed on seq_dim, but only up to seq_lengths: +// output[0, 0:7, :, ...] = input[0, 7:0:-1, :, ...] +// output[1, 0:2, :, ...] = input[1, 2:0:-1, :, ...] +// output[2, 0:3, :, ...] = input[2, 3:0:-1, :, ...] +// output[3, 0:5, :, ...] = input[3, 5:0:-1, :, ...] +// +// # while entries past seq_lens are copied through: +// output[0, 7:, :, ...] = input[0, 7:, :, ...] +// output[1, 2:, :, ...] = input[1, 2:, :, ...] +// output[2, 3:, :, ...] = input[2, 3:, :, ...] +// output[3, 2:, :, ...] = input[3, 2:, :, ...] +// ``` +// +// In contrast, if: +// +// ``` +// # Given this: +// batch_dim = 2 +// seq_dim = 0 +// input.dims = (8, ?, 4, ...) +// seq_lengths = [7, 2, 3, 5] +// +// # then slices of input are reversed on seq_dim, but only up to seq_lengths: +// output[0:7, :, 0, :, ...] = input[7:0:-1, :, 0, :, ...] +// output[0:2, :, 1, :, ...] = input[2:0:-1, :, 1, :, ...] +// output[0:3, :, 2, :, ...] = input[3:0:-1, :, 2, :, ...] +// output[0:5, :, 3, :, ...] = input[5:0:-1, :, 3, :, ...] +// +// # while entries past seq_lens are copied through: +// output[7:, :, 0, :, ...] = input[7:, :, 0, :, ...] +// output[2:, :, 1, :, ...] = input[2:, :, 1, :, ...] +// output[3:, :, 2, :, ...] = input[3:, :, 2, :, ...] +// output[2:, :, 3, :, ...] = input[2:, :, 3, :, ...] +// ``` // // Arguments: -// var_: Should be from a Variable(). -// accum: Should be from a Variable(). -// lr: Learning rate. Must be a scalar. -// grad: The gradient. -// indices: A vector of indices into the first dimension of var and accum. -// momentum: Momentum. Must be a scalar. +// input: The input to reverse. +// seq_lengths: 1-D with length `input.dims(batch_dim)` and +// `max(seq_lengths) <= input.dims(seq_dim)` +// seq_dim: The dimension which is partially reversed. // -// Returns the created operation. -func ResourceSparseApplyMomentum(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, indices tf.Output, momentum tf.Output, optional ...ResourceSparseApplyMomentumAttr) (o *tf.Operation) { +// Returns The partially reversed input. It has the same shape as `input`. +func ReverseSequence(scope *Scope, input tf.Output, seq_lengths tf.Output, seq_dim int64, optional ...ReverseSequenceAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"seq_dim": seq_dim} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "ResourceSparseApplyMomentum", + Type: "ReverseSequence", Input: []tf.Input{ - var_, accum, lr, grad, indices, momentum, + input, seq_lengths, }, Attrs: attrs, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0) } -// Creates a sequence of numbers. +// UniqueWithCountsV2Attr is an optional argument to UniqueWithCountsV2. +type UniqueWithCountsV2Attr func(optionalAttr) + +// UniqueWithCountsV2OutIdx sets the optional out_idx attribute to value. +// If not specified, defaults to DT_INT32 +func UniqueWithCountsV2OutIdx(value tf.DataType) UniqueWithCountsV2Attr { + return func(m optionalAttr) { + m["out_idx"] = value + } +} + +// Finds unique elements along an axis of a tensor. // -// This operation creates a sequence of numbers that begins at `start` and -// extends by increments of `delta` up to but not including `limit`. +// This operation either returns a tensor `y` containing unique elements +// along the `axis` of a tensor. The returned unique elements is sorted +// in the same order as they occur along `axis` in `x`. +// This operation also returns a tensor `idx` and a tensor `count` +// that are the same size as the number of the elements in `x` along the +// `axis` dimension. The `idx` contains the index in the unique output `y` +// and the `count` contains the count in the unique output `y`. +// In other words, for an `1-D` tensor `x` with `axis = None: +// +// `y[idx[i]] = x[i] for i in [0, 1,...,rank(x) - 1]` // // For example: // // ``` -// # 'start' is 3 -// # 'limit' is 18 -// # 'delta' is 3 -// tf.range(start, limit, delta) ==> [3, 6, 9, 12, 15] +// # tensor 'x' is [1, 1, 2, 4, 4, 4, 7, 8, 8] +// y, idx, count = unique_with_counts(x) +// y ==> [1, 2, 4, 7, 8] +// idx ==> [0, 0, 1, 2, 2, 2, 3, 4, 4] +// count ==> [2, 1, 3, 1, 2] +// ``` +// +// For an `2-D` tensor `x` with `axis = 0`: +// +// ``` +// # tensor 'x' is [[1, 0, 0], +// # [1, 0, 0], +// # [2, 0, 0]] +// y, idx, count = unique_with_counts(x, axis=0) +// y ==> [[1, 0, 0], +// [2, 0, 0]] +// idx ==> [0, 0, 1] +// count ==> [2, 1] +// ``` +// +// For an `2-D` tensor `x` with `axis = 1`: +// +// ``` +// # tensor 'x' is [[1, 0, 0], +// # [1, 0, 0], +// # [2, 0, 0]] +// y, idx, count = unique_with_counts(x, axis=1) +// y ==> [[1, 0], +// [1, 0], +// [2, 0]] +// idx ==> [0, 1, 1] +// count ==> [1, 2] // ``` // // Arguments: -// start: 0-D (scalar). First entry in the sequence. -// limit: 0-D (scalar). Upper limit of sequence, exclusive. -// delta: 0-D (scalar). Optional. Default is 1. Number that increments `start`. +// x: A `Tensor`. +// axis: A `Tensor` of type `int32` (default: None). The axis of the Tensor to +// find the unique elements. // -// Returns 1-D. -func Range(scope *Scope, start tf.Output, limit tf.Output, delta tf.Output) (output tf.Output) { +// Returns A `Tensor`. Unique elements along the `axis` of `Tensor` x.A 1-D Tensor. Has the same type as x that contains the index of each +// value of x in the output y.A 1-D Tensor. The count of each value of x in the output y. +func UniqueWithCountsV2(scope *Scope, x tf.Output, axis tf.Output, optional ...UniqueWithCountsV2Attr) (y tf.Output, idx tf.Output, count tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "Range", + Type: "UniqueWithCountsV2", Input: []tf.Input{ - start, limit, delta, + x, axis, }, + Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1), op.Output(2) } -// Computes gradients for SparseSegmentSqrtN. -// -// Returns tensor "output" with same shape as grad, except for dimension 0 whose -// value is output_dim0. -// -// Arguments: -// grad: gradient propagated to the SparseSegmentSqrtN op. -// indices: indices passed to the corresponding SparseSegmentSqrtN op. -// segment_ids: segment_ids passed to the corresponding SparseSegmentSqrtN op. -// output_dim0: dimension 0 of "data" passed to SparseSegmentSqrtN op. -func SparseSegmentSqrtNGrad(scope *Scope, grad tf.Output, indices tf.Output, segment_ids tf.Output, output_dim0 tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SparseSegmentSqrtNGrad", - Input: []tf.Input{ - grad, indices, segment_ids, output_dim0, - }, +// UniqueWithCountsAttr is an optional argument to UniqueWithCounts. +type UniqueWithCountsAttr func(optionalAttr) + +// UniqueWithCountsOutIdx sets the optional out_idx attribute to value. +// If not specified, defaults to DT_INT32 +func UniqueWithCountsOutIdx(value tf.DataType) UniqueWithCountsAttr { + return func(m optionalAttr) { + m["out_idx"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// Computes the mean along sparse segments of a tensor. +// Finds unique elements in a 1-D tensor. // -// Read @{$math_ops#segmentation$the section on segmentation} for an explanation of -// segments. +// This operation returns a tensor `y` containing all of the unique elements of `x` +// sorted in the same order that they occur in `x`. This operation also returns a +// tensor `idx` the same size as `x` that contains the index of each value of `x` +// in the unique output `y`. Finally, it returns a third tensor `count` that +// contains the count of each element of `y` in `x`. In other words: // -// Like `SegmentMean`, but `segment_ids` can have rank less than `data`'s first -// dimension, selecting a subset of dimension 0, specified by `indices`. +// `y[idx[i]] = x[i] for i in [0, 1,...,rank(x) - 1]` // -// Arguments: +// For example: // -// indices: A 1-D tensor. Has same rank as `segment_ids`. -// segment_ids: A 1-D tensor. Values should be sorted and can be repeated. +// ``` +// # tensor 'x' is [1, 1, 2, 4, 4, 4, 7, 8, 8] +// y, idx, count = unique_with_counts(x) +// y ==> [1, 2, 4, 7, 8] +// idx ==> [0, 0, 1, 2, 2, 2, 3, 4, 4] +// count ==> [2, 1, 3, 1, 2] +// ``` // -// Returns Has same shape as data, except for dimension 0 which -// has size `k`, the number of segments. -func SparseSegmentMean(scope *Scope, data tf.Output, indices tf.Output, segment_ids tf.Output) (output tf.Output) { +// Arguments: +// x: 1-D. +// +// Returns 1-D.1-D.1-D. +func UniqueWithCounts(scope *Scope, x tf.Output, optional ...UniqueWithCountsAttr) (y tf.Output, idx tf.Output, count tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "SparseSegmentMean", + Type: "UniqueWithCounts", Input: []tf.Input{ - data, indices, segment_ids, + x, }, + Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1), op.Output(2) } -// Pop the element at the top of the stack. -// -// Arguments: -// handle: The handle to a stack. -// elem_type: The type of the elem that is popped. -// -// Returns The tensor that is popped from the top of the stack. -func StackPopV2(scope *Scope, handle tf.Output, elem_type tf.DataType) (elem tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"elem_type": elem_type} - opspec := tf.OpSpec{ - Type: "StackPopV2", - Input: []tf.Input{ - handle, - }, - Attrs: attrs, +// UniqueV2Attr is an optional argument to UniqueV2. +type UniqueV2Attr func(optionalAttr) + +// UniqueV2OutIdx sets the optional out_idx attribute to value. +// If not specified, defaults to DT_INT32 +func UniqueV2OutIdx(value tf.DataType) UniqueV2Attr { + return func(m optionalAttr) { + m["out_idx"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// Computes the sum along sparse segments of a tensor. +// Finds unique elements along an axis of a tensor. // -// Like `SparseSegmentSum`, but allows missing ids in `segment_ids`. If an id is -// misisng, the `output` tensor at that position will be zeroed. +// This operation either returns a tensor `y` containing unique elements +// along the `axis` of a tensor. The returned unique elements is sorted +// in the same order as they occur along `axis` in `x`. +// This operation also returns a tensor `idx` that is the same size as +// the number of the elements in `x` along the `axis` dimension. It +// contains the index in the unique output `y`. +// In other words, for an `1-D` tensor `x` with `axis = None: // -// Read @{$math_ops#segmentation$the section on segmentation} for an explanation of -// segments. +// `y[idx[i]] = x[i] for i in [0, 1,...,rank(x) - 1]` // // For example: // -// ```python -// c = tf.constant([[1,2,3,4], [-1,-2,-3,-4], [5,6,7,8]]) +// ``` +// # tensor 'x' is [1, 1, 2, 4, 4, 4, 7, 8, 8] +// y, idx = unique(x) +// y ==> [1, 2, 4, 7, 8] +// idx ==> [0, 0, 1, 2, 2, 2, 3, 4, 4] +// ``` // -// tf.sparse_segment_sum_with_num_segments( -// c, tf.constant([0, 1]), tf.constant([0, 0]), num_segments=3) -// # => [[0 0 0 0] -// # [0 0 0 0] -// # [0 0 0 0]] +// For an `2-D` tensor `x` with `axis = 0`: // -// tf.sparse_segment_sum_with_num_segments(c, -// tf.constant([0, 1]), -// tf.constant([0, 2], -// num_segments=4)) -// # => [[ 1 2 3 4] -// # [ 0 0 0 0] -// # [-1 -2 -3 -4] -// # [ 0 0 0 0]] +// ``` +// # tensor 'x' is [[1, 0, 0], +// # [1, 0, 0], +// # [2, 0, 0]] +// y, idx = unique(x, axis=0) +// y ==> [[1, 0, 0], +// [2, 0, 0]] +// idx ==> [0, 0, 1] // ``` // -// Arguments: +// For an `2-D` tensor `x` with `axis = 1`: // -// indices: A 1-D tensor. Has same rank as `segment_ids`. -// segment_ids: A 1-D tensor. Values should be sorted and can be repeated. -// num_segments: Should equal the number of distinct segment IDs. +// ``` +// # tensor 'x' is [[1, 0, 0], +// # [1, 0, 0], +// # [2, 0, 0]] +// y, idx = unique(x, axis=1) +// y ==> [[1, 0], +// [1, 0], +// [2, 0]] +// idx ==> [0, 1, 1] +// ``` // -// Returns Has same shape as data, except for dimension 0 which -// has size `num_segments`. -func SparseSegmentSumWithNumSegments(scope *Scope, data tf.Output, indices tf.Output, segment_ids tf.Output, num_segments tf.Output) (output tf.Output) { +// Arguments: +// x: A `Tensor`. +// axis: A `Tensor` of type `int32` (default: None). The axis of the Tensor to +// find the unique elements. +// +// Returns A `Tensor`. Unique elements along the `axis` of `Tensor` x.A 1-D Tensor. Has the same type as x that contains the index of each +// value of x in the output y. +func UniqueV2(scope *Scope, x tf.Output, axis tf.Output, optional ...UniqueV2Attr) (y tf.Output, idx tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "SparseSegmentSumWithNumSegments", + Type: "UniqueV2", Input: []tf.Input{ - data, indices, segment_ids, num_segments, + x, axis, }, + Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1) } -// SparseToDenseAttr is an optional argument to SparseToDense. -type SparseToDenseAttr func(optionalAttr) +// UniqueAttr is an optional argument to Unique. +type UniqueAttr func(optionalAttr) -// SparseToDenseValidateIndices sets the optional validate_indices attribute to value. -// -// value: If true, indices are checked to make sure they are sorted in -// lexicographic order and that there are no repeats. -// If not specified, defaults to true -func SparseToDenseValidateIndices(value bool) SparseToDenseAttr { +// UniqueOutIdx sets the optional out_idx attribute to value. +// If not specified, defaults to DT_INT32 +func UniqueOutIdx(value tf.DataType) UniqueAttr { return func(m optionalAttr) { - m["validate_indices"] = value + m["out_idx"] = value } } -// Converts a sparse representation into a dense tensor. +// Finds unique elements in a 1-D tensor. // -// Builds an array `dense` with shape `output_shape` such that +// This operation returns a tensor `y` containing all of the unique elements of `x` +// sorted in the same order that they occur in `x`. This operation also returns a +// tensor `idx` the same size as `x` that contains the index of each value of `x` +// in the unique output `y`. In other words: // -// ``` -// # If sparse_indices is scalar -// dense[i] = (i == sparse_indices ? sparse_values : default_value) +// `y[idx[i]] = x[i] for i in [0, 1,...,rank(x) - 1]` // -// # If sparse_indices is a vector, then for each i -// dense[sparse_indices[i]] = sparse_values[i] +// For example: // -// # If sparse_indices is an n by d matrix, then for each i in [0, n) -// dense[sparse_indices[i][0], ..., sparse_indices[i][d-1]] = sparse_values[i] // ``` -// -// All other values in `dense` are set to `default_value`. If `sparse_values` is a -// scalar, all sparse indices are set to this single value. -// -// Indices should be sorted in lexicographic order, and indices must not -// contain any repeats. If `validate_indices` is true, these properties -// are checked during execution. +// # tensor 'x' is [1, 1, 2, 4, 4, 4, 7, 8, 8] +// y, idx = unique(x) +// y ==> [1, 2, 4, 7, 8] +// idx ==> [0, 0, 1, 2, 2, 2, 3, 4, 4] +// ``` // // Arguments: -// sparse_indices: 0-D, 1-D, or 2-D. `sparse_indices[i]` contains the complete -// index where `sparse_values[i]` will be placed. -// output_shape: 1-D. Shape of the dense output tensor. -// sparse_values: 1-D. Values corresponding to each row of `sparse_indices`, -// or a scalar value to be used for all sparse indices. -// default_value: Scalar value to set for indices not specified in -// `sparse_indices`. +// x: 1-D. // -// Returns Dense output tensor of shape `output_shape`. -func SparseToDense(scope *Scope, sparse_indices tf.Output, output_shape tf.Output, sparse_values tf.Output, default_value tf.Output, optional ...SparseToDenseAttr) (dense tf.Output) { +// Returns 1-D.1-D. +func Unique(scope *Scope, x tf.Output, optional ...UniqueAttr) (y tf.Output, idx tf.Output) { if scope.Err() != nil { return } @@ -2028,228 +2114,210 @@ func SparseToDense(scope *Scope, sparse_indices tf.Output, output_shape tf.Outpu a(attrs) } opspec := tf.OpSpec{ - Type: "SparseToDense", + Type: "Unique", Input: []tf.Input{ - sparse_indices, output_shape, sparse_values, default_value, + x, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1) } -// Counts the number of occurrences of each value in an integer array. -// -// Outputs a vector with length `size` and the same dtype as `weights`. If -// `weights` are empty, then index `i` stores the number of times the value `i` is -// counted in `arr`. If `weights` are non-empty, then index `i` stores the sum of -// the value in `weights` at each index where the corresponding value in `arr` is -// `i`. -// -// Values in `arr` outside of the range [0, size) are ignored. -// -// Arguments: -// arr: int32 `Tensor`. -// size: non-negative int32 scalar `Tensor`. -// weights: is an int32, int64, float32, or float64 `Tensor` with the same -// shape as `arr`, or a length-0 `Tensor`, in which case it acts as all weights -// equal to 1. +// Shuffle dimensions of x according to a permutation and conjugate the result. // -// Returns 1D `Tensor` with length equal to `size`. The counts or summed weights for -// each value in the range [0, size). -func Bincount(scope *Scope, arr tf.Output, size tf.Output, weights tf.Output) (bins tf.Output) { +// The output `y` has the same rank as `x`. The shapes of `x` and `y` satisfy: +// `y.shape[i] == x.shape[perm[i]] for i in [0, 1, ..., rank(x) - 1]` +// `y[i,j,k,...,s,t,u] == conj(x[perm[i], perm[j], perm[k],...,perm[s], perm[t], perm[u]])` +func ConjugateTranspose(scope *Scope, x tf.Output, perm tf.Output) (y tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "Bincount", + Type: "ConjugateTranspose", Input: []tf.Input{ - arr, size, weights, + x, perm, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Computes the sum along sparse segments of a tensor. +// Reshapes a tensor. // -// Read @{$math_ops#segmentation$the section on segmentation} for an explanation of -// segments. +// Given `tensor`, this operation returns a tensor that has the same values +// as `tensor` with shape `shape`. // -// Like `SegmentSum`, but `segment_ids` can have rank less than `data`'s first -// dimension, selecting a subset of dimension 0, specified by `indices`. +// If one component of `shape` is the special value -1, the size of that dimension +// is computed so that the total size remains constant. In particular, a `shape` +// of `[-1]` flattens into 1-D. At most one component of `shape` can be -1. +// +// If `shape` is 1-D or higher, then the operation returns a tensor with shape +// `shape` filled with the values of `tensor`. In this case, the number of elements +// implied by `shape` must be the same as the number of elements in `tensor`. // // For example: // -// ```python -// c = tf.constant([[1,2,3,4], [-1,-2,-3,-4], [5,6,7,8]]) +// ``` +// # tensor 't' is [1, 2, 3, 4, 5, 6, 7, 8, 9] +// # tensor 't' has shape [9] +// reshape(t, [3, 3]) ==> [[1, 2, 3], +// [4, 5, 6], +// [7, 8, 9]] // -// # Select two rows, one segment. -// tf.sparse_segment_sum(c, tf.constant([0, 1]), tf.constant([0, 0])) -// # => [[0 0 0 0]] +// # tensor 't' is [[[1, 1], [2, 2]], +// # [[3, 3], [4, 4]]] +// # tensor 't' has shape [2, 2, 2] +// reshape(t, [2, 4]) ==> [[1, 1, 2, 2], +// [3, 3, 4, 4]] // -// # Select two rows, two segment. -// tf.sparse_segment_sum(c, tf.constant([0, 1]), tf.constant([0, 1])) -// # => [[ 1 2 3 4] -// # [-1 -2 -3 -4]] +// # tensor 't' is [[[1, 1, 1], +// # [2, 2, 2]], +// # [[3, 3, 3], +// # [4, 4, 4]], +// # [[5, 5, 5], +// # [6, 6, 6]]] +// # tensor 't' has shape [3, 2, 3] +// # pass '[-1]' to flatten 't' +// reshape(t, [-1]) ==> [1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6] // -// # Select all rows, two segments. -// tf.sparse_segment_sum(c, tf.constant([0, 1, 2]), tf.constant([0, 0, 1])) -// # => [[0 0 0 0] -// # [5 6 7 8]] +// # -1 can also be used to infer the shape // -// # Which is equivalent to: -// tf.segment_sum(c, tf.constant([0, 0, 1])) +// # -1 is inferred to be 9: +// reshape(t, [2, -1]) ==> [[1, 1, 1, 2, 2, 2, 3, 3, 3], +// [4, 4, 4, 5, 5, 5, 6, 6, 6]] +// # -1 is inferred to be 2: +// reshape(t, [-1, 9]) ==> [[1, 1, 1, 2, 2, 2, 3, 3, 3], +// [4, 4, 4, 5, 5, 5, 6, 6, 6]] +// # -1 is inferred to be 3: +// reshape(t, [ 2, -1, 3]) ==> [[[1, 1, 1], +// [2, 2, 2], +// [3, 3, 3]], +// [[4, 4, 4], +// [5, 5, 5], +// [6, 6, 6]]] +// +// # tensor 't' is [7] +// # shape `[]` reshapes to a scalar +// reshape(t, []) ==> 7 // ``` // // Arguments: // -// indices: A 1-D tensor. Has same rank as `segment_ids`. -// segment_ids: A 1-D tensor. Values should be sorted and can be repeated. -// -// Returns Has same shape as data, except for dimension 0 which -// has size `k`, the number of segments. -func SparseSegmentSum(scope *Scope, data tf.Output, indices tf.Output, segment_ids tf.Output) (output tf.Output) { +// shape: Defines the shape of the output tensor. +func Reshape(scope *Scope, tensor tf.Output, shape tf.Output) (output tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "SparseSegmentSum", + Type: "Reshape", Input: []tf.Input{ - data, indices, segment_ids, + tensor, shape, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Computes hyperbolic sine of x element-wise. -func Sinh(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Sinh", - Input: []tf.Input{ - x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes rectified linear 6: `min(max(features, 0), 6)`. -func Relu6(scope *Scope, features tf.Output) (activations tf.Output) { +// Checks a tensor for NaN and Inf values. +// +// When run, reports an `InvalidArgument` error if `tensor` has any values +// that are not a number (NaN) or infinity (Inf). Otherwise, passes `tensor` as-is. +// +// Arguments: +// +// message: Prefix of the error message. +func CheckNumerics(scope *Scope, tensor tf.Output, message string) (output tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"message": message} opspec := tf.OpSpec{ - Type: "Relu6", + Type: "CheckNumerics", Input: []tf.Input{ - features, + tensor, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Computes the sum along segments of a tensor. -// -// Read @{$math_ops#segmentation$the section on segmentation} for an explanation of -// segments. -// -// Computes a tensor such that -// `(output[i] = sum_{j...} data[j...]` where the sum is over tuples `j...` such -// that `segment_ids[j...] == i`. Unlike `SegmentSum`, `segment_ids` -// need not be sorted and need not cover all values in the full -// range of valid values. -// -// If the sum is empty for a given segment ID `i`, `output[i] = 0`. -// If the given segment ID `i` is negative, the value is dropped and will not be -// added to the sum of the segment. -// -// `num_segments` should equal the number of distinct segment IDs. -// -//
-// -//
+// Returns the complex conjugate of a complex number. // -// Arguments: +// Given a tensor `input` of complex numbers, this operation returns a tensor of +// complex numbers that are the complex conjugate of each element in `input`. The +// complex numbers in `input` must be of the form \\(a + bj\\), where *a* is the +// real part and *b* is the imaginary part. // -// segment_ids: A tensor whose shape is a prefix of `data.shape`. +// The complex conjugate returned by this operation is of the form \\(a - bj\\). // +// For example: // -// Returns Has same shape as data, except for the first `segment_ids.rank` -// dimensions, which are replaced with a single dimension which has size -// `num_segments`. -func UnsortedSegmentSum(scope *Scope, data tf.Output, segment_ids tf.Output, num_segments tf.Output) (output tf.Output) { +// ``` +// # tensor 'input' is [-2.25 + 4.75j, 3.25 + 5.75j] +// tf.conj(input) ==> [-2.25 - 4.75j, 3.25 - 5.75j] +// ``` +func Conj(scope *Scope, input tf.Output) (output tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "UnsortedSegmentSum", + Type: "Conj", Input: []tf.Input{ - data, segment_ids, num_segments, + input, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// ResourceStridedSliceAssignAttr is an optional argument to ResourceStridedSliceAssign. -type ResourceStridedSliceAssignAttr func(optionalAttr) - -// ResourceStridedSliceAssignBeginMask sets the optional begin_mask attribute to value. -// If not specified, defaults to 0 -func ResourceStridedSliceAssignBeginMask(value int64) ResourceStridedSliceAssignAttr { - return func(m optionalAttr) { - m["begin_mask"] = value - } -} - -// ResourceStridedSliceAssignEndMask sets the optional end_mask attribute to value. -// If not specified, defaults to 0 -func ResourceStridedSliceAssignEndMask(value int64) ResourceStridedSliceAssignAttr { - return func(m optionalAttr) { - m["end_mask"] = value - } -} - -// ResourceStridedSliceAssignEllipsisMask sets the optional ellipsis_mask attribute to value. -// If not specified, defaults to 0 -func ResourceStridedSliceAssignEllipsisMask(value int64) ResourceStridedSliceAssignAttr { - return func(m optionalAttr) { - m["ellipsis_mask"] = value - } -} +// ResourceSparseApplyMomentumAttr is an optional argument to ResourceSparseApplyMomentum. +type ResourceSparseApplyMomentumAttr func(optionalAttr) -// ResourceStridedSliceAssignNewAxisMask sets the optional new_axis_mask attribute to value. -// If not specified, defaults to 0 -func ResourceStridedSliceAssignNewAxisMask(value int64) ResourceStridedSliceAssignAttr { +// ResourceSparseApplyMomentumUseLocking sets the optional use_locking attribute to value. +// +// value: If `True`, updating of the var and accum tensors will be protected +// by a lock; otherwise the behavior is undefined, but may exhibit less +// contention. +// If not specified, defaults to false +func ResourceSparseApplyMomentumUseLocking(value bool) ResourceSparseApplyMomentumAttr { return func(m optionalAttr) { - m["new_axis_mask"] = value + m["use_locking"] = value } } -// ResourceStridedSliceAssignShrinkAxisMask sets the optional shrink_axis_mask attribute to value. -// If not specified, defaults to 0 -func ResourceStridedSliceAssignShrinkAxisMask(value int64) ResourceStridedSliceAssignAttr { +// ResourceSparseApplyMomentumUseNesterov sets the optional use_nesterov attribute to value. +// +// value: If `True`, the tensor passed to compute grad will be +// var - lr * momentum * accum, so in the end, the var you get is actually +// var - lr * momentum * accum. +// If not specified, defaults to false +func ResourceSparseApplyMomentumUseNesterov(value bool) ResourceSparseApplyMomentumAttr { return func(m optionalAttr) { - m["shrink_axis_mask"] = value + m["use_nesterov"] = value } } -// Assign `value` to the sliced l-value reference of `ref`. +// Update relevant entries in '*var' and '*accum' according to the momentum scheme. // -// The values of `value` are assigned to the positions in the variable -// `ref` that are selected by the slice parameters. The slice parameters -// `begin, `end`, `strides`, etc. work exactly as in `StridedSlice`. +// Set use_nesterov = True if you want to use Nesterov momentum. // -// NOTE this op currently does not support broadcasting and so `value`'s -// shape must be exactly the shape produced by the slice of `ref`. +// That is for rows we have grad for, we update var and accum as follows: +// +// accum = accum * momentum + grad +// var -= lr * accum +// +// Arguments: +// var_: Should be from a Variable(). +// accum: Should be from a Variable(). +// lr: Learning rate. Must be a scalar. +// grad: The gradient. +// indices: A vector of indices into the first dimension of var and accum. +// momentum: Momentum. Must be a scalar. // // Returns the created operation. -func ResourceStridedSliceAssign(scope *Scope, ref tf.Output, begin tf.Output, end tf.Output, strides tf.Output, value tf.Output, optional ...ResourceStridedSliceAssignAttr) (o *tf.Operation) { +func ResourceSparseApplyMomentum(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, indices tf.Output, momentum tf.Output, optional ...ResourceSparseApplyMomentumAttr) (o *tf.Operation) { if scope.Err() != nil { return } @@ -2258,558 +2326,549 @@ func ResourceStridedSliceAssign(scope *Scope, ref tf.Output, begin tf.Output, en a(attrs) } opspec := tf.OpSpec{ - Type: "ResourceStridedSliceAssign", + Type: "ResourceSparseApplyMomentum", Input: []tf.Input{ - ref, begin, end, strides, value, + var_, accum, lr, grad, indices, momentum, }, Attrs: attrs, } return scope.AddOperation(opspec) } -// ArgMaxAttr is an optional argument to ArgMax. -type ArgMaxAttr func(optionalAttr) - -// ArgMaxOutputType sets the optional output_type attribute to value. -// If not specified, defaults to DT_INT64 -func ArgMaxOutputType(value tf.DataType) ArgMaxAttr { - return func(m optionalAttr) { - m["output_type"] = value - } -} - -// Returns the index with the largest value across dimensions of a tensor. +// Clips tensor values to a specified min and max. // -// Note that in case of ties the identity of the return value is not guaranteed. +// Given a tensor `t`, this operation returns a tensor of the same type and +// shape as `t` with its values clipped to `clip_value_min` and `clip_value_max`. +// Any values less than `clip_value_min` are set to `clip_value_min`. Any values +// greater than `clip_value_max` are set to `clip_value_max`. // // Arguments: +// t: A `Tensor`. +// clip_value_min: A 0-D (scalar) `Tensor`, or a `Tensor` with the same shape +// as `t`. The minimum value to clip by. +// clip_value_max: A 0-D (scalar) `Tensor`, or a `Tensor` with the same shape +// as `t`. The maximum value to clip by. // -// dimension: int32 or int64, must be in the range `[-rank(input), rank(input))`. -// Describes which dimension of the input Tensor to reduce across. For vectors, -// use dimension = 0. -func ArgMax(scope *Scope, input tf.Output, dimension tf.Output, optional ...ArgMaxAttr) (output tf.Output) { +// Returns A clipped `Tensor` with the same shape as input 't'. +func ClipByValue(scope *Scope, t tf.Output, clip_value_min tf.Output, clip_value_max tf.Output) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "ArgMax", + Type: "ClipByValue", Input: []tf.Input{ - input, dimension, + t, clip_value_min, clip_value_max, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Returns which elements of x are finite. +// Creates a sequence of numbers. // -// @compatibility(numpy) -// Equivalent to np.isfinite -// @end_compatibility -func IsFinite(scope *Scope, x tf.Output) (y tf.Output) { +// This operation creates a sequence of numbers that begins at `start` and +// extends by increments of `delta` up to but not including `limit`. +// +// For example: +// +// ``` +// # 'start' is 3 +// # 'limit' is 18 +// # 'delta' is 3 +// tf.range(start, limit, delta) ==> [3, 6, 9, 12, 15] +// ``` +// +// Arguments: +// start: 0-D (scalar). First entry in the sequence. +// limit: 0-D (scalar). Upper limit of sequence, exclusive. +// delta: 0-D (scalar). Optional. Default is 1. Number that increments `start`. +// +// Returns 1-D. +func Range(scope *Scope, start tf.Output, limit tf.Output, delta tf.Output) (output tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "IsFinite", + Type: "Range", Input: []tf.Input{ - x, + start, limit, delta, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// MatMulAttr is an optional argument to MatMul. -type MatMulAttr func(optionalAttr) - -// MatMulTransposeA sets the optional transpose_a attribute to value. -// -// value: If true, "a" is transposed before multiplication. -// If not specified, defaults to false -func MatMulTransposeA(value bool) MatMulAttr { - return func(m optionalAttr) { - m["transpose_a"] = value - } -} - -// MatMulTransposeB sets the optional transpose_b attribute to value. -// -// value: If true, "b" is transposed before multiplication. -// If not specified, defaults to false -func MatMulTransposeB(value bool) MatMulAttr { - return func(m optionalAttr) { - m["transpose_b"] = value - } -} - -// Multiply the matrix "a" by the matrix "b". +// Computes gradients for SparseSegmentSqrtN. // -// The inputs must be two-dimensional matrices and the inner dimension of -// "a" (after being transposed if transpose_a is true) must match the -// outer dimension of "b" (after being transposed if transposed_b is -// true). +// Returns tensor "output" with same shape as grad, except for dimension 0 whose +// value is output_dim0. // -// *Note*: The default kernel implementation for MatMul on GPUs uses -// cublas. -func MatMul(scope *Scope, a tf.Output, b tf.Output, optional ...MatMulAttr) (product tf.Output) { +// Arguments: +// grad: gradient propagated to the SparseSegmentSqrtN op. +// indices: indices passed to the corresponding SparseSegmentSqrtN op. +// segment_ids: segment_ids passed to the corresponding SparseSegmentSqrtN op. +// output_dim0: dimension 0 of "data" passed to SparseSegmentSqrtN op. +func SparseSegmentSqrtNGrad(scope *Scope, grad tf.Output, indices tf.Output, segment_ids tf.Output, output_dim0 tf.Output) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "MatMul", + Type: "SparseSegmentSqrtNGrad", Input: []tf.Input{ - a, b, + grad, indices, segment_ids, output_dim0, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Selects elements from `x` or `y`, depending on `condition`. +// Computes the mean along sparse segments of a tensor. // -// The `x`, and `y` tensors must all have the same shape, and the -// output will also have that shape. +// Read @{$math_ops#Segmentation$the section on segmentation} for an explanation of +// segments. // -// The `condition` tensor must be a scalar if `x` and `y` are scalars. -// If `x` and `y` are vectors or higher rank, then `condition` must be either a -// scalar, a vector with size matching the first dimension of `x`, or must have -// the same shape as `x`. -// -// The `condition` tensor acts as a mask that chooses, based on the value at each -// element, whether the corresponding element / row in the output should be -// taken from `x` (if true) or `y` (if false). -// -// If `condition` is a vector and `x` and `y` are higher rank matrices, then -// it chooses which row (outer dimension) to copy from `x` and `y`. -// If `condition` has the same shape as `x` and `y`, then it chooses which -// element to copy from `x` and `y`. -// -// For example: -// -// ```python -// # 'condition' tensor is [[True, False] -// # [False, True]] -// # 't' is [[1, 2], -// # [3, 4]] -// # 'e' is [[5, 6], -// # [7, 8]] -// select(condition, t, e) # => [[1, 6], [7, 4]] -// -// -// # 'condition' tensor is [True, False] -// # 't' is [[1, 2], -// # [3, 4]] -// # 'e' is [[5, 6], -// # [7, 8]] -// select(condition, t, e) ==> [[1, 2], -// [7, 8]] -// -// ``` +// Like `SegmentMean`, but `segment_ids` can have rank less than `data`'s first +// dimension, selecting a subset of dimension 0, specified by `indices`. // // Arguments: // -// x: = A `Tensor` which may have the same shape as `condition`. -// If `condition` is rank 1, `x` may have higher rank, -// but its first dimension must match the size of `condition`. -// y: = A `Tensor` with the same type and shape as `x`. +// indices: A 1-D tensor. Has same rank as `segment_ids`. +// segment_ids: A 1-D tensor. Values should be sorted and can be repeated. // -// Returns = A `Tensor` with the same type and shape as `x` and `y`. -func Select(scope *Scope, condition tf.Output, x tf.Output, y tf.Output) (output tf.Output) { +// Returns Has same shape as data, except for dimension 0 which +// has size `k`, the number of segments. +func SparseSegmentMean(scope *Scope, data tf.Output, indices tf.Output, segment_ids tf.Output) (output tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "Select", + Type: "SparseSegmentMean", Input: []tf.Input{ - condition, x, y, + data, indices, segment_ids, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Returns the truth value of x OR y element-wise. +// Pop the element at the top of the stack. // -// *NOTE*: `LogicalOr` supports broadcasting. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func LogicalOr(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { +// Arguments: +// handle: The handle to a stack. +// elem_type: The type of the elem that is popped. +// +// Returns The tensor that is popped from the top of the stack. +func StackPopV2(scope *Scope, handle tf.Output, elem_type tf.DataType) (elem tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"elem_type": elem_type} opspec := tf.OpSpec{ - Type: "LogicalOr", + Type: "StackPopV2", Input: []tf.Input{ - x, y, + handle, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Compute the regularized incomplete beta integral \\(I_x(a, b)\\). +// Computes the sum along sparse segments of a tensor. // -// The regularized incomplete beta integral is defined as: +// Like `SparseSegmentSum`, but allows missing ids in `segment_ids`. If an id is +// misisng, the `output` tensor at that position will be zeroed. // +// Read @{$math_ops#Segmentation$the section on segmentation} for an explanation of +// segments. // -// \\(I_x(a, b) = \frac{B(x; a, b)}{B(a, b)}\\) +// For example: // -// where +// ```python +// c = tf.constant([[1,2,3,4], [-1,-2,-3,-4], [5,6,7,8]]) // +// tf.sparse_segment_sum_with_num_segments( +// c, tf.constant([0, 1]), tf.constant([0, 0]), num_segments=3) +// # => [[0 0 0 0] +// # [0 0 0 0] +// # [0 0 0 0]] // -// \\(B(x; a, b) = \int_0^x t^{a-1} (1 - t)^{b-1} dt\\) +// tf.sparse_segment_sum_with_num_segments(c, +// tf.constant([0, 1]), +// tf.constant([0, 2], +// num_segments=4)) +// # => [[ 1 2 3 4] +// # [ 0 0 0 0] +// # [-1 -2 -3 -4] +// # [ 0 0 0 0]] +// ``` +// +// Arguments: // +// indices: A 1-D tensor. Has same rank as `segment_ids`. +// segment_ids: A 1-D tensor. Values should be sorted and can be repeated. +// num_segments: Should equal the number of distinct segment IDs. // -// is the incomplete beta function and \\(B(a, b)\\) is the *complete* -// beta function. -func Betainc(scope *Scope, a tf.Output, b tf.Output, x tf.Output) (z tf.Output) { +// Returns Has same shape as data, except for dimension 0 which +// has size `num_segments`. +func SparseSegmentSumWithNumSegments(scope *Scope, data tf.Output, indices tf.Output, segment_ids tf.Output, num_segments tf.Output) (output tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "Betainc", + Type: "SparseSegmentSumWithNumSegments", Input: []tf.Input{ - a, b, x, + data, indices, segment_ids, num_segments, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Computes the sum along sparse segments of a tensor divided by the sqrt of N. +// PreventGradientAttr is an optional argument to PreventGradient. +type PreventGradientAttr func(optionalAttr) + +// PreventGradientMessage sets the optional message attribute to value. // -// N is the size of the segment being reduced. +// value: Will be printed in the error when anyone tries to differentiate +// this operation. +// If not specified, defaults to "" +func PreventGradientMessage(value string) PreventGradientAttr { + return func(m optionalAttr) { + m["message"] = value + } +} + +// An identity op that triggers an error if a gradient is requested. // -// Like `SparseSegmentSqrtN`, but allows missing ids in `segment_ids`. If an id is -// misisng, the `output` tensor at that position will be zeroed. +// When executed in a graph, this op outputs its input tensor as-is. // -// Read @{$math_ops#segmentation$the section on segmentation} for an explanation of -// segments. +// When building ops to compute gradients, the TensorFlow gradient system +// will return an error when trying to lookup the gradient of this op, +// because no gradient must ever be registered for this function. This +// op exists to prevent subtle bugs from silently returning unimplemented +// gradients in some corner cases. // // Arguments: +// input: any tensor. // -// indices: A 1-D tensor. Has same rank as `segment_ids`. -// segment_ids: A 1-D tensor. Values should be sorted and can be repeated. -// num_segments: Should equal the number of distinct segment IDs. -// -// Returns Has same shape as data, except for dimension 0 which -// has size `k`, the number of segments. -func SparseSegmentSqrtNWithNumSegments(scope *Scope, data tf.Output, indices tf.Output, segment_ids tf.Output, num_segments tf.Output) (output tf.Output) { +// Returns the same input tensor. +func PreventGradient(scope *Scope, input tf.Output, optional ...PreventGradientAttr) (output tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "SparseSegmentSqrtNWithNumSegments", + Type: "PreventGradient", Input: []tf.Input{ - data, indices, segment_ids, num_segments, + input, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Compute the upper regularized incomplete Gamma function `Q(a, x)`. -// -// The upper regularized incomplete Gamma function is defined as: -// -// \\(Q(a, x) = Gamma(a, x) / Gamma(a) = 1 - P(a, x)\\) -// -// where -// -// \\(Gamma(a, x) = int_{x}^{\infty} t^{a-1} exp(-t) dt\\) -// -// is the upper incomplete Gama function. -// -// Note, above `P(a, x)` (`Igamma`) is the lower regularized complete -// Gamma function. -func Igammac(scope *Scope, a tf.Output, x tf.Output) (z tf.Output) { +// Computes asin of x element-wise. +func Asin(scope *Scope, x tf.Output) (y tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "Igammac", + Type: "Asin", Input: []tf.Input{ - a, x, + x, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// LogUniformCandidateSamplerAttr is an optional argument to LogUniformCandidateSampler. -type LogUniformCandidateSamplerAttr func(optionalAttr) +// SparseToDenseAttr is an optional argument to SparseToDense. +type SparseToDenseAttr func(optionalAttr) -// LogUniformCandidateSamplerSeed sets the optional seed attribute to value. +// SparseToDenseValidateIndices sets the optional validate_indices attribute to value. // -// value: If either seed or seed2 are set to be non-zero, the random number -// generator is seeded by the given seed. Otherwise, it is seeded by a -// random seed. -// If not specified, defaults to 0 -func LogUniformCandidateSamplerSeed(value int64) LogUniformCandidateSamplerAttr { +// value: If true, indices are checked to make sure they are sorted in +// lexicographic order and that there are no repeats. +// If not specified, defaults to true +func SparseToDenseValidateIndices(value bool) SparseToDenseAttr { return func(m optionalAttr) { - m["seed"] = value + m["validate_indices"] = value } } -// LogUniformCandidateSamplerSeed2 sets the optional seed2 attribute to value. +// Converts a sparse representation into a dense tensor. // -// value: An second seed to avoid seed collision. -// If not specified, defaults to 0 -func LogUniformCandidateSamplerSeed2(value int64) LogUniformCandidateSamplerAttr { - return func(m optionalAttr) { - m["seed2"] = value - } -} - -// Generates labels for candidate sampling with a log-uniform distribution. +// Builds an array `dense` with shape `output_shape` such that // -// See explanations of candidate sampling and the data formats at -// go/candidate-sampling. +// ``` +// # If sparse_indices is scalar +// dense[i] = (i == sparse_indices ? sparse_values : default_value) // -// For each batch, this op picks a single set of sampled candidate labels. +// # If sparse_indices is a vector, then for each i +// dense[sparse_indices[i]] = sparse_values[i] // -// The advantages of sampling candidates per-batch are simplicity and the -// possibility of efficient dense matrix multiplication. The disadvantage is that -// the sampled candidates must be chosen independently of the context and of the -// true labels. +// # If sparse_indices is an n by d matrix, then for each i in [0, n) +// dense[sparse_indices[i][0], ..., sparse_indices[i][d-1]] = sparse_values[i] +// ``` +// +// All other values in `dense` are set to `default_value`. If `sparse_values` is a +// scalar, all sparse indices are set to this single value. +// +// Indices should be sorted in lexicographic order, and indices must not +// contain any repeats. If `validate_indices` is true, these properties +// are checked during execution. // // Arguments: -// true_classes: A batch_size * num_true matrix, in which each row contains the -// IDs of the num_true target_classes in the corresponding original label. -// num_true: Number of true labels per context. -// num_sampled: Number of candidates to randomly sample. -// unique: If unique is true, we sample with rejection, so that all sampled -// candidates in a batch are unique. This requires some approximation to -// estimate the post-rejection sampling probabilities. -// range_max: The sampler will sample integers from the interval [0, range_max). +// sparse_indices: 0-D, 1-D, or 2-D. `sparse_indices[i]` contains the complete +// index where `sparse_values[i]` will be placed. +// output_shape: 1-D. Shape of the dense output tensor. +// sparse_values: 1-D. Values corresponding to each row of `sparse_indices`, +// or a scalar value to be used for all sparse indices. +// default_value: Scalar value to set for indices not specified in +// `sparse_indices`. // -// Returns A vector of length num_sampled, in which each element is -// the ID of a sampled candidate.A batch_size * num_true matrix, representing -// the number of times each candidate is expected to occur in a batch -// of sampled candidates. If unique=true, then this is a probability.A vector of length num_sampled, for each sampled -// candidate representing the number of times the candidate is expected -// to occur in a batch of sampled candidates. If unique=true, then this is a -// probability. -func LogUniformCandidateSampler(scope *Scope, true_classes tf.Output, num_true int64, num_sampled int64, unique bool, range_max int64, optional ...LogUniformCandidateSamplerAttr) (sampled_candidates tf.Output, true_expected_count tf.Output, sampled_expected_count tf.Output) { +// Returns Dense output tensor of shape `output_shape`. +func SparseToDense(scope *Scope, sparse_indices tf.Output, output_shape tf.Output, sparse_values tf.Output, default_value tf.Output, optional ...SparseToDenseAttr) (dense tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"num_true": num_true, "num_sampled": num_sampled, "unique": unique, "range_max": range_max} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "LogUniformCandidateSampler", + Type: "SparseToDense", Input: []tf.Input{ - true_classes, + sparse_indices, output_shape, sparse_values, default_value, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + return op.Output(0) } -// Returns (x - y)(x - y) element-wise. +// Computes the sum along sparse segments of a tensor. // -// *NOTE*: `SquaredDifference` supports broadcasting. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func SquaredDifference(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SquaredDifference", - Input: []tf.Input{ - x, y, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Forwards the input to the output. +// Read @{$math_ops#Segmentation$the section on segmentation} for an explanation of +// segments. // -// This operator represents the loop termination condition used by the -// "pivot" switches of a loop. +// Like `SegmentSum`, but `segment_ids` can have rank less than `data`'s first +// dimension, selecting a subset of dimension 0, specified by `indices`. +// +// For example: +// +// ```python +// c = tf.constant([[1,2,3,4], [-1,-2,-3,-4], [5,6,7,8]]) +// +// # Select two rows, one segment. +// tf.sparse_segment_sum(c, tf.constant([0, 1]), tf.constant([0, 0])) +// # => [[0 0 0 0]] +// +// # Select two rows, two segment. +// tf.sparse_segment_sum(c, tf.constant([0, 1]), tf.constant([0, 1])) +// # => [[ 1 2 3 4] +// # [-1 -2 -3 -4]] +// +// # Select all rows, two segments. +// tf.sparse_segment_sum(c, tf.constant([0, 1, 2]), tf.constant([0, 0, 1])) +// # => [[0 0 0 0] +// # [5 6 7 8]] +// +// # Which is equivalent to: +// tf.segment_sum(c, tf.constant([0, 0, 1])) +// ``` // // Arguments: -// input: A boolean scalar, representing the branch predicate of the Switch op. // -// Returns The same tensor as `input`. -func LoopCond(scope *Scope, input tf.Output) (output tf.Output) { +// indices: A 1-D tensor. Has same rank as `segment_ids`. +// segment_ids: A 1-D tensor. Values should be sorted and can be repeated. +// +// Returns Has same shape as data, except for dimension 0 which +// has size `k`, the number of segments. +func SparseSegmentSum(scope *Scope, data tf.Output, indices tf.Output, segment_ids tf.Output) (output tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "LoopCond", + Type: "SparseSegmentSum", Input: []tf.Input{ - input, + data, indices, segment_ids, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// ApproximateEqualAttr is an optional argument to ApproximateEqual. -type ApproximateEqualAttr func(optionalAttr) - -// ApproximateEqualTolerance sets the optional tolerance attribute to value. -// If not specified, defaults to 1e-05 -func ApproximateEqualTolerance(value float32) ApproximateEqualAttr { - return func(m optionalAttr) { - m["tolerance"] = value - } -} - -// Returns the truth value of abs(x-y) < tolerance element-wise. -func ApproximateEqual(scope *Scope, x tf.Output, y tf.Output, optional ...ApproximateEqualAttr) (z tf.Output) { +// Computes hyperbolic sine of x element-wise. +func Sinh(scope *Scope, x tf.Output) (y tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "ApproximateEqual", + Type: "Sinh", Input: []tf.Input{ - x, y, + x, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Returns x / y element-wise. +// Computes the minimum along segments of a tensor. // -// *NOTE*: `Div` supports broadcasting. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func Div(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { +// Read @{$math_ops#segmentation$the section on segmentation} for an explanation of +// segments. +// +// This operator is similar to the unsorted segment sum operator found +// [(here)](../../../api_docs/python/math_ops.md#UnsortedSegmentSum). +// Instead of computing the sum over segments, it computes the minimum such that: +// +// \\(output_i = \min_j data_j\\) where min is over `j` such +// that `segment_ids[j] == i`. +// +// If the minimum is empty for a given segment ID `i`, it outputs the largest +// possible value for the specific numeric type, +// `output[i] = numeric_limits::max()`. +// +// Arguments: +// +// segment_ids: A 1-D tensor whose rank is equal to the rank of `data`'s +// first dimension. +// +// +// Returns Has same shape as data, except for dimension 0 which +// has size `num_segments`. +func UnsortedSegmentMin(scope *Scope, data tf.Output, segment_ids tf.Output, num_segments tf.Output) (output tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "Div", + Type: "UnsortedSegmentMin", Input: []tf.Input{ - x, y, + data, segment_ids, num_segments, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Returns x * y element-wise. -// -// *NOTE*: `Multiply` supports broadcasting. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func Mul(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { +// Computes rectified linear 6: `min(max(features, 0), 6)`. +func Relu6(scope *Scope, features tf.Output) (activations tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "Mul", + Type: "Relu6", Input: []tf.Input{ - x, y, + features, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// BiasAddAttr is an optional argument to BiasAdd. -type BiasAddAttr func(optionalAttr) - -// BiasAddDataFormat sets the optional data_format attribute to value. +// Computes the sum along segments of a tensor. // -// value: Specify the data format of the input and output data. With the -// default format "NHWC", the bias tensor will be added to the last dimension -// of the value tensor. -// Alternatively, the format could be "NCHW", the data storage order of: -// [batch, in_channels, in_height, in_width]. -// The tensor will be added to "in_channels", the third-to-the-last -// dimension. -// If not specified, defaults to "NHWC" -func BiasAddDataFormat(value string) BiasAddAttr { - return func(m optionalAttr) { - m["data_format"] = value - } -} - -// Adds `bias` to `value`. +// Read @{$math_ops#Segmentation$the section on segmentation} for an explanation of +// segments. // -// This is a special case of `tf.add` where `bias` is restricted to be 1-D. -// Broadcasting is supported, so `value` may have any number of dimensions. +// Computes a tensor such that +// `(output[i] = sum_{j...} data[j...]` where the sum is over tuples `j...` such +// that `segment_ids[j...] == i`. Unlike `SegmentSum`, `segment_ids` +// need not be sorted and need not cover all values in the full +// range of valid values. +// +// If the sum is empty for a given segment ID `i`, `output[i] = 0`. +// If the given segment ID `i` is negative, the value is dropped and will not be +// added to the sum of the segment. +// +// `num_segments` should equal the number of distinct segment IDs. +// +//
+// +//
// // Arguments: -// value: Any number of dimensions. -// bias: 1-D with size the last dimension of `value`. // -// Returns Broadcasted sum of `value` and `bias`. -func BiasAdd(scope *Scope, value tf.Output, bias tf.Output, optional ...BiasAddAttr) (output tf.Output) { +// segment_ids: A tensor whose shape is a prefix of `data.shape`. +// +// +// Returns Has same shape as data, except for the first `segment_ids.rank` +// dimensions, which are replaced with a single dimension which has size +// `num_segments`. +func UnsortedSegmentSum(scope *Scope, data tf.Output, segment_ids tf.Output, num_segments tf.Output) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "BiasAdd", + Type: "UnsortedSegmentSum", Input: []tf.Input{ - value, bias, + data, segment_ids, num_segments, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// SparseReduceSumSparseAttr is an optional argument to SparseReduceSumSparse. -type SparseReduceSumSparseAttr func(optionalAttr) +// ResourceStridedSliceAssignAttr is an optional argument to ResourceStridedSliceAssign. +type ResourceStridedSliceAssignAttr func(optionalAttr) -// SparseReduceSumSparseKeepDims sets the optional keep_dims attribute to value. -// -// value: If true, retain reduced dimensions with length 1. -// If not specified, defaults to false -func SparseReduceSumSparseKeepDims(value bool) SparseReduceSumSparseAttr { +// ResourceStridedSliceAssignBeginMask sets the optional begin_mask attribute to value. +// If not specified, defaults to 0 +func ResourceStridedSliceAssignBeginMask(value int64) ResourceStridedSliceAssignAttr { return func(m optionalAttr) { - m["keep_dims"] = value + m["begin_mask"] = value } } -// Computes the sum of elements across dimensions of a SparseTensor. -// -// This Op takes a SparseTensor and is the sparse counterpart to -// `tf.reduce_sum()`. In contrast to SparseReduceSum, this Op returns a -// SparseTensor. +// ResourceStridedSliceAssignEndMask sets the optional end_mask attribute to value. +// If not specified, defaults to 0 +func ResourceStridedSliceAssignEndMask(value int64) ResourceStridedSliceAssignAttr { + return func(m optionalAttr) { + m["end_mask"] = value + } +} + +// ResourceStridedSliceAssignEllipsisMask sets the optional ellipsis_mask attribute to value. +// If not specified, defaults to 0 +func ResourceStridedSliceAssignEllipsisMask(value int64) ResourceStridedSliceAssignAttr { + return func(m optionalAttr) { + m["ellipsis_mask"] = value + } +} + +// ResourceStridedSliceAssignNewAxisMask sets the optional new_axis_mask attribute to value. +// If not specified, defaults to 0 +func ResourceStridedSliceAssignNewAxisMask(value int64) ResourceStridedSliceAssignAttr { + return func(m optionalAttr) { + m["new_axis_mask"] = value + } +} + +// ResourceStridedSliceAssignShrinkAxisMask sets the optional shrink_axis_mask attribute to value. +// If not specified, defaults to 0 +func ResourceStridedSliceAssignShrinkAxisMask(value int64) ResourceStridedSliceAssignAttr { + return func(m optionalAttr) { + m["shrink_axis_mask"] = value + } +} + +// Assign `value` to the sliced l-value reference of `ref`. // -// Reduces `sp_input` along the dimensions given in `reduction_axes`. Unless -// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in -// `reduction_axes`. If `keep_dims` is true, the reduced dimensions are retained -// with length 1. +// The values of `value` are assigned to the positions in the variable +// `ref` that are selected by the slice parameters. The slice parameters +// `begin, `end`, `strides`, etc. work exactly as in `StridedSlice`. // -// If `reduction_axes` has no entries, all dimensions are reduced, and a tensor -// with a single element is returned. Additionally, the axes can be negative, -// which are interpreted according to the indexing rules in Python. +// NOTE this op currently does not support broadcasting and so `value`'s +// shape must be exactly the shape produced by the slice of `ref`. // -// Arguments: -// input_indices: 2-D. `N x R` matrix with the indices of non-empty values in a -// SparseTensor, possibly not in canonical ordering. -// input_values: 1-D. `N` non-empty values corresponding to `input_indices`. -// input_shape: 1-D. Shape of the input SparseTensor. -// reduction_axes: 1-D. Length-`K` vector containing the reduction axes. -func SparseReduceSumSparse(scope *Scope, input_indices tf.Output, input_values tf.Output, input_shape tf.Output, reduction_axes tf.Output, optional ...SparseReduceSumSparseAttr) (output_indices tf.Output, output_values tf.Output, output_shape tf.Output) { +// Returns the created operation. +func ResourceStridedSliceAssign(scope *Scope, ref tf.Output, begin tf.Output, end tf.Output, strides tf.Output, value tf.Output, optional ...ResourceStridedSliceAssignAttr) (o *tf.Operation) { if scope.Err() != nil { return } @@ -2818,83 +2877,106 @@ func SparseReduceSumSparse(scope *Scope, input_indices tf.Output, input_values t a(attrs) } opspec := tf.OpSpec{ - Type: "SparseReduceSumSparse", + Type: "ResourceStridedSliceAssign", Input: []tf.Input{ - input_indices, input_values, input_shape, reduction_axes, + ref, begin, end, strides, value, }, Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + return scope.AddOperation(opspec) } -// Returns x + y element-wise. -// -// *NOTE*: `Add` supports broadcasting. `AddN` does not. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func AddV2(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { +// ArgMaxAttr is an optional argument to ArgMax. +type ArgMaxAttr func(optionalAttr) + +// ArgMaxOutputType sets the optional output_type attribute to value. +// If not specified, defaults to DT_INT64 +func ArgMaxOutputType(value tf.DataType) ArgMaxAttr { + return func(m optionalAttr) { + m["output_type"] = value + } +} + +// Returns the index with the largest value across dimensions of a tensor. +// +// Note that in case of ties the identity of the return value is not guaranteed. +// +// Arguments: +// +// dimension: int32 or int64, must be in the range `[-rank(input), rank(input))`. +// Describes which dimension of the input Tensor to reduce across. For vectors, +// use dimension = 0. +func ArgMax(scope *Scope, input tf.Output, dimension tf.Output, optional ...ArgMaxAttr) (output tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "AddV2", + Type: "ArgMax", Input: []tf.Input{ - x, y, + input, dimension, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Returns x + y element-wise. +// Returns which elements of x are finite. // -// *NOTE*: `Add` supports broadcasting. `AddN` does not. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func Add(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { +// @compatibility(numpy) +// Equivalent to np.isfinite +// @end_compatibility +func IsFinite(scope *Scope, x tf.Output) (y tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "Add", + Type: "IsFinite", Input: []tf.Input{ - x, y, + x, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// NthElementAttr is an optional argument to NthElement. -type NthElementAttr func(optionalAttr) +// MatMulAttr is an optional argument to MatMul. +type MatMulAttr func(optionalAttr) -// NthElementReverse sets the optional reverse attribute to value. +// MatMulTransposeA sets the optional transpose_a attribute to value. // -// value: When set to True, find the nth-largest value in the vector and vice -// versa. +// value: If true, "a" is transposed before multiplication. // If not specified, defaults to false -func NthElementReverse(value bool) NthElementAttr { +func MatMulTransposeA(value bool) MatMulAttr { return func(m optionalAttr) { - m["reverse"] = value + m["transpose_a"] = value } } -// Finds values of the `n`-th order statistic for the last dimension. -// -// If the input is a vector (rank-1), finds the entries which is the nth-smallest -// value in the vector and outputs their values as scalar tensor. -// -// For matrices (resp. higher rank input), computes the entries which is the -// nth-smallest value in each row (resp. vector along the last dimension). Thus, +// MatMulTransposeB sets the optional transpose_b attribute to value. // -// values.shape = input.shape[:-1] +// value: If true, "b" is transposed before multiplication. +// If not specified, defaults to false +func MatMulTransposeB(value bool) MatMulAttr { + return func(m optionalAttr) { + m["transpose_b"] = value + } +} + +// Multiply the matrix "a" by the matrix "b". // -// Arguments: -// input: 1-D or higher with last dimension at least `n+1`. -// n: 0-D. Position of sorted vector to select along the last dimension (along -// each row for matrices). Valid range of n is `[0, input.shape[:-1])` +// The inputs must be two-dimensional matrices and the inner dimension of +// "a" (after being transposed if transpose_a is true) must match the +// outer dimension of "b" (after being transposed if transposed_b is +// true). // -// Returns The `n`-th order statistic along each last dimensional slice. -func NthElement(scope *Scope, input tf.Output, n tf.Output, optional ...NthElementAttr) (values tf.Output) { +// *Note*: The default kernel implementation for MatMul on GPUs uses +// cublas. +func MatMul(scope *Scope, a tf.Output, b tf.Output, optional ...MatMulAttr) (product tf.Output) { if scope.Err() != nil { return } @@ -2903,9 +2985,9 @@ func NthElement(scope *Scope, input tf.Output, n tf.Output, optional ...NthEleme a(attrs) } opspec := tf.OpSpec{ - Type: "NthElement", + Type: "MatMul", Input: []tf.Input{ - input, n, + a, b, }, Attrs: attrs, } @@ -2913,293 +2995,346 @@ func NthElement(scope *Scope, input tf.Output, n tf.Output, optional ...NthEleme return op.Output(0) } -// Computes the Max along segments of a tensor. +// Selects elements from `x` or `y`, depending on `condition`. // -// Read @{$math_ops#segmentation$the section on segmentation} for an explanation of -// segments. +// The `x`, and `y` tensors must all have the same shape, and the +// output will also have that shape. // -// This operator is similar to the [unsorted segment sum operator](../../../api_docs/python/math_ops.md#UnsortedSegmentSum). -// Instead of computing the sum over segments, it computes the maximum -// such that: +// The `condition` tensor must be a scalar if `x` and `y` are scalars. +// If `x` and `y` are vectors or higher rank, then `condition` must be either a +// scalar, a vector with size matching the first dimension of `x`, or must have +// the same shape as `x`. // -// \\(output_i = \max_j data_j\\) where max is over `j` such -// that `segment_ids[j] == i`. +// The `condition` tensor acts as a mask that chooses, based on the value at each +// element, whether the corresponding element / row in the output should be +// taken from `x` (if true) or `y` (if false). // -// If the maximum is empty for a given segment ID `i`, it outputs the smallest possible value for specific numeric type, -// `output[i] = numeric_limits::min()`. +// If `condition` is a vector and `x` and `y` are higher rank matrices, then +// it chooses which row (outer dimension) to copy from `x` and `y`. +// If `condition` has the same shape as `x` and `y`, then it chooses which +// element to copy from `x` and `y`. // -//
-// -//
+// For example: // -// Arguments: +// ```python +// # 'condition' tensor is [[True, False] +// # [False, True]] +// # 't' is [[1, 2], +// # [3, 4]] +// # 'e' is [[5, 6], +// # [7, 8]] +// select(condition, t, e) # => [[1, 6], [7, 4]] // -// segment_ids: A 1-D tensor whose rank is equal to the rank of `data`'s -// first dimension. // +// # 'condition' tensor is [True, False] +// # 't' is [[1, 2], +// # [3, 4]] +// # 'e' is [[5, 6], +// # [7, 8]] +// select(condition, t, e) ==> [[1, 2], +// [7, 8]] +// +// ``` // -// Returns Has same shape as data, except for dimension 0 which -// has size `num_segments`. -func UnsortedSegmentMax(scope *Scope, data tf.Output, segment_ids tf.Output, num_segments tf.Output) (output tf.Output) { +// Arguments: +// +// x: = A `Tensor` which may have the same shape as `condition`. +// If `condition` is rank 1, `x` may have higher rank, +// but its first dimension must match the size of `condition`. +// y: = A `Tensor` with the same type and shape as `x`. +// +// Returns = A `Tensor` with the same type and shape as `x` and `y`. +func Select(scope *Scope, condition tf.Output, x tf.Output, y tf.Output) (output tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "UnsortedSegmentMax", + Type: "Select", Input: []tf.Input{ - data, segment_ids, num_segments, + condition, x, y, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Computes exponential of x element-wise. \\(y = e^x\\). -func Exp(scope *Scope, x tf.Output) (y tf.Output) { +// Returns the truth value of x OR y element-wise. +// +// *NOTE*: `LogicalOr` supports broadcasting. More about broadcasting +// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) +func LogicalOr(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "Exp", + Type: "LogicalOr", Input: []tf.Input{ - x, + x, y, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Returns an element-wise indication of the sign of a number. +// Compute the regularized incomplete beta integral \\(I_x(a, b)\\). // -// `y = sign(x) = -1` if `x < 0`; 0 if `x == 0`; 1 if `x > 0`. +// The regularized incomplete beta integral is defined as: // -// For complex numbers, `y = sign(x) = x / |x|` if `x != 0`, otherwise `y = 0`. -func Sign(scope *Scope, x tf.Output) (y tf.Output) { +// +// \\(I_x(a, b) = \frac{B(x; a, b)}{B(a, b)}\\) +// +// where +// +// +// \\(B(x; a, b) = \int_0^x t^{a-1} (1 - t)^{b-1} dt\\) +// +// +// is the incomplete beta function and \\(B(a, b)\\) is the *complete* +// beta function. +func Betainc(scope *Scope, a tf.Output, b tf.Output, x tf.Output) (z tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "Sign", + Type: "Betainc", Input: []tf.Input{ - x, + a, b, x, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// ArgMinAttr is an optional argument to ArgMin. -type ArgMinAttr func(optionalAttr) - -// ArgMinOutputType sets the optional output_type attribute to value. -// If not specified, defaults to DT_INT64 -func ArgMinOutputType(value tf.DataType) ArgMinAttr { - return func(m optionalAttr) { - m["output_type"] = value +// Return a tensor with the same shape and contents as the input tensor or value. +func Identity(scope *Scope, input tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Identity", + Input: []tf.Input{ + input, + }, } + op := scope.AddOperation(opspec) + return op.Output(0) } -// Returns the index with the smallest value across dimensions of a tensor. -// -// Note that in case of ties the identity of the return value is not guaranteed. -// -// Arguments: +// Computes arctangent of `y/x` element-wise, respecting signs of the arguments. // -// dimension: int32 or int64, must be in the range `[-rank(input), rank(input))`. -// Describes which dimension of the input Tensor to reduce across. For vectors, -// use dimension = 0. -func ArgMin(scope *Scope, input tf.Output, dimension tf.Output, optional ...ArgMinAttr) (output tf.Output) { +// This is the angle \( \theta \in [-\pi, \pi] \) such that +// \[ x = r \cos(\theta) \] +// and +// \[ y = r \sin(\theta) \] +// where \(r = \sqrt(x^2 + y^2) \). +func Atan2(scope *Scope, y tf.Output, x tf.Output) (z tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "ArgMin", + Type: "Atan2", Input: []tf.Input{ - input, dimension, + y, x, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Convert the quantized 'input' tensor into a lower-precision 'output', using the -// -// output range specified with 'requested_output_min' and 'requested_output_max'. -// -// [input_min, input_max] are scalar floats that specify the range for the float -// interpretation of the 'input' data. For example, if input_min is -1.0f and -// input_max is 1.0f, and we are dealing with quint16 quantized data, then a 0 -// value in the 16-bit data should be interpreted as -1.0f, and a 65535 means 1.0f. +// Creates a dataset that passes a sliding window over `input_dataset`. // // Arguments: // -// input_min: The float value that the minimum quantized input value represents. -// input_max: The float value that the maximum quantized input value represents. -// requested_output_min: The float value that the minimum quantized output value represents. -// requested_output_max: The float value that the maximum quantized output value represents. -// out_type: The type of the output. Should be a lower bit depth than Tinput. +// window_size: A scalar representing the number of elements in the +// sliding window. +// stride: A scalar representing the steps moving the sliding window +// forward in one iteration. It must be in `[1, window_size)`. // -// Returns The requested_output_min value is copied into this output.The requested_output_max value is copied into this output. -func Requantize(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output, requested_output_min tf.Output, requested_output_max tf.Output, out_type tf.DataType) (output tf.Output, output_min tf.Output, output_max tf.Output) { +// +func SlideDataset(scope *Scope, input_dataset tf.Output, window_size tf.Output, stride tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"out_type": out_type} + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} opspec := tf.OpSpec{ - Type: "Requantize", + Type: "SlideDataset", Input: []tf.Input{ - input, input_min, input_max, requested_output_min, requested_output_max, + input_dataset, window_size, stride, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + return op.Output(0) } -// Computes the determinant of one or more square matrices. +// Computes the sum along sparse segments of a tensor divided by the sqrt of N. // -// The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions -// form square matrices. The output is a tensor containing the determinants -// for all input submatrices `[..., :, :]`. +// N is the size of the segment being reduced. +// +// Like `SparseSegmentSqrtN`, but allows missing ids in `segment_ids`. If an id is +// misisng, the `output` tensor at that position will be zeroed. +// +// Read @{$math_ops#Segmentation$the section on segmentation} for an explanation of +// segments. // // Arguments: -// input: Shape is `[..., M, M]`. // -// Returns Shape is `[...]`. -func MatrixDeterminant(scope *Scope, input tf.Output) (output tf.Output) { +// indices: A 1-D tensor. Has same rank as `segment_ids`. +// segment_ids: A 1-D tensor. Values should be sorted and can be repeated. +// num_segments: Should equal the number of distinct segment IDs. +// +// Returns Has same shape as data, except for dimension 0 which +// has size `k`, the number of segments. +func SparseSegmentSqrtNWithNumSegments(scope *Scope, data tf.Output, indices tf.Output, segment_ids tf.Output, num_segments tf.Output) (output tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "MatrixDeterminant", + Type: "SparseSegmentSqrtNWithNumSegments", Input: []tf.Input{ - input, + data, indices, segment_ids, num_segments, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Computes sin of x element-wise. -func Sin(scope *Scope, x tf.Output) (y tf.Output) { +// Compute the upper regularized incomplete Gamma function `Q(a, x)`. +// +// The upper regularized incomplete Gamma function is defined as: +// +// \\(Q(a, x) = Gamma(a, x) / Gamma(a) = 1 - P(a, x)\\) +// +// where +// +// \\(Gamma(a, x) = int_{x}^{\infty} t^{a-1} exp(-t) dt\\) +// +// is the upper incomplete Gama function. +// +// Note, above `P(a, x)` (`Igamma`) is the lower regularized complete +// Gamma function. +func Igammac(scope *Scope, a tf.Output, x tf.Output) (z tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "Sin", + Type: "Igammac", Input: []tf.Input{ - x, + a, x, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Computes the complementary error function of `x` element-wise. -func Erfc(scope *Scope, x tf.Output) (y tf.Output) { +// ApproximateEqualAttr is an optional argument to ApproximateEqual. +type ApproximateEqualAttr func(optionalAttr) + +// ApproximateEqualTolerance sets the optional tolerance attribute to value. +// If not specified, defaults to 1e-05 +func ApproximateEqualTolerance(value float32) ApproximateEqualAttr { + return func(m optionalAttr) { + m["tolerance"] = value + } +} + +// Returns the truth value of abs(x-y) < tolerance element-wise. +func ApproximateEqual(scope *Scope, x tf.Output, y tf.Output, optional ...ApproximateEqualAttr) (z tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "Erfc", + Type: "ApproximateEqual", Input: []tf.Input{ - x, + x, y, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Computes Psi, the derivative of Lgamma (the log of the absolute value of +// Returns x / y element-wise. // -// `Gamma(x)`), element-wise. -func Digamma(scope *Scope, x tf.Output) (y tf.Output) { +// *NOTE*: `Div` supports broadcasting. More about broadcasting +// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) +func Div(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "Digamma", + Type: "Div", Input: []tf.Input{ - x, + x, y, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Conv2DBackpropFilterAttr is an optional argument to Conv2DBackpropFilter. -type Conv2DBackpropFilterAttr func(optionalAttr) - -// Conv2DBackpropFilterUseCudnnOnGpu sets the optional use_cudnn_on_gpu attribute to value. -// If not specified, defaults to true -func Conv2DBackpropFilterUseCudnnOnGpu(value bool) Conv2DBackpropFilterAttr { - return func(m optionalAttr) { - m["use_cudnn_on_gpu"] = value +// Returns x * y element-wise. +// +// *NOTE*: `Multiply` supports broadcasting. More about broadcasting +// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) +func Mul(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Mul", + Input: []tf.Input{ + x, y, + }, } + op := scope.AddOperation(opspec) + return op.Output(0) } -// Conv2DBackpropFilterDataFormat sets the optional data_format attribute to value. +// BiasAddAttr is an optional argument to BiasAdd. +type BiasAddAttr func(optionalAttr) + +// BiasAddDataFormat sets the optional data_format attribute to value. // // value: Specify the data format of the input and output data. With the -// default format "NHWC", the data is stored in the order of: -// [batch, in_height, in_width, in_channels]. +// default format "NHWC", the bias tensor will be added to the last dimension +// of the value tensor. // Alternatively, the format could be "NCHW", the data storage order of: // [batch, in_channels, in_height, in_width]. +// The tensor will be added to "in_channels", the third-to-the-last +// dimension. // If not specified, defaults to "NHWC" -func Conv2DBackpropFilterDataFormat(value string) Conv2DBackpropFilterAttr { +func BiasAddDataFormat(value string) BiasAddAttr { return func(m optionalAttr) { m["data_format"] = value } } -// Conv2DBackpropFilterDilations sets the optional dilations attribute to value. +// Adds `bias` to `value`. // -// value: 1-D tensor of length 4. The dilation factor for each dimension of -// `input`. If set to k > 1, there will be k-1 skipped cells between each filter -// element on that dimension. The dimension order is determined by the value of -// `data_format`, see above for details. Dilations in the batch and depth -// dimensions must be 1. -// If not specified, defaults to -func Conv2DBackpropFilterDilations(value []int64) Conv2DBackpropFilterAttr { - return func(m optionalAttr) { - m["dilations"] = value - } -} - -// Computes the gradients of convolution with respect to the filter. +// This is a special case of `tf.add` where `bias` is restricted to be 1-D. +// Broadcasting is supported, so `value` may have any number of dimensions. // // Arguments: -// input: 4-D with shape `[batch, in_height, in_width, in_channels]`. -// filter_sizes: An integer vector representing the tensor shape of `filter`, -// where `filter` is a 4-D -// `[filter_height, filter_width, in_channels, out_channels]` tensor. -// out_backprop: 4-D with shape `[batch, out_height, out_width, out_channels]`. -// Gradients w.r.t. the output of the convolution. -// strides: The stride of the sliding window for each dimension of the input -// of the convolution. Must be in the same order as the dimension specified with -// format. -// padding: The type of padding algorithm to use. +// value: Any number of dimensions. +// bias: 1-D with size the last dimension of `value`. // -// Returns 4-D with shape -// `[filter_height, filter_width, in_channels, out_channels]`. Gradient w.r.t. -// the `filter` input of the convolution. -func Conv2DBackpropFilter(scope *Scope, input tf.Output, filter_sizes tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...Conv2DBackpropFilterAttr) (output tf.Output) { +// Returns Broadcasted sum of `value` and `bias`. +func BiasAdd(scope *Scope, value tf.Output, bias tf.Output, optional ...BiasAddAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"strides": strides, "padding": padding} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "Conv2DBackpropFilter", + Type: "BiasAdd", Input: []tf.Input{ - input, filter_sizes, out_backprop, + value, bias, }, Attrs: attrs, } @@ -3207,138 +3342,212 @@ func Conv2DBackpropFilter(scope *Scope, input tf.Output, filter_sizes tf.Output, return op.Output(0) } -// Returns the number of work units this Reader has finished processing. +// SparseReduceSumSparseAttr is an optional argument to SparseReduceSumSparse. +type SparseReduceSumSparseAttr func(optionalAttr) + +// SparseReduceSumSparseKeepDims sets the optional keep_dims attribute to value. +// +// value: If true, retain reduced dimensions with length 1. +// If not specified, defaults to false +func SparseReduceSumSparseKeepDims(value bool) SparseReduceSumSparseAttr { + return func(m optionalAttr) { + m["keep_dims"] = value + } +} + +// Computes the sum of elements across dimensions of a SparseTensor. +// +// This Op takes a SparseTensor and is the sparse counterpart to +// `tf.reduce_sum()`. In contrast to SparseReduceSum, this Op returns a +// SparseTensor. +// +// Reduces `sp_input` along the dimensions given in `reduction_axes`. Unless +// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in +// `reduction_axes`. If `keep_dims` is true, the reduced dimensions are retained +// with length 1. +// +// If `reduction_axes` has no entries, all dimensions are reduced, and a tensor +// with a single element is returned. Additionally, the axes can be negative, +// which are interpreted according to the indexing rules in Python. // // Arguments: -// reader_handle: Handle to a Reader. -func ReaderNumWorkUnitsCompletedV2(scope *Scope, reader_handle tf.Output) (units_completed tf.Output) { +// input_indices: 2-D. `N x R` matrix with the indices of non-empty values in a +// SparseTensor, possibly not in canonical ordering. +// input_values: 1-D. `N` non-empty values corresponding to `input_indices`. +// input_shape: 1-D. Shape of the input SparseTensor. +// reduction_axes: 1-D. Length-`K` vector containing the reduction axes. +func SparseReduceSumSparse(scope *Scope, input_indices tf.Output, input_values tf.Output, input_shape tf.Output, reduction_axes tf.Output, optional ...SparseReduceSumSparseAttr) (output_indices tf.Output, output_values tf.Output, output_shape tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "ReaderNumWorkUnitsCompletedV2", + Type: "SparseReduceSumSparse", Input: []tf.Input{ - reader_handle, + input_indices, input_values, input_shape, reduction_axes, }, + Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1), op.Output(2) } -// Computes the log of the absolute value of `Gamma(x)` element-wise. -func Lgamma(scope *Scope, x tf.Output) (y tf.Output) { +// Returns x + y element-wise. +// +// *NOTE*: `Add` supports broadcasting. `AddN` does not. More about broadcasting +// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) +func AddV2(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "Lgamma", + Type: "AddV2", Input: []tf.Input{ - x, + x, y, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Computes the reverse mode backpropagated gradient of the Cholesky algorithm. -// -// For an explanation see "Differentiation of the Cholesky algorithm" by -// Iain Murray http://arxiv.org/abs/1602.07527. -// -// Arguments: -// l: Output of batch Cholesky algorithm l = cholesky(A). Shape is `[..., M, M]`. -// Algorithm depends only on lower triangular part of the innermost matrices of -// this tensor. -// grad: df/dl where f is some scalar function. Shape is `[..., M, M]`. -// Algorithm depends only on lower triangular part of the innermost matrices of -// this tensor. +// Returns x + y element-wise. // -// Returns Symmetrized version of df/dA . Shape is `[..., M, M]` -func CholeskyGrad(scope *Scope, l tf.Output, grad tf.Output) (output tf.Output) { +// *NOTE*: `Add` supports broadcasting. `AddN` does not. More about broadcasting +// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) +func Add(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "CholeskyGrad", + Type: "Add", Input: []tf.Input{ - l, grad, + x, y, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Computes the mean along sparse segments of a tensor. +// NthElementAttr is an optional argument to NthElement. +type NthElementAttr func(optionalAttr) + +// NthElementReverse sets the optional reverse attribute to value. // -// Like `SparseSegmentMean`, but allows missing ids in `segment_ids`. If an id is -// misisng, the `output` tensor at that position will be zeroed. +// value: When set to True, find the nth-largest value in the vector and vice +// versa. +// If not specified, defaults to false +func NthElementReverse(value bool) NthElementAttr { + return func(m optionalAttr) { + m["reverse"] = value + } +} + +// Finds values of the `n`-th order statistic for the last dimension. // -// Read @{$math_ops#segmentation$the section on segmentation} for an explanation of -// segments. +// If the input is a vector (rank-1), finds the entries which is the nth-smallest +// value in the vector and outputs their values as scalar tensor. // -// Arguments: +// For matrices (resp. higher rank input), computes the entries which is the +// nth-smallest value in each row (resp. vector along the last dimension). Thus, // -// indices: A 1-D tensor. Has same rank as `segment_ids`. -// segment_ids: A 1-D tensor. Values should be sorted and can be repeated. -// num_segments: Should equal the number of distinct segment IDs. +// values.shape = input.shape[:-1] // -// Returns Has same shape as data, except for dimension 0 which has size -// `num_segments`. -func SparseSegmentMeanWithNumSegments(scope *Scope, data tf.Output, indices tf.Output, segment_ids tf.Output, num_segments tf.Output) (output tf.Output) { +// Arguments: +// input: 1-D or higher with last dimension at least `n+1`. +// n: 0-D. Position of sorted vector to select along the last dimension (along +// each row for matrices). Valid range of n is `[0, input.shape[:-1])` +// +// Returns The `n`-th order statistic along each last dimensional slice. +func NthElement(scope *Scope, input tf.Output, n tf.Output, optional ...NthElementAttr) (values tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "SparseSegmentMeanWithNumSegments", + Type: "NthElement", Input: []tf.Input{ - data, indices, segment_ids, num_segments, + input, n, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Computes hyperbolic cosine of x element-wise. -func Cosh(scope *Scope, x tf.Output) (y tf.Output) { +// Computes the maximum along segments of a tensor. +// +// Read @{$math_ops#Segmentation$the section on segmentation} for an explanation of +// segments. +// +// This operator is similar to the unsorted segment sum operator found +// [(here)](../../../api_docs/python/math_ops.md#UnsortedSegmentSum). +// Instead of computing the sum over segments, it computes the maximum such that: +// +// \\(output_i = \max_j data_j\\) where max is over `j` such +// that `segment_ids[j] == i`. +// +// If the maximum is empty for a given segment ID `i`, it outputs the smallest +// possible value for the specific numeric type, +// `output[i] = numeric_limits::lowest()`. +// +//
+// +//
+// +// Arguments: +// +// segment_ids: A 1-D tensor whose rank is equal to the rank of `data`'s +// first dimension. +// +// +// Returns Has same shape as data, except for dimension 0 which +// has size `num_segments`. +func UnsortedSegmentMax(scope *Scope, data tf.Output, segment_ids tf.Output, num_segments tf.Output) (output tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "Cosh", + Type: "UnsortedSegmentMax", Input: []tf.Input{ - x, + data, segment_ids, num_segments, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Creates a dataset that emits each dim-0 slice of `components` once. -func TensorSliceDataset(scope *Scope, components []tf.Output, output_shapes []tf.Shape) (handle tf.Output) { +// Computes exponential of x element-wise. \\(y = e^x\\). +func Exp(scope *Scope, x tf.Output) (y tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"output_shapes": output_shapes} opspec := tf.OpSpec{ - Type: "TensorSliceDataset", + Type: "Exp", Input: []tf.Input{ - tf.OutputList(components), + x, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Computes natural logarithm of (1 + x) element-wise. +// Returns an element-wise indication of the sign of a number. // -// I.e., \\(y = \log_e (1 + x)\\). -func Log1p(scope *Scope, x tf.Output) (y tf.Output) { +// `y = sign(x) = -1` if `x < 0`; 0 if `x == 0`; 1 if `x > 0`. +// +// For complex numbers, `y = sign(x) = x / |x|` if `x != 0`, otherwise `y = 0`. +func Sign(scope *Scope, x tf.Output) (y tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "Log1p", + Type: "Sign", Input: []tf.Input{ x, }, @@ -3347,83 +3556,110 @@ func Log1p(scope *Scope, x tf.Output) (y tf.Output) { return op.Output(0) } -// Computes rectified linear 6 gradients for a Relu6 operation. +// ArgMinAttr is an optional argument to ArgMin. +type ArgMinAttr func(optionalAttr) + +// ArgMinOutputType sets the optional output_type attribute to value. +// If not specified, defaults to DT_INT64 +func ArgMinOutputType(value tf.DataType) ArgMinAttr { + return func(m optionalAttr) { + m["output_type"] = value + } +} + +// Returns the index with the smallest value across dimensions of a tensor. +// +// Note that in case of ties the identity of the return value is not guaranteed. // // Arguments: -// gradients: The backpropagated gradients to the corresponding Relu6 operation. -// features: The features passed as input to the corresponding Relu6 operation, or -// its output; using either one produces the same result. // -// Returns The gradients: -// `gradients * (features > 0) * (features < 6)`. -func Relu6Grad(scope *Scope, gradients tf.Output, features tf.Output) (backprops tf.Output) { +// dimension: int32 or int64, must be in the range `[-rank(input), rank(input))`. +// Describes which dimension of the input Tensor to reduce across. For vectors, +// use dimension = 0. +func ArgMin(scope *Scope, input tf.Output, dimension tf.Output, optional ...ArgMinAttr) (output tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "Relu6Grad", + Type: "ArgMin", Input: []tf.Input{ - gradients, features, + input, dimension, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// ResizeBicubicAttr is an optional argument to ResizeBicubic. -type ResizeBicubicAttr func(optionalAttr) - -// ResizeBicubicAlignCorners sets the optional align_corners attribute to value. +// Convert the quantized 'input' tensor into a lower-precision 'output', using the // -// value: If true, rescale input by (new_height - 1) / (height - 1), which -// exactly aligns the 4 corners of images and resized images. If false, rescale -// by new_height / height. Treat similarly the width dimension. -// If not specified, defaults to false -func ResizeBicubicAlignCorners(value bool) ResizeBicubicAttr { - return func(m optionalAttr) { - m["align_corners"] = value +// output range specified with 'requested_output_min' and 'requested_output_max'. +// +// [input_min, input_max] are scalar floats that specify the range for the float +// interpretation of the 'input' data. For example, if input_min is -1.0f and +// input_max is 1.0f, and we are dealing with quint16 quantized data, then a 0 +// value in the 16-bit data should be interpreted as -1.0f, and a 65535 means 1.0f. +// +// Arguments: +// +// input_min: The float value that the minimum quantized input value represents. +// input_max: The float value that the maximum quantized input value represents. +// requested_output_min: The float value that the minimum quantized output value represents. +// requested_output_max: The float value that the maximum quantized output value represents. +// out_type: The type of the output. Should be a lower bit depth than Tinput. +// +// Returns The requested_output_min value is copied into this output.The requested_output_max value is copied into this output. +func Requantize(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output, requested_output_min tf.Output, requested_output_max tf.Output, out_type tf.DataType) (output tf.Output, output_min tf.Output, output_max tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"out_type": out_type} + opspec := tf.OpSpec{ + Type: "Requantize", + Input: []tf.Input{ + input, input_min, input_max, requested_output_min, requested_output_max, + }, + Attrs: attrs, } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1), op.Output(2) } -// Resize `images` to `size` using bicubic interpolation. +// Computes the determinant of one or more square matrices. // -// Input images can be of different types but output images are always float. +// The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions +// form square matrices. The output is a tensor containing the determinants +// for all input submatrices `[..., :, :]`. // // Arguments: -// images: 4-D with shape `[batch, height, width, channels]`. -// size: = A 1-D int32 Tensor of 2 elements: `new_height, new_width`. The -// new size for the images. +// input: Shape is `[..., M, M]`. // -// Returns 4-D with shape -// `[batch, new_height, new_width, channels]`. -func ResizeBicubic(scope *Scope, images tf.Output, size tf.Output, optional ...ResizeBicubicAttr) (resized_images tf.Output) { +// Returns Shape is `[...]`. +func MatrixDeterminant(scope *Scope, input tf.Output) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "ResizeBicubic", + Type: "MatrixDeterminant", Input: []tf.Input{ - images, size, + input, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Computes natural logarithm of x element-wise. -// -// I.e., \\(y = \log_e x\\). -func Log(scope *Scope, x tf.Output) (y tf.Output) { +// Computes sin of x element-wise. +func Sin(scope *Scope, x tf.Output) (y tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "Log", + Type: "Sin", Input: []tf.Input{ x, }, @@ -3432,16 +3668,13 @@ func Log(scope *Scope, x tf.Output) (y tf.Output) { return op.Output(0) } -// Rounds the values of a tensor to the nearest integer, element-wise. -// -// Rounds half to even. Also known as bankers rounding. If you want to round -// according to the current system rounding mode use std::cint. -func Round(scope *Scope, x tf.Output) (y tf.Output) { +// Computes the complementary error function of `x` element-wise. +func Erfc(scope *Scope, x tf.Output) (y tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "Round", + Type: "Erfc", Input: []tf.Input{ x, }, @@ -3450,156 +3683,154 @@ func Round(scope *Scope, x tf.Output) (y tf.Output) { return op.Output(0) } -// RecordInputAttr is an optional argument to RecordInput. -type RecordInputAttr func(optionalAttr) - -// RecordInputFileRandomSeed sets the optional file_random_seed attribute to value. +// Computes Psi, the derivative of Lgamma (the log of the absolute value of // -// value: Random seeds used to produce randomized records. -// If not specified, defaults to 301 -func RecordInputFileRandomSeed(value int64) RecordInputAttr { - return func(m optionalAttr) { - m["file_random_seed"] = value +// `Gamma(x)`), element-wise. +func Digamma(scope *Scope, x tf.Output) (y tf.Output) { + if scope.Err() != nil { + return } -} - -// RecordInputFileShuffleShiftRatio sets the optional file_shuffle_shift_ratio attribute to value. -// -// value: Shifts the list of files after the list is randomly -// shuffled. -// If not specified, defaults to 0 -func RecordInputFileShuffleShiftRatio(value float32) RecordInputAttr { - return func(m optionalAttr) { - m["file_shuffle_shift_ratio"] = value + opspec := tf.OpSpec{ + Type: "Digamma", + Input: []tf.Input{ + x, + }, } + op := scope.AddOperation(opspec) + return op.Output(0) } -// RecordInputFileBufferSize sets the optional file_buffer_size attribute to value. +// Shuffle dimensions of x according to a permutation. // -// value: The randomization shuffling buffer. -// If not specified, defaults to 10000 -func RecordInputFileBufferSize(value int64) RecordInputAttr { - return func(m optionalAttr) { - m["file_buffer_size"] = value +// The output `y` has the same rank as `x`. The shapes of `x` and `y` satisfy: +// `y.shape[i] == x.shape[perm[i]] for i in [0, 1, ..., rank(x) - 1]` +func Transpose(scope *Scope, x tf.Output, perm tf.Output) (y tf.Output) { + if scope.Err() != nil { + return } -} - -// RecordInputFileParallelism sets the optional file_parallelism attribute to value. -// -// value: How many sstables are opened and concurrently iterated over. -// If not specified, defaults to 16 -func RecordInputFileParallelism(value int64) RecordInputAttr { - return func(m optionalAttr) { - m["file_parallelism"] = value + opspec := tf.OpSpec{ + Type: "Transpose", + Input: []tf.Input{ + x, perm, + }, } + op := scope.AddOperation(opspec) + return op.Output(0) } -// RecordInputBatchSize sets the optional batch_size attribute to value. +// MinAttr is an optional argument to Min. +type MinAttr func(optionalAttr) + +// MinKeepDims sets the optional keep_dims attribute to value. // -// value: The batch size. -// If not specified, defaults to 32 -func RecordInputBatchSize(value int64) RecordInputAttr { +// value: If true, retain reduced dimensions with length 1. +// If not specified, defaults to false +func MinKeepDims(value bool) MinAttr { return func(m optionalAttr) { - m["batch_size"] = value + m["keep_dims"] = value } } -// RecordInputCompressionType sets the optional compression_type attribute to value. +// Computes the minimum of elements across dimensions of a tensor. // -// value: The type of compression for the file. Currently ZLIB and -// GZIP are supported. Defaults to none. -// If not specified, defaults to "" -func RecordInputCompressionType(value string) RecordInputAttr { - return func(m optionalAttr) { - m["compression_type"] = value - } -} - -// Emits randomized records. +// Reduces `input` along the dimensions given in `axis`. Unless +// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in +// `axis`. If `keep_dims` is true, the reduced dimensions are +// retained with length 1. // // Arguments: -// file_pattern: Glob pattern for the data files. +// input: The tensor to reduce. +// axis: The dimensions to reduce. Must be in the range +// `[-rank(input), rank(input))`. // -// Returns A tensor of shape [batch_size]. -func RecordInput(scope *Scope, file_pattern string, optional ...RecordInputAttr) (records tf.Output) { +// Returns The reduced tensor. +func Min(scope *Scope, input tf.Output, axis tf.Output, optional ...MinAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"file_pattern": file_pattern} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "RecordInput", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes reciprocal of square root of x element-wise. -// -// I.e., \\(y = 1 / \sqrt{x}\\). -func Rsqrt(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Rsqrt", + Type: "Min", Input: []tf.Input{ - x, + input, axis, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// MatrixInverseAttr is an optional argument to MatrixInverse. -type MatrixInverseAttr func(optionalAttr) +// Conv2DBackpropFilterAttr is an optional argument to Conv2DBackpropFilter. +type Conv2DBackpropFilterAttr func(optionalAttr) -// MatrixInverseAdjoint sets the optional adjoint attribute to value. -// If not specified, defaults to false -func MatrixInverseAdjoint(value bool) MatrixInverseAttr { +// Conv2DBackpropFilterUseCudnnOnGpu sets the optional use_cudnn_on_gpu attribute to value. +// If not specified, defaults to true +func Conv2DBackpropFilterUseCudnnOnGpu(value bool) Conv2DBackpropFilterAttr { return func(m optionalAttr) { - m["adjoint"] = value + m["use_cudnn_on_gpu"] = value } } -// Computes the inverse of one or more square invertible matrices or their -// -// adjoints (conjugate transposes). -// -// The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions -// form square matrices. The output is a tensor of the same shape as the input -// containing the inverse for all input submatrices `[..., :, :]`. +// Conv2DBackpropFilterDataFormat sets the optional data_format attribute to value. // -// The op uses LU decomposition with partial pivoting to compute the inverses. +// value: Specify the data format of the input and output data. With the +// default format "NHWC", the data is stored in the order of: +// [batch, in_height, in_width, in_channels]. +// Alternatively, the format could be "NCHW", the data storage order of: +// [batch, in_channels, in_height, in_width]. +// If not specified, defaults to "NHWC" +func Conv2DBackpropFilterDataFormat(value string) Conv2DBackpropFilterAttr { + return func(m optionalAttr) { + m["data_format"] = value + } +} + +// Conv2DBackpropFilterDilations sets the optional dilations attribute to value. // -// If a matrix is not invertible there is no guarantee what the op does. It -// may detect the condition and raise an exception or it may simply return a -// garbage result. +// value: 1-D tensor of length 4. The dilation factor for each dimension of +// `input`. If set to k > 1, there will be k-1 skipped cells between each filter +// element on that dimension. The dimension order is determined by the value of +// `data_format`, see above for details. Dilations in the batch and depth +// dimensions must be 1. +// If not specified, defaults to +func Conv2DBackpropFilterDilations(value []int64) Conv2DBackpropFilterAttr { + return func(m optionalAttr) { + m["dilations"] = value + } +} + +// Computes the gradients of convolution with respect to the filter. // // Arguments: -// input: Shape is `[..., M, M]`. -// -// Returns Shape is `[..., M, M]`. +// input: 4-D with shape `[batch, in_height, in_width, in_channels]`. +// filter_sizes: An integer vector representing the tensor shape of `filter`, +// where `filter` is a 4-D +// `[filter_height, filter_width, in_channels, out_channels]` tensor. +// out_backprop: 4-D with shape `[batch, out_height, out_width, out_channels]`. +// Gradients w.r.t. the output of the convolution. +// strides: The stride of the sliding window for each dimension of the input +// of the convolution. Must be in the same order as the dimension specified with +// format. +// padding: The type of padding algorithm to use. // -// @compatibility(numpy) -// Equivalent to np.linalg.inv -// @end_compatibility -func MatrixInverse(scope *Scope, input tf.Output, optional ...MatrixInverseAttr) (output tf.Output) { +// Returns 4-D with shape +// `[filter_height, filter_width, in_channels, out_channels]`. Gradient w.r.t. +// the `filter` input of the convolution. +func Conv2DBackpropFilter(scope *Scope, input tf.Output, filter_sizes tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...Conv2DBackpropFilterAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"strides": strides, "padding": padding} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "MatrixInverse", + Type: "Conv2DBackpropFilter", Input: []tf.Input{ - input, + input, filter_sizes, out_backprop, }, Attrs: attrs, } @@ -3607,231 +3838,195 @@ func MatrixInverse(scope *Scope, input tf.Output, optional ...MatrixInverseAttr) return op.Output(0) } -// Computes square of x element-wise. +// Returns the number of work units this Reader has finished processing. // -// I.e., \\(y = x * x = x^2\\). -func Square(scope *Scope, x tf.Output) (y tf.Output) { +// Arguments: +// reader_handle: Handle to a Reader. +func ReaderNumWorkUnitsCompletedV2(scope *Scope, reader_handle tf.Output) (units_completed tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "Square", + Type: "ReaderNumWorkUnitsCompletedV2", Input: []tf.Input{ - x, + reader_handle, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Computes exponential linear: `exp(features) - 1` if < 0, `features` otherwise. -// -// See [Fast and Accurate Deep Network Learning by Exponential Linear Units (ELUs) -// ](http://arxiv.org/abs/1511.07289) -func Elu(scope *Scope, features tf.Output) (activations tf.Output) { +// Computes the log of the absolute value of `Gamma(x)` element-wise. +func Lgamma(scope *Scope, x tf.Output) (y tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "Elu", + Type: "Lgamma", Input: []tf.Input{ - features, + x, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Computes the reciprocal of x element-wise. +// Computes the reverse mode backpropagated gradient of the Cholesky algorithm. // -// I.e., \\(y = 1 / x\\). -func Reciprocal(scope *Scope, x tf.Output) (y tf.Output) { +// For an explanation see "Differentiation of the Cholesky algorithm" by +// Iain Murray http://arxiv.org/abs/1602.07527. +// +// Arguments: +// l: Output of batch Cholesky algorithm l = cholesky(A). Shape is `[..., M, M]`. +// Algorithm depends only on lower triangular part of the innermost matrices of +// this tensor. +// grad: df/dl where f is some scalar function. Shape is `[..., M, M]`. +// Algorithm depends only on lower triangular part of the innermost matrices of +// this tensor. +// +// Returns Symmetrized version of df/dA . Shape is `[..., M, M]` +func CholeskyGrad(scope *Scope, l tf.Output, grad tf.Output) (output tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "Reciprocal", + Type: "CholeskyGrad", Input: []tf.Input{ - x, + l, grad, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// OrderedMapClearAttr is an optional argument to OrderedMapClear. -type OrderedMapClearAttr func(optionalAttr) - -// OrderedMapClearCapacity sets the optional capacity attribute to value. -// If not specified, defaults to 0 +// Computes the mean along sparse segments of a tensor. // -// REQUIRES: value >= 0 -func OrderedMapClearCapacity(value int64) OrderedMapClearAttr { - return func(m optionalAttr) { - m["capacity"] = value - } -} - -// OrderedMapClearMemoryLimit sets the optional memory_limit attribute to value. -// If not specified, defaults to 0 +// Like `SparseSegmentMean`, but allows missing ids in `segment_ids`. If an id is +// misisng, the `output` tensor at that position will be zeroed. // -// REQUIRES: value >= 0 -func OrderedMapClearMemoryLimit(value int64) OrderedMapClearAttr { - return func(m optionalAttr) { - m["memory_limit"] = value - } -} - -// OrderedMapClearContainer sets the optional container attribute to value. -// If not specified, defaults to "" -func OrderedMapClearContainer(value string) OrderedMapClearAttr { - return func(m optionalAttr) { - m["container"] = value +// Read @{$math_ops#Segmentation$the section on segmentation} for an explanation of +// segments. +// +// Arguments: +// +// indices: A 1-D tensor. Has same rank as `segment_ids`. +// segment_ids: A 1-D tensor. Values should be sorted and can be repeated. +// num_segments: Should equal the number of distinct segment IDs. +// +// Returns Has same shape as data, except for dimension 0 which has size +// `num_segments`. +func SparseSegmentMeanWithNumSegments(scope *Scope, data tf.Output, indices tf.Output, segment_ids tf.Output, num_segments tf.Output) (output tf.Output) { + if scope.Err() != nil { + return } -} - -// OrderedMapClearSharedName sets the optional shared_name attribute to value. -// If not specified, defaults to "" -func OrderedMapClearSharedName(value string) OrderedMapClearAttr { - return func(m optionalAttr) { - m["shared_name"] = value + opspec := tf.OpSpec{ + Type: "SparseSegmentMeanWithNumSegments", + Input: []tf.Input{ + data, indices, segment_ids, num_segments, + }, } + op := scope.AddOperation(opspec) + return op.Output(0) } -// Op removes all elements in the underlying container. -// -// Returns the created operation. -func OrderedMapClear(scope *Scope, dtypes []tf.DataType, optional ...OrderedMapClearAttr) (o *tf.Operation) { +// Computes hyperbolic cosine of x element-wise. +func Cosh(scope *Scope, x tf.Output) (y tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"dtypes": dtypes} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "OrderedMapClear", - - Attrs: attrs, + Type: "Cosh", + Input: []tf.Input{ + x, + }, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0) } -// Computes the reciprocal of x element-wise. -// -// I.e., \\(y = 1 / x\\). -func Inv(scope *Scope, x tf.Output) (y tf.Output) { +// Creates a dataset that emits each dim-0 slice of `components` once. +func TensorSliceDataset(scope *Scope, components []tf.Output, output_shapes []tf.Shape) (handle tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"output_shapes": output_shapes} opspec := tf.OpSpec{ - Type: "Inv", + Type: "TensorSliceDataset", Input: []tf.Input{ - x, + tf.OutputList(components), }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// ComplexAbsAttr is an optional argument to ComplexAbs. -type ComplexAbsAttr func(optionalAttr) - -// ComplexAbsTout sets the optional Tout attribute to value. -// If not specified, defaults to DT_FLOAT -func ComplexAbsTout(value tf.DataType) ComplexAbsAttr { - return func(m optionalAttr) { - m["Tout"] = value - } -} - -// Computes the complex absolute value of a tensor. +// Computes natural logarithm of (1 + x) element-wise. // -// Given a tensor `x` of complex numbers, this operation returns a tensor of type -// `float` or `double` that is the absolute value of each element in `x`. All -// elements in `x` must be complex numbers of the form \\(a + bj\\). The absolute -// value is computed as \\( \sqrt{a^2 + b^2}\\). -func ComplexAbs(scope *Scope, x tf.Output, optional ...ComplexAbsAttr) (y tf.Output) { +// I.e., \\(y = \log_e (1 + x)\\). +func Log1p(scope *Scope, x tf.Output) (y tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "ComplexAbs", + Type: "Log1p", Input: []tf.Input{ x, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Returns the truth value of x AND y element-wise. +// Computes rectified linear 6 gradients for a Relu6 operation. // -// *NOTE*: `LogicalAnd` supports broadcasting. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func LogicalAnd(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "LogicalAnd", - Input: []tf.Input{ - x, y, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Cast x of type SrcT to y of DstT. -func Cast(scope *Scope, x tf.Output, DstT tf.DataType) (y tf.Output) { +// Arguments: +// gradients: The backpropagated gradients to the corresponding Relu6 operation. +// features: The features passed as input to the corresponding Relu6 operation, or +// its output; using either one produces the same result. +// +// Returns The gradients: +// `gradients * (features > 0) * (features < 6)`. +func Relu6Grad(scope *Scope, gradients tf.Output, features tf.Output) (backprops tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"DstT": DstT} opspec := tf.OpSpec{ - Type: "Cast", + Type: "Relu6Grad", Input: []tf.Input{ - x, + gradients, features, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// MaxAttr is an optional argument to Max. -type MaxAttr func(optionalAttr) +// ResizeBicubicAttr is an optional argument to ResizeBicubic. +type ResizeBicubicAttr func(optionalAttr) -// MaxKeepDims sets the optional keep_dims attribute to value. +// ResizeBicubicAlignCorners sets the optional align_corners attribute to value. // -// value: If true, retain reduced dimensions with length 1. +// value: If true, the centers of the 4 corner pixels of the input and output tensors are +// aligned, preserving the values at the corner pixels. Defaults to false. // If not specified, defaults to false -func MaxKeepDims(value bool) MaxAttr { +func ResizeBicubicAlignCorners(value bool) ResizeBicubicAttr { return func(m optionalAttr) { - m["keep_dims"] = value + m["align_corners"] = value } } -// Computes the maximum of elements across dimensions of a tensor. +// Resize `images` to `size` using bicubic interpolation. // -// Reduces `input` along the dimensions given in `axis`. Unless -// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in -// `axis`. If `keep_dims` is true, the reduced dimensions are -// retained with length 1. +// Input images can be of different types but output images are always float. // // Arguments: -// input: The tensor to reduce. -// axis: The dimensions to reduce. Must be in the range -// `[-rank(input), rank(input))`. +// images: 4-D with shape `[batch, height, width, channels]`. +// size: = A 1-D int32 Tensor of 2 elements: `new_height, new_width`. The +// new size for the images. // -// Returns The reduced tensor. -func Max(scope *Scope, input tf.Output, axis tf.Output, optional ...MaxAttr) (output tf.Output) { +// Returns 4-D with shape +// `[batch, new_height, new_width, channels]`. +func ResizeBicubic(scope *Scope, images tf.Output, size tf.Output, optional ...ResizeBicubicAttr) (resized_images tf.Output) { if scope.Err() != nil { return } @@ -3840,9 +4035,9 @@ func Max(scope *Scope, input tf.Output, axis tf.Output, optional ...MaxAttr) (ou a(attrs) } opspec := tf.OpSpec{ - Type: "Max", + Type: "ResizeBicubic", Input: []tf.Input{ - input, axis, + images, size, }, Attrs: attrs, } @@ -3850,315 +4045,343 @@ func Max(scope *Scope, input tf.Output, axis tf.Output, optional ...MaxAttr) (ou return op.Output(0) } -// Quantized Batch normalization. -// -// This op is deprecated and will be removed in the future. Prefer -// `tf.nn.batch_normalization`. +// Computes natural logarithm of x element-wise. // -// Arguments: -// t: A 4D input Tensor. -// t_min: The value represented by the lowest quantized input. -// t_max: The value represented by the highest quantized input. -// m: A 1D mean Tensor with size matching the last dimension of t. -// This is the first output from tf.nn.moments, -// or a saved moving average thereof. -// m_min: The value represented by the lowest quantized mean. -// m_max: The value represented by the highest quantized mean. -// v: A 1D variance Tensor with size matching the last dimension of t. -// This is the second output from tf.nn.moments, -// or a saved moving average thereof. -// v_min: The value represented by the lowest quantized variance. -// v_max: The value represented by the highest quantized variance. -// beta: A 1D beta Tensor with size matching the last dimension of t. -// An offset to be added to the normalized tensor. -// beta_min: The value represented by the lowest quantized offset. -// beta_max: The value represented by the highest quantized offset. -// gamma: A 1D gamma Tensor with size matching the last dimension of t. -// If "scale_after_normalization" is true, this tensor will be multiplied -// with the normalized tensor. -// gamma_min: The value represented by the lowest quantized gamma. -// gamma_max: The value represented by the highest quantized gamma. +// I.e., \\(y = \log_e x\\). +func Log(scope *Scope, x tf.Output) (y tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Log", + Input: []tf.Input{ + x, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Rounds the values of a tensor to the nearest integer, element-wise. // -// variance_epsilon: A small float number to avoid dividing by 0. -// scale_after_normalization: A bool indicating whether the resulted tensor -// needs to be multiplied with gamma. -func QuantizedBatchNormWithGlobalNormalization(scope *Scope, t tf.Output, t_min tf.Output, t_max tf.Output, m tf.Output, m_min tf.Output, m_max tf.Output, v tf.Output, v_min tf.Output, v_max tf.Output, beta tf.Output, beta_min tf.Output, beta_max tf.Output, gamma tf.Output, gamma_min tf.Output, gamma_max tf.Output, out_type tf.DataType, variance_epsilon float32, scale_after_normalization bool) (result tf.Output, result_min tf.Output, result_max tf.Output) { +// Rounds half to even. Also known as bankers rounding. If you want to round +// according to the current system rounding mode use std::cint. +func Round(scope *Scope, x tf.Output) (y tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"out_type": out_type, "variance_epsilon": variance_epsilon, "scale_after_normalization": scale_after_normalization} opspec := tf.OpSpec{ - Type: "QuantizedBatchNormWithGlobalNormalization", + Type: "Round", Input: []tf.Input{ - t, t_min, t_max, m, m_min, m_max, v, v_min, v_max, beta, beta_min, beta_max, gamma, gamma_min, gamma_max, + x, }, - Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + return op.Output(0) } -// HistogramFixedWidthAttr is an optional argument to HistogramFixedWidth. -type HistogramFixedWidthAttr func(optionalAttr) +// RecordInputAttr is an optional argument to RecordInput. +type RecordInputAttr func(optionalAttr) -// HistogramFixedWidthDtype sets the optional dtype attribute to value. -// If not specified, defaults to DT_INT32 -func HistogramFixedWidthDtype(value tf.DataType) HistogramFixedWidthAttr { +// RecordInputFileRandomSeed sets the optional file_random_seed attribute to value. +// +// value: Random seeds used to produce randomized records. +// If not specified, defaults to 301 +func RecordInputFileRandomSeed(value int64) RecordInputAttr { return func(m optionalAttr) { - m["dtype"] = value + m["file_random_seed"] = value } } -// Return histogram of values. +// RecordInputFileShuffleShiftRatio sets the optional file_shuffle_shift_ratio attribute to value. // -// Given the tensor `values`, this operation returns a rank 1 histogram counting -// the number of entries in `values` that fall into every bin. The bins are -// equal width and determined by the arguments `value_range` and `nbins`. +// value: Shifts the list of files after the list is randomly +// shuffled. +// If not specified, defaults to 0 +func RecordInputFileShuffleShiftRatio(value float32) RecordInputAttr { + return func(m optionalAttr) { + m["file_shuffle_shift_ratio"] = value + } +} + +// RecordInputFileBufferSize sets the optional file_buffer_size attribute to value. // -// ```python -// # Bins will be: (-inf, 1), [1, 2), [2, 3), [3, 4), [4, inf) -// nbins = 5 -// value_range = [0.0, 5.0] -// new_values = [-1.0, 0.0, 1.5, 2.0, 5.0, 15] +// value: The randomization shuffling buffer. +// If not specified, defaults to 10000 +func RecordInputFileBufferSize(value int64) RecordInputAttr { + return func(m optionalAttr) { + m["file_buffer_size"] = value + } +} + +// RecordInputFileParallelism sets the optional file_parallelism attribute to value. // -// with tf.get_default_session() as sess: -// hist = tf.histogram_fixed_width(new_values, value_range, nbins=5) -// variables.global_variables_initializer().run() -// sess.run(hist) => [2, 1, 1, 0, 2] -// ``` +// value: How many sstables are opened and concurrently iterated over. +// If not specified, defaults to 16 +func RecordInputFileParallelism(value int64) RecordInputAttr { + return func(m optionalAttr) { + m["file_parallelism"] = value + } +} + +// RecordInputBatchSize sets the optional batch_size attribute to value. +// +// value: The batch size. +// If not specified, defaults to 32 +func RecordInputBatchSize(value int64) RecordInputAttr { + return func(m optionalAttr) { + m["batch_size"] = value + } +} + +// RecordInputCompressionType sets the optional compression_type attribute to value. +// +// value: The type of compression for the file. Currently ZLIB and +// GZIP are supported. Defaults to none. +// If not specified, defaults to "" +func RecordInputCompressionType(value string) RecordInputAttr { + return func(m optionalAttr) { + m["compression_type"] = value + } +} + +// Emits randomized records. // // Arguments: -// values: Numeric `Tensor`. -// value_range: Shape [2] `Tensor` of same `dtype` as `values`. -// values <= value_range[0] will be mapped to hist[0], -// values >= value_range[1] will be mapped to hist[-1]. -// nbins: Scalar `int32 Tensor`. Number of histogram bins. +// file_pattern: Glob pattern for the data files. // -// Returns A 1-D `Tensor` holding histogram of values. -func HistogramFixedWidth(scope *Scope, values tf.Output, value_range tf.Output, nbins tf.Output, optional ...HistogramFixedWidthAttr) (out tf.Output) { +// Returns A tensor of shape [batch_size]. +func RecordInput(scope *Scope, file_pattern string, optional ...RecordInputAttr) (records tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"file_pattern": file_pattern} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "HistogramFixedWidth", - Input: []tf.Input{ - values, value_range, nbins, - }, + Type: "RecordInput", + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Adds Tensor 'bias' to Tensor 'input' for Quantized types. -// -// Broadcasts the values of bias on dimensions 0..N-2 of 'input'. -// -// Arguments: +// Computes reciprocal of square root of x element-wise. // -// bias: A 1D bias Tensor with size matching the last dimension of 'input'. -// min_input: The float value that the lowest quantized input value represents. -// max_input: The float value that the highest quantized input value represents. -// min_bias: The float value that the lowest quantized bias value represents. -// max_bias: The float value that the highest quantized bias value represents. -// -// -// Returns The float value that the lowest quantized output value represents.The float value that the highest quantized output value represents. -func QuantizedBiasAdd(scope *Scope, input tf.Output, bias tf.Output, min_input tf.Output, max_input tf.Output, min_bias tf.Output, max_bias tf.Output, out_type tf.DataType) (output tf.Output, min_out tf.Output, max_out tf.Output) { +// I.e., \\(y = 1 / \sqrt{x}\\). +func Rsqrt(scope *Scope, x tf.Output) (y tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"out_type": out_type} opspec := tf.OpSpec{ - Type: "QuantizedBiasAdd", + Type: "Rsqrt", Input: []tf.Input{ - input, bias, min_input, max_input, min_bias, max_bias, + x, }, - Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + return op.Output(0) } -// Produces the average pool of the input tensor for quantized types. +// MatrixInverseAttr is an optional argument to MatrixInverse. +type MatrixInverseAttr func(optionalAttr) + +// MatrixInverseAdjoint sets the optional adjoint attribute to value. +// If not specified, defaults to false +func MatrixInverseAdjoint(value bool) MatrixInverseAttr { + return func(m optionalAttr) { + m["adjoint"] = value + } +} + +// Computes the inverse of one or more square invertible matrices or their +// +// adjoints (conjugate transposes). +// +// The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions +// form square matrices. The output is a tensor of the same shape as the input +// containing the inverse for all input submatrices `[..., :, :]`. +// +// The op uses LU decomposition with partial pivoting to compute the inverses. +// +// If a matrix is not invertible there is no guarantee what the op does. It +// may detect the condition and raise an exception or it may simply return a +// garbage result. // // Arguments: -// input: 4-D with shape `[batch, height, width, channels]`. -// min_input: The float value that the lowest quantized input value represents. -// max_input: The float value that the highest quantized input value represents. -// ksize: The size of the window for each dimension of the input tensor. -// The length must be 4 to match the number of dimensions of the input. -// strides: The stride of the sliding window for each dimension of the input -// tensor. The length must be 4 to match the number of dimensions of the input. -// padding: The type of padding algorithm to use. +// input: Shape is `[..., M, M]`. // -// Returns The float value that the lowest quantized output value represents.The float value that the highest quantized output value represents. -func QuantizedAvgPool(scope *Scope, input tf.Output, min_input tf.Output, max_input tf.Output, ksize []int64, strides []int64, padding string) (output tf.Output, min_output tf.Output, max_output tf.Output) { +// Returns Shape is `[..., M, M]`. +// +// @compatibility(numpy) +// Equivalent to np.linalg.inv +// @end_compatibility +func MatrixInverse(scope *Scope, input tf.Output, optional ...MatrixInverseAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "QuantizedAvgPool", + Type: "MatrixInverse", Input: []tf.Input{ - input, min_input, max_input, + input, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + return op.Output(0) } -// FractionalAvgPoolAttr is an optional argument to FractionalAvgPool. -type FractionalAvgPoolAttr func(optionalAttr) - -// FractionalAvgPoolPseudoRandom sets the optional pseudo_random attribute to value. +// Computes square of x element-wise. // -// value: When set to True, generates the pooling sequence in a -// pseudorandom fashion, otherwise, in a random fashion. Check paper [Benjamin -// Graham, Fractional Max-Pooling](http://arxiv.org/abs/1412.6071) for -// difference between pseudorandom and random. -// If not specified, defaults to false -func FractionalAvgPoolPseudoRandom(value bool) FractionalAvgPoolAttr { - return func(m optionalAttr) { - m["pseudo_random"] = value +// I.e., \\(y = x * x = x^2\\). +func Square(scope *Scope, x tf.Output) (y tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Square", + Input: []tf.Input{ + x, + }, } + op := scope.AddOperation(opspec) + return op.Output(0) } -// FractionalAvgPoolOverlapping sets the optional overlapping attribute to value. -// -// value: When set to True, it means when pooling, the values at the boundary -// of adjacent pooling cells are used by both cells. For example: +// Computes exponential linear: `exp(features) - 1` if < 0, `features` otherwise. // -// `index 0 1 2 3 4` +// See [Fast and Accurate Deep Network Learning by Exponential Linear Units (ELUs) +// ](http://arxiv.org/abs/1511.07289) +func Elu(scope *Scope, features tf.Output) (activations tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Elu", + Input: []tf.Input{ + features, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Computes the reciprocal of x element-wise. // -// `value 20 5 16 3 7` +// I.e., \\(y = 1 / x\\). +func Reciprocal(scope *Scope, x tf.Output) (y tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Reciprocal", + Input: []tf.Input{ + x, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// OrderedMapClearAttr is an optional argument to OrderedMapClear. +type OrderedMapClearAttr func(optionalAttr) + +// OrderedMapClearCapacity sets the optional capacity attribute to value. +// If not specified, defaults to 0 // -// If the pooling sequence is [0, 2, 4], then 16, at index 2 will be used twice. -// The result would be [41/3, 26/3] for fractional avg pooling. -// If not specified, defaults to false -func FractionalAvgPoolOverlapping(value bool) FractionalAvgPoolAttr { +// REQUIRES: value >= 0 +func OrderedMapClearCapacity(value int64) OrderedMapClearAttr { return func(m optionalAttr) { - m["overlapping"] = value + m["capacity"] = value } } -// FractionalAvgPoolDeterministic sets the optional deterministic attribute to value. +// OrderedMapClearMemoryLimit sets the optional memory_limit attribute to value. +// If not specified, defaults to 0 // -// value: When set to True, a fixed pooling region will be used when -// iterating over a FractionalAvgPool node in the computation graph. Mainly used -// in unit test to make FractionalAvgPool deterministic. -// If not specified, defaults to false -func FractionalAvgPoolDeterministic(value bool) FractionalAvgPoolAttr { +// REQUIRES: value >= 0 +func OrderedMapClearMemoryLimit(value int64) OrderedMapClearAttr { return func(m optionalAttr) { - m["deterministic"] = value + m["memory_limit"] = value } } -// FractionalAvgPoolSeed sets the optional seed attribute to value. -// -// value: If either seed or seed2 are set to be non-zero, the random number -// generator is seeded by the given seed. Otherwise, it is seeded by a -// random seed. -// If not specified, defaults to 0 -func FractionalAvgPoolSeed(value int64) FractionalAvgPoolAttr { +// OrderedMapClearContainer sets the optional container attribute to value. +// If not specified, defaults to "" +func OrderedMapClearContainer(value string) OrderedMapClearAttr { return func(m optionalAttr) { - m["seed"] = value + m["container"] = value } } -// FractionalAvgPoolSeed2 sets the optional seed2 attribute to value. -// -// value: An second seed to avoid seed collision. -// If not specified, defaults to 0 -func FractionalAvgPoolSeed2(value int64) FractionalAvgPoolAttr { +// OrderedMapClearSharedName sets the optional shared_name attribute to value. +// If not specified, defaults to "" +func OrderedMapClearSharedName(value string) OrderedMapClearAttr { return func(m optionalAttr) { - m["seed2"] = value + m["shared_name"] = value } } -// Performs fractional average pooling on the input. -// -// Fractional average pooling is similar to Fractional max pooling in the pooling -// region generation step. The only difference is that after pooling regions are -// generated, a mean operation is performed instead of a max operation in each -// pooling region. -// -// Arguments: -// value: 4-D with shape `[batch, height, width, channels]`. -// pooling_ratio: Pooling ratio for each dimension of `value`, currently only -// supports row and col dimension and should be >= 1.0. For example, a valid -// pooling ratio looks like [1.0, 1.44, 1.73, 1.0]. The first and last elements -// must be 1.0 because we don't allow pooling on batch and channels -// dimensions. 1.44 and 1.73 are pooling ratio on height and width dimensions -// respectively. +// Op removes all elements in the underlying container. // -// Returns output tensor after fractional avg pooling.row pooling sequence, needed to calculate gradient.column pooling sequence, needed to calculate gradient. -func FractionalAvgPool(scope *Scope, value tf.Output, pooling_ratio []float32, optional ...FractionalAvgPoolAttr) (output tf.Output, row_pooling_sequence tf.Output, col_pooling_sequence tf.Output) { +// Returns the created operation. +func OrderedMapClear(scope *Scope, dtypes []tf.DataType, optional ...OrderedMapClearAttr) (o *tf.Operation) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"pooling_ratio": pooling_ratio} + attrs := map[string]interface{}{"dtypes": dtypes} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "FractionalAvgPool", - Input: []tf.Input{ - value, - }, + Type: "OrderedMapClear", + Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + return scope.AddOperation(opspec) } -// RandomCropAttr is an optional argument to RandomCrop. -type RandomCropAttr func(optionalAttr) - -// RandomCropSeed sets the optional seed attribute to value. +// Computes the reciprocal of x element-wise. // -// value: If either seed or seed2 are set to be non-zero, the random number -// generator is seeded by the given seed. Otherwise, it is seeded by a -// random seed. -// If not specified, defaults to 0 -func RandomCropSeed(value int64) RandomCropAttr { - return func(m optionalAttr) { - m["seed"] = value +// I.e., \\(y = 1 / x\\). +func Inv(scope *Scope, x tf.Output) (y tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Inv", + Input: []tf.Input{ + x, + }, } + op := scope.AddOperation(opspec) + return op.Output(0) } -// RandomCropSeed2 sets the optional seed2 attribute to value. -// -// value: An second seed to avoid seed collision. -// If not specified, defaults to 0 -func RandomCropSeed2(value int64) RandomCropAttr { +// ComplexAbsAttr is an optional argument to ComplexAbs. +type ComplexAbsAttr func(optionalAttr) + +// ComplexAbsTout sets the optional Tout attribute to value. +// If not specified, defaults to DT_FLOAT +func ComplexAbsTout(value tf.DataType) ComplexAbsAttr { return func(m optionalAttr) { - m["seed2"] = value + m["Tout"] = value } } -// Randomly crop `image`. -// -// DEPRECATED at GraphDef version 8: Random crop is now pure Python -// -// `size` is a 1-D int64 tensor with 2 elements representing the crop height and -// width. The values must be non negative. +// Computes the complex absolute value of a tensor. // -// This Op picks a random location in `image` and crops a `height` by `width` -// rectangle from that location. The random location is picked so the cropped -// area will fit inside the original image. -// -// Arguments: -// image: 3-D of shape `[height, width, channels]`. -// size: 1-D of length 2 containing: `crop_height`, `crop_width`.. -// -// Returns 3-D of shape `[crop_height, crop_width, channels].` -func RandomCrop(scope *Scope, image tf.Output, size tf.Output, optional ...RandomCropAttr) (output tf.Output) { +// Given a tensor `x` of complex numbers, this operation returns a tensor of type +// `float` or `double` that is the absolute value of each element in `x`. All +// elements in `x` must be complex numbers of the form \\(a + bj\\). The absolute +// value is computed as \\( \sqrt{a^2 + b^2}\\). +func ComplexAbs(scope *Scope, x tf.Output, optional ...ComplexAbsAttr) (y tf.Output) { if scope.Err() != nil { return } @@ -4167,9 +4390,9 @@ func RandomCrop(scope *Scope, image tf.Output, size tf.Output, optional ...Rando a(attrs) } opspec := tf.OpSpec{ - Type: "RandomCrop", + Type: "ComplexAbs", Input: []tf.Input{ - image, size, + x, }, Attrs: attrs, } @@ -4177,192 +4400,192 @@ func RandomCrop(scope *Scope, image tf.Output, size tf.Output, optional ...Rando return op.Output(0) } -// TopKV2Attr is an optional argument to TopKV2. -type TopKV2Attr func(optionalAttr) - -// TopKV2Sorted sets the optional sorted attribute to value. -// -// value: If true the resulting `k` elements will be sorted by the values in -// descending order. -// If not specified, defaults to true -func TopKV2Sorted(value bool) TopKV2Attr { - return func(m optionalAttr) { - m["sorted"] = value - } -} - -// Finds values and indices of the `k` largest elements for the last dimension. -// -// If the input is a vector (rank-1), finds the `k` largest entries in the vector -// and outputs their values and indices as vectors. Thus `values[j]` is the -// `j`-th largest entry in `input`, and its index is `indices[j]`. -// -// For matrices (resp. higher rank input), computes the top `k` entries in each -// row (resp. vector along the last dimension). Thus, -// -// values.shape = indices.shape = input.shape[:-1] + [k] -// -// If two elements are equal, the lower-index element appears first. -// -// Arguments: -// input: 1-D or higher with last dimension at least `k`. -// k: 0-D. Number of top elements to look for along the last dimension (along each -// row for matrices). +// Returns the truth value of x AND y element-wise. // -// Returns The `k` largest elements along each last dimensional slice.The indices of `values` within the last dimension of `input`. -func TopKV2(scope *Scope, input tf.Output, k tf.Output, optional ...TopKV2Attr) (values tf.Output, indices tf.Output) { +// *NOTE*: `LogicalAnd` supports broadcasting. More about broadcasting +// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) +func LogicalAnd(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "TopKV2", + Type: "LogicalAnd", Input: []tf.Input{ - input, k, + x, y, }, - Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) + return op.Output(0) } -// Returns x // y element-wise. +// Checks whether a tree ensemble has been initialized. // -// *NOTE*: `FloorDiv` supports broadcasting. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func FloorDiv(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { +// Arguments: +// tree_ensemble_handle: Handle to the tree ensemble resouce. +// +// Returns output boolean on whether it is initialized or not. +func IsBoostedTreesEnsembleInitialized(scope *Scope, tree_ensemble_handle tf.Output) (is_initialized tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "FloorDiv", + Type: "IsBoostedTreesEnsembleInitialized", Input: []tf.Input{ - x, y, + tree_ensemble_handle, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Returns a batched diagonal tensor with a given batched diagonal values. -// -// Given a `diagonal`, this operation returns a tensor with the `diagonal` and -// everything else padded with zeros. The diagonal is computed as follows: -// -// Assume `diagonal` has `k` dimensions `[I, J, K, ..., N]`, then the output is a -// tensor of rank `k+1` with dimensions [I, J, K, ..., N, N]` where: -// -// `output[i, j, k, ..., m, n] = 1{m=n} * diagonal[i, j, k, ..., n]`. -// -// For example: -// -// ``` -// # 'diagonal' is [[1, 2, 3, 4], [5, 6, 7, 8]] -// -// and diagonal.shape = (2, 4) -// -// tf.matrix_diag(diagonal) ==> [[[1, 0, 0, 0] -// [0, 2, 0, 0] -// [0, 0, 3, 0] -// [0, 0, 0, 4]], -// [[5, 0, 0, 0] -// [0, 6, 0, 0] -// [0, 0, 7, 0] -// [0, 0, 0, 8]]] -// -// which has shape (2, 4, 4) -// ``` -// -// Arguments: -// diagonal: Rank `k`, where `k >= 1`. -// -// Returns Rank `k+1`, with `output.shape = diagonal.shape + [diagonal.shape[-1]]`. -func MatrixDiag(scope *Scope, diagonal tf.Output) (output tf.Output) { +// Cast x of type SrcT to y of DstT. +func Cast(scope *Scope, x tf.Output, DstT tf.DataType) (y tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"DstT": DstT} opspec := tf.OpSpec{ - Type: "MatrixDiag", + Type: "Cast", Input: []tf.Input{ - diagonal, + x, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Returns the truth value of (x <= y) element-wise. +// MaxAttr is an optional argument to Max. +type MaxAttr func(optionalAttr) + +// MaxKeepDims sets the optional keep_dims attribute to value. // -// *NOTE*: `LessEqual` supports broadcasting. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func LessEqual(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { +// value: If true, retain reduced dimensions with length 1. +// If not specified, defaults to false +func MaxKeepDims(value bool) MaxAttr { + return func(m optionalAttr) { + m["keep_dims"] = value + } +} + +// Computes the maximum of elements across dimensions of a tensor. +// +// Reduces `input` along the dimensions given in `axis`. Unless +// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in +// `axis`. If `keep_dims` is true, the reduced dimensions are +// retained with length 1. +// +// Arguments: +// input: The tensor to reduce. +// axis: The dimensions to reduce. Must be in the range +// `[-rank(input), rank(input))`. +// +// Returns The reduced tensor. +func Max(scope *Scope, input tf.Output, axis tf.Output, optional ...MaxAttr) (output tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "LessEqual", + Type: "Max", Input: []tf.Input{ - x, y, + input, axis, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Computes softmax activations. -// -// For each batch `i` and class `j` we have +// Quantized Batch normalization. // -// softmax[i, j] = exp(logits[i, j]) / sum_j(exp(logits[i, j])) +// This op is deprecated and will be removed in the future. Prefer +// `tf.nn.batch_normalization`. // // Arguments: -// logits: 2-D with shape `[batch_size, num_classes]`. +// t: A 4D input Tensor. +// t_min: The value represented by the lowest quantized input. +// t_max: The value represented by the highest quantized input. +// m: A 1D mean Tensor with size matching the last dimension of t. +// This is the first output from tf.nn.moments, +// or a saved moving average thereof. +// m_min: The value represented by the lowest quantized mean. +// m_max: The value represented by the highest quantized mean. +// v: A 1D variance Tensor with size matching the last dimension of t. +// This is the second output from tf.nn.moments, +// or a saved moving average thereof. +// v_min: The value represented by the lowest quantized variance. +// v_max: The value represented by the highest quantized variance. +// beta: A 1D beta Tensor with size matching the last dimension of t. +// An offset to be added to the normalized tensor. +// beta_min: The value represented by the lowest quantized offset. +// beta_max: The value represented by the highest quantized offset. +// gamma: A 1D gamma Tensor with size matching the last dimension of t. +// If "scale_after_normalization" is true, this tensor will be multiplied +// with the normalized tensor. +// gamma_min: The value represented by the lowest quantized gamma. +// gamma_max: The value represented by the highest quantized gamma. // -// Returns Same shape as `logits`. -func Softmax(scope *Scope, logits tf.Output) (softmax tf.Output) { +// variance_epsilon: A small float number to avoid dividing by 0. +// scale_after_normalization: A bool indicating whether the resulted tensor +// needs to be multiplied with gamma. +func QuantizedBatchNormWithGlobalNormalization(scope *Scope, t tf.Output, t_min tf.Output, t_max tf.Output, m tf.Output, m_min tf.Output, m_max tf.Output, v tf.Output, v_min tf.Output, v_max tf.Output, beta tf.Output, beta_min tf.Output, beta_max tf.Output, gamma tf.Output, gamma_min tf.Output, gamma_max tf.Output, out_type tf.DataType, variance_epsilon float32, scale_after_normalization bool) (result tf.Output, result_min tf.Output, result_max tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"out_type": out_type, "variance_epsilon": variance_epsilon, "scale_after_normalization": scale_after_normalization} opspec := tf.OpSpec{ - Type: "Softmax", + Type: "QuantizedBatchNormWithGlobalNormalization", Input: []tf.Input{ - logits, + t, t_min, t_max, m, m_min, m_max, v, v_min, v_max, beta, beta_min, beta_max, gamma, gamma_min, gamma_max, }, + Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1), op.Output(2) } -// DecodeBmpAttr is an optional argument to DecodeBmp. -type DecodeBmpAttr func(optionalAttr) +// HistogramFixedWidthAttr is an optional argument to HistogramFixedWidth. +type HistogramFixedWidthAttr func(optionalAttr) -// DecodeBmpChannels sets the optional channels attribute to value. -// If not specified, defaults to 0 -func DecodeBmpChannels(value int64) DecodeBmpAttr { +// HistogramFixedWidthDtype sets the optional dtype attribute to value. +// If not specified, defaults to DT_INT32 +func HistogramFixedWidthDtype(value tf.DataType) HistogramFixedWidthAttr { return func(m optionalAttr) { - m["channels"] = value + m["dtype"] = value } } -// Decode the first frame of a BMP-encoded image to a uint8 tensor. +// Return histogram of values. // -// The attr `channels` indicates the desired number of color channels for the -// decoded image. +// Given the tensor `values`, this operation returns a rank 1 histogram counting +// the number of entries in `values` that fall into every bin. The bins are +// equal width and determined by the arguments `value_range` and `nbins`. // -// Accepted values are: +// ```python +// # Bins will be: (-inf, 1), [1, 2), [2, 3), [3, 4), [4, inf) +// nbins = 5 +// value_range = [0.0, 5.0] +// new_values = [-1.0, 0.0, 1.5, 2.0, 5.0, 15] // -// * 0: Use the number of channels in the BMP-encoded image. -// * 3: output an RGB image. -// * 4: output an RGBA image. +// with tf.get_default_session() as sess: +// hist = tf.histogram_fixed_width(new_values, value_range, nbins=5) +// variables.global_variables_initializer().run() +// sess.run(hist) => [2, 1, 1, 0, 2] +// ``` // // Arguments: -// contents: 0-D. The BMP-encoded image. +// values: Numeric `Tensor`. +// value_range: Shape [2] `Tensor` of same `dtype` as `values`. +// values <= value_range[0] will be mapped to hist[0], +// values >= value_range[1] will be mapped to hist[-1]. +// nbins: Scalar `int32 Tensor`. Number of histogram bins. // -// Returns 3-D with shape `[height, width, channels]`. RGB order -func DecodeBmp(scope *Scope, contents tf.Output, optional ...DecodeBmpAttr) (image tf.Output) { +// Returns A 1-D `Tensor` holding histogram of values. +func HistogramFixedWidth(scope *Scope, values tf.Output, value_range tf.Output, nbins tf.Output, optional ...HistogramFixedWidthAttr) (out tf.Output) { if scope.Err() != nil { return } @@ -4371,9 +4594,9 @@ func DecodeBmp(scope *Scope, contents tf.Output, optional ...DecodeBmpAttr) (ima a(attrs) } opspec := tf.OpSpec{ - Type: "DecodeBmp", + Type: "HistogramFixedWidth", Input: []tf.Input{ - contents, + values, value_range, nbins, }, Attrs: attrs, } @@ -4381,158 +4604,222 @@ func DecodeBmp(scope *Scope, contents tf.Output, optional ...DecodeBmpAttr) (ima return op.Output(0) } -// Computes softsign gradients for a softsign operation. +// Adds Tensor 'bias' to Tensor 'input' for Quantized types. +// +// Broadcasts the values of bias on dimensions 0..N-2 of 'input'. // // Arguments: -// gradients: The backpropagated gradients to the corresponding softsign operation. -// features: The features passed as input to the corresponding softsign operation. // -// Returns The gradients: `gradients / (1 + abs(features)) ** 2`. -func SoftsignGrad(scope *Scope, gradients tf.Output, features tf.Output) (backprops tf.Output) { - if scope.Err() != nil { +// bias: A 1D bias Tensor with size matching the last dimension of 'input'. +// min_input: The float value that the lowest quantized input value represents. +// max_input: The float value that the highest quantized input value represents. +// min_bias: The float value that the lowest quantized bias value represents. +// max_bias: The float value that the highest quantized bias value represents. +// +// +// Returns The float value that the lowest quantized output value represents.The float value that the highest quantized output value represents. +func QuantizedBiasAdd(scope *Scope, input tf.Output, bias tf.Output, min_input tf.Output, max_input tf.Output, min_bias tf.Output, max_bias tf.Output, out_type tf.DataType) (output tf.Output, min_out tf.Output, max_out tf.Output) { + if scope.Err() != nil { return } + attrs := map[string]interface{}{"out_type": out_type} opspec := tf.OpSpec{ - Type: "SoftsignGrad", + Type: "QuantizedBiasAdd", Input: []tf.Input{ - gradients, features, + input, bias, min_input, max_input, min_bias, max_bias, }, + Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1), op.Output(2) } -// BatchMatMulAttr is an optional argument to BatchMatMul. -type BatchMatMulAttr func(optionalAttr) +// Produces the average pool of the input tensor for quantized types. +// +// Arguments: +// input: 4-D with shape `[batch, height, width, channels]`. +// min_input: The float value that the lowest quantized input value represents. +// max_input: The float value that the highest quantized input value represents. +// ksize: The size of the window for each dimension of the input tensor. +// The length must be 4 to match the number of dimensions of the input. +// strides: The stride of the sliding window for each dimension of the input +// tensor. The length must be 4 to match the number of dimensions of the input. +// padding: The type of padding algorithm to use. +// +// Returns The float value that the lowest quantized output value represents.The float value that the highest quantized output value represents. +func QuantizedAvgPool(scope *Scope, input tf.Output, min_input tf.Output, max_input tf.Output, ksize []int64, strides []int64, padding string) (output tf.Output, min_output tf.Output, max_output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} + opspec := tf.OpSpec{ + Type: "QuantizedAvgPool", + Input: []tf.Input{ + input, min_input, max_input, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1), op.Output(2) +} -// BatchMatMulAdjX sets the optional adj_x attribute to value. +// FractionalAvgPoolAttr is an optional argument to FractionalAvgPool. +type FractionalAvgPoolAttr func(optionalAttr) + +// FractionalAvgPoolPseudoRandom sets the optional pseudo_random attribute to value. // -// value: If `True`, adjoint the slices of `x`. Defaults to `False`. +// value: When set to True, generates the pooling sequence in a +// pseudorandom fashion, otherwise, in a random fashion. Check paper [Benjamin +// Graham, Fractional Max-Pooling](http://arxiv.org/abs/1412.6071) for +// difference between pseudorandom and random. // If not specified, defaults to false -func BatchMatMulAdjX(value bool) BatchMatMulAttr { +func FractionalAvgPoolPseudoRandom(value bool) FractionalAvgPoolAttr { return func(m optionalAttr) { - m["adj_x"] = value + m["pseudo_random"] = value } } -// BatchMatMulAdjY sets the optional adj_y attribute to value. +// FractionalAvgPoolOverlapping sets the optional overlapping attribute to value. // -// value: If `True`, adjoint the slices of `y`. Defaults to `False`. +// value: When set to True, it means when pooling, the values at the boundary +// of adjacent pooling cells are used by both cells. For example: +// +// `index 0 1 2 3 4` +// +// `value 20 5 16 3 7` +// +// If the pooling sequence is [0, 2, 4], then 16, at index 2 will be used twice. +// The result would be [41/3, 26/3] for fractional avg pooling. // If not specified, defaults to false -func BatchMatMulAdjY(value bool) BatchMatMulAttr { +func FractionalAvgPoolOverlapping(value bool) FractionalAvgPoolAttr { return func(m optionalAttr) { - m["adj_y"] = value + m["overlapping"] = value } } -// Multiplies slices of two tensors in batches. -// -// Multiplies all slices of `Tensor` `x` and `y` (each slice can be -// viewed as an element of a batch), and arranges the individual results -// in a single output tensor of the same batch size. Each of the -// individual slices can optionally be adjointed (to adjoint a matrix -// means to transpose and conjugate it) before multiplication by setting -// the `adj_x` or `adj_y` flag to `True`, which are by default `False`. -// -// The input tensors `x` and `y` are 2-D or higher with shape `[..., r_x, c_x]` -// and `[..., r_y, c_y]`. +// FractionalAvgPoolDeterministic sets the optional deterministic attribute to value. // -// The output tensor is 2-D or higher with shape `[..., r_o, c_o]`, where: +// value: When set to True, a fixed pooling region will be used when +// iterating over a FractionalAvgPool node in the computation graph. Mainly used +// in unit test to make FractionalAvgPool deterministic. +// If not specified, defaults to false +func FractionalAvgPoolDeterministic(value bool) FractionalAvgPoolAttr { + return func(m optionalAttr) { + m["deterministic"] = value + } +} + +// FractionalAvgPoolSeed sets the optional seed attribute to value. // -// r_o = c_x if adj_x else r_x -// c_o = r_y if adj_y else c_y +// value: If either seed or seed2 are set to be non-zero, the random number +// generator is seeded by the given seed. Otherwise, it is seeded by a +// random seed. +// If not specified, defaults to 0 +func FractionalAvgPoolSeed(value int64) FractionalAvgPoolAttr { + return func(m optionalAttr) { + m["seed"] = value + } +} + +// FractionalAvgPoolSeed2 sets the optional seed2 attribute to value. // -// It is computed as: +// value: An second seed to avoid seed collision. +// If not specified, defaults to 0 +func FractionalAvgPoolSeed2(value int64) FractionalAvgPoolAttr { + return func(m optionalAttr) { + m["seed2"] = value + } +} + +// Performs fractional average pooling on the input. // -// output[..., :, :] = matrix(x[..., :, :]) * matrix(y[..., :, :]) +// Fractional average pooling is similar to Fractional max pooling in the pooling +// region generation step. The only difference is that after pooling regions are +// generated, a mean operation is performed instead of a max operation in each +// pooling region. // // Arguments: -// x: 2-D or higher with shape `[..., r_x, c_x]`. -// y: 2-D or higher with shape `[..., r_y, c_y]`. +// value: 4-D with shape `[batch, height, width, channels]`. +// pooling_ratio: Pooling ratio for each dimension of `value`, currently only +// supports row and col dimension and should be >= 1.0. For example, a valid +// pooling ratio looks like [1.0, 1.44, 1.73, 1.0]. The first and last elements +// must be 1.0 because we don't allow pooling on batch and channels +// dimensions. 1.44 and 1.73 are pooling ratio on height and width dimensions +// respectively. // -// Returns 3-D or higher with shape `[..., r_o, c_o]` -func BatchMatMul(scope *Scope, x tf.Output, y tf.Output, optional ...BatchMatMulAttr) (output tf.Output) { +// Returns output tensor after fractional avg pooling.row pooling sequence, needed to calculate gradient.column pooling sequence, needed to calculate gradient. +func FractionalAvgPool(scope *Scope, value tf.Output, pooling_ratio []float32, optional ...FractionalAvgPoolAttr) (output tf.Output, row_pooling_sequence tf.Output, col_pooling_sequence tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"pooling_ratio": pooling_ratio} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "BatchMatMul", + Type: "FractionalAvgPool", Input: []tf.Input{ - x, y, + value, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1), op.Output(2) } -// Returns which elements of x are NaN. +// RandomCropAttr is an optional argument to RandomCrop. +type RandomCropAttr func(optionalAttr) + +// RandomCropSeed sets the optional seed attribute to value. // -// @compatibility(numpy) -// Equivalent to np.isnan -// @end_compatibility -func IsNan(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "IsNan", - Input: []tf.Input{ - x, - }, +// value: If either seed or seed2 are set to be non-zero, the random number +// generator is seeded by the given seed. Otherwise, it is seeded by a +// random seed. +// If not specified, defaults to 0 +func RandomCropSeed(value int64) RandomCropAttr { + return func(m optionalAttr) { + m["seed"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// Computes rectified linear gradients for a Relu operation. -// -// Arguments: -// gradients: The backpropagated gradients to the corresponding Relu operation. -// features: The features passed as input to the corresponding Relu operation, OR -// the outputs of that operation (both work equivalently). +// RandomCropSeed2 sets the optional seed2 attribute to value. // -// Returns `gradients * (features > 0)`. -func ReluGrad(scope *Scope, gradients tf.Output, features tf.Output) (backprops tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ReluGrad", - Input: []tf.Input{ - gradients, features, - }, +// value: An second seed to avoid seed collision. +// If not specified, defaults to 0 +func RandomCropSeed2(value int64) RandomCropAttr { + return func(m optionalAttr) { + m["seed2"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// Computes the gradient of morphological 2-D dilation with respect to the input. +// Randomly crop `image`. +// +// DEPRECATED at GraphDef version 8: Random crop is now pure Python +// +// `size` is a 1-D int64 tensor with 2 elements representing the crop height and +// width. The values must be non negative. +// +// This Op picks a random location in `image` and crops a `height` by `width` +// rectangle from that location. The random location is picked so the cropped +// area will fit inside the original image. // // Arguments: -// input: 4-D with shape `[batch, in_height, in_width, depth]`. -// filter: 3-D with shape `[filter_height, filter_width, depth]`. -// out_backprop: 4-D with shape `[batch, out_height, out_width, depth]`. -// strides: 1-D of length 4. The stride of the sliding window for each dimension of -// the input tensor. Must be: `[1, stride_height, stride_width, 1]`. -// rates: 1-D of length 4. The input stride for atrous morphological dilation. -// Must be: `[1, rate_height, rate_width, 1]`. -// padding: The type of padding algorithm to use. +// image: 3-D of shape `[height, width, channels]`. +// size: 1-D of length 2 containing: `crop_height`, `crop_width`.. // -// Returns 4-D with shape `[batch, in_height, in_width, depth]`. -func Dilation2DBackpropInput(scope *Scope, input tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, rates []int64, padding string) (in_backprop tf.Output) { +// Returns 3-D of shape `[crop_height, crop_width, channels].` +func RandomCrop(scope *Scope, image tf.Output, size tf.Output, optional ...RandomCropAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"strides": strides, "rates": rates, "padding": padding} + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "Dilation2DBackpropInput", + Type: "RandomCrop", Input: []tf.Input{ - input, filter, out_backprop, + image, size, }, Attrs: attrs, } @@ -4540,314 +4827,263 @@ func Dilation2DBackpropInput(scope *Scope, input tf.Output, filter tf.Output, ou return op.Output(0) } -// CTCBeamSearchDecoderAttr is an optional argument to CTCBeamSearchDecoder. -type CTCBeamSearchDecoderAttr func(optionalAttr) +// TopKV2Attr is an optional argument to TopKV2. +type TopKV2Attr func(optionalAttr) -// CTCBeamSearchDecoderMergeRepeated sets the optional merge_repeated attribute to value. +// TopKV2Sorted sets the optional sorted attribute to value. // -// value: If true, merge repeated classes in output. +// value: If true the resulting `k` elements will be sorted by the values in +// descending order. // If not specified, defaults to true -func CTCBeamSearchDecoderMergeRepeated(value bool) CTCBeamSearchDecoderAttr { +func TopKV2Sorted(value bool) TopKV2Attr { return func(m optionalAttr) { - m["merge_repeated"] = value + m["sorted"] = value } } -// Performs beam search decoding on the logits given in input. -// -// A note about the attribute merge_repeated: For the beam search decoder, -// this means that if consecutive entries in a beam are the same, only -// the first of these is emitted. That is, when the top path is "A B B B B", -// "A B" is returned if merge_repeated = True but "A B B B B" is -// returned if merge_repeated = False. +// Finds values and indices of the `k` largest elements for the last dimension. +// +// If the input is a vector (rank-1), finds the `k` largest entries in the vector +// and outputs their values and indices as vectors. Thus `values[j]` is the +// `j`-th largest entry in `input`, and its index is `indices[j]`. +// +// For matrices (resp. higher rank input), computes the top `k` entries in each +// row (resp. vector along the last dimension). Thus, +// +// values.shape = indices.shape = input.shape[:-1] + [k] +// +// If two elements are equal, the lower-index element appears first. // // Arguments: -// inputs: 3-D, shape: `(max_time x batch_size x num_classes)`, the logits. -// sequence_length: A vector containing sequence lengths, size `(batch)`. -// beam_width: A scalar >= 0 (beam search beam width). -// top_paths: A scalar >= 0, <= beam_width (controls output size). +// input: 1-D or higher with last dimension at least `k`. +// k: 0-D. Number of top elements to look for along the last dimension (along each +// row for matrices). // -// Returns A list (length: top_paths) of indices matrices. Matrix j, -// size `(total_decoded_outputs[j] x 2)`, has indices of a -// `SparseTensor`. The rows store: [batch, time].A list (length: top_paths) of values vectors. Vector j, -// size `(length total_decoded_outputs[j])`, has the values of a -// `SparseTensor`. The vector stores the decoded classes for beam j.A list (length: top_paths) of shape vector. Vector j, -// size `(2)`, stores the shape of the decoded `SparseTensor[j]`. -// Its values are: `[batch_size, max_decoded_length[j]]`.A matrix, shaped: `(batch_size x top_paths)`. The -// sequence log-probabilities. -func CTCBeamSearchDecoder(scope *Scope, inputs tf.Output, sequence_length tf.Output, beam_width int64, top_paths int64, optional ...CTCBeamSearchDecoderAttr) (decoded_indices []tf.Output, decoded_values []tf.Output, decoded_shape []tf.Output, log_probability tf.Output) { +// Returns The `k` largest elements along each last dimensional slice.The indices of `values` within the last dimension of `input`. +func TopKV2(scope *Scope, input tf.Output, k tf.Output, optional ...TopKV2Attr) (values tf.Output, indices tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"beam_width": beam_width, "top_paths": top_paths} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "CTCBeamSearchDecoder", + Type: "TopKV2", Input: []tf.Input{ - inputs, sequence_length, + input, k, }, Attrs: attrs, } op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1) +} + +// Returns x // y element-wise. +// +// *NOTE*: `FloorDiv` supports broadcasting. More about broadcasting +// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) +func FloorDiv(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { if scope.Err() != nil { return } - var idx int - var err error - if decoded_indices, idx, err = makeOutputList(op, idx, "decoded_indices"); err != nil { - scope.UpdateErr("CTCBeamSearchDecoder", err) - return - } - if decoded_values, idx, err = makeOutputList(op, idx, "decoded_values"); err != nil { - scope.UpdateErr("CTCBeamSearchDecoder", err) - return - } - if decoded_shape, idx, err = makeOutputList(op, idx, "decoded_shape"); err != nil { - scope.UpdateErr("CTCBeamSearchDecoder", err) - return + opspec := tf.OpSpec{ + Type: "FloorDiv", + Input: []tf.Input{ + x, y, + }, } - log_probability = op.Output(idx) - return decoded_indices, decoded_values, decoded_shape, log_probability + op := scope.AddOperation(opspec) + return op.Output(0) } -// AudioSpectrogramAttr is an optional argument to AudioSpectrogram. -type AudioSpectrogramAttr func(optionalAttr) - -// AudioSpectrogramMagnitudeSquared sets the optional magnitude_squared attribute to value. +// Returns a batched diagonal tensor with a given batched diagonal values. // -// value: Whether to return the squared magnitude or just the -// magnitude. Using squared magnitude can avoid extra calculations. -// If not specified, defaults to false -func AudioSpectrogramMagnitudeSquared(value bool) AudioSpectrogramAttr { - return func(m optionalAttr) { - m["magnitude_squared"] = value - } -} - -// Produces a visualization of audio data over time. +// Given a `diagonal`, this operation returns a tensor with the `diagonal` and +// everything else padded with zeros. The diagonal is computed as follows: // -// Spectrograms are a standard way of representing audio information as a series of -// slices of frequency information, one slice for each window of time. By joining -// these together into a sequence, they form a distinctive fingerprint of the sound -// over time. +// Assume `diagonal` has `k` dimensions `[I, J, K, ..., N]`, then the output is a +// tensor of rank `k+1` with dimensions [I, J, K, ..., N, N]` where: // -// This op expects to receive audio data as an input, stored as floats in the range -// -1 to 1, together with a window width in samples, and a stride specifying how -// far to move the window between slices. From this it generates a three -// dimensional output. The lowest dimension has an amplitude value for each -// frequency during that time slice. The next dimension is time, with successive -// frequency slices. The final dimension is for the channels in the input, so a -// stereo audio input would have two here for example. +// `output[i, j, k, ..., m, n] = 1{m=n} * diagonal[i, j, k, ..., n]`. // -// This means the layout when converted and saved as an image is rotated 90 degrees -// clockwise from a typical spectrogram. Time is descending down the Y axis, and -// the frequency decreases from left to right. +// For example: // -// Each value in the result represents the square root of the sum of the real and -// imaginary parts of an FFT on the current window of samples. In this way, the -// lowest dimension represents the power of each frequency in the current window, -// and adjacent windows are concatenated in the next dimension. +// ``` +// # 'diagonal' is [[1, 2, 3, 4], [5, 6, 7, 8]] // -// To get a more intuitive and visual look at what this operation does, you can run -// tensorflow/examples/wav_to_spectrogram to read in an audio file and save out the -// resulting spectrogram as a PNG image. +// and diagonal.shape = (2, 4) +// +// tf.matrix_diag(diagonal) ==> [[[1, 0, 0, 0] +// [0, 2, 0, 0] +// [0, 0, 3, 0] +// [0, 0, 0, 4]], +// [[5, 0, 0, 0] +// [0, 6, 0, 0] +// [0, 0, 7, 0] +// [0, 0, 0, 8]]] +// +// which has shape (2, 4, 4) +// ``` // // Arguments: -// input: Float representation of audio data. -// window_size: How wide the input window is in samples. For the highest efficiency -// this should be a power of two, but other values are accepted. -// stride: How widely apart the center of adjacent sample windows should be. +// diagonal: Rank `k`, where `k >= 1`. // -// Returns 3D representation of the audio frequencies as an image. -func AudioSpectrogram(scope *Scope, input tf.Output, window_size int64, stride int64, optional ...AudioSpectrogramAttr) (spectrogram tf.Output) { +// Returns Rank `k+1`, with `output.shape = diagonal.shape + [diagonal.shape[-1]]`. +func MatrixDiag(scope *Scope, diagonal tf.Output) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"window_size": window_size, "stride": stride} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "AudioSpectrogram", + Type: "MatrixDiag", Input: []tf.Input{ - input, + diagonal, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Compute the polygamma function \\(\psi^{(n)}(x)\\). +// Computes the inverse permutation of a tensor. // -// The polygamma function is defined as: +// This operation computes the inverse of an index permutation. It takes a 1-D +// integer tensor `x`, which represents the indices of a zero-based array, and +// swaps each value with its index position. In other words, for an output tensor +// `y` and an input tensor `x`, this operation computes the following: // +// `y[x[i]] = i for i in [0, 1, ..., len(x) - 1]` // -// \\(\psi^{(n)}(x) = \frac{d^n}{dx^n} \psi(x)\\) +// The values must include 0. There can be no duplicate values or negative values. // -// where \\(\psi(x)\\) is the digamma function. -func Polygamma(scope *Scope, a tf.Output, x tf.Output) (z tf.Output) { +// For example: +// +// ``` +// # tensor `x` is [3, 4, 0, 2, 1] +// invert_permutation(x) ==> [2, 4, 3, 0, 1] +// ``` +// +// Arguments: +// x: 1-D. +// +// Returns 1-D. +func InvertPermutation(scope *Scope, x tf.Output) (y tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "Polygamma", + Type: "InvertPermutation", Input: []tf.Input{ - a, x, + x, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Computes second-order gradients of the maxpooling function. +// Computes log softmax activations. +// +// For each batch `i` and class `j` we have +// +// logsoftmax[i, j] = logits[i, j] - log(sum(exp(logits[i]))) // // Arguments: -// input: The original input. -// grad: 4-D with shape `[batch, height, width, channels]`. Gradients w.r.t. the -// input of `max_pool`. -// argmax: The indices of the maximum values chosen for each output of `max_pool`. -// ksize: The size of the window for each dimension of the input tensor. -// strides: The stride of the sliding window for each dimension of the -// input tensor. -// padding: The type of padding algorithm to use. +// logits: 2-D with shape `[batch_size, num_classes]`. // -// Returns Gradients of gradients w.r.t. the input of `max_pool`. -func MaxPoolGradGradWithArgmax(scope *Scope, input tf.Output, grad tf.Output, argmax tf.Output, ksize []int64, strides []int64, padding string) (output tf.Output) { +// Returns Same shape as `logits`. +func LogSoftmax(scope *Scope, logits tf.Output) (logsoftmax tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} opspec := tf.OpSpec{ - Type: "MaxPoolGradGradWithArgmax", + Type: "LogSoftmax", Input: []tf.Input{ - input, grad, argmax, + logits, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// MaxPoolGradGradV2Attr is an optional argument to MaxPoolGradGradV2. -type MaxPoolGradGradV2Attr func(optionalAttr) - -// MaxPoolGradGradV2DataFormat sets the optional data_format attribute to value. -// -// value: Specify the data format of the input and output data. With the -// default format "NHWC", the data is stored in the order of: -// [batch, in_height, in_width, in_channels]. -// Alternatively, the format could be "NCHW", the data storage order of: -// [batch, in_channels, in_height, in_width]. -// If not specified, defaults to "NHWC" -func MaxPoolGradGradV2DataFormat(value string) MaxPoolGradGradV2Attr { - return func(m optionalAttr) { - m["data_format"] = value - } -} - -// Computes second-order gradients of the maxpooling function. -// -// Arguments: -// orig_input: The original input tensor. -// orig_output: The original output tensor. -// grad: 4-D. Gradients of gradients w.r.t. the input of `max_pool`. -// ksize: The size of the window for each dimension of the input tensor. -// strides: The stride of the sliding window for each dimension of the -// input tensor. -// padding: The type of padding algorithm to use. +// Returns the truth value of (x <= y) element-wise. // -// Returns Gradients of gradients w.r.t. the input to `max_pool`. -func MaxPoolGradGradV2(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize tf.Output, strides tf.Output, padding string, optional ...MaxPoolGradGradV2Attr) (output tf.Output) { +// *NOTE*: `LessEqual` supports broadcasting. More about broadcasting +// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) +func LessEqual(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"padding": padding} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "MaxPoolGradGradV2", + Type: "LessEqual", Input: []tf.Input{ - orig_input, orig_output, grad, ksize, strides, + x, y, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Computes gradients of the maxpooling function. +// Computes softmax activations. +// +// For each batch `i` and class `j` we have +// +// softmax[i, j] = exp(logits[i, j]) / sum_j(exp(logits[i, j])) // // Arguments: -// input: The original input. -// grad: 4-D with shape `[batch, height, width, channels]`. Gradients w.r.t. the -// output of `max_pool`. -// argmax: The indices of the maximum values chosen for each output of `max_pool`. -// ksize: The size of the window for each dimension of the input tensor. -// strides: The stride of the sliding window for each dimension of the -// input tensor. -// padding: The type of padding algorithm to use. +// logits: 2-D with shape `[batch_size, num_classes]`. // -// Returns Gradients w.r.t. the input of `max_pool`. -func MaxPoolGradWithArgmax(scope *Scope, input tf.Output, grad tf.Output, argmax tf.Output, ksize []int64, strides []int64, padding string) (output tf.Output) { +// Returns Same shape as `logits`. +func Softmax(scope *Scope, logits tf.Output) (softmax tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} opspec := tf.OpSpec{ - Type: "MaxPoolGradWithArgmax", + Type: "Softmax", Input: []tf.Input{ - input, grad, argmax, + logits, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// AvgPool3DAttr is an optional argument to AvgPool3D. -type AvgPool3DAttr func(optionalAttr) +// DecodeBmpAttr is an optional argument to DecodeBmp. +type DecodeBmpAttr func(optionalAttr) -// AvgPool3DDataFormat sets the optional data_format attribute to value. -// -// value: The data format of the input and output data. With the -// default format "NDHWC", the data is stored in the order of: -// [batch, in_depth, in_height, in_width, in_channels]. -// Alternatively, the format could be "NCDHW", the data storage order is: -// [batch, in_channels, in_depth, in_height, in_width]. -// If not specified, defaults to "NDHWC" -func AvgPool3DDataFormat(value string) AvgPool3DAttr { +// DecodeBmpChannels sets the optional channels attribute to value. +// If not specified, defaults to 0 +func DecodeBmpChannels(value int64) DecodeBmpAttr { return func(m optionalAttr) { - m["data_format"] = value + m["channels"] = value } } -// Performs 3D average pooling on the input. +// Decode the first frame of a BMP-encoded image to a uint8 tensor. +// +// The attr `channels` indicates the desired number of color channels for the +// decoded image. +// +// Accepted values are: +// +// * 0: Use the number of channels in the BMP-encoded image. +// * 3: output an RGB image. +// * 4: output an RGBA image. // // Arguments: -// input: Shape `[batch, depth, rows, cols, channels]` tensor to pool over. -// ksize: 1-D tensor of length 5. The size of the window for each dimension of -// the input tensor. Must have `ksize[0] = ksize[4] = 1`. -// strides: 1-D tensor of length 5. The stride of the sliding window for each -// dimension of `input`. Must have `strides[0] = strides[4] = 1`. -// padding: The type of padding algorithm to use. +// contents: 0-D. The BMP-encoded image. // -// Returns The average pooled output tensor. -func AvgPool3D(scope *Scope, input tf.Output, ksize []int64, strides []int64, padding string, optional ...AvgPool3DAttr) (output tf.Output) { +// Returns 3-D with shape `[height, width, channels]`. RGB order +func DecodeBmp(scope *Scope, contents tf.Output, optional ...DecodeBmpAttr) (image tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "AvgPool3D", + Type: "DecodeBmp", Input: []tf.Input{ - input, + contents, }, Attrs: attrs, } @@ -4855,208 +5091,105 @@ func AvgPool3D(scope *Scope, input tf.Output, ksize []int64, strides []int64, pa return op.Output(0) } -// Returns element-wise remainder of division. This emulates C semantics in that +// Computes softsign gradients for a softsign operation. // -// the result here is consistent with a truncating divide. E.g. -// `tf.truncatediv(x, y) * y + truncate_mod(x, y) = x`. +// Arguments: +// gradients: The backpropagated gradients to the corresponding softsign operation. +// features: The features passed as input to the corresponding softsign operation. // -// *NOTE*: `Mod` supports broadcasting. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func Mod(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { +// Returns The gradients: `gradients / (1 + abs(features)) ** 2`. +func SoftsignGrad(scope *Scope, gradients tf.Output, features tf.Output) (backprops tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "Mod", + Type: "SoftsignGrad", Input: []tf.Input{ - x, y, + gradients, features, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// DepthToSpaceAttr is an optional argument to DepthToSpace. -type DepthToSpaceAttr func(optionalAttr) - -// DepthToSpaceDataFormat sets the optional data_format attribute to value. -// If not specified, defaults to "NHWC" -func DepthToSpaceDataFormat(value string) DepthToSpaceAttr { - return func(m optionalAttr) { - m["data_format"] = value - } -} - -// DepthToSpace for tensors of type T. -// -// Rearranges data from depth into blocks of spatial data. -// This is the reverse transformation of SpaceToDepth. More specifically, -// this op outputs a copy of the input tensor where values from the `depth` -// dimension are moved in spatial blocks to the `height` and `width` dimensions. -// The attr `block_size` indicates the input block size and how the data is moved. -// -// * Chunks of data of size `block_size * block_size` from depth are rearranged -// into non-overlapping blocks of size `block_size x block_size` -// * The width the output tensor is `input_depth * block_size`, whereas the -// height is `input_height * block_size`. -// * The Y, X coordinates within each block of the output image are determined -// by the high order component of the input channel index. -// * The depth of the input tensor must be divisible by -// `block_size * block_size`. -// -// The `data_format` attr specifies the layout of the input and output tensors -// with the following options: -// "NHWC": `[ batch, height, width, channels ]` -// "NCHW": `[ batch, channels, height, width ]` -// "NCHW_VECT_C": -// `qint8 [ batch, channels / 4, height, width, 4 ]` -// -// It is useful to consider the operation as transforming a 6-D Tensor. -// e.g. for data_format = NHWC, -// Each element in the input tensor can be specified via 6 coordinates, -// ordered by decreasing memory layout significance as: -// n,iY,iX,bY,bX,oC (where n=batch index, iX, iY means X or Y coordinates -// within the input image, bX, bY means coordinates -// within the output block, oC means output channels). -// The output would be the input transposed to the following layout: -// n,iY,bY,iX,bX,oC -// -// This operation is useful for resizing the activations between convolutions -// (but keeping all data), e.g. instead of pooling. It is also useful for training -// purely convolutional models. -// -// For example, given an input of shape `[1, 1, 1, 4]`, data_format = "NHWC" and -// block_size = 2: -// -// ``` -// x = [[[[1, 2, 3, 4]]]] -// -// ``` -// -// This operation will output a tensor of shape `[1, 2, 2, 1]`: -// -// ``` -// [[[[1], [2]], -// [[3], [4]]]] -// ``` -// -// Here, the input has a batch of 1 and each batch element has shape `[1, 1, 4]`, -// the corresponding output will have 2x2 elements and will have a depth of -// 1 channel (1 = `4 / (block_size * block_size)`). -// The output element shape is `[2, 2, 1]`. -// -// For an input tensor with larger depth, here of shape `[1, 1, 1, 12]`, e.g. -// -// ``` -// x = [[[[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]]]] -// ``` -// -// This operation, for block size of 2, will return the following tensor of shape -// `[1, 2, 2, 3]` -// -// ``` -// [[[[1, 2, 3], [4, 5, 6]], -// [[7, 8, 9], [10, 11, 12]]]] -// -// ``` -// -// Similarly, for the following input of shape `[1 2 2 4]`, and a block size of 2: -// -// ``` -// x = [[[[1, 2, 3, 4], -// [5, 6, 7, 8]], -// [[9, 10, 11, 12], -// [13, 14, 15, 16]]]] -// ``` -// -// the operator will return the following tensor of shape `[1 4 4 1]`: +// Provides the time since epoch in seconds. // -// ``` -// x = [[[ [1], [2], [5], [6]], -// [ [3], [4], [7], [8]], -// [ [9], [10], [13], [14]], -// [ [11], [12], [15], [16]]]] -// -// ``` -// -// Arguments: +// Returns the timestamp as a `float64` for seconds since the Unix epoch. // -// block_size: The size of the spatial block, same as in Space2Depth. -func DepthToSpace(scope *Scope, input tf.Output, block_size int64, optional ...DepthToSpaceAttr) (output tf.Output) { +// Note: the timestamp is computed when the op is executed, not when it is added +// to the graph. +func Timestamp(scope *Scope) (ts tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"block_size": block_size} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "DepthToSpace", - Input: []tf.Input{ - input, - }, - Attrs: attrs, + Type: "Timestamp", } op := scope.AddOperation(opspec) return op.Output(0) } -// Conv3DBackpropInputV2Attr is an optional argument to Conv3DBackpropInputV2. -type Conv3DBackpropInputV2Attr func(optionalAttr) +// BatchMatMulAttr is an optional argument to BatchMatMul. +type BatchMatMulAttr func(optionalAttr) -// Conv3DBackpropInputV2DataFormat sets the optional data_format attribute to value. +// BatchMatMulAdjX sets the optional adj_x attribute to value. // -// value: The data format of the input and output data. With the -// default format "NDHWC", the data is stored in the order of: -// [batch, in_depth, in_height, in_width, in_channels]. -// Alternatively, the format could be "NCDHW", the data storage order is: -// [batch, in_channels, in_depth, in_height, in_width]. -// If not specified, defaults to "NDHWC" -func Conv3DBackpropInputV2DataFormat(value string) Conv3DBackpropInputV2Attr { +// value: If `True`, adjoint the slices of `x`. Defaults to `False`. +// If not specified, defaults to false +func BatchMatMulAdjX(value bool) BatchMatMulAttr { return func(m optionalAttr) { - m["data_format"] = value + m["adj_x"] = value } } -// Conv3DBackpropInputV2Dilations sets the optional dilations attribute to value. +// BatchMatMulAdjY sets the optional adj_y attribute to value. // -// value: 1-D tensor of length 5. The dilation factor for each dimension of -// `input`. If set to k > 1, there will be k-1 skipped cells between each -// filter element on that dimension. The dimension order is determined by the -// value of `data_format`, see above for details. Dilations in the batch and -// depth dimensions must be 1. -// If not specified, defaults to -func Conv3DBackpropInputV2Dilations(value []int64) Conv3DBackpropInputV2Attr { +// value: If `True`, adjoint the slices of `y`. Defaults to `False`. +// If not specified, defaults to false +func BatchMatMulAdjY(value bool) BatchMatMulAttr { return func(m optionalAttr) { - m["dilations"] = value + m["adj_y"] = value } } -// Computes the gradients of 3-D convolution with respect to the input. +// Multiplies slices of two tensors in batches. +// +// Multiplies all slices of `Tensor` `x` and `y` (each slice can be +// viewed as an element of a batch), and arranges the individual results +// in a single output tensor of the same batch size. Each of the +// individual slices can optionally be adjointed (to adjoint a matrix +// means to transpose and conjugate it) before multiplication by setting +// the `adj_x` or `adj_y` flag to `True`, which are by default `False`. +// +// The input tensors `x` and `y` are 2-D or higher with shape `[..., r_x, c_x]` +// and `[..., r_y, c_y]`. +// +// The output tensor is 2-D or higher with shape `[..., r_o, c_o]`, where: +// +// r_o = c_x if adj_x else r_x +// c_o = r_y if adj_y else c_y +// +// It is computed as: +// +// output[..., :, :] = matrix(x[..., :, :]) * matrix(y[..., :, :]) // // Arguments: -// input_sizes: An integer vector representing the tensor shape of `input`, -// where `input` is a 5-D -// `[batch, depth, rows, cols, in_channels]` tensor. -// filter: Shape `[depth, rows, cols, in_channels, out_channels]`. -// `in_channels` must match between `input` and `filter`. -// out_backprop: Backprop signal of shape `[batch, out_depth, out_rows, out_cols, -// out_channels]`. -// strides: 1-D tensor of length 5. The stride of the sliding window for each -// dimension of `input`. Must have `strides[0] = strides[4] = 1`. -// padding: The type of padding algorithm to use. -func Conv3DBackpropInputV2(scope *Scope, input_sizes tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...Conv3DBackpropInputV2Attr) (output tf.Output) { +// x: 2-D or higher with shape `[..., r_x, c_x]`. +// y: 2-D or higher with shape `[..., r_y, c_y]`. +// +// Returns 3-D or higher with shape `[..., r_o, c_o]` +func BatchMatMul(scope *Scope, x tf.Output, y tf.Output, optional ...BatchMatMulAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"strides": strides, "padding": padding} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "Conv3DBackpropInputV2", + Type: "BatchMatMul", Input: []tf.Input{ - input_sizes, filter, out_backprop, + x, y, }, Attrs: attrs, } @@ -5064,15 +5197,17 @@ func Conv3DBackpropInputV2(scope *Scope, input_sizes tf.Output, filter tf.Output return op.Output(0) } -// Computes square root of x element-wise. +// Returns which elements of x are NaN. // -// I.e., \\(y = \sqrt{x} = x^{1/2}\\). -func Sqrt(scope *Scope, x tf.Output) (y tf.Output) { +// @compatibility(numpy) +// Equivalent to np.isnan +// @end_compatibility +func IsNan(scope *Scope, x tf.Output) (y tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "Sqrt", + Type: "IsNan", Input: []tf.Input{ x, }, @@ -5081,219 +5216,114 @@ func Sqrt(scope *Scope, x tf.Output) (y tf.Output) { return op.Output(0) } -// Computes the gradients of 3-D convolution with respect to the filter. -// -// DEPRECATED at GraphDef version 10: Use Conv3DBackpropFilterV2 -// -// Arguments: -// input: Shape `[batch, depth, rows, cols, in_channels]`. -// filter: Shape `[depth, rows, cols, in_channels, out_channels]`. -// `in_channels` must match between `input` and `filter`. -// out_backprop: Backprop signal of shape `[batch, out_depth, out_rows, out_cols, -// out_channels]`. -// strides: 1-D tensor of length 5. The stride of the sliding window for each -// dimension of `input`. Must have `strides[0] = strides[4] = 1`. -// padding: The type of padding algorithm to use. -func Conv3DBackpropFilter(scope *Scope, input tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, padding string) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"strides": strides, "padding": padding} - opspec := tf.OpSpec{ - Type: "Conv3DBackpropFilter", - Input: []tf.Input{ - input, filter, out_backprop, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes the gradient for the rsqrt of `x` wrt its input. +// Identity op for gradient debugging. // -// Specifically, `grad = dy * -0.5 * y^3`, where `y = rsqrt(x)`, and `dy` -// is the corresponding input gradient. -func RsqrtGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) { +// This op is hidden from public in Python. It is used by TensorFlow Debugger to +// register gradient tensors for gradient debugging. +// This op operates on non-reference-type tensors. +func DebugGradientIdentity(scope *Scope, input tf.Output) (output tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "RsqrtGrad", + Type: "DebugGradientIdentity", Input: []tf.Input{ - y, dy, + input, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// ReverseSequenceAttr is an optional argument to ReverseSequence. -type ReverseSequenceAttr func(optionalAttr) +// ResourceSparseApplyAdadeltaAttr is an optional argument to ResourceSparseApplyAdadelta. +type ResourceSparseApplyAdadeltaAttr func(optionalAttr) -// ReverseSequenceBatchDim sets the optional batch_dim attribute to value. +// ResourceSparseApplyAdadeltaUseLocking sets the optional use_locking attribute to value. // -// value: The dimension along which reversal is performed. -// If not specified, defaults to 0 -func ReverseSequenceBatchDim(value int64) ReverseSequenceAttr { +// value: If True, updating of the var and accum tensors will be protected by +// a lock; otherwise the behavior is undefined, but may exhibit less contention. +// If not specified, defaults to false +func ResourceSparseApplyAdadeltaUseLocking(value bool) ResourceSparseApplyAdadeltaAttr { return func(m optionalAttr) { - m["batch_dim"] = value + m["use_locking"] = value } } -// Reverses variable length slices. -// -// This op first slices `input` along the dimension `batch_dim`, and for each -// slice `i`, reverses the first `seq_lengths[i]` elements along -// the dimension `seq_dim`. -// -// The elements of `seq_lengths` must obey `seq_lengths[i] <= input.dims[seq_dim]`, -// and `seq_lengths` must be a vector of length `input.dims[batch_dim]`. -// -// The output slice `i` along dimension `batch_dim` is then given by input -// slice `i`, with the first `seq_lengths[i]` slices along dimension -// `seq_dim` reversed. -// -// For example: -// -// ``` -// # Given this: -// batch_dim = 0 -// seq_dim = 1 -// input.dims = (4, 8, ...) -// seq_lengths = [7, 2, 3, 5] -// -// # then slices of input are reversed on seq_dim, but only up to seq_lengths: -// output[0, 0:7, :, ...] = input[0, 7:0:-1, :, ...] -// output[1, 0:2, :, ...] = input[1, 2:0:-1, :, ...] -// output[2, 0:3, :, ...] = input[2, 3:0:-1, :, ...] -// output[3, 0:5, :, ...] = input[3, 5:0:-1, :, ...] -// -// # while entries past seq_lens are copied through: -// output[0, 7:, :, ...] = input[0, 7:, :, ...] -// output[1, 2:, :, ...] = input[1, 2:, :, ...] -// output[2, 3:, :, ...] = input[2, 3:, :, ...] -// output[3, 2:, :, ...] = input[3, 2:, :, ...] -// ``` -// -// In contrast, if: -// -// ``` -// # Given this: -// batch_dim = 2 -// seq_dim = 0 -// input.dims = (8, ?, 4, ...) -// seq_lengths = [7, 2, 3, 5] -// -// # then slices of input are reversed on seq_dim, but only up to seq_lengths: -// output[0:7, :, 0, :, ...] = input[7:0:-1, :, 0, :, ...] -// output[0:2, :, 1, :, ...] = input[2:0:-1, :, 1, :, ...] -// output[0:3, :, 2, :, ...] = input[3:0:-1, :, 2, :, ...] -// output[0:5, :, 3, :, ...] = input[5:0:-1, :, 3, :, ...] -// -// # while entries past seq_lens are copied through: -// output[7:, :, 0, :, ...] = input[7:, :, 0, :, ...] -// output[2:, :, 1, :, ...] = input[2:, :, 1, :, ...] -// output[3:, :, 2, :, ...] = input[3:, :, 2, :, ...] -// output[2:, :, 3, :, ...] = input[2:, :, 3, :, ...] -// ``` +// var: Should be from a Variable(). // // Arguments: -// input: The input to reverse. -// seq_lengths: 1-D with length `input.dims(batch_dim)` and -// `max(seq_lengths) <= input.dims(seq_dim)` -// seq_dim: The dimension which is partially reversed. // -// Returns The partially reversed input. It has the same shape as `input`. -func ReverseSequence(scope *Scope, input tf.Output, seq_lengths tf.Output, seq_dim int64, optional ...ReverseSequenceAttr) (output tf.Output) { +// accum: Should be from a Variable(). +// accum_update: : Should be from a Variable(). +// lr: Learning rate. Must be a scalar. +// rho: Decay factor. Must be a scalar. +// epsilon: Constant factor. Must be a scalar. +// grad: The gradient. +// indices: A vector of indices into the first dimension of var and accum. +// +// Returns the created operation. +func ResourceSparseApplyAdadelta(scope *Scope, var_ tf.Output, accum tf.Output, accum_update tf.Output, lr tf.Output, rho tf.Output, epsilon tf.Output, grad tf.Output, indices tf.Output, optional ...ResourceSparseApplyAdadeltaAttr) (o *tf.Operation) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"seq_dim": seq_dim} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "ReverseSequence", + Type: "ResourceSparseApplyAdadelta", Input: []tf.Input{ - input, seq_lengths, + var_, accum, accum_update, lr, rho, epsilon, grad, indices, }, Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0) + return scope.AddOperation(opspec) } -// DepthwiseConv2dNativeAttr is an optional argument to DepthwiseConv2dNative. -type DepthwiseConv2dNativeAttr func(optionalAttr) - -// DepthwiseConv2dNativeDataFormat sets the optional data_format attribute to value. +// Computes rectified linear gradients for a Relu operation. // -// value: Specify the data format of the input and output data. With the -// default format "NHWC", the data is stored in the order of: -// [batch, height, width, channels]. -// Alternatively, the format could be "NCHW", the data storage order of: -// [batch, channels, height, width]. -// If not specified, defaults to "NHWC" -func DepthwiseConv2dNativeDataFormat(value string) DepthwiseConv2dNativeAttr { - return func(m optionalAttr) { - m["data_format"] = value - } -} - -// DepthwiseConv2dNativeDilations sets the optional dilations attribute to value. +// Arguments: +// gradients: The backpropagated gradients to the corresponding Relu operation. +// features: The features passed as input to the corresponding Relu operation, OR +// the outputs of that operation (both work equivalently). // -// value: 1-D tensor of length 4. The dilation factor for each dimension of -// `input`. If set to k > 1, there will be k-1 skipped cells between each filter -// element on that dimension. The dimension order is determined by the value of -// `data_format`, see above for details. Dilations in the batch and depth -// dimensions must be 1. -// If not specified, defaults to -func DepthwiseConv2dNativeDilations(value []int64) DepthwiseConv2dNativeAttr { - return func(m optionalAttr) { - m["dilations"] = value +// Returns `gradients * (features > 0)`. +func ReluGrad(scope *Scope, gradients tf.Output, features tf.Output) (backprops tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "ReluGrad", + Input: []tf.Input{ + gradients, features, + }, } + op := scope.AddOperation(opspec) + return op.Output(0) } -// Computes a 2-D depthwise convolution given 4-D `input` and `filter` tensors. -// -// Given an input tensor of shape `[batch, in_height, in_width, in_channels]` -// and a filter / kernel tensor of shape -// `[filter_height, filter_width, in_channels, channel_multiplier]`, containing -// `in_channels` convolutional filters of depth 1, `depthwise_conv2d` applies -// a different filter to each input channel (expanding from 1 channel to -// `channel_multiplier` channels for each), then concatenates the results -// together. Thus, the output has `in_channels * channel_multiplier` channels. -// -// ``` -// for k in 0..in_channels-1 -// for q in 0..channel_multiplier-1 -// output[b, i, j, k * channel_multiplier + q] = -// sum_{di, dj} input[b, strides[1] * i + di, strides[2] * j + dj, k] * -// filter[di, dj, k, q] -// ``` -// -// Must have `strides[0] = strides[3] = 1`. For the most common case of the same -// horizontal and vertices strides, `strides = [1, stride, stride, 1]`. +// Computes the gradient of morphological 2-D dilation with respect to the input. // // Arguments: -// -// -// strides: 1-D of length 4. The stride of the sliding window for each dimension -// of `input`. +// input: 4-D with shape `[batch, in_height, in_width, depth]`. +// filter: 3-D with shape `[filter_height, filter_width, depth]`. +// out_backprop: 4-D with shape `[batch, out_height, out_width, depth]`. +// strides: 1-D of length 4. The stride of the sliding window for each dimension of +// the input tensor. Must be: `[1, stride_height, stride_width, 1]`. +// rates: 1-D of length 4. The input stride for atrous morphological dilation. +// Must be: `[1, rate_height, rate_width, 1]`. // padding: The type of padding algorithm to use. -func DepthwiseConv2dNative(scope *Scope, input tf.Output, filter tf.Output, strides []int64, padding string, optional ...DepthwiseConv2dNativeAttr) (output tf.Output) { +// +// Returns 4-D with shape `[batch, in_height, in_width, depth]`. +func Dilation2DBackpropInput(scope *Scope, input tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, rates []int64, padding string) (in_backprop tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"strides": strides, "padding": padding} - for _, a := range optional { - a(attrs) - } + attrs := map[string]interface{}{"strides": strides, "rates": rates, "padding": padding} opspec := tf.OpSpec{ - Type: "DepthwiseConv2dNative", + Type: "Dilation2DBackpropInput", Input: []tf.Input{ - input, filter, + input, filter, out_backprop, }, Attrs: attrs, } @@ -5301,47 +5331,139 @@ func DepthwiseConv2dNative(scope *Scope, input tf.Output, filter tf.Output, stri return op.Output(0) } -// MaxPoolGradV2Attr is an optional argument to MaxPoolGradV2. -type MaxPoolGradV2Attr func(optionalAttr) +// CTCBeamSearchDecoderAttr is an optional argument to CTCBeamSearchDecoder. +type CTCBeamSearchDecoderAttr func(optionalAttr) -// MaxPoolGradV2DataFormat sets the optional data_format attribute to value. +// CTCBeamSearchDecoderMergeRepeated sets the optional merge_repeated attribute to value. // -// value: Specify the data format of the input and output data. With the -// default format "NHWC", the data is stored in the order of: -// [batch, in_height, in_width, in_channels]. -// Alternatively, the format could be "NCHW", the data storage order of: -// [batch, in_channels, in_height, in_width]. -// If not specified, defaults to "NHWC" -func MaxPoolGradV2DataFormat(value string) MaxPoolGradV2Attr { +// value: If true, merge repeated classes in output. +// If not specified, defaults to true +func CTCBeamSearchDecoderMergeRepeated(value bool) CTCBeamSearchDecoderAttr { return func(m optionalAttr) { - m["data_format"] = value + m["merge_repeated"] = value } } -// Computes gradients of the maxpooling function. +// Performs beam search decoding on the logits given in input. // -// Arguments: -// orig_input: The original input tensor. -// orig_output: The original output tensor. -// grad: 4-D. Gradients w.r.t. the output of `max_pool`. -// ksize: The size of the window for each dimension of the input tensor. -// strides: The stride of the sliding window for each dimension of the -// input tensor. -// padding: The type of padding algorithm to use. +// A note about the attribute merge_repeated: For the beam search decoder, +// this means that if consecutive entries in a beam are the same, only +// the first of these is emitted. That is, when the top path is "A B B B B", +// "A B" is returned if merge_repeated = True but "A B B B B" is +// returned if merge_repeated = False. // -// Returns Gradients w.r.t. the input to `max_pool`. -func MaxPoolGradV2(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize tf.Output, strides tf.Output, padding string, optional ...MaxPoolGradV2Attr) (output tf.Output) { +// Arguments: +// inputs: 3-D, shape: `(max_time x batch_size x num_classes)`, the logits. +// sequence_length: A vector containing sequence lengths, size `(batch)`. +// beam_width: A scalar >= 0 (beam search beam width). +// top_paths: A scalar >= 0, <= beam_width (controls output size). +// +// Returns A list (length: top_paths) of indices matrices. Matrix j, +// size `(total_decoded_outputs[j] x 2)`, has indices of a +// `SparseTensor`. The rows store: [batch, time].A list (length: top_paths) of values vectors. Vector j, +// size `(length total_decoded_outputs[j])`, has the values of a +// `SparseTensor`. The vector stores the decoded classes for beam j.A list (length: top_paths) of shape vector. Vector j, +// size `(2)`, stores the shape of the decoded `SparseTensor[j]`. +// Its values are: `[batch_size, max_decoded_length[j]]`.A matrix, shaped: `(batch_size x top_paths)`. The +// sequence log-probabilities. +func CTCBeamSearchDecoder(scope *Scope, inputs tf.Output, sequence_length tf.Output, beam_width int64, top_paths int64, optional ...CTCBeamSearchDecoderAttr) (decoded_indices []tf.Output, decoded_values []tf.Output, decoded_shape []tf.Output, log_probability tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"padding": padding} + attrs := map[string]interface{}{"beam_width": beam_width, "top_paths": top_paths} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "MaxPoolGradV2", + Type: "CTCBeamSearchDecoder", Input: []tf.Input{ - orig_input, orig_output, grad, ksize, strides, + inputs, sequence_length, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + if scope.Err() != nil { + return + } + var idx int + var err error + if decoded_indices, idx, err = makeOutputList(op, idx, "decoded_indices"); err != nil { + scope.UpdateErr("CTCBeamSearchDecoder", err) + return + } + if decoded_values, idx, err = makeOutputList(op, idx, "decoded_values"); err != nil { + scope.UpdateErr("CTCBeamSearchDecoder", err) + return + } + if decoded_shape, idx, err = makeOutputList(op, idx, "decoded_shape"); err != nil { + scope.UpdateErr("CTCBeamSearchDecoder", err) + return + } + log_probability = op.Output(idx) + return decoded_indices, decoded_values, decoded_shape, log_probability +} + +// AudioSpectrogramAttr is an optional argument to AudioSpectrogram. +type AudioSpectrogramAttr func(optionalAttr) + +// AudioSpectrogramMagnitudeSquared sets the optional magnitude_squared attribute to value. +// +// value: Whether to return the squared magnitude or just the +// magnitude. Using squared magnitude can avoid extra calculations. +// If not specified, defaults to false +func AudioSpectrogramMagnitudeSquared(value bool) AudioSpectrogramAttr { + return func(m optionalAttr) { + m["magnitude_squared"] = value + } +} + +// Produces a visualization of audio data over time. +// +// Spectrograms are a standard way of representing audio information as a series of +// slices of frequency information, one slice for each window of time. By joining +// these together into a sequence, they form a distinctive fingerprint of the sound +// over time. +// +// This op expects to receive audio data as an input, stored as floats in the range +// -1 to 1, together with a window width in samples, and a stride specifying how +// far to move the window between slices. From this it generates a three +// dimensional output. The lowest dimension has an amplitude value for each +// frequency during that time slice. The next dimension is time, with successive +// frequency slices. The final dimension is for the channels in the input, so a +// stereo audio input would have two here for example. +// +// This means the layout when converted and saved as an image is rotated 90 degrees +// clockwise from a typical spectrogram. Time is descending down the Y axis, and +// the frequency decreases from left to right. +// +// Each value in the result represents the square root of the sum of the real and +// imaginary parts of an FFT on the current window of samples. In this way, the +// lowest dimension represents the power of each frequency in the current window, +// and adjacent windows are concatenated in the next dimension. +// +// To get a more intuitive and visual look at what this operation does, you can run +// tensorflow/examples/wav_to_spectrogram to read in an audio file and save out the +// resulting spectrogram as a PNG image. +// +// Arguments: +// input: Float representation of audio data. +// window_size: How wide the input window is in samples. For the highest efficiency +// this should be a power of two, but other values are accepted. +// stride: How widely apart the center of adjacent sample windows should be. +// +// Returns 3D representation of the audio frequencies as an image. +func AudioSpectrogram(scope *Scope, input tf.Output, window_size int64, stride int64, optional ...AudioSpectrogramAttr) (spectrogram tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"window_size": window_size, "stride": stride} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "AudioSpectrogram", + Input: []tf.Input{ + input, }, Attrs: attrs, } @@ -5349,34 +5471,61 @@ func MaxPoolGradV2(scope *Scope, orig_input tf.Output, orig_output tf.Output, gr return op.Output(0) } -// Restore a reader to a previously saved state. +// Compute the polygamma function \\(\psi^{(n)}(x)\\). // -// Not all Readers support being restored, so this can produce an -// Unimplemented error. +// The polygamma function is defined as: +// +// +// \\(\psi^{(n)}(x) = \frac{d^n}{dx^n} \psi(x)\\) +// +// where \\(\psi(x)\\) is the digamma function. +func Polygamma(scope *Scope, a tf.Output, x tf.Output) (z tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Polygamma", + Input: []tf.Input{ + a, x, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Computes second-order gradients of the maxpooling function. // // Arguments: -// reader_handle: Handle to a Reader. -// state: Result of a ReaderSerializeState of a Reader with type -// matching reader_handle. +// input: The original input. +// grad: 4-D with shape `[batch, height, width, channels]`. Gradients w.r.t. the +// input of `max_pool`. +// argmax: The indices of the maximum values chosen for each output of `max_pool`. +// ksize: The size of the window for each dimension of the input tensor. +// strides: The stride of the sliding window for each dimension of the +// input tensor. +// padding: The type of padding algorithm to use. // -// Returns the created operation. -func ReaderRestoreStateV2(scope *Scope, reader_handle tf.Output, state tf.Output) (o *tf.Operation) { +// Returns Gradients of gradients w.r.t. the input of `max_pool`. +func MaxPoolGradGradWithArgmax(scope *Scope, input tf.Output, grad tf.Output, argmax tf.Output, ksize []int64, strides []int64, padding string) (output tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} opspec := tf.OpSpec{ - Type: "ReaderRestoreStateV2", + Type: "MaxPoolGradGradWithArgmax", Input: []tf.Input{ - reader_handle, state, + input, grad, argmax, }, + Attrs: attrs, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0) } -// MaxPoolGradAttr is an optional argument to MaxPoolGrad. -type MaxPoolGradAttr func(optionalAttr) +// MaxPoolGradGradV2Attr is an optional argument to MaxPoolGradGradV2. +type MaxPoolGradGradV2Attr func(optionalAttr) -// MaxPoolGradDataFormat sets the optional data_format attribute to value. +// MaxPoolGradGradV2DataFormat sets the optional data_format attribute to value. // // value: Specify the data format of the input and output data. With the // default format "NHWC", the data is stored in the order of: @@ -5384,36 +5533,36 @@ type MaxPoolGradAttr func(optionalAttr) // Alternatively, the format could be "NCHW", the data storage order of: // [batch, in_channels, in_height, in_width]. // If not specified, defaults to "NHWC" -func MaxPoolGradDataFormat(value string) MaxPoolGradAttr { +func MaxPoolGradGradV2DataFormat(value string) MaxPoolGradGradV2Attr { return func(m optionalAttr) { m["data_format"] = value } } -// Computes gradients of the maxpooling function. +// Computes second-order gradients of the maxpooling function. // // Arguments: // orig_input: The original input tensor. // orig_output: The original output tensor. -// grad: 4-D. Gradients w.r.t. the output of `max_pool`. +// grad: 4-D. Gradients of gradients w.r.t. the input of `max_pool`. // ksize: The size of the window for each dimension of the input tensor. // strides: The stride of the sliding window for each dimension of the // input tensor. // padding: The type of padding algorithm to use. // -// Returns Gradients w.r.t. the input to `max_pool`. -func MaxPoolGrad(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPoolGradAttr) (output tf.Output) { +// Returns Gradients of gradients w.r.t. the input to `max_pool`. +func MaxPoolGradGradV2(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize tf.Output, strides tf.Output, padding string, optional ...MaxPoolGradGradV2Attr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} + attrs := map[string]interface{}{"padding": padding} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "MaxPoolGrad", + Type: "MaxPoolGradGradV2", Input: []tf.Input{ - orig_input, orig_output, grad, + orig_input, orig_output, grad, ksize, strides, }, Attrs: attrs, } @@ -5421,66 +5570,64 @@ func MaxPoolGrad(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad return op.Output(0) } -// CropAndResizeAttr is an optional argument to CropAndResize. -type CropAndResizeAttr func(optionalAttr) +// Computes gradients of the maxpooling function. +// +// Arguments: +// input: The original input. +// grad: 4-D with shape `[batch, height, width, channels]`. Gradients w.r.t. the +// output of `max_pool`. +// argmax: The indices of the maximum values chosen for each output of `max_pool`. +// ksize: The size of the window for each dimension of the input tensor. +// strides: The stride of the sliding window for each dimension of the +// input tensor. +// padding: The type of padding algorithm to use. +// +// Returns Gradients w.r.t. the input of `max_pool`. +func MaxPoolGradWithArgmax(scope *Scope, input tf.Output, grad tf.Output, argmax tf.Output, ksize []int64, strides []int64, padding string) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} + opspec := tf.OpSpec{ + Type: "MaxPoolGradWithArgmax", + Input: []tf.Input{ + input, grad, argmax, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} -// CropAndResizeMethod sets the optional method attribute to value. +// MutexV2Attr is an optional argument to MutexV2. +type MutexV2Attr func(optionalAttr) + +// MutexV2Container sets the optional container attribute to value. // -// value: A string specifying the interpolation method. Only 'bilinear' is -// supported for now. -// If not specified, defaults to "bilinear" -func CropAndResizeMethod(value string) CropAndResizeAttr { +// value: If non-empty, this variable is placed in the given container. +// Otherwise, a default container is used. +// If not specified, defaults to "" +func MutexV2Container(value string) MutexV2Attr { return func(m optionalAttr) { - m["method"] = value + m["container"] = value } } -// CropAndResizeExtrapolationValue sets the optional extrapolation_value attribute to value. +// MutexV2SharedName sets the optional shared_name attribute to value. // -// value: Value used for extrapolation, when applicable. -// If not specified, defaults to 0 -func CropAndResizeExtrapolationValue(value float32) CropAndResizeAttr { +// value: If non-empty, this variable is named in the given bucket +// with this shared_name. Otherwise, the node name is used instead. +// If not specified, defaults to "" +func MutexV2SharedName(value string) MutexV2Attr { return func(m optionalAttr) { - m["extrapolation_value"] = value + m["shared_name"] = value } } -// Extracts crops from the input image tensor and bilinearly resizes them (possibly -// -// with aspect ratio change) to a common output size specified by `crop_size`. This -// is more general than the `crop_to_bounding_box` op which extracts a fixed size -// slice from the input image and does not allow resizing or aspect ratio change. -// -// Returns a tensor with `crops` from the input `image` at positions defined at the -// bounding box locations in `boxes`. The cropped boxes are all resized (with -// bilinear interpolation) to a fixed `size = [crop_height, crop_width]`. The -// result is a 4-D tensor `[num_boxes, crop_height, crop_width, depth]`. The -// resizing is corner aligned. In particular, if `boxes = [[0, 0, 1, 1]]`, the -// method will give identical results to using `tf.image.resize_bilinear()` -// with `align_corners=True`. +// Creates a Mutex resource that can be locked by `MutexLock`. // -// Arguments: -// image: A 4-D tensor of shape `[batch, image_height, image_width, depth]`. -// Both `image_height` and `image_width` need to be positive. -// boxes: A 2-D tensor of shape `[num_boxes, 4]`. The `i`-th row of the tensor -// specifies the coordinates of a box in the `box_ind[i]` image and is specified -// in normalized coordinates `[y1, x1, y2, x2]`. A normalized coordinate value of -// `y` is mapped to the image coordinate at `y * (image_height - 1)`, so as the -// `[0, 1]` interval of normalized image height is mapped to -// `[0, image_height - 1]` in image height coordinates. We do allow `y1` > `y2`, in -// which case the sampled crop is an up-down flipped version of the original -// image. The width dimension is treated similarly. Normalized coordinates -// outside the `[0, 1]` range are allowed, in which case we use -// `extrapolation_value` to extrapolate the input image values. -// box_ind: A 1-D tensor of shape `[num_boxes]` with int32 values in `[0, batch)`. -// The value of `box_ind[i]` specifies the image that the `i`-th box refers to. -// crop_size: A 1-D tensor of 2 elements, `size = [crop_height, crop_width]`. All -// cropped image patches are resized to this size. The aspect ratio of the image -// content is not preserved. Both `crop_height` and `crop_width` need to be -// positive. -// -// Returns A 4-D tensor of shape `[num_boxes, crop_height, crop_width, depth]`. -func CropAndResize(scope *Scope, image tf.Output, boxes tf.Output, box_ind tf.Output, crop_size tf.Output, optional ...CropAndResizeAttr) (crops tf.Output) { +// Returns The mutex resource. +func MutexV2(scope *Scope, optional ...MutexV2Attr) (resource tf.Output) { if scope.Err() != nil { return } @@ -5489,246 +5636,200 @@ func CropAndResize(scope *Scope, image tf.Output, boxes tf.Output, box_ind tf.Ou a(attrs) } opspec := tf.OpSpec{ - Type: "CropAndResize", - Input: []tf.Input{ - image, boxes, box_ind, crop_size, - }, + Type: "MutexV2", + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Fills empty rows in the input 2-D `SparseTensor` with a default value. -// -// The input `SparseTensor` is represented via the tuple of inputs -// (`indices`, `values`, `dense_shape`). The output `SparseTensor` has the -// same `dense_shape` but with indices `output_indices` and values -// `output_values`. -// -// This op inserts a single entry for every row that doesn't have any values. -// The index is created as `[row, 0, ..., 0]` and the inserted value -// is `default_value`. -// -// For example, suppose `sp_input` has shape `[5, 6]` and non-empty values: -// -// [0, 1]: a -// [0, 3]: b -// [2, 0]: c -// [3, 1]: d -// -// Rows 1 and 4 are empty, so the output will be of shape `[5, 6]` with values: -// -// [0, 1]: a -// [0, 3]: b -// [1, 0]: default_value -// [2, 0]: c -// [3, 1]: d -// [4, 0]: default_value -// -// The output `SparseTensor` will be in row-major order and will have the -// same shape as the input. -// -// This op also returns an indicator vector shaped `[dense_shape[0]]` such that -// -// empty_row_indicator[i] = True iff row i was an empty row. -// -// And a reverse index map vector shaped `[indices.shape[0]]` that is used during -// backpropagation, -// -// reverse_index_map[j] = out_j s.t. indices[j, :] == output_indices[out_j, :] -// -// Arguments: -// indices: 2-D. the indices of the sparse tensor. -// values: 1-D. the values of the sparse tensor. -// dense_shape: 1-D. the shape of the sparse tensor. -// default_value: 0-D. default value to insert into location `[row, 0, ..., 0]` -// for rows missing from the input sparse tensor. -// output indices: 2-D. the indices of the filled sparse tensor. -// -// Returns 1-D. the values of the filled sparse tensor.1-D. whether the dense row was missing in the -// input sparse tensor.1-D. a map from the input indices to the output indices. -func SparseFillEmptyRows(scope *Scope, indices tf.Output, values tf.Output, dense_shape tf.Output, default_value tf.Output) (output_indices tf.Output, output_values tf.Output, empty_row_indicator tf.Output, reverse_index_map tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SparseFillEmptyRows", - Input: []tf.Input{ - indices, values, dense_shape, default_value, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2), op.Output(3) -} +// AvgPool3DAttr is an optional argument to AvgPool3D. +type AvgPool3DAttr func(optionalAttr) -// Reverses specific dimensions of a tensor. -// -// Given a `tensor`, and a `bool` tensor `dims` representing the dimensions -// of `tensor`, this operation reverses each dimension i of `tensor` where -// `dims[i]` is `True`. -// -// `tensor` can have up to 8 dimensions. The number of dimensions -// of `tensor` must equal the number of elements in `dims`. In other words: -// -// `rank(tensor) = size(dims)` -// -// For example: -// -// ``` -// # tensor 't' is [[[[ 0, 1, 2, 3], -// # [ 4, 5, 6, 7], -// # [ 8, 9, 10, 11]], -// # [[12, 13, 14, 15], -// # [16, 17, 18, 19], -// # [20, 21, 22, 23]]]] -// # tensor 't' shape is [1, 2, 3, 4] -// -// # 'dims' is [False, False, False, True] -// reverse(t, dims) ==> [[[[ 3, 2, 1, 0], -// [ 7, 6, 5, 4], -// [ 11, 10, 9, 8]], -// [[15, 14, 13, 12], -// [19, 18, 17, 16], -// [23, 22, 21, 20]]]] -// -// # 'dims' is [False, True, False, False] -// reverse(t, dims) ==> [[[[12, 13, 14, 15], -// [16, 17, 18, 19], -// [20, 21, 22, 23] -// [[ 0, 1, 2, 3], -// [ 4, 5, 6, 7], -// [ 8, 9, 10, 11]]]] -// -// # 'dims' is [False, False, True, False] -// reverse(t, dims) ==> [[[[8, 9, 10, 11], -// [4, 5, 6, 7], -// [0, 1, 2, 3]] -// [[20, 21, 22, 23], -// [16, 17, 18, 19], -// [12, 13, 14, 15]]]] -// ``` -// -// Arguments: -// tensor: Up to 8-D. -// dims: 1-D. The dimensions to reverse. +// AvgPool3DDataFormat sets the optional data_format attribute to value. // -// Returns The same shape as `tensor`. -func Reverse(scope *Scope, tensor tf.Output, dims tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Reverse", - Input: []tf.Input{ - tensor, dims, - }, +// value: The data format of the input and output data. With the +// default format "NDHWC", the data is stored in the order of: +// [batch, in_depth, in_height, in_width, in_channels]. +// Alternatively, the format could be "NCDHW", the data storage order is: +// [batch, in_channels, in_depth, in_height, in_width]. +// If not specified, defaults to "NDHWC" +func AvgPool3DDataFormat(value string) AvgPool3DAttr { + return func(m optionalAttr) { + m["data_format"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// Computes log softmax activations. -// -// For each batch `i` and class `j` we have -// -// logsoftmax[i, j] = logits[i, j] - log(sum(exp(logits[i]))) +// Performs 3D average pooling on the input. // // Arguments: -// logits: 2-D with shape `[batch_size, num_classes]`. +// input: Shape `[batch, depth, rows, cols, channels]` tensor to pool over. +// ksize: 1-D tensor of length 5. The size of the window for each dimension of +// the input tensor. Must have `ksize[0] = ksize[4] = 1`. +// strides: 1-D tensor of length 5. The stride of the sliding window for each +// dimension of `input`. Must have `strides[0] = strides[4] = 1`. +// padding: The type of padding algorithm to use. // -// Returns Same shape as `logits`. -func LogSoftmax(scope *Scope, logits tf.Output) (logsoftmax tf.Output) { +// Returns The average pooled output tensor. +func AvgPool3D(scope *Scope, input tf.Output, ksize []int64, strides []int64, padding string, optional ...AvgPool3DAttr) (output tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "LogSoftmax", + Type: "AvgPool3D", Input: []tf.Input{ - logits, + input, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Computes the inverse permutation of a tensor. -// -// This operation computes the inverse of an index permutation. It takes a 1-D -// integer tensor `x`, which represents the indices of a zero-based array, and -// swaps each value with its index position. In other words, for an output tensor -// `y` and an input tensor `x`, this operation computes the following: -// -// `y[x[i]] = i for i in [0, 1, ..., len(x) - 1]` -// -// The values must include 0. There can be no duplicate values or negative values. -// -// For example: -// -// ``` -// # tensor `x` is [3, 4, 0, 2, 1] -// invert_permutation(x) ==> [2, 4, 3, 0, 1] -// ``` +// Returns element-wise remainder of division. This emulates C semantics in that // -// Arguments: -// x: 1-D. +// the result here is consistent with a truncating divide. E.g. +// `tf.truncatediv(x, y) * y + truncate_mod(x, y) = x`. // -// Returns 1-D. -func InvertPermutation(scope *Scope, x tf.Output) (y tf.Output) { +// *NOTE*: `Mod` supports broadcasting. More about broadcasting +// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) +func Mod(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "InvertPermutation", + Type: "Mod", Input: []tf.Input{ - x, + x, y, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// BiasAddGradAttr is an optional argument to BiasAddGrad. -type BiasAddGradAttr func(optionalAttr) +// DepthToSpaceAttr is an optional argument to DepthToSpace. +type DepthToSpaceAttr func(optionalAttr) -// BiasAddGradDataFormat sets the optional data_format attribute to value. -// -// value: Specify the data format of the input and output data. With the -// default format "NHWC", the bias tensor will be added to the last dimension -// of the value tensor. -// Alternatively, the format could be "NCHW", the data storage order of: -// [batch, in_channels, in_height, in_width]. -// The tensor will be added to "in_channels", the third-to-the-last -// dimension. +// DepthToSpaceDataFormat sets the optional data_format attribute to value. // If not specified, defaults to "NHWC" -func BiasAddGradDataFormat(value string) BiasAddGradAttr { +func DepthToSpaceDataFormat(value string) DepthToSpaceAttr { return func(m optionalAttr) { m["data_format"] = value } } -// The backward operation for "BiasAdd" on the "bias" tensor. +// DepthToSpace for tensors of type T. // -// It accumulates all the values from out_backprop into the feature dimension. -// For NHWC data format, the feature dimension is the last. For NCHW data format, -// the feature dimension is the third-to-last. +// Rearranges data from depth into blocks of spatial data. +// This is the reverse transformation of SpaceToDepth. More specifically, +// this op outputs a copy of the input tensor where values from the `depth` +// dimension are moved in spatial blocks to the `height` and `width` dimensions. +// The attr `block_size` indicates the input block size and how the data is moved. // -// Arguments: -// out_backprop: Any number of dimensions. +// * Chunks of data of size `block_size * block_size` from depth are rearranged +// into non-overlapping blocks of size `block_size x block_size` +// * The width the output tensor is `input_depth * block_size`, whereas the +// height is `input_height * block_size`. +// * The Y, X coordinates within each block of the output image are determined +// by the high order component of the input channel index. +// * The depth of the input tensor must be divisible by +// `block_size * block_size`. // -// Returns 1-D with size the feature dimension of `out_backprop`. -func BiasAddGrad(scope *Scope, out_backprop tf.Output, optional ...BiasAddGradAttr) (output tf.Output) { - if scope.Err() != nil { +// The `data_format` attr specifies the layout of the input and output tensors +// with the following options: +// "NHWC": `[ batch, height, width, channels ]` +// "NCHW": `[ batch, channels, height, width ]` +// "NCHW_VECT_C": +// `qint8 [ batch, channels / 4, height, width, 4 ]` +// +// It is useful to consider the operation as transforming a 6-D Tensor. +// e.g. for data_format = NHWC, +// Each element in the input tensor can be specified via 6 coordinates, +// ordered by decreasing memory layout significance as: +// n,iY,iX,bY,bX,oC (where n=batch index, iX, iY means X or Y coordinates +// within the input image, bX, bY means coordinates +// within the output block, oC means output channels). +// The output would be the input transposed to the following layout: +// n,iY,bY,iX,bX,oC +// +// This operation is useful for resizing the activations between convolutions +// (but keeping all data), e.g. instead of pooling. It is also useful for training +// purely convolutional models. +// +// For example, given an input of shape `[1, 1, 1, 4]`, data_format = "NHWC" and +// block_size = 2: +// +// ``` +// x = [[[[1, 2, 3, 4]]]] +// +// ``` +// +// This operation will output a tensor of shape `[1, 2, 2, 1]`: +// +// ``` +// [[[[1], [2]], +// [[3], [4]]]] +// ``` +// +// Here, the input has a batch of 1 and each batch element has shape `[1, 1, 4]`, +// the corresponding output will have 2x2 elements and will have a depth of +// 1 channel (1 = `4 / (block_size * block_size)`). +// The output element shape is `[2, 2, 1]`. +// +// For an input tensor with larger depth, here of shape `[1, 1, 1, 12]`, e.g. +// +// ``` +// x = [[[[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]]]] +// ``` +// +// This operation, for block size of 2, will return the following tensor of shape +// `[1, 2, 2, 3]` +// +// ``` +// [[[[1, 2, 3], [4, 5, 6]], +// [[7, 8, 9], [10, 11, 12]]]] +// +// ``` +// +// Similarly, for the following input of shape `[1 2 2 4]`, and a block size of 2: +// +// ``` +// x = [[[[1, 2, 3, 4], +// [5, 6, 7, 8]], +// [[9, 10, 11, 12], +// [13, 14, 15, 16]]]] +// ``` +// +// the operator will return the following tensor of shape `[1 4 4 1]`: +// +// ``` +// x = [[[ [1], [2], [5], [6]], +// [ [3], [4], [7], [8]], +// [ [9], [10], [13], [14]], +// [ [11], [12], [15], [16]]]] +// +// ``` +// +// Arguments: +// +// block_size: The size of the spatial block, same as in Space2Depth. +func DepthToSpace(scope *Scope, input tf.Output, block_size int64, optional ...DepthToSpaceAttr) (output tf.Output) { + if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"block_size": block_size} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "BiasAddGrad", + Type: "DepthToSpace", Input: []tf.Input{ - out_backprop, + input, }, Attrs: attrs, } @@ -5736,167 +5837,122 @@ func BiasAddGrad(scope *Scope, out_backprop tf.Output, optional ...BiasAddGradAt return op.Output(0) } -// FusedBatchNormV2Attr is an optional argument to FusedBatchNormV2. -type FusedBatchNormV2Attr func(optionalAttr) - -// FusedBatchNormV2Epsilon sets the optional epsilon attribute to value. -// -// value: A small float number added to the variance of x. -// If not specified, defaults to 0.0001 -func FusedBatchNormV2Epsilon(value float32) FusedBatchNormV2Attr { - return func(m optionalAttr) { - m["epsilon"] = value - } -} +// Conv3DBackpropInputV2Attr is an optional argument to Conv3DBackpropInputV2. +type Conv3DBackpropInputV2Attr func(optionalAttr) -// FusedBatchNormV2DataFormat sets the optional data_format attribute to value. +// Conv3DBackpropInputV2DataFormat sets the optional data_format attribute to value. // -// value: The data format for x and y. Either "NHWC" (default) or "NCHW". -// If not specified, defaults to "NHWC" -func FusedBatchNormV2DataFormat(value string) FusedBatchNormV2Attr { +// value: The data format of the input and output data. With the +// default format "NDHWC", the data is stored in the order of: +// [batch, in_depth, in_height, in_width, in_channels]. +// Alternatively, the format could be "NCDHW", the data storage order is: +// [batch, in_channels, in_depth, in_height, in_width]. +// If not specified, defaults to "NDHWC" +func Conv3DBackpropInputV2DataFormat(value string) Conv3DBackpropInputV2Attr { return func(m optionalAttr) { m["data_format"] = value } } -// FusedBatchNormV2IsTraining sets the optional is_training attribute to value. +// Conv3DBackpropInputV2Dilations sets the optional dilations attribute to value. // -// value: A bool value to indicate the operation is for training (default) -// or inference. -// If not specified, defaults to true -func FusedBatchNormV2IsTraining(value bool) FusedBatchNormV2Attr { +// value: 1-D tensor of length 5. The dilation factor for each dimension of +// `input`. If set to k > 1, there will be k-1 skipped cells between each +// filter element on that dimension. The dimension order is determined by the +// value of `data_format`, see above for details. Dilations in the batch and +// depth dimensions must be 1. +// If not specified, defaults to +func Conv3DBackpropInputV2Dilations(value []int64) Conv3DBackpropInputV2Attr { return func(m optionalAttr) { - m["is_training"] = value + m["dilations"] = value } } -// Batch normalization. -// -// Note that the size of 4D Tensors are defined by either "NHWC" or "NCHW". -// The size of 1D Tensors matches the dimension C of the 4D Tensors. +// Computes the gradients of 3-D convolution with respect to the input. // // Arguments: -// x: A 4D Tensor for input data. -// scale: A 1D Tensor for scaling factor, to scale the normalized x. -// offset: A 1D Tensor for offset, to shift to the normalized x. -// mean: A 1D Tensor for population mean. Used for inference only; -// must be empty for training. -// variance: A 1D Tensor for population variance. Used for inference only; -// must be empty for training. -// -// Returns A 4D Tensor for output data.A 1D Tensor for the computed batch mean, to be used by TensorFlow -// to compute the running mean.A 1D Tensor for the computed batch variance, to be used by -// TensorFlow to compute the running variance.A 1D Tensor for the computed batch mean, to be reused -// in the gradient computation.A 1D Tensor for the computed batch variance (inverted variance -// in the cuDNN case), to be reused in the gradient computation. -func FusedBatchNormV2(scope *Scope, x tf.Output, scale tf.Output, offset tf.Output, mean tf.Output, variance tf.Output, optional ...FusedBatchNormV2Attr) (y tf.Output, batch_mean tf.Output, batch_variance tf.Output, reserve_space_1 tf.Output, reserve_space_2 tf.Output) { +// input_sizes: An integer vector representing the tensor shape of `input`, +// where `input` is a 5-D +// `[batch, depth, rows, cols, in_channels]` tensor. +// filter: Shape `[depth, rows, cols, in_channels, out_channels]`. +// `in_channels` must match between `input` and `filter`. +// out_backprop: Backprop signal of shape `[batch, out_depth, out_rows, out_cols, +// out_channels]`. +// strides: 1-D tensor of length 5. The stride of the sliding window for each +// dimension of `input`. Must have `strides[0] = strides[4] = 1`. +// padding: The type of padding algorithm to use. +func Conv3DBackpropInputV2(scope *Scope, input_sizes tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...Conv3DBackpropInputV2Attr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"strides": strides, "padding": padding} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "FusedBatchNormV2", + Type: "Conv3DBackpropInputV2", Input: []tf.Input{ - x, scale, offset, mean, variance, + input_sizes, filter, out_backprop, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4) -} - -// Returns the rank of a tensor. -// -// This operation returns an integer representing the rank of `input`. -// -// For example: -// -// ``` -// # 't' is [[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]]] -// # shape of tensor 't' is [2, 2, 3] -// rank(t) ==> 3 -// ``` -// -// **Note**: The rank of a tensor is not the same as the rank of a matrix. The rank -// of a tensor is the number of indices required to uniquely select each element -// of the tensor. Rank is also known as "order", "degree", or "ndims." -func Rank(scope *Scope, input tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Rank", - Input: []tf.Input{ - input, - }, - } - op := scope.AddOperation(opspec) return op.Output(0) } -// Transforms a Tensor into a serialized TensorProto proto. -// -// Arguments: -// tensor: A Tensor of type `T`. +// Computes square root of x element-wise. // -// Returns A serialized TensorProto proto of the input tensor. -func SerializeTensor(scope *Scope, tensor tf.Output) (serialized tf.Output) { +// I.e., \\(y = \sqrt{x} = x^{1/2}\\). +func Sqrt(scope *Scope, x tf.Output) (y tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "SerializeTensor", + Type: "Sqrt", Input: []tf.Input{ - tensor, + x, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// MatrixSolveAttr is an optional argument to MatrixSolve. -type MatrixSolveAttr func(optionalAttr) +// Conv3DBackpropFilterAttr is an optional argument to Conv3DBackpropFilter. +type Conv3DBackpropFilterAttr func(optionalAttr) -// MatrixSolveAdjoint sets the optional adjoint attribute to value. -// -// value: Boolean indicating whether to solve with `matrix` or its (block-wise) -// adjoint. -// If not specified, defaults to false -func MatrixSolveAdjoint(value bool) MatrixSolveAttr { +// Conv3DBackpropFilterDilations sets the optional dilations attribute to value. +// If not specified, defaults to +func Conv3DBackpropFilterDilations(value []int64) Conv3DBackpropFilterAttr { return func(m optionalAttr) { - m["adjoint"] = value + m["dilations"] = value } } -// Solves systems of linear equations. +// Computes the gradients of 3-D convolution with respect to the filter. // -// `Matrix` is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions -// form square matrices. `Rhs` is a tensor of shape `[..., M, K]`. The `output` is -// a tensor shape `[..., M, K]`. If `adjoint` is `False` then each output matrix -// satisfies `matrix[..., :, :] * output[..., :, :] = rhs[..., :, :]`. -// If `adjoint` is `True` then each output matrix satisfies -// `adjoint(matrix[..., :, :]) * output[..., :, :] = rhs[..., :, :]`. +// DEPRECATED at GraphDef version 10: Use Conv3DBackpropFilterV2 // // Arguments: -// matrix: Shape is `[..., M, M]`. -// rhs: Shape is `[..., M, K]`. -// -// Returns Shape is `[..., M, K]`. -func MatrixSolve(scope *Scope, matrix tf.Output, rhs tf.Output, optional ...MatrixSolveAttr) (output tf.Output) { +// input: Shape `[batch, depth, rows, cols, in_channels]`. +// filter: Shape `[depth, rows, cols, in_channels, out_channels]`. +// `in_channels` must match between `input` and `filter`. +// out_backprop: Backprop signal of shape `[batch, out_depth, out_rows, out_cols, +// out_channels]`. +// strides: 1-D tensor of length 5. The stride of the sliding window for each +// dimension of `input`. Must have `strides[0] = strides[4] = 1`. +// padding: The type of padding algorithm to use. +func Conv3DBackpropFilter(scope *Scope, input tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...Conv3DBackpropFilterAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"strides": strides, "padding": padding} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "MatrixSolve", + Type: "Conv3DBackpropFilter", Input: []tf.Input{ - matrix, rhs, + input, filter, out_backprop, }, Attrs: attrs, } @@ -5904,63 +5960,28 @@ func MatrixSolve(scope *Scope, matrix tf.Output, rhs tf.Output, optional ...Matr return op.Output(0) } -// Computes acos of x element-wise. -func Acos(scope *Scope, x tf.Output) (y tf.Output) { +// Computes the gradient for the rsqrt of `x` wrt its input. +// +// Specifically, `grad = dy * -0.5 * y^3`, where `y = rsqrt(x)`, and `dy` +// is the corresponding input gradient. +func RsqrtGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "Acos", + Type: "RsqrtGrad", Input: []tf.Input{ - x, + y, dy, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Real-valued fast Fourier transform. -// -// Computes the 1-dimensional discrete Fourier transform of a real-valued signal -// over the inner-most dimension of `input`. -// -// Since the DFT of a real signal is Hermitian-symmetric, `RFFT` only returns the -// `fft_length / 2 + 1` unique components of the FFT: the zero-frequency term, -// followed by the `fft_length / 2` positive-frequency terms. -// -// Along the axis `RFFT` is computed on, if `fft_length` is smaller than the -// corresponding dimension of `input`, the dimension is cropped. If it is larger, -// the dimension is padded with zeros. -// -// Arguments: -// input: A float32 tensor. -// fft_length: An int32 tensor of shape [1]. The FFT length. -// -// Returns A complex64 tensor of the same rank as `input`. The inner-most -// dimension of `input` is replaced with the `fft_length / 2 + 1` unique -// frequency components of its 1D Fourier transform. -// -// @compatibility(numpy) -// Equivalent to np.fft.rfft -// @end_compatibility -func RFFT(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "RFFT", - Input: []tf.Input{ - input, fft_length, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// DepthwiseConv2dNativeBackpropFilterAttr is an optional argument to DepthwiseConv2dNativeBackpropFilter. -type DepthwiseConv2dNativeBackpropFilterAttr func(optionalAttr) +// DepthwiseConv2dNativeAttr is an optional argument to DepthwiseConv2dNative. +type DepthwiseConv2dNativeAttr func(optionalAttr) -// DepthwiseConv2dNativeBackpropFilterDataFormat sets the optional data_format attribute to value. +// DepthwiseConv2dNativeDataFormat sets the optional data_format attribute to value. // // value: Specify the data format of the input and output data. With the // default format "NHWC", the data is stored in the order of: @@ -5968,13 +5989,13 @@ type DepthwiseConv2dNativeBackpropFilterAttr func(optionalAttr) // Alternatively, the format could be "NCHW", the data storage order of: // [batch, channels, height, width]. // If not specified, defaults to "NHWC" -func DepthwiseConv2dNativeBackpropFilterDataFormat(value string) DepthwiseConv2dNativeBackpropFilterAttr { +func DepthwiseConv2dNativeDataFormat(value string) DepthwiseConv2dNativeAttr { return func(m optionalAttr) { m["data_format"] = value } } -// DepthwiseConv2dNativeBackpropFilterDilations sets the optional dilations attribute to value. +// DepthwiseConv2dNativeDilations sets the optional dilations attribute to value. // // value: 1-D tensor of length 4. The dilation factor for each dimension of // `input`. If set to k > 1, there will be k-1 skipped cells between each filter @@ -5982,33 +6003,40 @@ func DepthwiseConv2dNativeBackpropFilterDataFormat(value string) DepthwiseConv2d // `data_format`, see above for details. Dilations in the batch and depth // dimensions must be 1. // If not specified, defaults to -func DepthwiseConv2dNativeBackpropFilterDilations(value []int64) DepthwiseConv2dNativeBackpropFilterAttr { +func DepthwiseConv2dNativeDilations(value []int64) DepthwiseConv2dNativeAttr { return func(m optionalAttr) { m["dilations"] = value } } -// Computes the gradients of depthwise convolution with respect to the filter. +// Computes a 2-D depthwise convolution given 4-D `input` and `filter` tensors. +// +// Given an input tensor of shape `[batch, in_height, in_width, in_channels]` +// and a filter / kernel tensor of shape +// `[filter_height, filter_width, in_channels, channel_multiplier]`, containing +// `in_channels` convolutional filters of depth 1, `depthwise_conv2d` applies +// a different filter to each input channel (expanding from 1 channel to +// `channel_multiplier` channels for each), then concatenates the results +// together. Thus, the output has `in_channels * channel_multiplier` channels. +// +// ``` +// for k in 0..in_channels-1 +// for q in 0..channel_multiplier-1 +// output[b, i, j, k * channel_multiplier + q] = +// sum_{di, dj} input[b, strides[1] * i + di, strides[2] * j + dj, k] * +// filter[di, dj, k, q] +// ``` +// +// Must have `strides[0] = strides[3] = 1`. For the most common case of the same +// horizontal and vertices strides, `strides = [1, stride, stride, 1]`. // // Arguments: -// input: 4-D with shape based on `data_format`. For example, if -// `data_format` is 'NHWC' then `input` is a 4-D `[batch, in_height, -// in_width, in_channels]` tensor. -// filter_sizes: An integer vector representing the tensor shape of `filter`, -// where `filter` is a 4-D -// `[filter_height, filter_width, in_channels, depthwise_multiplier]` tensor. -// out_backprop: 4-D with shape based on `data_format`. -// For example, if `data_format` is 'NHWC' then -// out_backprop shape is `[batch, out_height, out_width, out_channels]`. -// Gradients w.r.t. the output of the convolution. -// strides: The stride of the sliding window for each dimension of the input -// of the convolution. -// padding: The type of padding algorithm to use. // -// Returns 4-D with shape -// `[filter_height, filter_width, in_channels, out_channels]`. Gradient w.r.t. -// the `filter` input of the convolution. -func DepthwiseConv2dNativeBackpropFilter(scope *Scope, input tf.Output, filter_sizes tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...DepthwiseConv2dNativeBackpropFilterAttr) (output tf.Output) { +// +// strides: 1-D of length 4. The stride of the sliding window for each dimension +// of `input`. +// padding: The type of padding algorithm to use. +func DepthwiseConv2dNative(scope *Scope, input tf.Output, filter tf.Output, strides []int64, padding string, optional ...DepthwiseConv2dNativeAttr) (output tf.Output) { if scope.Err() != nil { return } @@ -6017,9 +6045,9 @@ func DepthwiseConv2dNativeBackpropFilter(scope *Scope, input tf.Output, filter_s a(attrs) } opspec := tf.OpSpec{ - Type: "DepthwiseConv2dNativeBackpropFilter", + Type: "DepthwiseConv2dNative", Input: []tf.Input{ - input, filter_sizes, out_backprop, + input, filter, }, Attrs: attrs, } @@ -6027,69 +6055,47 @@ func DepthwiseConv2dNativeBackpropFilter(scope *Scope, input tf.Output, filter_s return op.Output(0) } -// LRNGradAttr is an optional argument to LRNGrad. -type LRNGradAttr func(optionalAttr) - -// LRNGradDepthRadius sets the optional depth_radius attribute to value. -// -// value: A depth radius. -// If not specified, defaults to 5 -func LRNGradDepthRadius(value int64) LRNGradAttr { - return func(m optionalAttr) { - m["depth_radius"] = value - } -} - -// LRNGradBias sets the optional bias attribute to value. -// -// value: An offset (usually > 0 to avoid dividing by 0). -// If not specified, defaults to 1 -func LRNGradBias(value float32) LRNGradAttr { - return func(m optionalAttr) { - m["bias"] = value - } -} - -// LRNGradAlpha sets the optional alpha attribute to value. -// -// value: A scale factor, usually positive. -// If not specified, defaults to 1 -func LRNGradAlpha(value float32) LRNGradAttr { - return func(m optionalAttr) { - m["alpha"] = value - } -} +// MaxPoolGradV2Attr is an optional argument to MaxPoolGradV2. +type MaxPoolGradV2Attr func(optionalAttr) -// LRNGradBeta sets the optional beta attribute to value. +// MaxPoolGradV2DataFormat sets the optional data_format attribute to value. // -// value: An exponent. -// If not specified, defaults to 0.5 -func LRNGradBeta(value float32) LRNGradAttr { +// value: Specify the data format of the input and output data. With the +// default format "NHWC", the data is stored in the order of: +// [batch, in_height, in_width, in_channels]. +// Alternatively, the format could be "NCHW", the data storage order of: +// [batch, in_channels, in_height, in_width]. +// If not specified, defaults to "NHWC" +func MaxPoolGradV2DataFormat(value string) MaxPoolGradV2Attr { return func(m optionalAttr) { - m["beta"] = value + m["data_format"] = value } } -// Gradients for Local Response Normalization. +// Computes gradients of the maxpooling function. // // Arguments: -// input_grads: 4-D with shape `[batch, height, width, channels]`. -// input_image: 4-D with shape `[batch, height, width, channels]`. -// output_image: 4-D with shape `[batch, height, width, channels]`. +// orig_input: The original input tensor. +// orig_output: The original output tensor. +// grad: 4-D. Gradients w.r.t. the output of `max_pool`. +// ksize: The size of the window for each dimension of the input tensor. +// strides: The stride of the sliding window for each dimension of the +// input tensor. +// padding: The type of padding algorithm to use. // -// Returns The gradients for LRN. -func LRNGrad(scope *Scope, input_grads tf.Output, input_image tf.Output, output_image tf.Output, optional ...LRNGradAttr) (output tf.Output) { +// Returns Gradients w.r.t. the input to `max_pool`. +func MaxPoolGradV2(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize tf.Output, strides tf.Output, padding string, optional ...MaxPoolGradV2Attr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"padding": padding} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "LRNGrad", + Type: "MaxPoolGradV2", Input: []tf.Input{ - input_grads, input_image, output_image, + orig_input, orig_output, grad, ksize, strides, }, Attrs: attrs, } @@ -6097,44 +6103,71 @@ func LRNGrad(scope *Scope, input_grads tf.Output, input_image tf.Output, output_ return op.Output(0) } -// AnyAttr is an optional argument to Any. -type AnyAttr func(optionalAttr) +// Restore a reader to a previously saved state. +// +// Not all Readers support being restored, so this can produce an +// Unimplemented error. +// +// Arguments: +// reader_handle: Handle to a Reader. +// state: Result of a ReaderSerializeState of a Reader with type +// matching reader_handle. +// +// Returns the created operation. +func ReaderRestoreStateV2(scope *Scope, reader_handle tf.Output, state tf.Output) (o *tf.Operation) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "ReaderRestoreStateV2", + Input: []tf.Input{ + reader_handle, state, + }, + } + return scope.AddOperation(opspec) +} -// AnyKeepDims sets the optional keep_dims attribute to value. +// MaxPoolGradAttr is an optional argument to MaxPoolGrad. +type MaxPoolGradAttr func(optionalAttr) + +// MaxPoolGradDataFormat sets the optional data_format attribute to value. // -// value: If true, retain reduced dimensions with length 1. -// If not specified, defaults to false -func AnyKeepDims(value bool) AnyAttr { +// value: Specify the data format of the input and output data. With the +// default format "NHWC", the data is stored in the order of: +// [batch, in_height, in_width, in_channels]. +// Alternatively, the format could be "NCHW", the data storage order of: +// [batch, in_channels, in_height, in_width]. +// If not specified, defaults to "NHWC" +func MaxPoolGradDataFormat(value string) MaxPoolGradAttr { return func(m optionalAttr) { - m["keep_dims"] = value + m["data_format"] = value } } -// Computes the "logical or" of elements across dimensions of a tensor. -// -// Reduces `input` along the dimensions given in `axis`. Unless -// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in -// `axis`. If `keep_dims` is true, the reduced dimensions are -// retained with length 1. +// Computes gradients of the maxpooling function. // // Arguments: -// input: The tensor to reduce. -// axis: The dimensions to reduce. Must be in the range -// `[-rank(input), rank(input))`. +// orig_input: The original input tensor. +// orig_output: The original output tensor. +// grad: 4-D. Gradients w.r.t. the output of `max_pool`. +// ksize: The size of the window for each dimension of the input tensor. +// strides: The stride of the sliding window for each dimension of the +// input tensor. +// padding: The type of padding algorithm to use. // -// Returns The reduced tensor. -func Any(scope *Scope, input tf.Output, axis tf.Output, optional ...AnyAttr) (output tf.Output) { +// Returns Gradients w.r.t. the input to `max_pool`. +func MaxPoolGrad(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPoolGradAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "Any", + Type: "MaxPoolGrad", Input: []tf.Input{ - input, axis, + orig_input, orig_output, grad, }, Attrs: attrs, } @@ -6142,41 +6175,66 @@ func Any(scope *Scope, input tf.Output, axis tf.Output, optional ...AnyAttr) (ou return op.Output(0) } -// ResourceApplyFtrlAttr is an optional argument to ResourceApplyFtrl. -type ResourceApplyFtrlAttr func(optionalAttr) +// CropAndResizeAttr is an optional argument to CropAndResize. +type CropAndResizeAttr func(optionalAttr) -// ResourceApplyFtrlUseLocking sets the optional use_locking attribute to value. +// CropAndResizeMethod sets the optional method attribute to value. // -// value: If `True`, updating of the var and accum tensors will be protected -// by a lock; otherwise the behavior is undefined, but may exhibit less -// contention. -// If not specified, defaults to false -func ResourceApplyFtrlUseLocking(value bool) ResourceApplyFtrlAttr { +// value: A string specifying the interpolation method. Only 'bilinear' is +// supported for now. +// If not specified, defaults to "bilinear" +func CropAndResizeMethod(value string) CropAndResizeAttr { return func(m optionalAttr) { - m["use_locking"] = value + m["method"] = value } } -// Update '*var' according to the Ftrl-proximal scheme. +// CropAndResizeExtrapolationValue sets the optional extrapolation_value attribute to value. // -// accum_new = accum + grad * grad -// linear += grad - (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var -// quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2 -// var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0 -// accum = accum_new +// value: Value used for extrapolation, when applicable. +// If not specified, defaults to 0 +func CropAndResizeExtrapolationValue(value float32) CropAndResizeAttr { + return func(m optionalAttr) { + m["extrapolation_value"] = value + } +} + +// Extracts crops from the input image tensor and bilinearly resizes them (possibly +// +// with aspect ratio change) to a common output size specified by `crop_size`. This +// is more general than the `crop_to_bounding_box` op which extracts a fixed size +// slice from the input image and does not allow resizing or aspect ratio change. +// +// Returns a tensor with `crops` from the input `image` at positions defined at the +// bounding box locations in `boxes`. The cropped boxes are all resized (with +// bilinear interpolation) to a fixed `size = [crop_height, crop_width]`. The +// result is a 4-D tensor `[num_boxes, crop_height, crop_width, depth]`. The +// resizing is corner aligned. In particular, if `boxes = [[0, 0, 1, 1]]`, the +// method will give identical results to using `tf.image.resize_bilinear()` +// with `align_corners=True`. // // Arguments: -// var_: Should be from a Variable(). -// accum: Should be from a Variable(). -// linear: Should be from a Variable(). -// grad: The gradient. -// lr: Scaling factor. Must be a scalar. -// l1: L1 regulariation. Must be a scalar. -// l2: L2 regulariation. Must be a scalar. -// lr_power: Scaling factor. Must be a scalar. +// image: A 4-D tensor of shape `[batch, image_height, image_width, depth]`. +// Both `image_height` and `image_width` need to be positive. +// boxes: A 2-D tensor of shape `[num_boxes, 4]`. The `i`-th row of the tensor +// specifies the coordinates of a box in the `box_ind[i]` image and is specified +// in normalized coordinates `[y1, x1, y2, x2]`. A normalized coordinate value of +// `y` is mapped to the image coordinate at `y * (image_height - 1)`, so as the +// `[0, 1]` interval of normalized image height is mapped to +// `[0, image_height - 1]` in image height coordinates. We do allow `y1` > `y2`, in +// which case the sampled crop is an up-down flipped version of the original +// image. The width dimension is treated similarly. Normalized coordinates +// outside the `[0, 1]` range are allowed, in which case we use +// `extrapolation_value` to extrapolate the input image values. +// box_ind: A 1-D tensor of shape `[num_boxes]` with int32 values in `[0, batch)`. +// The value of `box_ind[i]` specifies the image that the `i`-th box refers to. +// crop_size: A 1-D tensor of 2 elements, `size = [crop_height, crop_width]`. All +// cropped image patches are resized to this size. The aspect ratio of the image +// content is not preserved. Both `crop_height` and `crop_width` need to be +// positive. // -// Returns the created operation. -func ResourceApplyFtrl(scope *Scope, var_ tf.Output, accum tf.Output, linear tf.Output, grad tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, lr_power tf.Output, optional ...ResourceApplyFtrlAttr) (o *tf.Operation) { +// Returns A 4-D tensor of shape `[num_boxes, crop_height, crop_width, depth]`. +func CropAndResize(scope *Scope, image tf.Output, boxes tf.Output, box_ind tf.Output, crop_size tf.Output, optional ...CropAndResizeAttr) (crops tf.Output) { if scope.Err() != nil { return } @@ -6185,93 +6243,175 @@ func ResourceApplyFtrl(scope *Scope, var_ tf.Output, accum tf.Output, linear tf. a(attrs) } opspec := tf.OpSpec{ - Type: "ResourceApplyFtrl", + Type: "CropAndResize", Input: []tf.Input{ - var_, accum, linear, grad, lr, l1, l2, lr_power, + image, boxes, box_ind, crop_size, }, Attrs: attrs, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0) } -// RandomUniformAttr is an optional argument to RandomUniform. -type RandomUniformAttr func(optionalAttr) - -// RandomUniformSeed sets the optional seed attribute to value. +// Fills empty rows in the input 2-D `SparseTensor` with a default value. // -// value: If either `seed` or `seed2` are set to be non-zero, the random number -// generator is seeded by the given seed. Otherwise, it is seeded by a -// random seed. -// If not specified, defaults to 0 -func RandomUniformSeed(value int64) RandomUniformAttr { - return func(m optionalAttr) { - m["seed"] = value - } -} - -// RandomUniformSeed2 sets the optional seed2 attribute to value. +// The input `SparseTensor` is represented via the tuple of inputs +// (`indices`, `values`, `dense_shape`). The output `SparseTensor` has the +// same `dense_shape` but with indices `output_indices` and values +// `output_values`. // -// value: A second seed to avoid seed collision. -// If not specified, defaults to 0 -func RandomUniformSeed2(value int64) RandomUniformAttr { - return func(m optionalAttr) { - m["seed2"] = value +// This op inserts a single entry for every row that doesn't have any values. +// The index is created as `[row, 0, ..., 0]` and the inserted value +// is `default_value`. +// +// For example, suppose `sp_input` has shape `[5, 6]` and non-empty values: +// +// [0, 1]: a +// [0, 3]: b +// [2, 0]: c +// [3, 1]: d +// +// Rows 1 and 4 are empty, so the output will be of shape `[5, 6]` with values: +// +// [0, 1]: a +// [0, 3]: b +// [1, 0]: default_value +// [2, 0]: c +// [3, 1]: d +// [4, 0]: default_value +// +// The output `SparseTensor` will be in row-major order and will have the +// same shape as the input. +// +// This op also returns an indicator vector shaped `[dense_shape[0]]` such that +// +// empty_row_indicator[i] = True iff row i was an empty row. +// +// And a reverse index map vector shaped `[indices.shape[0]]` that is used during +// backpropagation, +// +// reverse_index_map[j] = out_j s.t. indices[j, :] == output_indices[out_j, :] +// +// Arguments: +// indices: 2-D. the indices of the sparse tensor. +// values: 1-D. the values of the sparse tensor. +// dense_shape: 1-D. the shape of the sparse tensor. +// default_value: 0-D. default value to insert into location `[row, 0, ..., 0]` +// for rows missing from the input sparse tensor. +// output indices: 2-D. the indices of the filled sparse tensor. +// +// Returns 1-D. the values of the filled sparse tensor.1-D. whether the dense row was missing in the +// input sparse tensor.1-D. a map from the input indices to the output indices. +func SparseFillEmptyRows(scope *Scope, indices tf.Output, values tf.Output, dense_shape tf.Output, default_value tf.Output) (output_indices tf.Output, output_values tf.Output, empty_row_indicator tf.Output, reverse_index_map tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "SparseFillEmptyRows", + Input: []tf.Input{ + indices, values, dense_shape, default_value, + }, } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1), op.Output(2), op.Output(3) } -// Outputs random values from a uniform distribution. +// Reverses specific dimensions of a tensor. // -// The generated values follow a uniform distribution in the range `[0, 1)`. The -// lower bound 0 is included in the range, while the upper bound 1 is excluded. +// Given a `tensor`, and a `bool` tensor `dims` representing the dimensions +// of `tensor`, this operation reverses each dimension i of `tensor` where +// `dims[i]` is `True`. +// +// `tensor` can have up to 8 dimensions. The number of dimensions +// of `tensor` must equal the number of elements in `dims`. In other words: +// +// `rank(tensor) = size(dims)` +// +// For example: +// +// ``` +// # tensor 't' is [[[[ 0, 1, 2, 3], +// # [ 4, 5, 6, 7], +// # [ 8, 9, 10, 11]], +// # [[12, 13, 14, 15], +// # [16, 17, 18, 19], +// # [20, 21, 22, 23]]]] +// # tensor 't' shape is [1, 2, 3, 4] +// +// # 'dims' is [False, False, False, True] +// reverse(t, dims) ==> [[[[ 3, 2, 1, 0], +// [ 7, 6, 5, 4], +// [ 11, 10, 9, 8]], +// [[15, 14, 13, 12], +// [19, 18, 17, 16], +// [23, 22, 21, 20]]]] +// +// # 'dims' is [False, True, False, False] +// reverse(t, dims) ==> [[[[12, 13, 14, 15], +// [16, 17, 18, 19], +// [20, 21, 22, 23] +// [[ 0, 1, 2, 3], +// [ 4, 5, 6, 7], +// [ 8, 9, 10, 11]]]] +// +// # 'dims' is [False, False, True, False] +// reverse(t, dims) ==> [[[[8, 9, 10, 11], +// [4, 5, 6, 7], +// [0, 1, 2, 3]] +// [[20, 21, 22, 23], +// [16, 17, 18, 19], +// [12, 13, 14, 15]]]] +// ``` // // Arguments: -// shape: The shape of the output tensor. -// dtype: The type of the output. +// tensor: Up to 8-D. +// dims: 1-D. The dimensions to reverse. // -// Returns A tensor of the specified shape filled with uniform random values. -func RandomUniform(scope *Scope, shape tf.Output, dtype tf.DataType, optional ...RandomUniformAttr) (output tf.Output) { +// Returns The same shape as `tensor`. +func Reverse(scope *Scope, tensor tf.Output, dims tf.Output) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"dtype": dtype} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "RandomUniform", + Type: "Reverse", Input: []tf.Input{ - shape, + tensor, dims, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// AssertAttr is an optional argument to Assert. -type AssertAttr func(optionalAttr) +// BiasAddGradAttr is an optional argument to BiasAddGrad. +type BiasAddGradAttr func(optionalAttr) -// AssertSummarize sets the optional summarize attribute to value. +// BiasAddGradDataFormat sets the optional data_format attribute to value. // -// value: Print this many entries of each tensor. -// If not specified, defaults to 3 -func AssertSummarize(value int64) AssertAttr { +// value: Specify the data format of the input and output data. With the +// default format "NHWC", the bias tensor will be added to the last dimension +// of the value tensor. +// Alternatively, the format could be "NCHW", the data storage order of: +// [batch, in_channels, in_height, in_width]. +// The tensor will be added to "in_channels", the third-to-the-last +// dimension. +// If not specified, defaults to "NHWC" +func BiasAddGradDataFormat(value string) BiasAddGradAttr { return func(m optionalAttr) { - m["summarize"] = value + m["data_format"] = value } } -// Asserts that the given condition is true. +// The backward operation for "BiasAdd" on the "bias" tensor. // -// If `condition` evaluates to false, print the list of tensors in `data`. -// `summarize` determines how many entries of the tensors to print. +// It accumulates all the values from out_backprop into the feature dimension. +// For NHWC data format, the feature dimension is the last. For NCHW data format, +// the feature dimension is the third-to-last. // // Arguments: -// condition: The condition to evaluate. -// data: The tensors to print out when condition is false. +// out_backprop: Any number of dimensions. // -// Returns the created operation. -func Assert(scope *Scope, condition tf.Output, data []tf.Output, optional ...AssertAttr) (o *tf.Operation) { +// Returns 1-D with size the feature dimension of `out_backprop`. +func BiasAddGrad(scope *Scope, out_backprop tf.Output, optional ...BiasAddGradAttr) (output tf.Output) { if scope.Err() != nil { return } @@ -6280,184 +6420,70 @@ func Assert(scope *Scope, condition tf.Output, data []tf.Output, optional ...Ass a(attrs) } opspec := tf.OpSpec{ - Type: "Assert", + Type: "BiasAddGrad", Input: []tf.Input{ - condition, tf.OutputList(data), + out_backprop, }, Attrs: attrs, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0) } -// Computes element-wise population count (a.k.a. popcount, bitsum, bitcount). -// -// For each entry in `x`, calculates the number of `1` (on) bits in the binary -// representation of that entry. +// FusedBatchNormV2Attr is an optional argument to FusedBatchNormV2. +type FusedBatchNormV2Attr func(optionalAttr) + +// FusedBatchNormV2Epsilon sets the optional epsilon attribute to value. // -// **NOTE**: It is more efficient to first `tf.bitcast` your tensors into -// `int32` or `int64` and perform the bitcount on the result, than to feed in -// 8- or 16-bit inputs and then aggregate the resulting counts. -func PopulationCount(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "PopulationCount", - Input: []tf.Input{ - x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Split a `SparseTensor` into `num_split` tensors along one dimension. -// -// If the `shape[split_dim]` is not an integer multiple of `num_split`. Slices -// `[0 : shape[split_dim] % num_split]` gets one extra dimension. -// For example, if `split_dim = 1` and `num_split = 2` and the input is -// -// input_tensor = shape = [2, 7] -// [ a d e ] -// [b c ] -// -// Graphically the output tensors are: -// -// output_tensor[0] = shape = [2, 4] -// [ a ] -// [b c ] -// -// output_tensor[1] = shape = [2, 3] -// [ d e ] -// [ ] -// -// Arguments: -// split_dim: 0-D. The dimension along which to split. Must be in the range -// `[0, rank(shape))`. -// indices: 2-D tensor represents the indices of the sparse tensor. -// values: 1-D tensor represents the values of the sparse tensor. -// shape: 1-D. tensor represents the shape of the sparse tensor. -// output indices: A list of 1-D tensors represents the indices of the output -// sparse tensors. -// num_split: The number of ways to split. -// -// Returns A list of 1-D tensors represents the values of the output sparse -// tensors.A list of 1-D tensors represents the shape of the output sparse -// tensors. -func SparseSplit(scope *Scope, split_dim tf.Output, indices tf.Output, values tf.Output, shape tf.Output, num_split int64) (output_indices []tf.Output, output_values []tf.Output, output_shape []tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_split": num_split} - opspec := tf.OpSpec{ - Type: "SparseSplit", - Input: []tf.Input{ - split_dim, indices, values, shape, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if output_indices, idx, err = makeOutputList(op, idx, "output_indices"); err != nil { - scope.UpdateErr("SparseSplit", err) - return - } - if output_values, idx, err = makeOutputList(op, idx, "output_values"); err != nil { - scope.UpdateErr("SparseSplit", err) - return - } - if output_shape, idx, err = makeOutputList(op, idx, "output_shape"); err != nil { - scope.UpdateErr("SparseSplit", err) - return - } - return output_indices, output_values, output_shape -} - -// RandomPoissonAttr is an optional argument to RandomPoisson. -type RandomPoissonAttr func(optionalAttr) - -// RandomPoissonSeed sets the optional seed attribute to value. -// If not specified, defaults to 0 -func RandomPoissonSeed(value int64) RandomPoissonAttr { - return func(m optionalAttr) { - m["seed"] = value - } -} - -// RandomPoissonSeed2 sets the optional seed2 attribute to value. -// If not specified, defaults to 0 -func RandomPoissonSeed2(value int64) RandomPoissonAttr { +// value: A small float number added to the variance of x. +// If not specified, defaults to 0.0001 +func FusedBatchNormV2Epsilon(value float32) FusedBatchNormV2Attr { return func(m optionalAttr) { - m["seed2"] = value + m["epsilon"] = value } } -// Use RandomPoissonV2 instead. +// FusedBatchNormV2DataFormat sets the optional data_format attribute to value. // -// DEPRECATED at GraphDef version 25: Replaced by RandomPoissonV2 -func RandomPoisson(scope *Scope, shape tf.Output, rate tf.Output, optional ...RandomPoissonAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "RandomPoisson", - Input: []tf.Input{ - shape, rate, - }, - Attrs: attrs, +// value: The data format for x and y. Either "NHWC" (default) or "NCHW". +// If not specified, defaults to "NHWC" +func FusedBatchNormV2DataFormat(value string) FusedBatchNormV2Attr { + return func(m optionalAttr) { + m["data_format"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// ResourceSparseApplyFtrlV2Attr is an optional argument to ResourceSparseApplyFtrlV2. -type ResourceSparseApplyFtrlV2Attr func(optionalAttr) - -// ResourceSparseApplyFtrlV2UseLocking sets the optional use_locking attribute to value. +// FusedBatchNormV2IsTraining sets the optional is_training attribute to value. // -// value: If `True`, updating of the var and accum tensors will be protected -// by a lock; otherwise the behavior is undefined, but may exhibit less -// contention. -// If not specified, defaults to false -func ResourceSparseApplyFtrlV2UseLocking(value bool) ResourceSparseApplyFtrlV2Attr { +// value: A bool value to indicate the operation is for training (default) +// or inference. +// If not specified, defaults to true +func FusedBatchNormV2IsTraining(value bool) FusedBatchNormV2Attr { return func(m optionalAttr) { - m["use_locking"] = value + m["is_training"] = value } } -// Update relevant entries in '*var' according to the Ftrl-proximal scheme. +// Batch normalization. // -// That is for rows we have grad for, we update var, accum and linear as follows: -// grad_with_shrinkage = grad + 2 * l2_shrinkage * var -// accum_new = accum + grad_with_shrinkage * grad_with_shrinkage -// linear += grad_with_shrinkage + -// (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var -// quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2 -// var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0 -// accum = accum_new +// Note that the size of 4D Tensors are defined by either "NHWC" or "NCHW". +// The size of 1D Tensors matches the dimension C of the 4D Tensors. // // Arguments: -// var_: Should be from a Variable(). -// accum: Should be from a Variable(). -// linear: Should be from a Variable(). -// grad: The gradient. -// indices: A vector of indices into the first dimension of var and accum. -// lr: Scaling factor. Must be a scalar. -// l1: L1 regularization. Must be a scalar. -// l2: L2 shrinkage regulariation. Must be a scalar. -// -// lr_power: Scaling factor. Must be a scalar. +// x: A 4D Tensor for input data. +// scale: A 1D Tensor for scaling factor, to scale the normalized x. +// offset: A 1D Tensor for offset, to shift to the normalized x. +// mean: A 1D Tensor for population mean. Used for inference only; +// must be empty for training. +// variance: A 1D Tensor for population variance. Used for inference only; +// must be empty for training. // -// Returns the created operation. -func ResourceSparseApplyFtrlV2(scope *Scope, var_ tf.Output, accum tf.Output, linear tf.Output, grad tf.Output, indices tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, l2_shrinkage tf.Output, lr_power tf.Output, optional ...ResourceSparseApplyFtrlV2Attr) (o *tf.Operation) { +// Returns A 4D Tensor for output data.A 1D Tensor for the computed batch mean, to be used by TensorFlow +// to compute the running mean.A 1D Tensor for the computed batch variance, to be used by +// TensorFlow to compute the running variance.A 1D Tensor for the computed batch mean, to be reused +// in the gradient computation.A 1D Tensor for the computed batch variance (inverted variance +// in the cuDNN case), to be reused in the gradient computation. +func FusedBatchNormV2(scope *Scope, x tf.Output, scale tf.Output, offset tf.Output, mean tf.Output, variance tf.Output, optional ...FusedBatchNormV2Attr) (y tf.Output, batch_mean tf.Output, batch_variance tf.Output, reserve_space_1 tf.Output, reserve_space_2 tf.Output) { if scope.Err() != nil { return } @@ -6466,63 +6492,65 @@ func ResourceSparseApplyFtrlV2(scope *Scope, var_ tf.Output, accum tf.Output, li a(attrs) } opspec := tf.OpSpec{ - Type: "ResourceSparseApplyFtrlV2", + Type: "FusedBatchNormV2", Input: []tf.Input{ - var_, accum, linear, grad, indices, lr, l1, l2, l2_shrinkage, lr_power, + x, scale, offset, mean, variance, }, Attrs: attrs, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4) } -// Associates the given iterator with the given statistics aggregator. +// Transforms a Tensor into a serialized TensorProto proto. // -// Returns the created operation. -func IteratorSetStatsAggregator(scope *Scope, iterator_handle tf.Output, stats_aggregator_handle tf.Output) (o *tf.Operation) { +// Arguments: +// tensor: A Tensor of type `T`. +// +// Returns A serialized TensorProto proto of the input tensor. +func SerializeTensor(scope *Scope, tensor tf.Output) (serialized tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "IteratorSetStatsAggregator", + Type: "SerializeTensor", Input: []tf.Input{ - iterator_handle, stats_aggregator_handle, + tensor, }, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0) } -// DataFormatVecPermuteAttr is an optional argument to DataFormatVecPermute. -type DataFormatVecPermuteAttr func(optionalAttr) - -// DataFormatVecPermuteSrcFormat sets the optional src_format attribute to value. -// -// value: source data format. -// If not specified, defaults to "NHWC" -func DataFormatVecPermuteSrcFormat(value string) DataFormatVecPermuteAttr { - return func(m optionalAttr) { - m["src_format"] = value - } -} +// MatrixSolveAttr is an optional argument to MatrixSolve. +type MatrixSolveAttr func(optionalAttr) -// DataFormatVecPermuteDstFormat sets the optional dst_format attribute to value. +// MatrixSolveAdjoint sets the optional adjoint attribute to value. // -// value: destination data format. -// If not specified, defaults to "NCHW" -func DataFormatVecPermuteDstFormat(value string) DataFormatVecPermuteAttr { +// value: Boolean indicating whether to solve with `matrix` or its (block-wise) +// adjoint. +// If not specified, defaults to false +func MatrixSolveAdjoint(value bool) MatrixSolveAttr { return func(m optionalAttr) { - m["dst_format"] = value + m["adjoint"] = value } } -// Returns the permuted vector/tensor in the destination data format given the +// Solves systems of linear equations. // -// one in the source data format. +// `Matrix` is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions +// form square matrices. `Rhs` is a tensor of shape `[..., M, K]`. The `output` is +// a tensor shape `[..., M, K]`. If `adjoint` is `False` then each output matrix +// satisfies `matrix[..., :, :] * output[..., :, :] = rhs[..., :, :]`. +// If `adjoint` is `True` then each output matrix satisfies +// `adjoint(matrix[..., :, :]) * output[..., :, :] = rhs[..., :, :]`. // // Arguments: -// x: Vector of size 4 or Tensor of shape (4, 2) in source data format. +// matrix: Shape is `[..., M, M]`. +// rhs: Shape is `[..., M, K]`. // -// Returns Vector of size 4 or Tensor of shape (4, 2) in destination data format. -func DataFormatVecPermute(scope *Scope, x tf.Output, optional ...DataFormatVecPermuteAttr) (y tf.Output) { +// Returns Shape is `[..., M, K]`. +func MatrixSolve(scope *Scope, matrix tf.Output, rhs tf.Output, optional ...MatrixSolveAttr) (output tf.Output) { if scope.Err() != nil { return } @@ -6531,9 +6559,9 @@ func DataFormatVecPermute(scope *Scope, x tf.Output, optional ...DataFormatVecPe a(attrs) } opspec := tf.OpSpec{ - Type: "DataFormatVecPermute", + Type: "MatrixSolve", Input: []tf.Input{ - x, + matrix, rhs, }, Attrs: attrs, } @@ -6541,13 +6569,13 @@ func DataFormatVecPermute(scope *Scope, x tf.Output, optional ...DataFormatVecPe return op.Output(0) } -// Computes tan of x element-wise. -func Tan(scope *Scope, x tf.Output) (y tf.Output) { +// Computes acos of x element-wise. +func Acos(scope *Scope, x tf.Output) (y tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "Tan", + Type: "Acos", Input: []tf.Input{ x, }, @@ -6556,229 +6584,345 @@ func Tan(scope *Scope, x tf.Output) (y tf.Output) { return op.Output(0) } -// ResourceSparseApplyFtrlAttr is an optional argument to ResourceSparseApplyFtrl. -type ResourceSparseApplyFtrlAttr func(optionalAttr) +// QuantizeAndDequantizeAttr is an optional argument to QuantizeAndDequantize. +type QuantizeAndDequantizeAttr func(optionalAttr) -// ResourceSparseApplyFtrlUseLocking sets the optional use_locking attribute to value. -// -// value: If `True`, updating of the var and accum tensors will be protected -// by a lock; otherwise the behavior is undefined, but may exhibit less -// contention. -// If not specified, defaults to false -func ResourceSparseApplyFtrlUseLocking(value bool) ResourceSparseApplyFtrlAttr { +// QuantizeAndDequantizeSignedInput sets the optional signed_input attribute to value. +// If not specified, defaults to true +func QuantizeAndDequantizeSignedInput(value bool) QuantizeAndDequantizeAttr { return func(m optionalAttr) { - m["use_locking"] = value + m["signed_input"] = value } } -// Update relevant entries in '*var' according to the Ftrl-proximal scheme. -// -// That is for rows we have grad for, we update var, accum and linear as follows: -// accum_new = accum + grad * grad -// linear += grad + (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var -// quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2 -// var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0 -// accum = accum_new -// -// Arguments: -// var_: Should be from a Variable(). -// accum: Should be from a Variable(). -// linear: Should be from a Variable(). -// grad: The gradient. -// indices: A vector of indices into the first dimension of var and accum. -// lr: Scaling factor. Must be a scalar. -// l1: L1 regularization. Must be a scalar. -// l2: L2 regularization. Must be a scalar. -// lr_power: Scaling factor. Must be a scalar. -// -// Returns the created operation. -func ResourceSparseApplyFtrl(scope *Scope, var_ tf.Output, accum tf.Output, linear tf.Output, grad tf.Output, indices tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, lr_power tf.Output, optional ...ResourceSparseApplyFtrlAttr) (o *tf.Operation) { - if scope.Err() != nil { - return +// QuantizeAndDequantizeNumBits sets the optional num_bits attribute to value. +// If not specified, defaults to 8 +func QuantizeAndDequantizeNumBits(value int64) QuantizeAndDequantizeAttr { + return func(m optionalAttr) { + m["num_bits"] = value } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) +} + +// QuantizeAndDequantizeRangeGiven sets the optional range_given attribute to value. +// If not specified, defaults to false +func QuantizeAndDequantizeRangeGiven(value bool) QuantizeAndDequantizeAttr { + return func(m optionalAttr) { + m["range_given"] = value } - opspec := tf.OpSpec{ - Type: "ResourceSparseApplyFtrl", - Input: []tf.Input{ - var_, accum, linear, grad, indices, lr, l1, l2, lr_power, - }, - Attrs: attrs, +} + +// QuantizeAndDequantizeInputMin sets the optional input_min attribute to value. +// If not specified, defaults to 0 +func QuantizeAndDequantizeInputMin(value float32) QuantizeAndDequantizeAttr { + return func(m optionalAttr) { + m["input_min"] = value } - return scope.AddOperation(opspec) } -// Returns which elements of x are Inf. +// QuantizeAndDequantizeInputMax sets the optional input_max attribute to value. +// If not specified, defaults to 0 +func QuantizeAndDequantizeInputMax(value float32) QuantizeAndDequantizeAttr { + return func(m optionalAttr) { + m["input_max"] = value + } +} + +// Use QuantizeAndDequantizeV2 instead. // -// @compatibility(numpy) -// Equivalent to np.isinf -// @end_compatibility -func IsInf(scope *Scope, x tf.Output) (y tf.Output) { +// DEPRECATED at GraphDef version 22: Replaced by QuantizeAndDequantizeV2 +func QuantizeAndDequantize(scope *Scope, input tf.Output, optional ...QuantizeAndDequantizeAttr) (output tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "IsInf", + Type: "QuantizeAndDequantize", Input: []tf.Input{ - x, + input, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Computes the sum along sparse segments of a tensor divided by the sqrt of N. +// Returns locations of nonzero / true values in a tensor. // -// N is the size of the segment being reduced. +// This operation returns the coordinates of true elements in `condition`. The +// coordinates are returned in a 2-D tensor where the first dimension (rows) +// represents the number of true elements, and the second dimension (columns) +// represents the coordinates of the true elements. Keep in mind, the shape of +// the output tensor can vary depending on how many true values there are in +// `condition`. Indices are output in row-major order. // -// Read @{$math_ops#segmentation$the section on segmentation} for an explanation of -// segments. +// For example: // -// Arguments: +// ``` +// # 'input' tensor is [[True, False] +// # [True, False]] +// # 'input' has two true values, so output has two coordinates. +// # 'input' has rank of 2, so coordinates have two indices. +// where(input) ==> [[0, 0], +// [1, 0]] // -// indices: A 1-D tensor. Has same rank as `segment_ids`. -// segment_ids: A 1-D tensor. Values should be sorted and can be repeated. +// # `condition` tensor is [[[True, False] +// # [True, False]] +// # [[False, True] +// # [False, True]] +// # [[False, False] +// # [False, True]]] +// # 'input' has 5 true values, so output has 5 coordinates. +// # 'input' has rank of 3, so coordinates have three indices. +// where(input) ==> [[0, 0, 0], +// [0, 1, 0], +// [1, 0, 1], +// [1, 1, 1], +// [2, 1, 1]] // -// Returns Has same shape as data, except for dimension 0 which -// has size `k`, the number of segments. -func SparseSegmentSqrtN(scope *Scope, data tf.Output, indices tf.Output, segment_ids tf.Output) (output tf.Output) { +// # `condition` tensor is [[[1.5, 0.0] +// # [-0.5, 0.0]] +// # [[0.0, 0.25] +// # [0.0, 0.75]] +// # [[0.0, 0.0] +// # [0.0, 0.01]]] +// # 'input' has 5 nonzero values, so output has 5 coordinates. +// # 'input' has rank of 3, so coordinates have three indices. +// where(input) ==> [[0, 0, 0], +// [0, 1, 0], +// [1, 0, 1], +// [1, 1, 1], +// [2, 1, 1]] +// +// # `condition` tensor is [[[1.5 + 0.0j, 0.0 + 0.0j] +// # [0.0 + 0.5j, 0.0 + 0.0j]] +// # [[0.0 + 0.0j, 0.25 + 1.5j] +// # [0.0 + 0.0j, 0.75 + 0.0j]] +// # [[0.0 + 0.0j, 0.0 + 0.0j] +// # [0.0 + 0.0j, 0.01 + 0.0j]]] +// # 'input' has 5 nonzero magnitude values, so output has 5 coordinates. +// # 'input' has rank of 3, so coordinates have three indices. +// where(input) ==> [[0, 0, 0], +// [0, 1, 0], +// [1, 0, 1], +// [1, 1, 1], +// [2, 1, 1]] +// ``` +func Where(scope *Scope, condition tf.Output) (index tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "SparseSegmentSqrtN", + Type: "Where", Input: []tf.Input{ - data, indices, segment_ids, + condition, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// StatelessTruncatedNormalAttr is an optional argument to StatelessTruncatedNormal. -type StatelessTruncatedNormalAttr func(optionalAttr) +// QueueDequeueV2Attr is an optional argument to QueueDequeueV2. +type QueueDequeueV2Attr func(optionalAttr) -// StatelessTruncatedNormalDtype sets the optional dtype attribute to value. +// QueueDequeueV2TimeoutMs sets the optional timeout_ms attribute to value. // -// value: The type of the output. -// If not specified, defaults to DT_FLOAT -func StatelessTruncatedNormalDtype(value tf.DataType) StatelessTruncatedNormalAttr { +// value: If the queue is empty, this operation will block for up to +// timeout_ms milliseconds. +// Note: This option is not supported yet. +// If not specified, defaults to -1 +func QueueDequeueV2TimeoutMs(value int64) QueueDequeueV2Attr { return func(m optionalAttr) { - m["dtype"] = value + m["timeout_ms"] = value } } -// Outputs deterministic pseudorandom values from a truncated normal distribution. +// Dequeues a tuple of one or more tensors from the given queue. // -// The generated values follow a normal distribution with mean 0 and standard -// deviation 1, except that values whose magnitude is more than 2 standard -// deviations from the mean are dropped and re-picked. +// This operation has k outputs, where k is the number of components +// in the tuples stored in the given queue, and output i is the ith +// component of the dequeued tuple. // -// The outputs are a deterministic function of `shape` and `seed`. +// N.B. If the queue is empty, this operation will block until an element +// has been dequeued (or 'timeout_ms' elapses, if specified). // // Arguments: -// shape: The shape of the output tensor. -// seed: 2 seeds (shape [2]). +// handle: The handle to a queue. +// component_types: The type of each component in a tuple. // -// Returns Random values with specified shape. -func StatelessTruncatedNormal(scope *Scope, shape tf.Output, seed tf.Output, optional ...StatelessTruncatedNormalAttr) (output tf.Output) { +// Returns One or more tensors that were dequeued as a tuple. +func QueueDequeueV2(scope *Scope, handle tf.Output, component_types []tf.DataType, optional ...QueueDequeueV2Attr) (components []tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"component_types": component_types} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "StatelessTruncatedNormal", + Type: "QueueDequeueV2", Input: []tf.Input{ - shape, seed, + handle, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) -} - -// RestoreSliceAttr is an optional argument to RestoreSlice. -type RestoreSliceAttr func(optionalAttr) - -// RestoreSlicePreferredShard sets the optional preferred_shard attribute to value. -// -// value: Index of file to open first if multiple files match -// `file_pattern`. See the documentation for `Restore`. -// If not specified, defaults to -1 -func RestoreSlicePreferredShard(value int64) RestoreSliceAttr { - return func(m optionalAttr) { - m["preferred_shard"] = value + if scope.Err() != nil { + return + } + var idx int + var err error + if components, idx, err = makeOutputList(op, idx, "components"); err != nil { + scope.UpdateErr("QueueDequeueV2", err) + return } + return components } -// Restores a tensor from checkpoint files. -// -// This is like `Restore` except that restored tensor can be listed as filling -// only a slice of a larger tensor. `shape_and_slice` specifies the shape of the -// larger tensor and the slice that the restored tensor covers. -// -// The `shape_and_slice` input has the same format as the -// elements of the `shapes_and_slices` input of the `SaveSlices` op. -// -// Arguments: -// file_pattern: Must have a single element. The pattern of the files from -// which we read the tensor. -// tensor_name: Must have a single element. The name of the tensor to be -// restored. -// shape_and_slice: Scalar. The shapes and slice specifications to use when -// restoring a tensors. -// dt: The type of the tensor to be restored. -// -// Returns The restored tensor. -func RestoreSlice(scope *Scope, file_pattern tf.Output, tensor_name tf.Output, shape_and_slice tf.Output, dt tf.DataType, optional ...RestoreSliceAttr) (tensor tf.Output) { +// Computes the Gauss error function of `x` element-wise. +func Erf(scope *Scope, x tf.Output) (y tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"dt": dt} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "RestoreSlice", + Type: "Erf", Input: []tf.Input{ - file_pattern, tensor_name, shape_and_slice, + x, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// ImagAttr is an optional argument to Imag. -type ImagAttr func(optionalAttr) - -// ImagTout sets the optional Tout attribute to value. -// If not specified, defaults to DT_FLOAT -func ImagTout(value tf.DataType) ImagAttr { +// Returns element-wise largest integer not greater than x. +func Floor(scope *Scope, x tf.Output) (y tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Floor", + Input: []tf.Input{ + x, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// OneHotAttr is an optional argument to OneHot. +type OneHotAttr func(optionalAttr) + +// OneHotAxis sets the optional axis attribute to value. +// +// value: The axis to fill (default: -1, a new inner-most axis). +// If not specified, defaults to -1 +func OneHotAxis(value int64) OneHotAttr { return func(m optionalAttr) { - m["Tout"] = value + m["axis"] = value } } -// Returns the imaginary part of a complex number. +// Returns a one-hot tensor. // -// Given a tensor `input` of complex numbers, this operation returns a tensor of -// type `float` that is the imaginary part of each element in `input`. All -// elements in `input` must be complex numbers of the form \\(a + bj\\), where *a* -// is the real part and *b* is the imaginary part returned by this operation. +// The locations represented by indices in `indices` take value `on_value`, +// while all other locations take value `off_value`. // -// For example: +// If the input `indices` is rank `N`, the output will have rank `N+1`, +// The new axis is created at dimension `axis` (default: the new axis is +// appended at the end). +// +// If `indices` is a scalar the output shape will be a vector of length `depth`. // +// If `indices` is a vector of length `features`, the output shape will be: // ``` -// # tensor 'input' is [-2.25 + 4.75j, 3.25 + 5.75j] -// tf.imag(input) ==> [4.75, 5.75] +// features x depth if axis == -1 +// depth x features if axis == 0 // ``` -func Imag(scope *Scope, input tf.Output, optional ...ImagAttr) (output tf.Output) { +// +// If `indices` is a matrix (batch) with shape `[batch, features]`, +// the output shape will be: +// ``` +// batch x features x depth if axis == -1 +// batch x depth x features if axis == 1 +// depth x batch x features if axis == 0 +// ``` +// +// +// Examples +// ========= +// +// Suppose that +// +// ``` +// indices = [0, 2, -1, 1] +// depth = 3 +// on_value = 5.0 +// off_value = 0.0 +// axis = -1 +// ``` +// +// Then output is `[4 x 3]`: +// +// ```output = +// [5.0 0.0 0.0] // one_hot(0) +// [0.0 0.0 5.0] // one_hot(2) +// [0.0 0.0 0.0] // one_hot(-1) +// [0.0 5.0 0.0] // one_hot(1) +// ``` +// +// Suppose that +// +// ``` +// indices = [0, 2, -1, 1] +// depth = 3 +// on_value = 0.0 +// off_value = 3.0 +// axis = 0 +// ``` +// +// Then output is `[3 x 4]`: +// +// ```output = +// [0.0 3.0 3.0 3.0] +// [3.0 3.0 3.0 0.0] +// [3.0 3.0 3.0 3.0] +// [3.0 0.0 3.0 3.0] +// // ^ one_hot(0) +// // ^ one_hot(2) +// // ^ one_hot(-1) +// // ^ one_hot(1) +// ``` +// Suppose that +// +// ``` +// indices = [[0, 2], [1, -1]] +// depth = 3 +// on_value = 1.0 +// off_value = 0.0 +// axis = -1 +// ``` +// +// Then output is `[2 x 2 x 3]`: +// +// ```output = +// [ +// [1.0, 0.0, 0.0] // one_hot(0) +// [0.0, 0.0, 1.0] // one_hot(2) +// ][ +// [0.0, 1.0, 0.0] // one_hot(1) +// [0.0, 0.0, 0.0] // one_hot(-1) +// ]``` +// +// Arguments: +// indices: A tensor of indices. +// depth: A scalar defining the depth of the one hot dimension. +// on_value: A scalar defining the value to fill in output when `indices[j] = i`. +// off_value: A scalar defining the value to fill in output when `indices[j] != i`. +// +// Returns The one-hot tensor. +func OneHot(scope *Scope, indices tf.Output, depth tf.Output, on_value tf.Output, off_value tf.Output, optional ...OneHotAttr) (output tf.Output) { if scope.Err() != nil { return } @@ -6787,9 +6931,9 @@ func Imag(scope *Scope, input tf.Output, optional ...ImagAttr) (output tf.Output a(attrs) } opspec := tf.OpSpec{ - Type: "Imag", + Type: "OneHot", Input: []tf.Input{ - input, + indices, depth, on_value, off_value, }, Attrs: attrs, } @@ -6797,45 +6941,2135 @@ func Imag(scope *Scope, input tf.Output, optional ...ImagAttr) (output tf.Output return op.Output(0) } -// ComplexAttr is an optional argument to Complex. -type ComplexAttr func(optionalAttr) - -// ComplexTout sets the optional Tout attribute to value. -// If not specified, defaults to DT_COMPLEX64 -func ComplexTout(value tf.DataType) ComplexAttr { - return func(m optionalAttr) { - m["Tout"] = value +// Transforms a vector of brain.Example protos (as strings) into typed tensors. +// +// Arguments: +// serialized: A vector containing a batch of binary serialized Example protos. +// names: A vector containing the names of the serialized protos. +// May contain, for example, table key (descriptive) names for the +// corresponding serialized protos. These are purely useful for debugging +// purposes, and the presence of values here has no effect on the output. +// May also be an empty vector if no names are available. +// If non-empty, this vector must be the same length as "serialized". +// sparse_keys: A list of Nsparse string Tensors (scalars). +// The keys expected in the Examples' features associated with sparse values. +// dense_keys: A list of Ndense string Tensors (scalars). +// The keys expected in the Examples' features associated with dense values. +// dense_defaults: A list of Ndense Tensors (some may be empty). +// dense_defaults[j] provides default values +// when the example's feature_map lacks dense_key[j]. If an empty Tensor is +// provided for dense_defaults[j], then the Feature dense_keys[j] is required. +// The input type is inferred from dense_defaults[j], even when it's empty. +// If dense_defaults[j] is not empty, and dense_shapes[j] is fully defined, +// then the shape of dense_defaults[j] must match that of dense_shapes[j]. +// If dense_shapes[j] has an undefined major dimension (variable strides dense +// feature), dense_defaults[j] must contain a single element: +// the padding element. +// sparse_types: A list of Nsparse types; the data types of data in each Feature +// given in sparse_keys. +// Currently the ParseExample supports DT_FLOAT (FloatList), +// DT_INT64 (Int64List), and DT_STRING (BytesList). +// dense_shapes: A list of Ndense shapes; the shapes of data in each Feature +// given in dense_keys. +// The number of elements in the Feature corresponding to dense_key[j] +// must always equal dense_shapes[j].NumEntries(). +// If dense_shapes[j] == (D0, D1, ..., DN) then the shape of output +// Tensor dense_values[j] will be (|serialized|, D0, D1, ..., DN): +// The dense outputs are just the inputs row-stacked by batch. +// This works for dense_shapes[j] = (-1, D1, ..., DN). In this case +// the shape of the output Tensor dense_values[j] will be +// (|serialized|, M, D1, .., DN), where M is the maximum number of blocks +// of elements of length D1 * .... * DN, across all minibatch entries +// in the input. Any minibatch entry with less than M blocks of elements of +// length D1 * ... * DN will be padded with the corresponding default_value +// scalar element along the second dimension. +func ParseExample(scope *Scope, serialized tf.Output, names tf.Output, sparse_keys []tf.Output, dense_keys []tf.Output, dense_defaults []tf.Output, sparse_types []tf.DataType, dense_shapes []tf.Shape) (sparse_indices []tf.Output, sparse_values []tf.Output, sparse_shapes []tf.Output, dense_values []tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"sparse_types": sparse_types, "dense_shapes": dense_shapes} + opspec := tf.OpSpec{ + Type: "ParseExample", + Input: []tf.Input{ + serialized, names, tf.OutputList(sparse_keys), tf.OutputList(dense_keys), tf.OutputList(dense_defaults), + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + if scope.Err() != nil { + return + } + var idx int + var err error + if sparse_indices, idx, err = makeOutputList(op, idx, "sparse_indices"); err != nil { + scope.UpdateErr("ParseExample", err) + return + } + if sparse_values, idx, err = makeOutputList(op, idx, "sparse_values"); err != nil { + scope.UpdateErr("ParseExample", err) + return + } + if sparse_shapes, idx, err = makeOutputList(op, idx, "sparse_shapes"); err != nil { + scope.UpdateErr("ParseExample", err) + return + } + if dense_values, idx, err = makeOutputList(op, idx, "dense_values"); err != nil { + scope.UpdateErr("ParseExample", err) + return } + return sparse_indices, sparse_values, sparse_shapes, dense_values } -// Converts two real numbers to a complex number. +// Real-valued fast Fourier transform. // -// Given a tensor `real` representing the real part of a complex number, and a -// tensor `imag` representing the imaginary part of a complex number, this -// operation returns complex numbers elementwise of the form \\(a + bj\\), where -// *a* represents the `real` part and *b* represents the `imag` part. +// Computes the 1-dimensional discrete Fourier transform of a real-valued signal +// over the inner-most dimension of `input`. // -// The input tensors `real` and `imag` must have the same shape. +// Since the DFT of a real signal is Hermitian-symmetric, `RFFT` only returns the +// `fft_length / 2 + 1` unique components of the FFT: the zero-frequency term, +// followed by the `fft_length / 2` positive-frequency terms. // -// For example: +// Along the axis `RFFT` is computed on, if `fft_length` is smaller than the +// corresponding dimension of `input`, the dimension is cropped. If it is larger, +// the dimension is padded with zeros. // -// ``` -// # tensor 'real' is [2.25, 3.25] -// # tensor `imag` is [4.75, 5.75] -// tf.complex(real, imag) ==> [[2.25 + 4.75j], [3.25 + 5.75j]] -// ``` -func Complex(scope *Scope, real tf.Output, imag tf.Output, optional ...ComplexAttr) (out tf.Output) { +// Arguments: +// input: A float32 tensor. +// fft_length: An int32 tensor of shape [1]. The FFT length. +// +// Returns A complex64 tensor of the same rank as `input`. The inner-most +// dimension of `input` is replaced with the `fft_length / 2 + 1` unique +// frequency components of its 1D Fourier transform. +// +// @compatibility(numpy) +// Equivalent to np.fft.rfft +// @end_compatibility +func RFFT(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "RFFT", + Input: []tf.Input{ + input, fft_length, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// DepthwiseConv2dNativeBackpropFilterAttr is an optional argument to DepthwiseConv2dNativeBackpropFilter. +type DepthwiseConv2dNativeBackpropFilterAttr func(optionalAttr) + +// DepthwiseConv2dNativeBackpropFilterDataFormat sets the optional data_format attribute to value. +// +// value: Specify the data format of the input and output data. With the +// default format "NHWC", the data is stored in the order of: +// [batch, height, width, channels]. +// Alternatively, the format could be "NCHW", the data storage order of: +// [batch, channels, height, width]. +// If not specified, defaults to "NHWC" +func DepthwiseConv2dNativeBackpropFilterDataFormat(value string) DepthwiseConv2dNativeBackpropFilterAttr { + return func(m optionalAttr) { + m["data_format"] = value + } +} + +// DepthwiseConv2dNativeBackpropFilterDilations sets the optional dilations attribute to value. +// +// value: 1-D tensor of length 4. The dilation factor for each dimension of +// `input`. If set to k > 1, there will be k-1 skipped cells between each filter +// element on that dimension. The dimension order is determined by the value of +// `data_format`, see above for details. Dilations in the batch and depth +// dimensions must be 1. +// If not specified, defaults to +func DepthwiseConv2dNativeBackpropFilterDilations(value []int64) DepthwiseConv2dNativeBackpropFilterAttr { + return func(m optionalAttr) { + m["dilations"] = value + } +} + +// Computes the gradients of depthwise convolution with respect to the filter. +// +// Arguments: +// input: 4-D with shape based on `data_format`. For example, if +// `data_format` is 'NHWC' then `input` is a 4-D `[batch, in_height, +// in_width, in_channels]` tensor. +// filter_sizes: An integer vector representing the tensor shape of `filter`, +// where `filter` is a 4-D +// `[filter_height, filter_width, in_channels, depthwise_multiplier]` tensor. +// out_backprop: 4-D with shape based on `data_format`. +// For example, if `data_format` is 'NHWC' then +// out_backprop shape is `[batch, out_height, out_width, out_channels]`. +// Gradients w.r.t. the output of the convolution. +// strides: The stride of the sliding window for each dimension of the input +// of the convolution. +// padding: The type of padding algorithm to use. +// +// Returns 4-D with shape +// `[filter_height, filter_width, in_channels, out_channels]`. Gradient w.r.t. +// the `filter` input of the convolution. +func DepthwiseConv2dNativeBackpropFilter(scope *Scope, input tf.Output, filter_sizes tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...DepthwiseConv2dNativeBackpropFilterAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"strides": strides, "padding": padding} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "DepthwiseConv2dNativeBackpropFilter", + Input: []tf.Input{ + input, filter_sizes, out_backprop, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// LRNGradAttr is an optional argument to LRNGrad. +type LRNGradAttr func(optionalAttr) + +// LRNGradDepthRadius sets the optional depth_radius attribute to value. +// +// value: A depth radius. +// If not specified, defaults to 5 +func LRNGradDepthRadius(value int64) LRNGradAttr { + return func(m optionalAttr) { + m["depth_radius"] = value + } +} + +// LRNGradBias sets the optional bias attribute to value. +// +// value: An offset (usually > 0 to avoid dividing by 0). +// If not specified, defaults to 1 +func LRNGradBias(value float32) LRNGradAttr { + return func(m optionalAttr) { + m["bias"] = value + } +} + +// LRNGradAlpha sets the optional alpha attribute to value. +// +// value: A scale factor, usually positive. +// If not specified, defaults to 1 +func LRNGradAlpha(value float32) LRNGradAttr { + return func(m optionalAttr) { + m["alpha"] = value + } +} + +// LRNGradBeta sets the optional beta attribute to value. +// +// value: An exponent. +// If not specified, defaults to 0.5 +func LRNGradBeta(value float32) LRNGradAttr { + return func(m optionalAttr) { + m["beta"] = value + } +} + +// Gradients for Local Response Normalization. +// +// Arguments: +// input_grads: 4-D with shape `[batch, height, width, channels]`. +// input_image: 4-D with shape `[batch, height, width, channels]`. +// output_image: 4-D with shape `[batch, height, width, channels]`. +// +// Returns The gradients for LRN. +func LRNGrad(scope *Scope, input_grads tf.Output, input_image tf.Output, output_image tf.Output, optional ...LRNGradAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "LRNGrad", + Input: []tf.Input{ + input_grads, input_image, output_image, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// AnyAttr is an optional argument to Any. +type AnyAttr func(optionalAttr) + +// AnyKeepDims sets the optional keep_dims attribute to value. +// +// value: If true, retain reduced dimensions with length 1. +// If not specified, defaults to false +func AnyKeepDims(value bool) AnyAttr { + return func(m optionalAttr) { + m["keep_dims"] = value + } +} + +// Computes the "logical or" of elements across dimensions of a tensor. +// +// Reduces `input` along the dimensions given in `axis`. Unless +// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in +// `axis`. If `keep_dims` is true, the reduced dimensions are +// retained with length 1. +// +// Arguments: +// input: The tensor to reduce. +// axis: The dimensions to reduce. Must be in the range +// `[-rank(input), rank(input))`. +// +// Returns The reduced tensor. +func Any(scope *Scope, input tf.Output, axis tf.Output, optional ...AnyAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "Any", + Input: []tf.Input{ + input, axis, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// ResourceApplyFtrlAttr is an optional argument to ResourceApplyFtrl. +type ResourceApplyFtrlAttr func(optionalAttr) + +// ResourceApplyFtrlUseLocking sets the optional use_locking attribute to value. +// +// value: If `True`, updating of the var and accum tensors will be protected +// by a lock; otherwise the behavior is undefined, but may exhibit less +// contention. +// If not specified, defaults to false +func ResourceApplyFtrlUseLocking(value bool) ResourceApplyFtrlAttr { + return func(m optionalAttr) { + m["use_locking"] = value + } +} + +// Update '*var' according to the Ftrl-proximal scheme. +// +// accum_new = accum + grad * grad +// linear += grad - (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var +// quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2 +// var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0 +// accum = accum_new +// +// Arguments: +// var_: Should be from a Variable(). +// accum: Should be from a Variable(). +// linear: Should be from a Variable(). +// grad: The gradient. +// lr: Scaling factor. Must be a scalar. +// l1: L1 regulariation. Must be a scalar. +// l2: L2 regulariation. Must be a scalar. +// lr_power: Scaling factor. Must be a scalar. +// +// Returns the created operation. +func ResourceApplyFtrl(scope *Scope, var_ tf.Output, accum tf.Output, linear tf.Output, grad tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, lr_power tf.Output, optional ...ResourceApplyFtrlAttr) (o *tf.Operation) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "ResourceApplyFtrl", + Input: []tf.Input{ + var_, accum, linear, grad, lr, l1, l2, lr_power, + }, + Attrs: attrs, + } + return scope.AddOperation(opspec) +} + +// RandomUniformAttr is an optional argument to RandomUniform. +type RandomUniformAttr func(optionalAttr) + +// RandomUniformSeed sets the optional seed attribute to value. +// +// value: If either `seed` or `seed2` are set to be non-zero, the random number +// generator is seeded by the given seed. Otherwise, it is seeded by a +// random seed. +// If not specified, defaults to 0 +func RandomUniformSeed(value int64) RandomUniformAttr { + return func(m optionalAttr) { + m["seed"] = value + } +} + +// RandomUniformSeed2 sets the optional seed2 attribute to value. +// +// value: A second seed to avoid seed collision. +// If not specified, defaults to 0 +func RandomUniformSeed2(value int64) RandomUniformAttr { + return func(m optionalAttr) { + m["seed2"] = value + } +} + +// Outputs random values from a uniform distribution. +// +// The generated values follow a uniform distribution in the range `[0, 1)`. The +// lower bound 0 is included in the range, while the upper bound 1 is excluded. +// +// Arguments: +// shape: The shape of the output tensor. +// dtype: The type of the output. +// +// Returns A tensor of the specified shape filled with uniform random values. +func RandomUniform(scope *Scope, shape tf.Output, dtype tf.DataType, optional ...RandomUniformAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"dtype": dtype} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "RandomUniform", + Input: []tf.Input{ + shape, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// AssertAttr is an optional argument to Assert. +type AssertAttr func(optionalAttr) + +// AssertSummarize sets the optional summarize attribute to value. +// +// value: Print this many entries of each tensor. +// If not specified, defaults to 3 +func AssertSummarize(value int64) AssertAttr { + return func(m optionalAttr) { + m["summarize"] = value + } +} + +// Asserts that the given condition is true. +// +// If `condition` evaluates to false, print the list of tensors in `data`. +// `summarize` determines how many entries of the tensors to print. +// +// Arguments: +// condition: The condition to evaluate. +// data: The tensors to print out when condition is false. +// +// Returns the created operation. +func Assert(scope *Scope, condition tf.Output, data []tf.Output, optional ...AssertAttr) (o *tf.Operation) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "Assert", + Input: []tf.Input{ + condition, tf.OutputList(data), + }, + Attrs: attrs, + } + return scope.AddOperation(opspec) +} + +// Computes element-wise population count (a.k.a. popcount, bitsum, bitcount). +// +// For each entry in `x`, calculates the number of `1` (on) bits in the binary +// representation of that entry. +// +// **NOTE**: It is more efficient to first `tf.bitcast` your tensors into +// `int32` or `int64` and perform the bitcount on the result, than to feed in +// 8- or 16-bit inputs and then aggregate the resulting counts. +func PopulationCount(scope *Scope, x tf.Output) (y tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "PopulationCount", + Input: []tf.Input{ + x, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Broadcasts a tensor value to one or more other devices. +func CollectiveBcastSend(scope *Scope, input tf.Output, group_size int64, group_key int64, instance_key int64, shape tf.Shape) (data tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"group_size": group_size, "group_key": group_key, "instance_key": instance_key, "shape": shape} + opspec := tf.OpSpec{ + Type: "CollectiveBcastSend", + Input: []tf.Input{ + input, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Makes a copy of `x`. +// +// Arguments: +// x: The source tensor of type `T`. +// +// Returns y: A `Tensor` of type `T`. A copy of `x`. Guaranteed that `y` +// is not an alias of `x`. +func DeepCopy(scope *Scope, x tf.Output) (y tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "DeepCopy", + Input: []tf.Input{ + x, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Split a `SparseTensor` into `num_split` tensors along one dimension. +// +// If the `shape[split_dim]` is not an integer multiple of `num_split`. Slices +// `[0 : shape[split_dim] % num_split]` gets one extra dimension. +// For example, if `split_dim = 1` and `num_split = 2` and the input is +// +// input_tensor = shape = [2, 7] +// [ a d e ] +// [b c ] +// +// Graphically the output tensors are: +// +// output_tensor[0] = shape = [2, 4] +// [ a ] +// [b c ] +// +// output_tensor[1] = shape = [2, 3] +// [ d e ] +// [ ] +// +// Arguments: +// split_dim: 0-D. The dimension along which to split. Must be in the range +// `[0, rank(shape))`. +// indices: 2-D tensor represents the indices of the sparse tensor. +// values: 1-D tensor represents the values of the sparse tensor. +// shape: 1-D. tensor represents the shape of the sparse tensor. +// output indices: A list of 1-D tensors represents the indices of the output +// sparse tensors. +// num_split: The number of ways to split. +// +// Returns A list of 1-D tensors represents the values of the output sparse +// tensors.A list of 1-D tensors represents the shape of the output sparse +// tensors. +func SparseSplit(scope *Scope, split_dim tf.Output, indices tf.Output, values tf.Output, shape tf.Output, num_split int64) (output_indices []tf.Output, output_values []tf.Output, output_shape []tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"num_split": num_split} + opspec := tf.OpSpec{ + Type: "SparseSplit", + Input: []tf.Input{ + split_dim, indices, values, shape, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + if scope.Err() != nil { + return + } + var idx int + var err error + if output_indices, idx, err = makeOutputList(op, idx, "output_indices"); err != nil { + scope.UpdateErr("SparseSplit", err) + return + } + if output_values, idx, err = makeOutputList(op, idx, "output_values"); err != nil { + scope.UpdateErr("SparseSplit", err) + return + } + if output_shape, idx, err = makeOutputList(op, idx, "output_shape"); err != nil { + scope.UpdateErr("SparseSplit", err) + return + } + return output_indices, output_values, output_shape +} + +// RandomPoissonAttr is an optional argument to RandomPoisson. +type RandomPoissonAttr func(optionalAttr) + +// RandomPoissonSeed sets the optional seed attribute to value. +// If not specified, defaults to 0 +func RandomPoissonSeed(value int64) RandomPoissonAttr { + return func(m optionalAttr) { + m["seed"] = value + } +} + +// RandomPoissonSeed2 sets the optional seed2 attribute to value. +// If not specified, defaults to 0 +func RandomPoissonSeed2(value int64) RandomPoissonAttr { + return func(m optionalAttr) { + m["seed2"] = value + } +} + +// Use RandomPoissonV2 instead. +// +// DEPRECATED at GraphDef version 25: Replaced by RandomPoissonV2 +func RandomPoisson(scope *Scope, shape tf.Output, rate tf.Output, optional ...RandomPoissonAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "RandomPoisson", + Input: []tf.Input{ + shape, rate, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// ResourceSparseApplyFtrlV2Attr is an optional argument to ResourceSparseApplyFtrlV2. +type ResourceSparseApplyFtrlV2Attr func(optionalAttr) + +// ResourceSparseApplyFtrlV2UseLocking sets the optional use_locking attribute to value. +// +// value: If `True`, updating of the var and accum tensors will be protected +// by a lock; otherwise the behavior is undefined, but may exhibit less +// contention. +// If not specified, defaults to false +func ResourceSparseApplyFtrlV2UseLocking(value bool) ResourceSparseApplyFtrlV2Attr { + return func(m optionalAttr) { + m["use_locking"] = value + } +} + +// Update relevant entries in '*var' according to the Ftrl-proximal scheme. +// +// That is for rows we have grad for, we update var, accum and linear as follows: +// grad_with_shrinkage = grad + 2 * l2_shrinkage * var +// accum_new = accum + grad_with_shrinkage * grad_with_shrinkage +// linear += grad_with_shrinkage + +// (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var +// quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2 +// var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0 +// accum = accum_new +// +// Arguments: +// var_: Should be from a Variable(). +// accum: Should be from a Variable(). +// linear: Should be from a Variable(). +// grad: The gradient. +// indices: A vector of indices into the first dimension of var and accum. +// lr: Scaling factor. Must be a scalar. +// l1: L1 regularization. Must be a scalar. +// l2: L2 shrinkage regulariation. Must be a scalar. +// +// lr_power: Scaling factor. Must be a scalar. +// +// Returns the created operation. +func ResourceSparseApplyFtrlV2(scope *Scope, var_ tf.Output, accum tf.Output, linear tf.Output, grad tf.Output, indices tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, l2_shrinkage tf.Output, lr_power tf.Output, optional ...ResourceSparseApplyFtrlV2Attr) (o *tf.Operation) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "ResourceSparseApplyFtrlV2", + Input: []tf.Input{ + var_, accum, linear, grad, indices, lr, l1, l2, l2_shrinkage, lr_power, + }, + Attrs: attrs, + } + return scope.AddOperation(opspec) +} + +// DataFormatVecPermuteAttr is an optional argument to DataFormatVecPermute. +type DataFormatVecPermuteAttr func(optionalAttr) + +// DataFormatVecPermuteSrcFormat sets the optional src_format attribute to value. +// +// value: source data format. +// If not specified, defaults to "NHWC" +func DataFormatVecPermuteSrcFormat(value string) DataFormatVecPermuteAttr { + return func(m optionalAttr) { + m["src_format"] = value + } +} + +// DataFormatVecPermuteDstFormat sets the optional dst_format attribute to value. +// +// value: destination data format. +// If not specified, defaults to "NCHW" +func DataFormatVecPermuteDstFormat(value string) DataFormatVecPermuteAttr { + return func(m optionalAttr) { + m["dst_format"] = value + } +} + +// Returns the permuted vector/tensor in the destination data format given the +// +// one in the source data format. +// +// Arguments: +// x: Vector of size 4 or Tensor of shape (4, 2) in source data format. +// +// Returns Vector of size 4 or Tensor of shape (4, 2) in destination data format. +func DataFormatVecPermute(scope *Scope, x tf.Output, optional ...DataFormatVecPermuteAttr) (y tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "DataFormatVecPermute", + Input: []tf.Input{ + x, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Reads the value of a variable. +// +// The tensor returned by this operation is immutable. +// +// The value returned by this operation is guaranteed to be influenced by all the +// writes on which this operation depends directly or indirectly, and to not be +// influenced by any of the writes which depend directly or indirectly on this +// operation. +// +// Arguments: +// resource: handle to the resource in which to store the variable. +// dtype: the dtype of the value. +func ReadVariableOp(scope *Scope, resource tf.Output, dtype tf.DataType) (value tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"dtype": dtype} + opspec := tf.OpSpec{ + Type: "ReadVariableOp", + Input: []tf.Input{ + resource, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Computes tan of x element-wise. +func Tan(scope *Scope, x tf.Output) (y tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Tan", + Input: []tf.Input{ + x, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Updates the tree ensemble by either adding a layer to the last tree being grown +// +// or by starting a new tree. +// +// Arguments: +// tree_ensemble_handle: Handle to the ensemble variable. +// feature_ids: Rank 1 tensor with ids for each feature. This is the real id of +// the feature that will be used in the split. +// node_ids: List of rank 1 tensors representing the nodes for which this feature +// has a split. +// gains: List of rank 1 tensors representing the gains for each of the feature's +// split. +// thresholds: List of rank 1 tensors representing the thesholds for each of the +// feature's split. +// left_node_contribs: List of rank 2 tensors with left leaf contribs for each of +// the feature's splits. Will be added to the previous node values to constitute +// the values of the left nodes. +// right_node_contribs: List of rank 2 tensors with right leaf contribs for each +// of the feature's splits. Will be added to the previous node values to constitute +// the values of the right nodes. +// max_depth: Max depth of the tree to build. +// learning_rate: shrinkage const for each new tree. +// pruning_mode: 0-No pruning, 1-Pre-pruning, 2-Post-pruning. +// +// Returns the created operation. +func BoostedTreesUpdateEnsemble(scope *Scope, tree_ensemble_handle tf.Output, feature_ids tf.Output, node_ids []tf.Output, gains []tf.Output, thresholds []tf.Output, left_node_contribs []tf.Output, right_node_contribs []tf.Output, max_depth tf.Output, learning_rate tf.Output, pruning_mode int64) (o *tf.Operation) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"pruning_mode": pruning_mode} + opspec := tf.OpSpec{ + Type: "BoostedTreesUpdateEnsemble", + Input: []tf.Input{ + tree_ensemble_handle, feature_ids, tf.OutputList(node_ids), tf.OutputList(gains), tf.OutputList(thresholds), tf.OutputList(left_node_contribs), tf.OutputList(right_node_contribs), max_depth, learning_rate, + }, + Attrs: attrs, + } + return scope.AddOperation(opspec) +} + +// ResourceSparseApplyFtrlAttr is an optional argument to ResourceSparseApplyFtrl. +type ResourceSparseApplyFtrlAttr func(optionalAttr) + +// ResourceSparseApplyFtrlUseLocking sets the optional use_locking attribute to value. +// +// value: If `True`, updating of the var and accum tensors will be protected +// by a lock; otherwise the behavior is undefined, but may exhibit less +// contention. +// If not specified, defaults to false +func ResourceSparseApplyFtrlUseLocking(value bool) ResourceSparseApplyFtrlAttr { + return func(m optionalAttr) { + m["use_locking"] = value + } +} + +// Update relevant entries in '*var' according to the Ftrl-proximal scheme. +// +// That is for rows we have grad for, we update var, accum and linear as follows: +// accum_new = accum + grad * grad +// linear += grad + (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var +// quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2 +// var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0 +// accum = accum_new +// +// Arguments: +// var_: Should be from a Variable(). +// accum: Should be from a Variable(). +// linear: Should be from a Variable(). +// grad: The gradient. +// indices: A vector of indices into the first dimension of var and accum. +// lr: Scaling factor. Must be a scalar. +// l1: L1 regularization. Must be a scalar. +// l2: L2 regularization. Must be a scalar. +// lr_power: Scaling factor. Must be a scalar. +// +// Returns the created operation. +func ResourceSparseApplyFtrl(scope *Scope, var_ tf.Output, accum tf.Output, linear tf.Output, grad tf.Output, indices tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, lr_power tf.Output, optional ...ResourceSparseApplyFtrlAttr) (o *tf.Operation) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "ResourceSparseApplyFtrl", + Input: []tf.Input{ + var_, accum, linear, grad, indices, lr, l1, l2, lr_power, + }, + Attrs: attrs, + } + return scope.AddOperation(opspec) +} + +// Returns which elements of x are Inf. +// +// @compatibility(numpy) +// Equivalent to np.isinf +// @end_compatibility +func IsInf(scope *Scope, x tf.Output) (y tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "IsInf", + Input: []tf.Input{ + x, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Computes the sum along sparse segments of a tensor divided by the sqrt of N. +// +// N is the size of the segment being reduced. +// +// Read @{$math_ops#Segmentation$the section on segmentation} for an explanation of +// segments. +// +// Arguments: +// +// indices: A 1-D tensor. Has same rank as `segment_ids`. +// segment_ids: A 1-D tensor. Values should be sorted and can be repeated. +// +// Returns Has same shape as data, except for dimension 0 which +// has size `k`, the number of segments. +func SparseSegmentSqrtN(scope *Scope, data tf.Output, indices tf.Output, segment_ids tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "SparseSegmentSqrtN", + Input: []tf.Input{ + data, indices, segment_ids, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Adds up a `SparseTensor` and a dense `Tensor`, producing a dense `Tensor`. +// +// This Op does not require `a_indices` be sorted in standard lexicographic order. +// +// Arguments: +// a_indices: 2-D. The `indices` of the `SparseTensor`, with shape `[nnz, ndims]`. +// a_values: 1-D. The `values` of the `SparseTensor`, with shape `[nnz]`. +// a_shape: 1-D. The `shape` of the `SparseTensor`, with shape `[ndims]`. +// b: `ndims`-D Tensor. With shape `a_shape`. +func SparseTensorDenseAdd(scope *Scope, a_indices tf.Output, a_values tf.Output, a_shape tf.Output, b tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "SparseTensorDenseAdd", + Input: []tf.Input{ + a_indices, a_values, a_shape, b, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// StatelessTruncatedNormalAttr is an optional argument to StatelessTruncatedNormal. +type StatelessTruncatedNormalAttr func(optionalAttr) + +// StatelessTruncatedNormalDtype sets the optional dtype attribute to value. +// +// value: The type of the output. +// If not specified, defaults to DT_FLOAT +func StatelessTruncatedNormalDtype(value tf.DataType) StatelessTruncatedNormalAttr { + return func(m optionalAttr) { + m["dtype"] = value + } +} + +// Outputs deterministic pseudorandom values from a truncated normal distribution. +// +// The generated values follow a normal distribution with mean 0 and standard +// deviation 1, except that values whose magnitude is more than 2 standard +// deviations from the mean are dropped and re-picked. +// +// The outputs are a deterministic function of `shape` and `seed`. +// +// Arguments: +// shape: The shape of the output tensor. +// seed: 2 seeds (shape [2]). +// +// Returns Random values with specified shape. +func StatelessTruncatedNormal(scope *Scope, shape tf.Output, seed tf.Output, optional ...StatelessTruncatedNormalAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "StatelessTruncatedNormal", + Input: []tf.Input{ + shape, seed, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// RestoreSliceAttr is an optional argument to RestoreSlice. +type RestoreSliceAttr func(optionalAttr) + +// RestoreSlicePreferredShard sets the optional preferred_shard attribute to value. +// +// value: Index of file to open first if multiple files match +// `file_pattern`. See the documentation for `Restore`. +// If not specified, defaults to -1 +func RestoreSlicePreferredShard(value int64) RestoreSliceAttr { + return func(m optionalAttr) { + m["preferred_shard"] = value + } +} + +// Restores a tensor from checkpoint files. +// +// This is like `Restore` except that restored tensor can be listed as filling +// only a slice of a larger tensor. `shape_and_slice` specifies the shape of the +// larger tensor and the slice that the restored tensor covers. +// +// The `shape_and_slice` input has the same format as the +// elements of the `shapes_and_slices` input of the `SaveSlices` op. +// +// Arguments: +// file_pattern: Must have a single element. The pattern of the files from +// which we read the tensor. +// tensor_name: Must have a single element. The name of the tensor to be +// restored. +// shape_and_slice: Scalar. The shapes and slice specifications to use when +// restoring a tensors. +// dt: The type of the tensor to be restored. +// +// Returns The restored tensor. +func RestoreSlice(scope *Scope, file_pattern tf.Output, tensor_name tf.Output, shape_and_slice tf.Output, dt tf.DataType, optional ...RestoreSliceAttr) (tensor tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"dt": dt} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "RestoreSlice", + Input: []tf.Input{ + file_pattern, tensor_name, shape_and_slice, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// ImagAttr is an optional argument to Imag. +type ImagAttr func(optionalAttr) + +// ImagTout sets the optional Tout attribute to value. +// If not specified, defaults to DT_FLOAT +func ImagTout(value tf.DataType) ImagAttr { + return func(m optionalAttr) { + m["Tout"] = value + } +} + +// Returns the imaginary part of a complex number. +// +// Given a tensor `input` of complex numbers, this operation returns a tensor of +// type `float` that is the imaginary part of each element in `input`. All +// elements in `input` must be complex numbers of the form \\(a + bj\\), where *a* +// is the real part and *b* is the imaginary part returned by this operation. +// +// For example: +// +// ``` +// # tensor 'input' is [-2.25 + 4.75j, 3.25 + 5.75j] +// tf.imag(input) ==> [4.75, 5.75] +// ``` +func Imag(scope *Scope, input tf.Output, optional ...ImagAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "Imag", + Input: []tf.Input{ + input, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// ComplexAttr is an optional argument to Complex. +type ComplexAttr func(optionalAttr) + +// ComplexTout sets the optional Tout attribute to value. +// If not specified, defaults to DT_COMPLEX64 +func ComplexTout(value tf.DataType) ComplexAttr { + return func(m optionalAttr) { + m["Tout"] = value + } +} + +// Converts two real numbers to a complex number. +// +// Given a tensor `real` representing the real part of a complex number, and a +// tensor `imag` representing the imaginary part of a complex number, this +// operation returns complex numbers elementwise of the form \\(a + bj\\), where +// *a* represents the `real` part and *b* represents the `imag` part. +// +// The input tensors `real` and `imag` must have the same shape. +// +// For example: +// +// ``` +// # tensor 'real' is [2.25, 3.25] +// # tensor `imag` is [4.75, 5.75] +// tf.complex(real, imag) ==> [[2.25 + 4.75j], [3.25 + 5.75j]] +// ``` +func Complex(scope *Scope, real tf.Output, imag tf.Output, optional ...ComplexAttr) (out tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "Complex", + Input: []tf.Input{ + real, imag, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Divides sparse updates into the variable referenced by `resource`. +// +// This operation computes +// +// # Scalar indices +// ref[indices, ...] /= updates[...] +// +// # Vector indices (for each i) +// ref[indices[i], ...] /= updates[i, ...] +// +// # High rank indices (for each i, ..., j) +// ref[indices[i, ..., j], ...] /= updates[i, ..., j, ...] +// +// Duplicate entries are handled correctly: if multiple `indices` reference +// the same location, their contributions multiply. +// +// Requires `updates.shape = indices.shape + ref.shape[1:]` or `updates.shape = []`. +// +//
+// +//
+// +// Arguments: +// resource: Should be from a `Variable` node. +// indices: A tensor of indices into the first dimension of `ref`. +// updates: A tensor of updated values to add to `ref`. +// +// Returns the created operation. +func ResourceScatterDiv(scope *Scope, resource tf.Output, indices tf.Output, updates tf.Output) (o *tf.Operation) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "ResourceScatterDiv", + Input: []tf.Input{ + resource, indices, updates, + }, + } + return scope.AddOperation(opspec) +} + +// StatelessRandomNormalAttr is an optional argument to StatelessRandomNormal. +type StatelessRandomNormalAttr func(optionalAttr) + +// StatelessRandomNormalDtype sets the optional dtype attribute to value. +// +// value: The type of the output. +// If not specified, defaults to DT_FLOAT +func StatelessRandomNormalDtype(value tf.DataType) StatelessRandomNormalAttr { + return func(m optionalAttr) { + m["dtype"] = value + } +} + +// Outputs deterministic pseudorandom values from a normal distribution. +// +// The generated values will have mean 0 and standard deviation 1. +// +// The outputs are a deterministic function of `shape` and `seed`. +// +// Arguments: +// shape: The shape of the output tensor. +// seed: 2 seeds (shape [2]). +// +// Returns Random values with specified shape. +func StatelessRandomNormal(scope *Scope, shape tf.Output, seed tf.Output, optional ...StatelessRandomNormalAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "StatelessRandomNormal", + Input: []tf.Input{ + shape, seed, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Reduces sparse updates into the variable referenced by `resource` using the `min` operation. +// +// This operation computes +// +// # Scalar indices +// ref[indices, ...] = min(ref[indices, ...], updates[...]) +// +// # Vector indices (for each i) +// ref[indices[i], ...] = min(ref[indices[i], ...], updates[i, ...]) +// +// # High rank indices (for each i, ..., j) +// ref[indices[i, ..., j], ...] = min(ref[indices[i, ..., j], ...], updates[i, ..., j, ...]) +// +// Duplicate entries are handled correctly: if multiple `indices` reference +// the same location, their contributions are combined. +// +// Requires `updates.shape = indices.shape + ref.shape[1:]` or `updates.shape = []`. +// +//
+// +//
+// +// Arguments: +// resource: Should be from a `Variable` node. +// indices: A tensor of indices into the first dimension of `ref`. +// updates: A tensor of updated values to add to `ref`. +// +// Returns the created operation. +func ResourceScatterMin(scope *Scope, resource tf.Output, indices tf.Output, updates tf.Output) (o *tf.Operation) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "ResourceScatterMin", + Input: []tf.Input{ + resource, indices, updates, + }, + } + return scope.AddOperation(opspec) +} + +// Reshapes a quantized tensor as per the Reshape op. +// +// ``` +// +// Arguments: +// +// shape: Defines the shape of the output tensor. +// input_min: The minimum value of the input. +// input_max: The maximum value of the input. +// +// Returns This value is copied from input_min.This value is copied from input_max. +func QuantizedReshape(scope *Scope, tensor tf.Output, shape tf.Output, input_min tf.Output, input_max tf.Output) (output tf.Output, output_min tf.Output, output_max tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "QuantizedReshape", + Input: []tf.Input{ + tensor, shape, input_min, input_max, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1), op.Output(2) +} + +// GatherAttr is an optional argument to Gather. +type GatherAttr func(optionalAttr) + +// GatherValidateIndices sets the optional validate_indices attribute to value. +// If not specified, defaults to true +func GatherValidateIndices(value bool) GatherAttr { + return func(m optionalAttr) { + m["validate_indices"] = value + } +} + +// Gather slices from `params` according to `indices`. +// +// `indices` must be an integer tensor of any dimension (usually 0-D or 1-D). +// Produces an output tensor with shape `indices.shape + params.shape[1:]` where: +// +// ```python +// # Scalar indices +// output[:, ..., :] = params[indices, :, ... :] +// +// # Vector indices +// output[i, :, ..., :] = params[indices[i], :, ... :] +// +// # Higher rank indices +// output[i, ..., j, :, ... :] = params[indices[i, ..., j], :, ..., :] +// ``` +// +// If `indices` is a permutation and `len(indices) == params.shape[0]` then +// this operation will permute `params` accordingly. +// +// `validate_indices`: DEPRECATED. If this operation is assigned to CPU, values in +// `indices` are always validated to be within range. If assigned to GPU, +// out-of-bound indices result in safe but unspecified behavior, which may include +// raising an error. +// +//
+// +//
+func Gather(scope *Scope, params tf.Output, indices tf.Output, optional ...GatherAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "Gather", + Input: []tf.Input{ + params, indices, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Returns the truth value of (x != y) element-wise. +// +// *NOTE*: `NotEqual` supports broadcasting. More about broadcasting +// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) +func NotEqual(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "NotEqual", + Input: []tf.Input{ + x, y, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Inverse 3D real-valued fast Fourier transform. +// +// Computes the inverse 3-dimensional discrete Fourier transform of a real-valued +// signal over the inner-most 3 dimensions of `input`. +// +// The inner-most 3 dimensions of `input` are assumed to be the result of `RFFT3D`: +// The inner-most dimension contains the `fft_length / 2 + 1` unique components of +// the DFT of a real-valued signal. If `fft_length` is not provided, it is computed +// from the size of the inner-most 3 dimensions of `input`. If the FFT length used +// to compute `input` is odd, it should be provided since it cannot be inferred +// properly. +// +// Along each axis `IRFFT3D` is computed on, if `fft_length` (or +// `fft_length / 2 + 1` for the inner-most dimension) is smaller than the +// corresponding dimension of `input`, the dimension is cropped. If it is larger, +// the dimension is padded with zeros. +// +// Arguments: +// input: A complex64 tensor. +// fft_length: An int32 tensor of shape [3]. The FFT length for each dimension. +// +// Returns A float32 tensor of the same rank as `input`. The inner-most 3 +// dimensions of `input` are replaced with the `fft_length` samples of their +// inverse 3D real Fourier transform. +// +// @compatibility(numpy) +// Equivalent to np.irfftn with 3 dimensions. +// @end_compatibility +func IRFFT3D(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "IRFFT3D", + Input: []tf.Input{ + input, fft_length, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// StringSplitAttr is an optional argument to StringSplit. +type StringSplitAttr func(optionalAttr) + +// StringSplitSkipEmpty sets the optional skip_empty attribute to value. +// +// value: A `bool`. If `True`, skip the empty strings from the result. +// If not specified, defaults to true +func StringSplitSkipEmpty(value bool) StringSplitAttr { + return func(m optionalAttr) { + m["skip_empty"] = value + } +} + +// Split elements of `input` based on `delimiter` into a `SparseTensor`. +// +// Let N be the size of source (typically N will be the batch size). Split each +// element of `input` based on `delimiter` and return a `SparseTensor` +// containing the splitted tokens. Empty tokens are ignored. +// +// `delimiter` can be empty, or a string of split characters. If `delimiter` is an +// empty string, each element of `input` is split into individual single-byte +// character strings, including splitting of UTF-8 multibyte sequences. Otherwise +// every character of `delimiter` is a potential split point. +// +// For example: +// N = 2, input[0] is 'hello world' and input[1] is 'a b c', then the output +// will be +// +// indices = [0, 0; +// 0, 1; +// 1, 0; +// 1, 1; +// 1, 2] +// shape = [2, 3] +// values = ['hello', 'world', 'a', 'b', 'c'] +// +// Arguments: +// input: 1-D. Strings to split. +// delimiter: 0-D. Delimiter characters (bytes), or empty string. +// +// Returns A dense matrix of int64 representing the indices of the sparse tensor.A vector of strings corresponding to the splited values.a length-2 vector of int64 representing the shape of the sparse +// tensor, where the first value is N and the second value is the maximum number +// of tokens in a single input entry. +func StringSplit(scope *Scope, input tf.Output, delimiter tf.Output, optional ...StringSplitAttr) (indices tf.Output, values tf.Output, shape tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "StringSplit", + Input: []tf.Input{ + input, delimiter, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1), op.Output(2) +} + +// ResizeBilinearAttr is an optional argument to ResizeBilinear. +type ResizeBilinearAttr func(optionalAttr) + +// ResizeBilinearAlignCorners sets the optional align_corners attribute to value. +// +// value: If true, the centers of the 4 corner pixels of the input and output tensors are +// aligned, preserving the values at the corner pixels. Defaults to false. +// If not specified, defaults to false +func ResizeBilinearAlignCorners(value bool) ResizeBilinearAttr { + return func(m optionalAttr) { + m["align_corners"] = value + } +} + +// Resize `images` to `size` using bilinear interpolation. +// +// Input images can be of different types but output images are always float. +// +// Arguments: +// images: 4-D with shape `[batch, height, width, channels]`. +// size: = A 1-D int32 Tensor of 2 elements: `new_height, new_width`. The +// new size for the images. +// +// Returns 4-D with shape +// `[batch, new_height, new_width, channels]`. +func ResizeBilinear(scope *Scope, images tf.Output, size tf.Output, optional ...ResizeBilinearAttr) (resized_images tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "ResizeBilinear", + Input: []tf.Input{ + images, size, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Computes softsign: `features / (abs(features) + 1)`. +func Softsign(scope *Scope, features tf.Output) (activations tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Softsign", + Input: []tf.Input{ + features, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Creates a TensorList which, when stacked, has the value of `tensor`. +// +// Each tensor in the result list corresponds to one row of the input tensor. +// +// tensor: The input tensor. +// output_handle: The list. +func TensorListFromTensor(scope *Scope, tensor tf.Output, element_shape tf.Output) (output_handle tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "TensorListFromTensor", + Input: []tf.Input{ + tensor, element_shape, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// GenerateVocabRemappingAttr is an optional argument to GenerateVocabRemapping. +type GenerateVocabRemappingAttr func(optionalAttr) + +// GenerateVocabRemappingOldVocabSize sets the optional old_vocab_size attribute to value. +// +// value: Number of entries in the old vocab file to consider. If -1, +// use the entire old vocabulary. +// If not specified, defaults to -1 +// +// REQUIRES: value >= -1 +func GenerateVocabRemappingOldVocabSize(value int64) GenerateVocabRemappingAttr { + return func(m optionalAttr) { + m["old_vocab_size"] = value + } +} + +// Given a path to new and old vocabulary files, returns a remapping Tensor of +// +// length `num_new_vocab`, where `remapping[i]` contains the row number in the old +// vocabulary that corresponds to row `i` in the new vocabulary (starting at line +// `new_vocab_offset` and up to `num_new_vocab` entities), or `-1` if entry `i` +// in the new vocabulary is not in the old vocabulary. The old vocabulary is +// constrained to the first `old_vocab_size` entries if `old_vocab_size` is not the +// default value of -1. +// +// `num_vocab_offset` enables +// use in the partitioned variable case, and should generally be set through +// examining partitioning info. The format of the files should be a text file, +// with each line containing a single entity within the vocabulary. +// +// For example, with `new_vocab_file` a text file containing each of the following +// elements on a single line: `[f0, f1, f2, f3]`, old_vocab_file = [f1, f0, f3], +// `num_new_vocab = 3, new_vocab_offset = 1`, the returned remapping would be +// `[0, -1, 2]`. +// +// The op also returns a count of how many entries in the new vocabulary +// were present in the old vocabulary, which is used to calculate the number of +// values to initialize in a weight matrix remapping +// +// This functionality can be used to remap both row vocabularies (typically, +// features) and column vocabularies (typically, classes) from TensorFlow +// checkpoints. Note that the partitioning logic relies on contiguous vocabularies +// corresponding to div-partitioned variables. Moreover, the underlying remapping +// uses an IndexTable (as opposed to an inexact CuckooTable), so client code should +// use the corresponding index_table_from_file() as the FeatureColumn framework +// does (as opposed to tf.feature_to_id(), which uses a CuckooTable). +// +// Arguments: +// new_vocab_file: Path to the new vocab file. +// old_vocab_file: Path to the old vocab file. +// new_vocab_offset: How many entries into the new vocab file to start reading. +// num_new_vocab: Number of entries in the new vocab file to remap. +// +// Returns A Tensor of length num_new_vocab where the element at index i +// is equal to the old ID that maps to the new ID i. This element is -1 for any +// new ID that is not found in the old vocabulary.Number of new vocab entries found in old vocab. +func GenerateVocabRemapping(scope *Scope, new_vocab_file tf.Output, old_vocab_file tf.Output, new_vocab_offset int64, num_new_vocab int64, optional ...GenerateVocabRemappingAttr) (remapping tf.Output, num_present tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"new_vocab_offset": new_vocab_offset, "num_new_vocab": num_new_vocab} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "GenerateVocabRemapping", + Input: []tf.Input{ + new_vocab_file, old_vocab_file, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1) +} + +// Assigns sparse updates to the variable referenced by `resource`. +// +// This operation computes +// +// # Scalar indices +// ref[indices, ...] = updates[...] +// +// # Vector indices (for each i) +// ref[indices[i], ...] = updates[i, ...] +// +// # High rank indices (for each i, ..., j) +// ref[indices[i, ..., j], ...] = updates[i, ..., j, ...] +// +// Arguments: +// resource: Should be from a `Variable` node. +// indices: A tensor of indices into the first dimension of `ref`. +// updates: A tensor of updated values to add to `ref`. +// +// Returns the created operation. +func ResourceScatterUpdate(scope *Scope, resource tf.Output, indices tf.Output, updates tf.Output) (o *tf.Operation) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "ResourceScatterUpdate", + Input: []tf.Input{ + resource, indices, updates, + }, + } + return scope.AddOperation(opspec) +} + +// Creates and returns an empty tensor list. +// +// All list elements must be tensors of dtype element_dtype and shape compatible +// with element_shape. +// +// handle: an empty tensor list. +// element_dtype: the type of elements in the list. +// element_shape: a shape compatible with that of elements in the list. +func EmptyTensorList(scope *Scope, element_shape tf.Output, element_dtype tf.DataType) (handle tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"element_dtype": element_dtype} + opspec := tf.OpSpec{ + Type: "EmptyTensorList", + Input: []tf.Input{ + element_shape, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// AvgPoolGradAttr is an optional argument to AvgPoolGrad. +type AvgPoolGradAttr func(optionalAttr) + +// AvgPoolGradDataFormat sets the optional data_format attribute to value. +// +// value: Specify the data format of the input and output data. With the +// default format "NHWC", the data is stored in the order of: +// [batch, in_height, in_width, in_channels]. +// Alternatively, the format could be "NCHW", the data storage order of: +// [batch, in_channels, in_height, in_width]. +// If not specified, defaults to "NHWC" +func AvgPoolGradDataFormat(value string) AvgPoolGradAttr { + return func(m optionalAttr) { + m["data_format"] = value + } +} + +// Computes gradients of the average pooling function. +// +// Arguments: +// orig_input_shape: 1-D. Shape of the original input to `avg_pool`. +// grad: 4-D with shape `[batch, height, width, channels]`. Gradients w.r.t. +// the output of `avg_pool`. +// ksize: The size of the sliding window for each dimension of the input. +// strides: The stride of the sliding window for each dimension of the input. +// padding: The type of padding algorithm to use. +// +// Returns 4-D. Gradients w.r.t. the input of `avg_pool`. +func AvgPoolGrad(scope *Scope, orig_input_shape tf.Output, grad tf.Output, ksize []int64, strides []int64, padding string, optional ...AvgPoolGradAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "AvgPoolGrad", + Input: []tf.Input{ + orig_input_shape, grad, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// StageClearAttr is an optional argument to StageClear. +type StageClearAttr func(optionalAttr) + +// StageClearCapacity sets the optional capacity attribute to value. +// If not specified, defaults to 0 +// +// REQUIRES: value >= 0 +func StageClearCapacity(value int64) StageClearAttr { + return func(m optionalAttr) { + m["capacity"] = value + } +} + +// StageClearMemoryLimit sets the optional memory_limit attribute to value. +// If not specified, defaults to 0 +// +// REQUIRES: value >= 0 +func StageClearMemoryLimit(value int64) StageClearAttr { + return func(m optionalAttr) { + m["memory_limit"] = value + } +} + +// StageClearContainer sets the optional container attribute to value. +// If not specified, defaults to "" +func StageClearContainer(value string) StageClearAttr { + return func(m optionalAttr) { + m["container"] = value + } +} + +// StageClearSharedName sets the optional shared_name attribute to value. +// If not specified, defaults to "" +func StageClearSharedName(value string) StageClearAttr { + return func(m optionalAttr) { + m["shared_name"] = value + } +} + +// Op removes all elements in the underlying container. +// +// Returns the created operation. +func StageClear(scope *Scope, dtypes []tf.DataType, optional ...StageClearAttr) (o *tf.Operation) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"dtypes": dtypes} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "StageClear", + + Attrs: attrs, + } + return scope.AddOperation(opspec) +} + +// ComputeAccidentalHitsAttr is an optional argument to ComputeAccidentalHits. +type ComputeAccidentalHitsAttr func(optionalAttr) + +// ComputeAccidentalHitsSeed sets the optional seed attribute to value. +// +// value: If either seed or seed2 are set to be non-zero, the random number +// generator is seeded by the given seed. Otherwise, it is seeded by a +// random seed. +// If not specified, defaults to 0 +func ComputeAccidentalHitsSeed(value int64) ComputeAccidentalHitsAttr { + return func(m optionalAttr) { + m["seed"] = value + } +} + +// ComputeAccidentalHitsSeed2 sets the optional seed2 attribute to value. +// +// value: An second seed to avoid seed collision. +// If not specified, defaults to 0 +func ComputeAccidentalHitsSeed2(value int64) ComputeAccidentalHitsAttr { + return func(m optionalAttr) { + m["seed2"] = value + } +} + +// Computes the ids of the positions in sampled_candidates that match true_labels. +// +// When doing log-odds NCE, the result of this op should be passed through a +// SparseToDense op, then added to the logits of the sampled candidates. This has +// the effect of 'removing' the sampled labels that match the true labels by +// making the classifier sure that they are sampled labels. +// +// Arguments: +// true_classes: The true_classes output of UnpackSparseLabels. +// sampled_candidates: The sampled_candidates output of CandidateSampler. +// num_true: Number of true labels per context. +// +// Returns A vector of indices corresponding to rows of true_candidates.A vector of IDs of positions in sampled_candidates that match a true_label +// for the row with the corresponding index in indices.A vector of the same length as indices and ids, in which each element +// is -FLOAT_MAX. +func ComputeAccidentalHits(scope *Scope, true_classes tf.Output, sampled_candidates tf.Output, num_true int64, optional ...ComputeAccidentalHitsAttr) (indices tf.Output, ids tf.Output, weights tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"num_true": num_true} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "ComputeAccidentalHits", + Input: []tf.Input{ + true_classes, sampled_candidates, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1), op.Output(2) +} + +// QuantizedRelu6Attr is an optional argument to QuantizedRelu6. +type QuantizedRelu6Attr func(optionalAttr) + +// QuantizedRelu6OutType sets the optional out_type attribute to value. +// If not specified, defaults to DT_QUINT8 +func QuantizedRelu6OutType(value tf.DataType) QuantizedRelu6Attr { + return func(m optionalAttr) { + m["out_type"] = value + } +} + +// Computes Quantized Rectified Linear 6: `min(max(features, 0), 6)` +// +// Arguments: +// +// min_features: The float value that the lowest quantized value represents. +// max_features: The float value that the highest quantized value represents. +// +// Returns Has the same output shape as "features".The float value that the lowest quantized value represents.The float value that the highest quantized value represents. +func QuantizedRelu6(scope *Scope, features tf.Output, min_features tf.Output, max_features tf.Output, optional ...QuantizedRelu6Attr) (activations tf.Output, min_activations tf.Output, max_activations tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "QuantizedRelu6", + Input: []tf.Input{ + features, min_features, max_features, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1), op.Output(2) +} + +// FixedLengthRecordReaderV2Attr is an optional argument to FixedLengthRecordReaderV2. +type FixedLengthRecordReaderV2Attr func(optionalAttr) + +// FixedLengthRecordReaderV2HeaderBytes sets the optional header_bytes attribute to value. +// +// value: Number of bytes in the header, defaults to 0. +// If not specified, defaults to 0 +func FixedLengthRecordReaderV2HeaderBytes(value int64) FixedLengthRecordReaderV2Attr { + return func(m optionalAttr) { + m["header_bytes"] = value + } +} + +// FixedLengthRecordReaderV2FooterBytes sets the optional footer_bytes attribute to value. +// +// value: Number of bytes in the footer, defaults to 0. +// If not specified, defaults to 0 +func FixedLengthRecordReaderV2FooterBytes(value int64) FixedLengthRecordReaderV2Attr { + return func(m optionalAttr) { + m["footer_bytes"] = value + } +} + +// FixedLengthRecordReaderV2HopBytes sets the optional hop_bytes attribute to value. +// +// value: Number of bytes to hop before each read. Default of 0 means using +// record_bytes. +// If not specified, defaults to 0 +func FixedLengthRecordReaderV2HopBytes(value int64) FixedLengthRecordReaderV2Attr { + return func(m optionalAttr) { + m["hop_bytes"] = value + } +} + +// FixedLengthRecordReaderV2Container sets the optional container attribute to value. +// +// value: If non-empty, this reader is placed in the given container. +// Otherwise, a default container is used. +// If not specified, defaults to "" +func FixedLengthRecordReaderV2Container(value string) FixedLengthRecordReaderV2Attr { + return func(m optionalAttr) { + m["container"] = value + } +} + +// FixedLengthRecordReaderV2SharedName sets the optional shared_name attribute to value. +// +// value: If non-empty, this reader is named in the given bucket +// with this shared_name. Otherwise, the node name is used instead. +// If not specified, defaults to "" +func FixedLengthRecordReaderV2SharedName(value string) FixedLengthRecordReaderV2Attr { + return func(m optionalAttr) { + m["shared_name"] = value + } +} + +// FixedLengthRecordReaderV2Encoding sets the optional encoding attribute to value. +// +// value: The type of encoding for the file. Currently ZLIB and GZIP +// are supported. Defaults to none. +// If not specified, defaults to "" +func FixedLengthRecordReaderV2Encoding(value string) FixedLengthRecordReaderV2Attr { + return func(m optionalAttr) { + m["encoding"] = value + } +} + +// A Reader that outputs fixed-length records from a file. +// +// Arguments: +// record_bytes: Number of bytes in the record. +// +// Returns The handle to reference the Reader. +func FixedLengthRecordReaderV2(scope *Scope, record_bytes int64, optional ...FixedLengthRecordReaderV2Attr) (reader_handle tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"record_bytes": record_bytes} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "FixedLengthRecordReaderV2", + + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Converts each string in the input Tensor to its hash mod by a number of buckets. +// +// The hash function is deterministic on the content of the string within the +// process. +// +// Note that the hash function may change from time to time. +// This functionality will be deprecated and it's recommended to use +// `tf.string_to_hash_bucket_fast()` or `tf.string_to_hash_bucket_strong()`. +// +// Arguments: +// +// num_buckets: The number of buckets. +// +// Returns A Tensor of the same shape as the input `string_tensor`. +func StringToHashBucket(scope *Scope, string_tensor tf.Output, num_buckets int64) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"num_buckets": num_buckets} + opspec := tf.OpSpec{ + Type: "StringToHashBucket", + Input: []tf.Input{ + string_tensor, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Computes gradients for the exponential linear (Elu) operation. +// +// Arguments: +// gradients: The backpropagated gradients to the corresponding Elu operation. +// outputs: The outputs of the corresponding Elu operation. +// +// Returns The gradients: `gradients * (outputs + 1)` if outputs < 0, +// `gradients` otherwise. +func EluGrad(scope *Scope, gradients tf.Output, outputs tf.Output) (backprops tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "EluGrad", + Input: []tf.Input{ + gradients, outputs, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Creates a dataset that contains `count` elements from the `input_dataset`. +// +// Arguments: +// +// count: A scalar representing the number of elements from the `input_dataset` +// that should be taken. A value of `-1` indicates that all of `input_dataset` +// is taken. +// +// +func TakeDataset(scope *Scope, input_dataset tf.Output, count tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} + opspec := tf.OpSpec{ + Type: "TakeDataset", + Input: []tf.Input{ + input_dataset, count, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// The gradient operator for the SparseAdd op. +// +// The SparseAdd op calculates A + B, where A, B, and the sum are all represented +// as `SparseTensor` objects. This op takes in the upstream gradient w.r.t. +// non-empty values of the sum, and outputs the gradients w.r.t. the non-empty +// values of A and B. +// +// Arguments: +// backprop_val_grad: 1-D with shape `[nnz(sum)]`. The gradient with respect to +// the non-empty values of the sum. +// a_indices: 2-D. The `indices` of the `SparseTensor` A, size `[nnz(A), ndims]`. +// b_indices: 2-D. The `indices` of the `SparseTensor` B, size `[nnz(B), ndims]`. +// sum_indices: 2-D. The `indices` of the sum `SparseTensor`, size +// `[nnz(sum), ndims]`. +// +// Returns 1-D with shape `[nnz(A)]`. The gradient with respect to the +// non-empty values of A.1-D with shape `[nnz(B)]`. The gradient with respect to the +// non-empty values of B. +func SparseAddGrad(scope *Scope, backprop_val_grad tf.Output, a_indices tf.Output, b_indices tf.Output, sum_indices tf.Output) (a_val_grad tf.Output, b_val_grad tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "SparseAddGrad", + Input: []tf.Input{ + backprop_val_grad, a_indices, b_indices, sum_indices, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1) +} + +// Computes atan of x element-wise. +func Atan(scope *Scope, x tf.Output) (y tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Atan", + Input: []tf.Input{ + x, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Encode audio data using the WAV file format. +// +// This operation will generate a string suitable to be saved out to create a .wav +// audio file. It will be encoded in the 16-bit PCM format. It takes in float +// values in the range -1.0f to 1.0f, and any outside that value will be clamped to +// that range. +// +// `audio` is a 2-D float Tensor of shape `[length, channels]`. +// `sample_rate` is a scalar Tensor holding the rate to use (e.g. 44100). +// +// Arguments: +// audio: 2-D with shape `[length, channels]`. +// sample_rate: Scalar containing the sample frequency. +// +// Returns 0-D. WAV-encoded file contents. +func EncodeWav(scope *Scope, audio tf.Output, sample_rate tf.Output) (contents tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "EncodeWav", + Input: []tf.Input{ + audio, sample_rate, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Converts each string in the input Tensor to its hash mod by a number of buckets. +// +// The hash function is deterministic on the content of the string within the +// process. The hash function is a keyed hash function, where attribute `key` +// defines the key of the hash function. `key` is an array of 2 elements. +// +// A strong hash is important when inputs may be malicious, e.g. URLs with +// additional components. Adversaries could try to make their inputs hash to the +// same bucket for a denial-of-service attack or to skew the results. A strong +// hash prevents this by making it difficult, if not infeasible, to compute inputs +// that hash to the same bucket. This comes at a cost of roughly 4x higher compute +// time than `tf.string_to_hash_bucket_fast`. +// +// Arguments: +// input: The strings to assign a hash bucket. +// num_buckets: The number of buckets. +// key: The key for the keyed hash function passed as a list of two uint64 +// elements. +// +// Returns A Tensor of the same shape as the input `string_tensor`. +func StringToHashBucketStrong(scope *Scope, input tf.Output, num_buckets int64, key []int64) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } + attrs := map[string]interface{}{"num_buckets": num_buckets, "key": key} opspec := tf.OpSpec{ - Type: "Complex", + Type: "StringToHashBucketStrong", Input: []tf.Input{ - real, imag, + input, }, Attrs: attrs, } @@ -6843,42 +9077,31 @@ func Complex(scope *Scope, real tf.Output, imag tf.Output, optional ...ComplexAt return op.Output(0) } -// UniqueWithCountsAttr is an optional argument to UniqueWithCounts. -type UniqueWithCountsAttr func(optionalAttr) +// RegexReplaceAttr is an optional argument to RegexReplace. +type RegexReplaceAttr func(optionalAttr) -// UniqueWithCountsOutIdx sets the optional out_idx attribute to value. -// If not specified, defaults to DT_INT32 -func UniqueWithCountsOutIdx(value tf.DataType) UniqueWithCountsAttr { +// RegexReplaceReplaceGlobal sets the optional replace_global attribute to value. +// +// value: If True, the replacement is global, otherwise the replacement +// is done only on the first match. +// If not specified, defaults to true +func RegexReplaceReplaceGlobal(value bool) RegexReplaceAttr { return func(m optionalAttr) { - m["out_idx"] = value + m["replace_global"] = value } } -// Finds unique elements in a 1-D tensor. -// -// This operation returns a tensor `y` containing all of the unique elements of `x` -// sorted in the same order that they occur in `x`. This operation also returns a -// tensor `idx` the same size as `x` that contains the index of each value of `x` -// in the unique output `y`. Finally, it returns a third tensor `count` that -// contains the count of each element of `y` in `x`. In other words: -// -// `y[idx[i]] = x[i] for i in [0, 1,...,rank(x) - 1]` -// -// For example: +// Replaces the match of pattern in input with rewrite. // -// ``` -// # tensor 'x' is [1, 1, 2, 4, 4, 4, 7, 8, 8] -// y, idx, count = unique_with_counts(x) -// y ==> [1, 2, 4, 7, 8] -// idx ==> [0, 0, 1, 2, 2, 2, 3, 4, 4] -// count ==> [2, 1, 3, 1, 2] -// ``` +// It follows the re2 syntax (https://github.com/google/re2/wiki/Syntax) // // Arguments: -// x: 1-D. +// input: The text to be processed. +// pattern: The regular expression to match the input. +// rewrite: The rewrite to be applied to the matched expresion. // -// Returns 1-D.1-D.1-D. -func UniqueWithCounts(scope *Scope, x tf.Output, optional ...UniqueWithCountsAttr) (y tf.Output, idx tf.Output, count tf.Output) { +// Returns The text after applying pattern and rewrite. +func RegexReplace(scope *Scope, input tf.Output, pattern tf.Output, rewrite tf.Output, optional ...RegexReplaceAttr) (output tf.Output) { if scope.Err() != nil { return } @@ -6887,134 +9110,117 @@ func UniqueWithCounts(scope *Scope, x tf.Output, optional ...UniqueWithCountsAtt a(attrs) } opspec := tf.OpSpec{ - Type: "UniqueWithCounts", + Type: "RegexReplace", Input: []tf.Input{ - x, + input, pattern, rewrite, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// StatelessRandomNormalAttr is an optional argument to StatelessRandomNormal. -type StatelessRandomNormalAttr func(optionalAttr) - -// StatelessRandomNormalDtype sets the optional dtype attribute to value. -// -// value: The type of the output. -// If not specified, defaults to DT_FLOAT -func StatelessRandomNormalDtype(value tf.DataType) StatelessRandomNormalAttr { - return func(m optionalAttr) { - m["dtype"] = value - } + return op.Output(0) } -// Outputs deterministic pseudorandom values from a normal distribution. -// -// The generated values will have mean 0 and standard deviation 1. -// -// The outputs are a deterministic function of `shape` and `seed`. -// -// Arguments: -// shape: The shape of the output tensor. -// seed: 2 seeds (shape [2]). +// Computes numerical negative value element-wise. // -// Returns Random values with specified shape. -func StatelessRandomNormal(scope *Scope, shape tf.Output, seed tf.Output, optional ...StatelessRandomNormalAttr) (output tf.Output) { +// I.e., \\(y = -x\\). +func Neg(scope *Scope, x tf.Output) (y tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "StatelessRandomNormal", + Type: "Neg", Input: []tf.Input{ - shape, seed, + x, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Reshapes a quantized tensor as per the Reshape op. +// Execute a sub graph on a remote processor. // -// ``` +// The graph specifications(such as graph itself, input tensors and output names) +// are stored as a serialized protocol buffer of RemoteFusedGraphExecuteInfo +// as serialized_remote_fused_graph_execute_info. +// The specifications will be passed to a dedicated registered +// remote fused graph executor. The executor will send the graph specifications +// to a remote processor and execute that graph. The execution results +// will be passed to consumer nodes as outputs of this node. // // Arguments: +// inputs: Arbitrary number of tensors with arbitrary data types // -// shape: Defines the shape of the output tensor. -// input_min: The minimum value of the input. -// input_max: The maximum value of the input. +// serialized_remote_fused_graph_execute_info: Serialized protocol buffer +// of RemoteFusedGraphExecuteInfo which contains graph specifications. // -// Returns This value is copied from input_min.This value is copied from input_max. -func QuantizedReshape(scope *Scope, tensor tf.Output, shape tf.Output, input_min tf.Output, input_max tf.Output) (output tf.Output, output_min tf.Output, output_max tf.Output) { +// Returns Arbitrary number of tensors with arbitrary data types +func RemoteFusedGraphExecute(scope *Scope, inputs []tf.Output, Toutputs []tf.DataType, serialized_remote_fused_graph_execute_info string) (outputs []tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"Toutputs": Toutputs, "serialized_remote_fused_graph_execute_info": serialized_remote_fused_graph_execute_info} opspec := tf.OpSpec{ - Type: "QuantizedReshape", + Type: "RemoteFusedGraphExecute", Input: []tf.Input{ - tensor, shape, input_min, input_max, + tf.OutputList(inputs), }, + Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + if scope.Err() != nil { + return + } + var idx int + var err error + if outputs, idx, err = makeOutputList(op, idx, "outputs"); err != nil { + scope.UpdateErr("RemoteFusedGraphExecute", err) + return + } + return outputs } -// GatherAttr is an optional argument to Gather. -type GatherAttr func(optionalAttr) +// MaxPool3DGradGradAttr is an optional argument to MaxPool3DGradGrad. +type MaxPool3DGradGradAttr func(optionalAttr) -// GatherValidateIndices sets the optional validate_indices attribute to value. -// If not specified, defaults to true -func GatherValidateIndices(value bool) GatherAttr { +// MaxPool3DGradGradDataFormat sets the optional data_format attribute to value. +// +// value: The data format of the input and output data. With the +// default format "NDHWC", the data is stored in the order of: +// [batch, in_depth, in_height, in_width, in_channels]. +// Alternatively, the format could be "NCDHW", the data storage order is: +// [batch, in_channels, in_depth, in_height, in_width]. +// If not specified, defaults to "NDHWC" +func MaxPool3DGradGradDataFormat(value string) MaxPool3DGradGradAttr { return func(m optionalAttr) { - m["validate_indices"] = value + m["data_format"] = value } } -// Gather slices from `params` according to `indices`. -// -// `indices` must be an integer tensor of any dimension (usually 0-D or 1-D). -// Produces an output tensor with shape `indices.shape + params.shape[1:]` where: -// -// ```python -// # Scalar indices -// output[:, ..., :] = params[indices, :, ... :] -// -// # Vector indices -// output[i, :, ..., :] = params[indices[i], :, ... :] -// -// # Higher rank indices -// output[i, ..., j, :, ... :] = params[indices[i, ..., j], :, ..., :] -// ``` -// -// If `indices` is a permutation and `len(indices) == params.shape[0]` then -// this operation will permute `params` accordingly. +// Computes second-order gradients of the maxpooling function. // -// `validate_indices`: DEPRECATED. If this operation is assigned to CPU, values in -// `indices` are always validated to be within range. If assigned to GPU, -// out-of-bound indices result in safe but unspecified behavior, which may include -// raising an error. +// Arguments: +// orig_input: The original input tensor. +// orig_output: The original output tensor. +// grad: Output backprop of shape `[batch, depth, rows, cols, channels]`. +// ksize: 1-D tensor of length 5. The size of the window for each dimension of +// the input tensor. Must have `ksize[0] = ksize[4] = 1`. +// strides: 1-D tensor of length 5. The stride of the sliding window for each +// dimension of `input`. Must have `strides[0] = strides[4] = 1`. +// padding: The type of padding algorithm to use. // -//
-// -//
-func Gather(scope *Scope, params tf.Output, indices tf.Output, optional ...GatherAttr) (output tf.Output) { +// Returns Gradients of gradients w.r.t. the input to `max_pool`. +func MaxPool3DGradGrad(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPool3DGradGradAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "Gather", + Type: "MaxPool3DGradGrad", Input: []tf.Input{ - params, indices, + orig_input, orig_output, grad, }, Attrs: attrs, } @@ -7022,110 +9228,101 @@ func Gather(scope *Scope, params tf.Output, indices tf.Output, optional ...Gathe return op.Output(0) } -// Returns the truth value of (x != y) element-wise. +// Conv3DBackpropFilterV2Attr is an optional argument to Conv3DBackpropFilterV2. +type Conv3DBackpropFilterV2Attr func(optionalAttr) + +// Conv3DBackpropFilterV2DataFormat sets the optional data_format attribute to value. // -// *NOTE*: `NotEqual` supports broadcasting. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func NotEqual(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "NotEqual", - Input: []tf.Input{ - x, y, - }, +// value: The data format of the input and output data. With the +// default format "NDHWC", the data is stored in the order of: +// [batch, in_depth, in_height, in_width, in_channels]. +// Alternatively, the format could be "NCDHW", the data storage order is: +// [batch, in_channels, in_depth, in_height, in_width]. +// If not specified, defaults to "NDHWC" +func Conv3DBackpropFilterV2DataFormat(value string) Conv3DBackpropFilterV2Attr { + return func(m optionalAttr) { + m["data_format"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// Inverse 3D real-valued fast Fourier transform. -// -// Computes the inverse 3-dimensional discrete Fourier transform of a real-valued -// signal over the inner-most 3 dimensions of `input`. -// -// The inner-most 3 dimensions of `input` are assumed to be the result of `RFFT3D`: -// The inner-most dimension contains the `fft_length / 2 + 1` unique components of -// the DFT of a real-valued signal. If `fft_length` is not provided, it is computed -// from the size of the inner-most 3 dimensions of `input`. If the FFT length used -// to compute `input` is odd, it should be provided since it cannot be inferred -// properly. +// Conv3DBackpropFilterV2Dilations sets the optional dilations attribute to value. // -// Along each axis `IRFFT3D` is computed on, if `fft_length` (or -// `fft_length / 2 + 1` for the inner-most dimension) is smaller than the -// corresponding dimension of `input`, the dimension is cropped. If it is larger, -// the dimension is padded with zeros. +// value: 1-D tensor of length 5. The dilation factor for each dimension of +// `input`. If set to k > 1, there will be k-1 skipped cells between each +// filter element on that dimension. The dimension order is determined by the +// value of `data_format`, see above for details. Dilations in the batch and +// depth dimensions must be 1. +// If not specified, defaults to +func Conv3DBackpropFilterV2Dilations(value []int64) Conv3DBackpropFilterV2Attr { + return func(m optionalAttr) { + m["dilations"] = value + } +} + +// Computes the gradients of 3-D convolution with respect to the filter. // // Arguments: -// input: A complex64 tensor. -// fft_length: An int32 tensor of shape [3]. The FFT length for each dimension. -// -// Returns A float32 tensor of the same rank as `input`. The inner-most 3 -// dimensions of `input` are replaced with the `fft_length` samples of their -// inverse 3D real Fourier transform. -// -// @compatibility(numpy) -// Equivalent to np.irfftn with 3 dimensions. -// @end_compatibility -func IRFFT3D(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Output) { +// input: Shape `[batch, depth, rows, cols, in_channels]`. +// filter_sizes: An integer vector representing the tensor shape of `filter`, +// where `filter` is a 5-D +// `[filter_depth, filter_height, filter_width, in_channels, out_channels]` +// tensor. +// out_backprop: Backprop signal of shape `[batch, out_depth, out_rows, out_cols, +// out_channels]`. +// strides: 1-D tensor of length 5. The stride of the sliding window for each +// dimension of `input`. Must have `strides[0] = strides[4] = 1`. +// padding: The type of padding algorithm to use. +func Conv3DBackpropFilterV2(scope *Scope, input tf.Output, filter_sizes tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...Conv3DBackpropFilterV2Attr) (output tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"strides": strides, "padding": padding} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "IRFFT3D", + Type: "Conv3DBackpropFilterV2", Input: []tf.Input{ - input, fft_length, + input, filter_sizes, out_backprop, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// StringSplitAttr is an optional argument to StringSplit. -type StringSplitAttr func(optionalAttr) +// FakeQuantWithMinMaxVarsAttr is an optional argument to FakeQuantWithMinMaxVars. +type FakeQuantWithMinMaxVarsAttr func(optionalAttr) -// StringSplitSkipEmpty sets the optional skip_empty attribute to value. -// -// value: A `bool`. If `True`, skip the empty strings from the result. -// If not specified, defaults to true -func StringSplitSkipEmpty(value bool) StringSplitAttr { +// FakeQuantWithMinMaxVarsNumBits sets the optional num_bits attribute to value. +// If not specified, defaults to 8 +func FakeQuantWithMinMaxVarsNumBits(value int64) FakeQuantWithMinMaxVarsAttr { return func(m optionalAttr) { - m["skip_empty"] = value + m["num_bits"] = value } } -// Split elements of `input` based on `delimiter` into a `SparseTensor`. -// -// Let N be the size of source (typically N will be the batch size). Split each -// element of `input` based on `delimiter` and return a `SparseTensor` -// containing the splitted tokens. Empty tokens are ignored. -// -// `delimiter` can be empty, or a string of split characters. If `delimiter` is an -// empty string, each element of `input` is split into individual single-byte -// character strings, including splitting of UTF-8 multibyte sequences. Otherwise -// every character of `delimiter` is a potential split point. -// -// For example: -// N = 2, input[0] is 'hello world' and input[1] is 'a b c', then the output -// will be +// FakeQuantWithMinMaxVarsNarrowRange sets the optional narrow_range attribute to value. +// If not specified, defaults to false +func FakeQuantWithMinMaxVarsNarrowRange(value bool) FakeQuantWithMinMaxVarsAttr { + return func(m optionalAttr) { + m["narrow_range"] = value + } +} + +// Fake-quantize the 'inputs' tensor of type float via global float scalars `min` // -// indices = [0, 0; -// 0, 1; -// 1, 0; -// 1, 1; -// 1, 2] -// shape = [2, 3] -// values = ['hello', 'world', 'a', 'b', 'c'] +// and `max` to 'outputs' tensor of same shape as `inputs`. // -// Arguments: -// input: 1-D. Strings to split. -// delimiter: 0-D. Delimiter characters (bytes), or empty string. +// `[min; max]` define the clamping range for the `inputs` data. +// `inputs` values are quantized into the quantization range (`[0; 2^num_bits - 1]` +// when `narrow_range` is false and `[1; 2^num_bits - 1]` when it is true) and +// then de-quantized and output as floats in `[min; max]` interval. +// `num_bits` is the bitwidth of the quantization; between 2 and 16, inclusive. // -// Returns A dense matrix of int64 representing the indices of the sparse tensor.A vector of strings corresponding to the splited values.a length-2 vector of int64 representing the shape of the sparse -// tensor, where the first value is N and the second value is the maximum number -// of tokens in a single input entry. -func StringSplit(scope *Scope, input tf.Output, delimiter tf.Output, optional ...StringSplitAttr) (indices tf.Output, values tf.Output, shape tf.Output) { +// This operation has a gradient and thus allows for training `min` and `max` +// values. +func FakeQuantWithMinMaxVars(scope *Scope, inputs tf.Output, min tf.Output, max tf.Output, optional ...FakeQuantWithMinMaxVarsAttr) (outputs tf.Output) { if scope.Err() != nil { return } @@ -7134,223 +9331,231 @@ func StringSplit(scope *Scope, input tf.Output, delimiter tf.Output, optional .. a(attrs) } opspec := tf.OpSpec{ - Type: "StringSplit", + Type: "FakeQuantWithMinMaxVars", Input: []tf.Input{ - input, delimiter, + inputs, min, max, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + return op.Output(0) } -// ResizeBilinearAttr is an optional argument to ResizeBilinear. -type ResizeBilinearAttr func(optionalAttr) - -// ResizeBilinearAlignCorners sets the optional align_corners attribute to value. +// Applies softmax to a batched N-D `SparseTensor`. // -// value: If true, rescale input by (new_height - 1) / (height - 1), which -// exactly aligns the 4 corners of images and resized images. If false, rescale -// by new_height / height. Treat similarly the width dimension. -// If not specified, defaults to false -func ResizeBilinearAlignCorners(value bool) ResizeBilinearAttr { - return func(m optionalAttr) { - m["align_corners"] = value - } -} - -// Resize `images` to `size` using bilinear interpolation. +// The inputs represent an N-D SparseTensor with logical shape `[..., B, C]` +// (where `N >= 2`), and with indices sorted in the canonical lexicographic order. // -// Input images can be of different types but output images are always float. +// This op is equivalent to applying the normal `tf.nn.softmax()` to each innermost +// logical submatrix with shape `[B, C]`, but with the catch that *the implicitly +// zero elements do not participate*. Specifically, the algorithm is equivalent +// to the following: +// +// (1) Applies `tf.nn.softmax()` to a densified view of each innermost submatrix +// with shape `[B, C]`, along the size-C dimension; +// (2) Masks out the original implicitly-zero locations; +// (3) Renormalizes the remaining elements. +// +// Hence, the `SparseTensor` result has exactly the same non-zero indices and +// shape. // // Arguments: -// images: 4-D with shape `[batch, height, width, channels]`. -// size: = A 1-D int32 Tensor of 2 elements: `new_height, new_width`. The -// new size for the images. +// sp_indices: 2-D. `NNZ x R` matrix with the indices of non-empty values in a +// SparseTensor, in canonical ordering. +// sp_values: 1-D. `NNZ` non-empty values corresponding to `sp_indices`. +// sp_shape: 1-D. Shape of the input SparseTensor. // -// Returns 4-D with shape -// `[batch, new_height, new_width, channels]`. -func ResizeBilinear(scope *Scope, images tf.Output, size tf.Output, optional ...ResizeBilinearAttr) (resized_images tf.Output) { +// Returns 1-D. The `NNZ` values for the result `SparseTensor`. +func SparseSoftmax(scope *Scope, sp_indices tf.Output, sp_values tf.Output, sp_shape tf.Output) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "ResizeBilinear", + Type: "SparseSoftmax", Input: []tf.Input{ - images, size, + sp_indices, sp_values, sp_shape, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Computes softsign: `features / (abs(features) + 1)`. -func Softsign(scope *Scope, features tf.Output) (activations tf.Output) { +// Partitions `data` into `num_partitions` tensors using indices from `partitions`. +// +// For each index tuple `js` of size `partitions.ndim`, the slice `data[js, ...]` +// becomes part of `outputs[partitions[js]]`. The slices with `partitions[js] = i` +// are placed in `outputs[i]` in lexicographic order of `js`, and the first +// dimension of `outputs[i]` is the number of entries in `partitions` equal to `i`. +// In detail, +// +// ```python +// outputs[i].shape = [sum(partitions == i)] + data.shape[partitions.ndim:] +// +// outputs[i] = pack([data[js, ...] for js if partitions[js] == i]) +// ``` +// +// `data.shape` must start with `partitions.shape`. +// +// For example: +// +// ```python +// # Scalar partitions. +// partitions = 1 +// num_partitions = 2 +// data = [10, 20] +// outputs[0] = [] # Empty with shape [0, 2] +// outputs[1] = [[10, 20]] +// +// # Vector partitions. +// partitions = [0, 0, 1, 1, 0] +// num_partitions = 2 +// data = [10, 20, 30, 40, 50] +// outputs[0] = [10, 20, 50] +// outputs[1] = [30, 40] +// ``` +// +// See `dynamic_stitch` for an example on how to merge partitions back. +// +//
+// +//
+// +// Arguments: +// +// partitions: Any shape. Indices in the range `[0, num_partitions)`. +// num_partitions: The number of partitions to output. +func DynamicPartition(scope *Scope, data tf.Output, partitions tf.Output, num_partitions int64) (outputs []tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"num_partitions": num_partitions} opspec := tf.OpSpec{ - Type: "Softsign", + Type: "DynamicPartition", Input: []tf.Input{ - features, + data, partitions, }, + Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + if scope.Err() != nil { + return + } + var idx int + var err error + if outputs, idx, err = makeOutputList(op, idx, "outputs"); err != nil { + scope.UpdateErr("DynamicPartition", err) + return + } + return outputs } -// GenerateVocabRemappingAttr is an optional argument to GenerateVocabRemapping. -type GenerateVocabRemappingAttr func(optionalAttr) +// ResourceApplyAdagradAttr is an optional argument to ResourceApplyAdagrad. +type ResourceApplyAdagradAttr func(optionalAttr) -// GenerateVocabRemappingOldVocabSize sets the optional old_vocab_size attribute to value. -// -// value: Number of entries in the old vocab file to consider. If -1, -// use the entire old vocabulary. -// If not specified, defaults to -1 +// ResourceApplyAdagradUseLocking sets the optional use_locking attribute to value. // -// REQUIRES: value >= -1 -func GenerateVocabRemappingOldVocabSize(value int64) GenerateVocabRemappingAttr { +// value: If `True`, updating of the var and accum tensors will be protected +// by a lock; otherwise the behavior is undefined, but may exhibit less +// contention. +// If not specified, defaults to false +func ResourceApplyAdagradUseLocking(value bool) ResourceApplyAdagradAttr { return func(m optionalAttr) { - m["old_vocab_size"] = value + m["use_locking"] = value } } -// Given a path to new and old vocabulary files, returns a remapping Tensor of -// -// length `num_new_vocab`, where `remapping[i]` contains the row number in the old -// vocabulary that corresponds to row `i` in the new vocabulary (starting at line -// `new_vocab_offset` and up to `num_new_vocab` entities), or `-1` if entry `i` -// in the new vocabulary is not in the old vocabulary. The old vocabulary is -// constrained to the first `old_vocab_size` entries if `old_vocab_size` is not the -// default value of -1. -// -// `num_vocab_offset` enables -// use in the partitioned variable case, and should generally be set through -// examining partitioning info. The format of the files should be a text file, -// with each line containing a single entity within the vocabulary. -// -// For example, with `new_vocab_file` a text file containing each of the following -// elements on a single line: `[f0, f1, f2, f3]`, old_vocab_file = [f1, f0, f3], -// `num_new_vocab = 3, new_vocab_offset = 1`, the returned remapping would be -// `[0, -1, 2]`. -// -// The op also returns a count of how many entries in the new vocabulary -// were present in the old vocabulary, which is used to calculate the number of -// values to initialize in a weight matrix remapping +// Update '*var' according to the adagrad scheme. // -// This functionality can be used to remap both row vocabularies (typically, -// features) and column vocabularies (typically, classes) from TensorFlow -// checkpoints. Note that the partitioning logic relies on contiguous vocabularies -// corresponding to div-partitioned variables. Moreover, the underlying remapping -// uses an IndexTable (as opposed to an inexact CuckooTable), so client code should -// use the corresponding index_table_from_file() as the FeatureColumn framework -// does (as opposed to tf.feature_to_id(), which uses a CuckooTable). +// accum += grad * grad +// var -= lr * grad * (1 / sqrt(accum)) // // Arguments: -// new_vocab_file: Path to the new vocab file. -// old_vocab_file: Path to the old vocab file. -// new_vocab_offset: How many entries into the new vocab file to start reading. -// num_new_vocab: Number of entries in the new vocab file to remap. +// var_: Should be from a Variable(). +// accum: Should be from a Variable(). +// lr: Scaling factor. Must be a scalar. +// grad: The gradient. // -// Returns A Tensor of length num_new_vocab where the element at index i -// is equal to the old ID that maps to the new ID i. This element is -1 for any -// new ID that is not found in the old vocabulary.Number of new vocab entries found in old vocab. -func GenerateVocabRemapping(scope *Scope, new_vocab_file tf.Output, old_vocab_file tf.Output, new_vocab_offset int64, num_new_vocab int64, optional ...GenerateVocabRemappingAttr) (remapping tf.Output, num_present tf.Output) { +// Returns the created operation. +func ResourceApplyAdagrad(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, optional ...ResourceApplyAdagradAttr) (o *tf.Operation) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"new_vocab_offset": new_vocab_offset, "num_new_vocab": num_new_vocab} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "GenerateVocabRemapping", + Type: "ResourceApplyAdagrad", Input: []tf.Input{ - new_vocab_file, old_vocab_file, + var_, accum, lr, grad, }, Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) + return scope.AddOperation(opspec) } -// Assigns sparse updates to the variable referenced by `resource`. -// -// This operation computes -// -// # Scalar indices -// ref[indices, ...] = updates[...] -// -// # Vector indices (for each i) -// ref[indices[i], ...] = updates[i, ...] -// -// # High rank indices (for each i, ..., j) -// ref[indices[i, ..., j], ...] = updates[i, ..., j, ...] -// -// Arguments: -// resource: Should be from a `Variable` node. -// indices: A tensor of indices into the first dimension of `ref`. -// updates: A tensor of updated values to add to `ref`. +// Return the shape of s0 op s1 with broadcast. // -// Returns the created operation. -func ResourceScatterUpdate(scope *Scope, resource tf.Output, indices tf.Output, updates tf.Output) (o *tf.Operation) { +// Given `s0` and `s1`, tensors that represent shapes, compute `r0`, the +// broadcasted shape. `s0`, `s1` and `r0` are all integer vectors. +func BroadcastArgs(scope *Scope, s0 tf.Output, s1 tf.Output) (r0 tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "ResourceScatterUpdate", + Type: "BroadcastArgs", Input: []tf.Input{ - resource, indices, updates, + s0, s1, }, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0) } -// AvgPoolGradAttr is an optional argument to AvgPoolGrad. -type AvgPoolGradAttr func(optionalAttr) +// DataFormatDimMapAttr is an optional argument to DataFormatDimMap. +type DataFormatDimMapAttr func(optionalAttr) -// AvgPoolGradDataFormat sets the optional data_format attribute to value. +// DataFormatDimMapSrcFormat sets the optional src_format attribute to value. // -// value: Specify the data format of the input and output data. With the -// default format "NHWC", the data is stored in the order of: -// [batch, in_height, in_width, in_channels]. -// Alternatively, the format could be "NCHW", the data storage order of: -// [batch, in_channels, in_height, in_width]. +// value: source data format. // If not specified, defaults to "NHWC" -func AvgPoolGradDataFormat(value string) AvgPoolGradAttr { +func DataFormatDimMapSrcFormat(value string) DataFormatDimMapAttr { return func(m optionalAttr) { - m["data_format"] = value + m["src_format"] = value } } -// Computes gradients of the average pooling function. +// DataFormatDimMapDstFormat sets the optional dst_format attribute to value. +// +// value: destination data format. +// If not specified, defaults to "NCHW" +func DataFormatDimMapDstFormat(value string) DataFormatDimMapAttr { + return func(m optionalAttr) { + m["dst_format"] = value + } +} + +// Returns the dimension index in the destination data format given the one in +// +// the source data format. // // Arguments: -// orig_input_shape: 1-D. Shape of the original input to `avg_pool`. -// grad: 4-D with shape `[batch, height, width, channels]`. Gradients w.r.t. -// the output of `avg_pool`. -// ksize: The size of the sliding window for each dimension of the input. -// strides: The stride of the sliding window for each dimension of the input. -// padding: The type of padding algorithm to use. +// x: A Tensor with each element as a dimension index in source data format. +// Must be in the range [-4, 4). // -// Returns 4-D. Gradients w.r.t. the input of `avg_pool`. -func AvgPoolGrad(scope *Scope, orig_input_shape tf.Output, grad tf.Output, ksize []int64, strides []int64, padding string, optional ...AvgPoolGradAttr) (output tf.Output) { +// Returns A Tensor with each element as a dimension index in destination data format. +func DataFormatDimMap(scope *Scope, x tf.Output, optional ...DataFormatDimMapAttr) (y tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "AvgPoolGrad", + Type: "DataFormatDimMap", Input: []tf.Input{ - orig_input_shape, grad, + x, }, Attrs: attrs, } @@ -7358,222 +9563,326 @@ func AvgPoolGrad(scope *Scope, orig_input_shape tf.Output, grad tf.Output, ksize return op.Output(0) } -// StageClearAttr is an optional argument to StageClear. -type StageClearAttr func(optionalAttr) +// ResourceApplyPowerSignAttr is an optional argument to ResourceApplyPowerSign. +type ResourceApplyPowerSignAttr func(optionalAttr) -// StageClearCapacity sets the optional capacity attribute to value. -// If not specified, defaults to 0 +// ResourceApplyPowerSignUseLocking sets the optional use_locking attribute to value. // -// REQUIRES: value >= 0 -func StageClearCapacity(value int64) StageClearAttr { +// value: If `True`, updating of the var and m tensors is +// protected by a lock; otherwise the behavior is undefined, but may exhibit less +// contention. +// If not specified, defaults to false +func ResourceApplyPowerSignUseLocking(value bool) ResourceApplyPowerSignAttr { return func(m optionalAttr) { - m["capacity"] = value + m["use_locking"] = value } } -// StageClearMemoryLimit sets the optional memory_limit attribute to value. -// If not specified, defaults to 0 +// Update '*var' according to the AddSign update. // -// REQUIRES: value >= 0 -func StageClearMemoryLimit(value int64) StageClearAttr { - return func(m optionalAttr) { - m["memory_limit"] = value - } -} - -// StageClearContainer sets the optional container attribute to value. -// If not specified, defaults to "" -func StageClearContainer(value string) StageClearAttr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// StageClearSharedName sets the optional shared_name attribute to value. -// If not specified, defaults to "" -func StageClearSharedName(value string) StageClearAttr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// Op removes all elements in the underlying container. +// m_t <- beta1 * m_{t-1} + (1 - beta1) * g +// update <- exp(logbase * sign_decay * sign(g) * sign(m_t)) * g +// variable <- variable - lr_t * update +// +// Arguments: +// var_: Should be from a Variable(). +// m: Should be from a Variable(). +// lr: Scaling factor. Must be a scalar. +// logbase: Must be a scalar. +// sign_decay: Must be a scalar. +// beta: Must be a scalar. +// grad: The gradient. // // Returns the created operation. -func StageClear(scope *Scope, dtypes []tf.DataType, optional ...StageClearAttr) (o *tf.Operation) { +func ResourceApplyPowerSign(scope *Scope, var_ tf.Output, m tf.Output, lr tf.Output, logbase tf.Output, sign_decay tf.Output, beta tf.Output, grad tf.Output, optional ...ResourceApplyPowerSignAttr) (o *tf.Operation) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"dtypes": dtypes} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "StageClear", - + Type: "ResourceApplyPowerSign", + Input: []tf.Input{ + var_, m, lr, logbase, sign_decay, beta, grad, + }, Attrs: attrs, } return scope.AddOperation(opspec) } -// ComputeAccidentalHitsAttr is an optional argument to ComputeAccidentalHits. -type ComputeAccidentalHitsAttr func(optionalAttr) +// Locks a mutex resource. The output is the lock. So long as the lock tensor +// +// is alive, any other request to use `MutexLock` with this mutex will wait. +// +// This is particularly useful for creating a critical section when used in +// conjunction with `MutexLockIdentity`: +// +// ```python +// +// mutex = mutex_v2( +// shared_name=handle_name, container=container, name=name) +// +// def execute_in_critical_section(fn, *args, **kwargs): +// lock = gen_resource_variable_ops.mutex_lock(mutex) +// +// with ops.control_dependencies([lock]): +// r = fn(*args, **kwargs) +// +// with ops.control_dependencies(nest.flatten(r)): +// with ops.colocate_with(mutex): +// ensure_lock_exists = mutex_lock_identity(lock) +// +// # Make sure that if any element of r is accessed, all of +// # them are executed together. +// r = nest.map_structure(tf.identity, r) +// +// with ops.control_dependencies([ensure_lock_exists]): +// return nest.map_structure(tf.identity, r) +// ``` +// +// While `fn` is running in the critical section, no other functions which wish to +// use this critical section may run. +// +// Often the use case is that two executions of the same graph, in parallel, +// wish to run `fn`; and we wish to ensure that only one of them executes +// at a time. This is especially important if `fn` modifies one or more +// variables at a time. +// +// It is also useful if two separate functions must share a resource, but we +// wish to ensure the usage is exclusive. +// +// Arguments: +// mutex: The mutex resource to lock. +// +// Returns A tensor that keeps a shared pointer to a lock on the mutex; +// when the Tensor is destroyed, the use count on the shared pointer is decreased +// by 1. When it reaches 0, the lock is released. +func MutexLock(scope *Scope, mutex tf.Output) (mutex_lock tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "MutexLock", + Input: []tf.Input{ + mutex, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} -// ComputeAccidentalHitsSeed sets the optional seed attribute to value. +// Computes the mean along segments of a tensor. // -// value: If either seed or seed2 are set to be non-zero, the random number -// generator is seeded by the given seed. Otherwise, it is seeded by a -// random seed. -// If not specified, defaults to 0 -func ComputeAccidentalHitsSeed(value int64) ComputeAccidentalHitsAttr { - return func(m optionalAttr) { - m["seed"] = value +// Read @{$math_ops#Segmentation$the section on segmentation} for an explanation of +// segments. +// +// Computes a tensor such that +// \\(output_i = \frac{\sum_j data_j}{N}\\) where `mean` is +// over `j` such that `segment_ids[j] == i` and `N` is the total number of +// values summed. +// +// If the mean is empty for a given segment ID `i`, `output[i] = 0`. +// +//
+// +//
+// +// Arguments: +// +// segment_ids: A 1-D tensor whose rank is equal to the rank of `data`'s +// first dimension. Values should be sorted and can be repeated. +// +// Returns Has same shape as data, except for dimension 0 which +// has size `k`, the number of segments. +func SegmentMean(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "SegmentMean", + Input: []tf.Input{ + data, segment_ids, + }, } + op := scope.AddOperation(opspec) + return op.Output(0) } -// ComputeAccidentalHitsSeed2 sets the optional seed2 attribute to value. +// ResourceSparseApplyCenteredRMSPropAttr is an optional argument to ResourceSparseApplyCenteredRMSProp. +type ResourceSparseApplyCenteredRMSPropAttr func(optionalAttr) + +// ResourceSparseApplyCenteredRMSPropUseLocking sets the optional use_locking attribute to value. // -// value: An second seed to avoid seed collision. -// If not specified, defaults to 0 -func ComputeAccidentalHitsSeed2(value int64) ComputeAccidentalHitsAttr { +// value: If `True`, updating of the var, mg, ms, and mom tensors is +// protected by a lock; otherwise the behavior is undefined, but may exhibit less +// contention. +// If not specified, defaults to false +func ResourceSparseApplyCenteredRMSPropUseLocking(value bool) ResourceSparseApplyCenteredRMSPropAttr { return func(m optionalAttr) { - m["seed2"] = value + m["use_locking"] = value } } -// Computes the ids of the positions in sampled_candidates that match true_labels. +// Update '*var' according to the centered RMSProp algorithm. // -// When doing log-odds NCE, the result of this op should be passed through a -// SparseToDense op, then added to the logits of the sampled candidates. This has -// the effect of 'removing' the sampled labels that match the true labels by -// making the classifier sure that they are sampled labels. +// The centered RMSProp algorithm uses an estimate of the centered second moment +// (i.e., the variance) for normalization, as opposed to regular RMSProp, which +// uses the (uncentered) second moment. This often helps with training, but is +// slightly more expensive in terms of computation and memory. +// +// Note that in dense implementation of this algorithm, mg, ms, and mom will +// update even if the grad is zero, but in this sparse implementation, mg, ms, +// and mom will not update in iterations during which the grad is zero. +// +// mean_square = decay * mean_square + (1-decay) * gradient ** 2 +// mean_grad = decay * mean_grad + (1-decay) * gradient +// Delta = learning_rate * gradient / sqrt(mean_square + epsilon - mean_grad ** 2) +// +// ms <- rho * ms_{t-1} + (1-rho) * grad * grad +// mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon) +// var <- var - mom // // Arguments: -// true_classes: The true_classes output of UnpackSparseLabels. -// sampled_candidates: The sampled_candidates output of CandidateSampler. -// num_true: Number of true labels per context. +// var_: Should be from a Variable(). +// mg: Should be from a Variable(). +// ms: Should be from a Variable(). +// mom: Should be from a Variable(). +// lr: Scaling factor. Must be a scalar. +// rho: Decay rate. Must be a scalar. // -// Returns A vector of indices corresponding to rows of true_candidates.A vector of IDs of positions in sampled_candidates that match a true_label -// for the row with the corresponding index in indices.A vector of the same length as indices and ids, in which each element -// is -FLOAT_MAX. -func ComputeAccidentalHits(scope *Scope, true_classes tf.Output, sampled_candidates tf.Output, num_true int64, optional ...ComputeAccidentalHitsAttr) (indices tf.Output, ids tf.Output, weights tf.Output) { +// epsilon: Ridge term. Must be a scalar. +// grad: The gradient. +// indices: A vector of indices into the first dimension of var, ms and mom. +// +// Returns the created operation. +func ResourceSparseApplyCenteredRMSProp(scope *Scope, var_ tf.Output, mg tf.Output, ms tf.Output, mom tf.Output, lr tf.Output, rho tf.Output, momentum tf.Output, epsilon tf.Output, grad tf.Output, indices tf.Output, optional ...ResourceSparseApplyCenteredRMSPropAttr) (o *tf.Operation) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"num_true": num_true} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "ComputeAccidentalHits", + Type: "ResourceSparseApplyCenteredRMSProp", Input: []tf.Input{ - true_classes, sampled_candidates, + var_, mg, ms, mom, lr, rho, momentum, epsilon, grad, indices, }, Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + return scope.AddOperation(opspec) } -// CumsumAttr is an optional argument to Cumsum. -type CumsumAttr func(optionalAttr) - -// CumsumExclusive sets the optional exclusive attribute to value. +// Creates a dataset that batches `batch_size` elements from `input_dataset`. // -// value: If `True`, perform exclusive cumsum. -// If not specified, defaults to false -func CumsumExclusive(value bool) CumsumAttr { - return func(m optionalAttr) { - m["exclusive"] = value - } -} - -// CumsumReverse sets the optional reverse attribute to value. +// Arguments: // -// value: A `bool` (default: False). -// If not specified, defaults to false -func CumsumReverse(value bool) CumsumAttr { - return func(m optionalAttr) { - m["reverse"] = value +// batch_size: A scalar representing the number of elements to accumulate in a +// batch. +// +// +func BatchDataset(scope *Scope, input_dataset tf.Output, batch_size tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} + opspec := tf.OpSpec{ + Type: "BatchDataset", + Input: []tf.Input{ + input_dataset, batch_size, + }, + Attrs: attrs, } + op := scope.AddOperation(opspec) + return op.Output(0) } -// Compute the cumulative sum of the tensor `x` along `axis`. -// -// By default, this op performs an inclusive cumsum, which means that the first -// element of the input is identical to the first element of the output: -// -// ```python -// tf.cumsum([a, b, c]) # => [a, a + b, a + b + c] -// ``` +// Inverse fast Fourier transform. // -// By setting the `exclusive` kwarg to `True`, an exclusive cumsum is -// performed instead: +// Computes the inverse 1-dimensional discrete Fourier transform over the +// inner-most dimension of `input`. // -// ```python -// tf.cumsum([a, b, c], exclusive=True) # => [0, a, a + b] -// ``` +// Arguments: +// input: A complex64 tensor. // -// By setting the `reverse` kwarg to `True`, the cumsum is performed in the -// opposite direction: +// Returns A complex64 tensor of the same shape as `input`. The inner-most +// dimension of `input` is replaced with its inverse 1D Fourier transform. // -// ```python -// tf.cumsum([a, b, c], reverse=True) # => [a + b + c, b + c, c] -// ``` +// @compatibility(numpy) +// Equivalent to np.fft.ifft +// @end_compatibility +func IFFT(scope *Scope, input tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "IFFT", + Input: []tf.Input{ + input, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Generates values in an interval. // -// This is more efficient than using separate `tf.reverse` ops. +// A sequence of `num` evenly-spaced values are generated beginning at `start`. +// If `num > 1`, the values in the sequence increase by `stop - start / num - 1`, +// so that the last one is exactly `stop`. // -// The `reverse` and `exclusive` kwargs can also be combined: +// For example: // -// ```python -// tf.cumsum([a, b, c], exclusive=True, reverse=True) # => [b + c, c, 0] +// ``` +// tf.linspace(10.0, 12.0, 3, name="linspace") => [ 10.0 11.0 12.0] // ``` // // Arguments: -// x: A `Tensor`. Must be one of the following types: `float32`, `float64`, -// `int64`, `int32`, `uint8`, `uint16`, `int16`, `int8`, `complex64`, -// `complex128`, `qint8`, `quint8`, `qint32`, `half`. -// axis: A `Tensor` of type `int32` (default: 0). Must be in the range -// `[-rank(x), rank(x))`. -func Cumsum(scope *Scope, x tf.Output, axis tf.Output, optional ...CumsumAttr) (out tf.Output) { +// start: First entry in the range. +// stop: Last entry in the range. +// num: Number of values to generate. +// +// Returns 1-D. The generated values. +func LinSpace(scope *Scope, start tf.Output, stop tf.Output, num tf.Output) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "Cumsum", + Type: "LinSpace", Input: []tf.Input{ - x, axis, + start, stop, num, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// QuantizedRelu6Attr is an optional argument to QuantizedRelu6. -type QuantizedRelu6Attr func(optionalAttr) +// DestroyResourceOpAttr is an optional argument to DestroyResourceOp. +type DestroyResourceOpAttr func(optionalAttr) -// QuantizedRelu6OutType sets the optional out_type attribute to value. -// If not specified, defaults to DT_QUINT8 -func QuantizedRelu6OutType(value tf.DataType) QuantizedRelu6Attr { +// DestroyResourceOpIgnoreLookupError sets the optional ignore_lookup_error attribute to value. +// +// value: whether to ignore the error when the resource +// doesn't exist. +// If not specified, defaults to true +func DestroyResourceOpIgnoreLookupError(value bool) DestroyResourceOpAttr { return func(m optionalAttr) { - m["out_type"] = value + m["ignore_lookup_error"] = value } } -// Computes Quantized Rectified Linear 6: `min(max(features, 0), 6)` +// Deletes the resource specified by the handle. // -// Arguments: +// All subsequent operations using the resource will result in a NotFound +// error status. // -// min_features: The float value that the lowest quantized value represents. -// max_features: The float value that the highest quantized value represents. +// Arguments: +// resource: handle to the resource to delete. // -// Returns Has the same output shape as "features".The float value that the lowest quantized value represents.The float value that the highest quantized value represents. -func QuantizedRelu6(scope *Scope, features tf.Output, min_features tf.Output, max_features tf.Output, optional ...QuantizedRelu6Attr) (activations tf.Output, min_activations tf.Output, max_activations tf.Output) { +// Returns the created operation. +func DestroyResourceOp(scope *Scope, resource tf.Output, optional ...DestroyResourceOpAttr) (o *tf.Operation) { if scope.Err() != nil { return } @@ -7582,129 +9891,103 @@ func QuantizedRelu6(scope *Scope, features tf.Output, min_features tf.Output, ma a(attrs) } opspec := tf.OpSpec{ - Type: "QuantizedRelu6", + Type: "DestroyResourceOp", Input: []tf.Input{ - features, min_features, max_features, + resource, }, Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + return scope.AddOperation(opspec) } -// FixedLengthRecordReaderV2Attr is an optional argument to FixedLengthRecordReaderV2. -type FixedLengthRecordReaderV2Attr func(optionalAttr) - -// FixedLengthRecordReaderV2HeaderBytes sets the optional header_bytes attribute to value. -// -// value: Number of bytes in the header, defaults to 0. -// If not specified, defaults to 0 -func FixedLengthRecordReaderV2HeaderBytes(value int64) FixedLengthRecordReaderV2Attr { - return func(m optionalAttr) { - m["header_bytes"] = value - } -} +// LRNAttr is an optional argument to LRN. +type LRNAttr func(optionalAttr) -// FixedLengthRecordReaderV2FooterBytes sets the optional footer_bytes attribute to value. +// LRNDepthRadius sets the optional depth_radius attribute to value. // -// value: Number of bytes in the footer, defaults to 0. -// If not specified, defaults to 0 -func FixedLengthRecordReaderV2FooterBytes(value int64) FixedLengthRecordReaderV2Attr { +// value: 0-D. Half-width of the 1-D normalization window. +// If not specified, defaults to 5 +func LRNDepthRadius(value int64) LRNAttr { return func(m optionalAttr) { - m["footer_bytes"] = value + m["depth_radius"] = value } } -// FixedLengthRecordReaderV2HopBytes sets the optional hop_bytes attribute to value. +// LRNBias sets the optional bias attribute to value. // -// value: Number of bytes to hop before each read. Default of 0 means using -// record_bytes. -// If not specified, defaults to 0 -func FixedLengthRecordReaderV2HopBytes(value int64) FixedLengthRecordReaderV2Attr { +// value: An offset (usually positive to avoid dividing by 0). +// If not specified, defaults to 1 +func LRNBias(value float32) LRNAttr { return func(m optionalAttr) { - m["hop_bytes"] = value + m["bias"] = value } } -// FixedLengthRecordReaderV2Container sets the optional container attribute to value. +// LRNAlpha sets the optional alpha attribute to value. // -// value: If non-empty, this reader is placed in the given container. -// Otherwise, a default container is used. -// If not specified, defaults to "" -func FixedLengthRecordReaderV2Container(value string) FixedLengthRecordReaderV2Attr { +// value: A scale factor, usually positive. +// If not specified, defaults to 1 +func LRNAlpha(value float32) LRNAttr { return func(m optionalAttr) { - m["container"] = value + m["alpha"] = value } } -// FixedLengthRecordReaderV2SharedName sets the optional shared_name attribute to value. +// LRNBeta sets the optional beta attribute to value. // -// value: If non-empty, this reader is named in the given bucket -// with this shared_name. Otherwise, the node name is used instead. -// If not specified, defaults to "" -func FixedLengthRecordReaderV2SharedName(value string) FixedLengthRecordReaderV2Attr { +// value: An exponent. +// If not specified, defaults to 0.5 +func LRNBeta(value float32) LRNAttr { return func(m optionalAttr) { - m["shared_name"] = value + m["beta"] = value } } -// FixedLengthRecordReaderV2Encoding sets the optional encoding attribute to value. +// Local Response Normalization. // -// value: The type of encoding for the file. Currently ZLIB and GZIP -// are supported. Defaults to none. -// If not specified, defaults to "" -func FixedLengthRecordReaderV2Encoding(value string) FixedLengthRecordReaderV2Attr { - return func(m optionalAttr) { - m["encoding"] = value - } -} - -// A Reader that outputs fixed-length records from a file. +// The 4-D `input` tensor is treated as a 3-D array of 1-D vectors (along the last +// dimension), and each vector is normalized independently. Within a given vector, +// each component is divided by the weighted, squared sum of inputs within +// `depth_radius`. In detail, // -// Arguments: -// record_bytes: Number of bytes in the record. +// sqr_sum[a, b, c, d] = +// sum(input[a, b, c, d - depth_radius : d + depth_radius + 1] ** 2) +// output = input / (bias + alpha * sqr_sum) ** beta // -// Returns The handle to reference the Reader. -func FixedLengthRecordReaderV2(scope *Scope, record_bytes int64, optional ...FixedLengthRecordReaderV2Attr) (reader_handle tf.Output) { +// For details, see [Krizhevsky et al., ImageNet classification with deep +// convolutional neural networks (NIPS 2012)](http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks). +// +// Arguments: +// input: 4-D. +func LRN(scope *Scope, input tf.Output, optional ...LRNAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"record_bytes": record_bytes} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "FixedLengthRecordReaderV2", - + Type: "LRN", + Input: []tf.Input{ + input, + }, Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Converts each string in the input Tensor to its hash mod by a number of buckets. -// -// The hash function is deterministic on the content of the string within the -// process. -// -// Note that the hash function may change from time to time. -// This functionality will be deprecated and it's recommended to use -// `tf.string_to_hash_bucket_fast()` or `tf.string_to_hash_bucket_strong()`. -// -// Arguments: -// -// num_buckets: The number of buckets. -// -// Returns A Tensor of the same shape as the input `string_tensor`. -func StringToHashBucket(scope *Scope, string_tensor tf.Output, num_buckets int64) (output tf.Output) { +// Creates a dataset that zips together `input_datasets`. +func ZipDataset(scope *Scope, input_datasets []tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"num_buckets": num_buckets} + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} opspec := tf.OpSpec{ - Type: "StringToHashBucket", + Type: "ZipDataset", Input: []tf.Input{ - string_tensor, + tf.OutputList(input_datasets), }, Attrs: attrs, } @@ -7712,158 +9995,243 @@ func StringToHashBucket(scope *Scope, string_tensor tf.Output, num_buckets int64 return op.Output(0) } -// Computes gradients for the exponential linear (Elu) operation. +// ResourceSparseApplyAdagradAttr is an optional argument to ResourceSparseApplyAdagrad. +type ResourceSparseApplyAdagradAttr func(optionalAttr) + +// ResourceSparseApplyAdagradUseLocking sets the optional use_locking attribute to value. +// +// value: If `True`, updating of the var and accum tensors will be protected +// by a lock; otherwise the behavior is undefined, but may exhibit less +// contention. +// If not specified, defaults to false +func ResourceSparseApplyAdagradUseLocking(value bool) ResourceSparseApplyAdagradAttr { + return func(m optionalAttr) { + m["use_locking"] = value + } +} + +// Update relevant entries in '*var' and '*accum' according to the adagrad scheme. +// +// That is for rows we have grad for, we update var and accum as follows: +// accum += grad * grad +// var -= lr * grad * (1 / sqrt(accum)) // // Arguments: -// gradients: The backpropagated gradients to the corresponding Elu operation. -// outputs: The outputs of the corresponding Elu operation. +// var_: Should be from a Variable(). +// accum: Should be from a Variable(). +// lr: Learning rate. Must be a scalar. +// grad: The gradient. +// indices: A vector of indices into the first dimension of var and accum. // -// Returns The gradients: `gradients * (outputs + 1)` if outputs < 0, -// `gradients` otherwise. -func EluGrad(scope *Scope, gradients tf.Output, outputs tf.Output) (backprops tf.Output) { +// Returns the created operation. +func ResourceSparseApplyAdagrad(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, indices tf.Output, optional ...ResourceSparseApplyAdagradAttr) (o *tf.Operation) { if scope.Err() != nil { return } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "EluGrad", + Type: "ResourceSparseApplyAdagrad", Input: []tf.Input{ - gradients, outputs, + var_, accum, lr, grad, indices, }, + Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0) + return scope.AddOperation(opspec) } -// Creates a dataset that contains `count` elements from the `input_dataset`. +// 2D real-valued fast Fourier transform. // -// Arguments: +// Computes the 2-dimensional discrete Fourier transform of a real-valued signal +// over the inner-most 2 dimensions of `input`. // -// count: A scalar representing the number of elements from the `input_dataset` -// that should be taken. A value of `-1` indicates that all of `input_dataset` -// is taken. +// Since the DFT of a real signal is Hermitian-symmetric, `RFFT2D` only returns the +// `fft_length / 2 + 1` unique components of the FFT for the inner-most dimension +// of `output`: the zero-frequency term, followed by the `fft_length / 2` +// positive-frequency terms. // +// Along each axis `RFFT2D` is computed on, if `fft_length` is smaller than the +// corresponding dimension of `input`, the dimension is cropped. If it is larger, +// the dimension is padded with zeros. // -func TakeDataset(scope *Scope, input_dataset tf.Output, count tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { +// Arguments: +// input: A float32 tensor. +// fft_length: An int32 tensor of shape [2]. The FFT length for each dimension. +// +// Returns A complex64 tensor of the same rank as `input`. The inner-most 2 +// dimensions of `input` are replaced with their 2D Fourier transform. The +// inner-most dimension contains `fft_length / 2 + 1` unique frequency +// components. +// +// @compatibility(numpy) +// Equivalent to np.fft.rfft2 +// @end_compatibility +func RFFT2D(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} opspec := tf.OpSpec{ - Type: "TakeDataset", + Type: "RFFT2D", Input: []tf.Input{ - input_dataset, count, + input, fft_length, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// The gradient operator for the SparseAdd op. +// ResizeAreaAttr is an optional argument to ResizeArea. +type ResizeAreaAttr func(optionalAttr) + +// ResizeAreaAlignCorners sets the optional align_corners attribute to value. // -// The SparseAdd op calculates A + B, where A, B, and the sum are all represented -// as `SparseTensor` objects. This op takes in the upstream gradient w.r.t. -// non-empty values of the sum, and outputs the gradients w.r.t. the non-empty -// values of A and B. +// value: If true, the centers of the 4 corner pixels of the input and output tensors are +// aligned, preserving the values at the corner pixels. Defaults to false. +// If not specified, defaults to false +func ResizeAreaAlignCorners(value bool) ResizeAreaAttr { + return func(m optionalAttr) { + m["align_corners"] = value + } +} + +// Resize `images` to `size` using area interpolation. +// +// Input images can be of different types but output images are always float. +// +// The range of pixel values for the output image might be slightly different +// from the range for the input image because of limited numerical precision. +// To guarantee an output range, for example `[0.0, 1.0]`, apply +// `tf.clip_by_value` to the output. +// +// Each output pixel is computed by first transforming the pixel's footprint into +// the input tensor and then averaging the pixels that intersect the footprint. An +// input pixel's contribution to the average is weighted by the fraction of its +// area that intersects the footprint. This is the same as OpenCV's INTER_AREA. // // Arguments: -// backprop_val_grad: 1-D with shape `[nnz(sum)]`. The gradient with respect to -// the non-empty values of the sum. -// a_indices: 2-D. The `indices` of the `SparseTensor` A, size `[nnz(A), ndims]`. -// b_indices: 2-D. The `indices` of the `SparseTensor` B, size `[nnz(B), ndims]`. -// sum_indices: 2-D. The `indices` of the sum `SparseTensor`, size -// `[nnz(sum), ndims]`. +// images: 4-D with shape `[batch, height, width, channels]`. +// size: = A 1-D int32 Tensor of 2 elements: `new_height, new_width`. The +// new size for the images. // -// Returns 1-D with shape `[nnz(A)]`. The gradient with respect to the -// non-empty values of A.1-D with shape `[nnz(B)]`. The gradient with respect to the -// non-empty values of B. -func SparseAddGrad(scope *Scope, backprop_val_grad tf.Output, a_indices tf.Output, b_indices tf.Output, sum_indices tf.Output) (a_val_grad tf.Output, b_val_grad tf.Output) { +// Returns 4-D with shape +// `[batch, new_height, new_width, channels]`. +func ResizeArea(scope *Scope, images tf.Output, size tf.Output, optional ...ResizeAreaAttr) (resized_images tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "SparseAddGrad", + Type: "ResizeArea", Input: []tf.Input{ - backprop_val_grad, a_indices, b_indices, sum_indices, + images, size, }, + Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) + return op.Output(0) } -// Computes atan of x element-wise. -func Atan(scope *Scope, x tf.Output) (y tf.Output) { +// Pads a tensor with zeros. +// +// This operation pads a `input` with zeros according to the `paddings` you +// specify. `paddings` is an integer tensor with shape `[Dn, 2]`, where n is the +// rank of `input`. For each dimension D of `input`, `paddings[D, 0]` indicates +// how many zeros to add before the contents of `input` in that dimension, and +// `paddings[D, 1]` indicates how many zeros to add after the contents of `input` +// in that dimension. +// +// The padded size of each dimension D of the output is: +// +// `paddings(D, 0) + input.dim_size(D) + paddings(D, 1)` +// +// For example: +// +// ``` +// # 't' is [[1, 1], [2, 2]] +// # 'paddings' is [[1, 1], [2, 2]] +// # rank of 't' is 2 +// pad(t, paddings) ==> [[0, 0, 0, 0, 0, 0] +// [0, 0, 1, 1, 0, 0] +// [0, 0, 2, 2, 0, 0] +// [0, 0, 0, 0, 0, 0]] +// ``` +func Pad(scope *Scope, input tf.Output, paddings tf.Output) (output tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "Atan", + Type: "Pad", Input: []tf.Input{ - x, + input, paddings, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Encode audio data using the WAV file format. -// -// This operation will generate a string suitable to be saved out to create a .wav -// audio file. It will be encoded in the 16-bit PCM format. It takes in float -// values in the range -1.0f to 1.0f, and any outside that value will be clamped to -// that range. -// -// `audio` is a 2-D float Tensor of shape `[length, channels]`. -// `sample_rate` is a scalar Tensor holding the rate to use (e.g. 44100). +// Checks whether a resource handle-based variable has been initialized. // // Arguments: -// audio: 2-D with shape `[length, channels]`. -// sample_rate: Scalar containing the sample frequency. +// resource: the input resource handle. // -// Returns 0-D. WAV-encoded file contents. -func EncodeWav(scope *Scope, audio tf.Output, sample_rate tf.Output) (contents tf.Output) { +// Returns a scalar boolean which is true if the variable has been +// initialized. +func VarIsInitializedOp(scope *Scope, resource tf.Output) (is_initialized tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "EncodeWav", + Type: "VarIsInitializedOp", Input: []tf.Input{ - audio, sample_rate, + resource, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Converts each string in the input Tensor to its hash mod by a number of buckets. +// StatelessRandomUniformAttr is an optional argument to StatelessRandomUniform. +type StatelessRandomUniformAttr func(optionalAttr) + +// StatelessRandomUniformDtype sets the optional dtype attribute to value. // -// The hash function is deterministic on the content of the string within the -// process. The hash function is a keyed hash function, where attribute `key` -// defines the key of the hash function. `key` is an array of 2 elements. +// value: The type of the output. +// If not specified, defaults to DT_FLOAT +func StatelessRandomUniformDtype(value tf.DataType) StatelessRandomUniformAttr { + return func(m optionalAttr) { + m["dtype"] = value + } +} + +// Outputs deterministic pseudorandom random values from a uniform distribution. // -// A strong hash is important when inputs may be malicious, e.g. URLs with -// additional components. Adversaries could try to make their inputs hash to the -// same bucket for a denial-of-service attack or to skew the results. A strong -// hash prevents this by making it difficult, if not infeasible, to compute inputs -// that hash to the same bucket. This comes at a cost of roughly 4x higher compute -// time than `tf.string_to_hash_bucket_fast`. +// The generated values follow a uniform distribution in the range `[0, 1)`. The +// lower bound 0 is included in the range, while the upper bound 1 is excluded. +// +// The outputs are a deterministic function of `shape` and `seed`. // // Arguments: -// input: The strings to assign a hash bucket. -// num_buckets: The number of buckets. -// key: The key for the keyed hash function passed as a list of two uint64 -// elements. +// shape: The shape of the output tensor. +// seed: 2 seeds (shape [2]). // -// Returns A Tensor of the same shape as the input `string_tensor`. -func StringToHashBucketStrong(scope *Scope, input tf.Output, num_buckets int64, key []int64) (output tf.Output) { +// Returns Random values with specified shape. +func StatelessRandomUniform(scope *Scope, shape tf.Output, seed tf.Output, optional ...StatelessRandomUniformAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"num_buckets": num_buckets, "key": key} + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "StringToHashBucketStrong", + Type: "StatelessRandomUniform", Input: []tf.Input{ - input, + shape, seed, }, Attrs: attrs, } @@ -7871,260 +10239,255 @@ func StringToHashBucketStrong(scope *Scope, input tf.Output, num_buckets int64, return op.Output(0) } -// Applies softmax to a batched N-D `SparseTensor`. -// -// The inputs represent an N-D SparseTensor with logical shape `[..., B, C]` -// (where `N >= 2`), and with indices sorted in the canonical lexicographic order. -// -// This op is equivalent to applying the normal `tf.nn.softmax()` to each innermost -// logical submatrix with shape `[B, C]`, but with the catch that *the implicitly -// zero elements do not participate*. Specifically, the algorithm is equivalent -// to the following: -// -// (1) Applies `tf.nn.softmax()` to a densified view of each innermost submatrix -// with shape `[B, C]`, along the size-C dimension; -// (2) Masks out the original implicitly-zero locations; -// (3) Renormalizes the remaining elements. -// -// Hence, the `SparseTensor` result has exactly the same non-zero indices and -// shape. +// Makes its input available to the next iteration. // // Arguments: -// sp_indices: 2-D. `NNZ x R` matrix with the indices of non-empty values in a -// SparseTensor, in canonical ordering. -// sp_values: 1-D. `NNZ` non-empty values corresponding to `sp_indices`. -// sp_shape: 1-D. Shape of the input SparseTensor. +// data: The tensor to be made available to the next iteration. // -// Returns 1-D. The `NNZ` values for the result `SparseTensor`. -func SparseSoftmax(scope *Scope, sp_indices tf.Output, sp_values tf.Output, sp_shape tf.Output) (output tf.Output) { +// Returns The same tensor as `data`. +func NextIteration(scope *Scope, data tf.Output) (output tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "SparseSoftmax", + Type: "NextIteration", Input: []tf.Input{ - sp_indices, sp_values, sp_shape, + data, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Partitions `data` into `num_partitions` tensors using indices from `partitions`. -// -// For each index tuple `js` of size `partitions.ndim`, the slice `data[js, ...]` -// becomes part of `outputs[partitions[js]]`. The slices with `partitions[js] = i` -// are placed in `outputs[i]` in lexicographic order of `js`, and the first -// dimension of `outputs[i]` is the number of entries in `partitions` equal to `i`. -// In detail, -// -// ```python -// outputs[i].shape = [sum(partitions == i)] + data.shape[partitions.ndim:] +// Output a fact about factorials. +func Fact(scope *Scope) (fact tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Fact", + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// AngleAttr is an optional argument to Angle. +type AngleAttr func(optionalAttr) + +// AngleTout sets the optional Tout attribute to value. +// If not specified, defaults to DT_FLOAT +func AngleTout(value tf.DataType) AngleAttr { + return func(m optionalAttr) { + m["Tout"] = value + } +} + +// Returns the argument of a complex number. // -// outputs[i] = pack([data[js, ...] for js if partitions[js] == i]) -// ``` +// Given a tensor `input` of complex numbers, this operation returns a tensor of +// type `float` that is the argument of each element in `input`. All elements in +// `input` must be complex numbers of the form \\(a + bj\\), where *a* +// is the real part and *b* is the imaginary part. // -// `data.shape` must start with `partitions.shape`. +// The argument returned by this operation is of the form \\(atan2(b, a)\\). // // For example: // -// ```python -// # Scalar partitions. -// partitions = 1 -// num_partitions = 2 -// data = [10, 20] -// outputs[0] = [] # Empty with shape [0, 2] -// outputs[1] = [[10, 20]] -// -// # Vector partitions. -// partitions = [0, 0, 1, 1, 0] -// num_partitions = 2 -// data = [10, 20, 30, 40, 50] -// outputs[0] = [10, 20, 50] -// outputs[1] = [30, 40] +// ``` +// # tensor 'input' is [-2.25 + 4.75j, 3.25 + 5.75j] +// tf.angle(input) ==> [2.0132, 1.056] // ``` // -// See `dynamic_stitch` for an example on how to merge partitions back. -// -//
-// -//
-// -// Arguments: -// -// partitions: Any shape. Indices in the range `[0, num_partitions)`. -// num_partitions: The number of partitions to output. -func DynamicPartition(scope *Scope, data tf.Output, partitions tf.Output, num_partitions int64) (outputs []tf.Output) { +// @compatibility(numpy) +// Equivalent to np.angle. +// @end_compatibility +func Angle(scope *Scope, input tf.Output, optional ...AngleAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"num_partitions": num_partitions} + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "DynamicPartition", + Type: "Angle", Input: []tf.Input{ - data, partitions, + input, }, Attrs: attrs, } op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if outputs, idx, err = makeOutputList(op, idx, "outputs"); err != nil { - scope.UpdateErr("DynamicPartition", err) - return - } - return outputs + return op.Output(0) } -// ResourceApplyAdagradAttr is an optional argument to ResourceApplyAdagrad. -type ResourceApplyAdagradAttr func(optionalAttr) +// VarHandleOpAttr is an optional argument to VarHandleOp. +type VarHandleOpAttr func(optionalAttr) -// ResourceApplyAdagradUseLocking sets the optional use_locking attribute to value. +// VarHandleOpContainer sets the optional container attribute to value. // -// value: If `True`, updating of the var and accum tensors will be protected -// by a lock; otherwise the behavior is undefined, but may exhibit less -// contention. -// If not specified, defaults to false -func ResourceApplyAdagradUseLocking(value bool) ResourceApplyAdagradAttr { +// value: the container this variable is placed in. +// If not specified, defaults to "" +func VarHandleOpContainer(value string) VarHandleOpAttr { return func(m optionalAttr) { - m["use_locking"] = value + m["container"] = value } } -// Update '*var' according to the adagrad scheme. +// VarHandleOpSharedName sets the optional shared_name attribute to value. // -// accum += grad * grad -// var -= lr * grad * (1 / sqrt(accum)) +// value: the name by which this variable is referred to. +// If not specified, defaults to "" +func VarHandleOpSharedName(value string) VarHandleOpAttr { + return func(m optionalAttr) { + m["shared_name"] = value + } +} + +// Creates a handle to a Variable resource. // // Arguments: -// var_: Should be from a Variable(). -// accum: Should be from a Variable(). -// lr: Scaling factor. Must be a scalar. -// grad: The gradient. -// -// Returns the created operation. -func ResourceApplyAdagrad(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, optional ...ResourceApplyAdagradAttr) (o *tf.Operation) { +// dtype: the type of this variable. Must agree with the dtypes +// of all ops using this variable. +// shape: The (possibly partially specified) shape of this variable. +func VarHandleOp(scope *Scope, dtype tf.DataType, shape tf.Shape, optional ...VarHandleOpAttr) (resource tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"dtype": dtype, "shape": shape} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "ResourceApplyAdagrad", - Input: []tf.Input{ - var_, accum, lr, grad, - }, + Type: "VarHandleOp", + Attrs: attrs, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0) } -// Return the shape of s0 op s1 with broadcast. +// Elementwise computes the bitwise XOR of `x` and `y`. // -// Given `s0` and `s1`, tensors that represent shapes, compute `r0`, the -// broadcasted shape. `s0`, `s1` and `r0` are all integer vectors. -func BroadcastArgs(scope *Scope, s0 tf.Output, s1 tf.Output) (r0 tf.Output) { +// The result will have those bits set, that are different in `x` and `y`. The +// computation is performed on the underlying representations of `x` and `y`. +func BitwiseXor(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "BroadcastArgs", + Type: "BitwiseXor", Input: []tf.Input{ - s0, s1, + x, y, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// DataFormatDimMapAttr is an optional argument to DataFormatDimMap. -type DataFormatDimMapAttr func(optionalAttr) - -// DataFormatDimMapSrcFormat sets the optional src_format attribute to value. +// Deserialize `SparseTensor` objects. // -// value: source data format. -// If not specified, defaults to "NHWC" -func DataFormatDimMapSrcFormat(value string) DataFormatDimMapAttr { - return func(m optionalAttr) { - m["src_format"] = value - } -} - -// DataFormatDimMapDstFormat sets the optional dst_format attribute to value. +// The input `serialized_sparse` must have the shape `[?, ?, ..., ?, 3]` where +// the last dimension stores serialized `SparseTensor` objects and the other N +// dimensions (N >= 0) correspond to a batch. The ranks of the original +// `SparseTensor` objects must all match. When the final `SparseTensor` is +// created, its rank is the rank of the incoming `SparseTensor` objects plus N; +// the sparse tensors have been concatenated along new dimensions, one for each +// batch. // -// value: destination data format. -// If not specified, defaults to "NCHW" -func DataFormatDimMapDstFormat(value string) DataFormatDimMapAttr { - return func(m optionalAttr) { - m["dst_format"] = value - } -} - -// Returns the dimension index in the destination data format given the one in +// The output `SparseTensor` object's shape values for the original dimensions +// are the max across the input `SparseTensor` objects' shape values for the +// corresponding dimensions. The new dimensions match the size of the batch. // -// the source data format. +// The input `SparseTensor` objects' indices are assumed ordered in +// standard lexicographic order. If this is not the case, after this +// step run `SparseReorder` to restore index ordering. +// +// For example, if the serialized input is a `[2 x 3]` matrix representing two +// original `SparseTensor` objects: +// +// index = [ 0] +// [10] +// [20] +// values = [1, 2, 3] +// shape = [50] +// +// and +// +// index = [ 2] +// [10] +// values = [4, 5] +// shape = [30] +// +// then the final deserialized `SparseTensor` will be: +// +// index = [0 0] +// [0 10] +// [0 20] +// [1 2] +// [1 10] +// values = [1, 2, 3, 4, 5] +// shape = [2 50] // // Arguments: -// x: A Tensor with each element as a dimension index in source data format. -// Must be in the range [-4, 4). -// -// Returns A Tensor with each element as a dimension index in destination data format. -func DataFormatDimMap(scope *Scope, x tf.Output, optional ...DataFormatDimMapAttr) (y tf.Output) { +// serialized_sparse: The serialized `SparseTensor` objects. The last dimension +// must have 3 columns. +// dtype: The `dtype` of the serialized `SparseTensor` objects. +func DeserializeSparse(scope *Scope, serialized_sparse tf.Output, dtype tf.DataType) (sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } + attrs := map[string]interface{}{"dtype": dtype} opspec := tf.OpSpec{ - Type: "DataFormatDimMap", + Type: "DeserializeSparse", Input: []tf.Input{ - x, + serialized_sparse, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1), op.Output(2) } -// ResourceApplyPowerSignAttr is an optional argument to ResourceApplyPowerSign. -type ResourceApplyPowerSignAttr func(optionalAttr) +// ResourceApplyRMSPropAttr is an optional argument to ResourceApplyRMSProp. +type ResourceApplyRMSPropAttr func(optionalAttr) -// ResourceApplyPowerSignUseLocking sets the optional use_locking attribute to value. +// ResourceApplyRMSPropUseLocking sets the optional use_locking attribute to value. // -// value: If `True`, updating of the var and m tensors is -// protected by a lock; otherwise the behavior is undefined, but may exhibit less +// value: If `True`, updating of the var, ms, and mom tensors is protected +// by a lock; otherwise the behavior is undefined, but may exhibit less // contention. // If not specified, defaults to false -func ResourceApplyPowerSignUseLocking(value bool) ResourceApplyPowerSignAttr { +func ResourceApplyRMSPropUseLocking(value bool) ResourceApplyRMSPropAttr { return func(m optionalAttr) { m["use_locking"] = value } } -// Update '*var' according to the AddSign update. +// Update '*var' according to the RMSProp algorithm. // -// m_t <- beta1 * m_{t-1} + (1 - beta1) * g -// update <- exp(logbase * sign_decay * sign(g) * sign(m_t)) * g -// variable <- variable - lr_t * update +// Note that in dense implementation of this algorithm, ms and mom will +// update even if the grad is zero, but in this sparse implementation, ms +// and mom will not update in iterations during which the grad is zero. +// +// mean_square = decay * mean_square + (1-decay) * gradient ** 2 +// Delta = learning_rate * gradient / sqrt(mean_square + epsilon) +// +// ms <- rho * ms_{t-1} + (1-rho) * grad * grad +// mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon) +// var <- var - mom // // Arguments: // var_: Should be from a Variable(). -// m: Should be from a Variable(). +// ms: Should be from a Variable(). +// mom: Should be from a Variable(). // lr: Scaling factor. Must be a scalar. -// logbase: Must be a scalar. -// sign_decay: Must be a scalar. -// beta: Must be a scalar. +// rho: Decay rate. Must be a scalar. +// +// epsilon: Ridge term. Must be a scalar. // grad: The gradient. // // Returns the created operation. -func ResourceApplyPowerSign(scope *Scope, var_ tf.Output, m tf.Output, lr tf.Output, logbase tf.Output, sign_decay tf.Output, beta tf.Output, grad tf.Output, optional ...ResourceApplyPowerSignAttr) (o *tf.Operation) { +func ResourceApplyRMSProp(scope *Scope, var_ tf.Output, ms tf.Output, mom tf.Output, lr tf.Output, rho tf.Output, momentum tf.Output, epsilon tf.Output, grad tf.Output, optional ...ResourceApplyRMSPropAttr) (o *tf.Operation) { if scope.Err() != nil { return } @@ -8133,76 +10496,77 @@ func ResourceApplyPowerSign(scope *Scope, var_ tf.Output, m tf.Output, lr tf.Out a(attrs) } opspec := tf.OpSpec{ - Type: "ResourceApplyPowerSign", + Type: "ResourceApplyRMSProp", Input: []tf.Input{ - var_, m, lr, logbase, sign_decay, beta, grad, + var_, ms, mom, lr, rho, momentum, epsilon, grad, }, Attrs: attrs, } return scope.AddOperation(opspec) } -// CumprodAttr is an optional argument to Cumprod. -type CumprodAttr func(optionalAttr) +// ResourceScatterNdUpdateAttr is an optional argument to ResourceScatterNdUpdate. +type ResourceScatterNdUpdateAttr func(optionalAttr) -// CumprodExclusive sets the optional exclusive attribute to value. +// ResourceScatterNdUpdateUseLocking sets the optional use_locking attribute to value. // -// value: If `True`, perform exclusive cumprod. -// If not specified, defaults to false -func CumprodExclusive(value bool) CumprodAttr { +// value: An optional bool. Defaults to True. If True, the assignment will +// be protected by a lock; otherwise the behavior is undefined, +// but may exhibit less contention. +// If not specified, defaults to true +func ResourceScatterNdUpdateUseLocking(value bool) ResourceScatterNdUpdateAttr { return func(m optionalAttr) { - m["exclusive"] = value + m["use_locking"] = value } } -// CumprodReverse sets the optional reverse attribute to value. +// Applies sparse `updates` to individual values or slices within a given // -// value: A `bool` (default: False). -// If not specified, defaults to false -func CumprodReverse(value bool) CumprodAttr { - return func(m optionalAttr) { - m["reverse"] = value - } -} - -// Compute the cumulative product of the tensor `x` along `axis`. +// variable according to `indices`. // -// By default, this op performs an inclusive cumprod, which means that the first -// element of the input is identical to the first element of the output: +// `ref` is a `Tensor` with rank `P` and `indices` is a `Tensor` of rank `Q`. // -// ```python -// tf.cumprod([a, b, c]) # => [a, a * b, a * b * c] -// ``` +// `indices` must be integer tensor, containing indices into `ref`. +// It must be shape `[d_0, ..., d_{Q-2}, K]` where `0 < K <= P`. // -// By setting the `exclusive` kwarg to `True`, an exclusive cumprod is -// performed instead: +// The innermost dimension of `indices` (with length `K`) corresponds to +// indices into elements (if `K = P`) or slices (if `K < P`) along the `K`th +// dimension of `ref`. +// +// `updates` is `Tensor` of rank `Q-1+P-K` with shape: // -// ```python -// tf.cumprod([a, b, c], exclusive=True) # => [1, a, a * b] +// ``` +// [d_0, ..., d_{Q-2}, ref.shape[K], ..., ref.shape[P-1]]. // ``` // -// By setting the `reverse` kwarg to `True`, the cumprod is performed in the -// opposite direction: +// For example, say we want to update 4 scattered elements to a rank-1 tensor to +// 8 elements. In Python, that update would look like this: // // ```python -// tf.cumprod([a, b, c], reverse=True) # => [a * b * c, b * c, c] +// ref = tfe.Variable([1, 2, 3, 4, 5, 6, 7, 8]) +// indices = tf.constant([[4], [3], [1] ,[7]]) +// updates = tf.constant([9, 10, 11, 12]) +// update = tf.scatter_nd_update(ref, indices, updates) +// with tf.Session() as sess: +// print sess.run(update) // ``` // -// This is more efficient than using separate `tf.reverse` ops. +// The resulting update to ref would look like this: // -// The `reverse` and `exclusive` kwargs can also be combined: +// [1, 11, 3, 10, 9, 6, 7, 12] // -// ```python -// tf.cumprod([a, b, c], exclusive=True, reverse=True) # => [b * c, c, 1] -// ``` +// See @{tf.scatter_nd} for more details about how to make updates to +// slices. // // Arguments: -// x: A `Tensor`. Must be one of the following types: `float32`, `float64`, -// `int64`, `int32`, `uint8`, `uint16`, `int16`, `int8`, `complex64`, -// `complex128`, `qint8`, `quint8`, `qint32`, `half`. -// axis: A `Tensor` of type `int32` (default: 0). Must be in the range -// `[-rank(x), rank(x))`. -func Cumprod(scope *Scope, x tf.Output, axis tf.Output, optional ...CumprodAttr) (out tf.Output) { +// ref: A resource handle. Must be from a VarHandleOp. +// indices: A Tensor. Must be one of the following types: int32, int64. +// A tensor of indices into ref. +// updates: A Tensor. Must have the same type as ref. A tensor of updated +// values to add to ref. +// +// Returns the created operation. +func ResourceScatterNdUpdate(scope *Scope, ref tf.Output, indices tf.Output, updates tf.Output, optional ...ResourceScatterNdUpdateAttr) (o *tf.Operation) { if scope.Err() != nil { return } @@ -8211,101 +10575,109 @@ func Cumprod(scope *Scope, x tf.Output, axis tf.Output, optional ...CumprodAttr) a(attrs) } opspec := tf.OpSpec{ - Type: "Cumprod", + Type: "ResourceScatterNdUpdate", Input: []tf.Input{ - x, axis, + ref, indices, updates, }, Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0) + return scope.AddOperation(opspec) } -// Computes the mean along segments of a tensor. +// SqueezeAttr is an optional argument to Squeeze. +type SqueezeAttr func(optionalAttr) + +// SqueezeAxis sets the optional axis attribute to value. // -// Read @{$math_ops#segmentation$the section on segmentation} for an explanation of -// segments. +// value: If specified, only squeezes the dimensions listed. The dimension +// index starts at 0. It is an error to squeeze a dimension that is not 1. Must +// be in the range `[-rank(input), rank(input))`. +// If not specified, defaults to <> // -// Computes a tensor such that -// \\(output_i = \frac{\sum_j data_j}{N}\\) where `mean` is -// over `j` such that `segment_ids[j] == i` and `N` is the total number of -// values summed. +// REQUIRES: len(value) >= 0 +func SqueezeAxis(value []int64) SqueezeAttr { + return func(m optionalAttr) { + m["squeeze_dims"] = value + } +} + +// Removes dimensions of size 1 from the shape of a tensor. // -// If the mean is empty for a given segment ID `i`, `output[i] = 0`. +// Given a tensor `input`, this operation returns a tensor of the same type with +// all dimensions of size 1 removed. If you don't want to remove all size 1 +// dimensions, you can remove specific size 1 dimensions by specifying +// `axis`. // -//
-// -//
+// For example: // -// Arguments: +// ``` +// # 't' is a tensor of shape [1, 2, 1, 3, 1, 1] +// shape(squeeze(t)) ==> [2, 3] +// ``` // -// segment_ids: A 1-D tensor whose rank is equal to the rank of `data`'s -// first dimension. Values should be sorted and can be repeated. +// Or, to remove specific size 1 dimensions: // -// Returns Has same shape as data, except for dimension 0 which -// has size `k`, the number of segments. -func SegmentMean(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.Output) { +// ``` +// # 't' is a tensor of shape [1, 2, 1, 3, 1, 1] +// shape(squeeze(t, [2, 4])) ==> [1, 2, 3, 1] +// ``` +// +// Arguments: +// input: The `input` to squeeze. +// +// Returns Contains the same data as `input`, but has one or more dimensions of +// size 1 removed. +func Squeeze(scope *Scope, input tf.Output, optional ...SqueezeAttr) (output tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "SegmentMean", + Type: "Squeeze", Input: []tf.Input{ - data, segment_ids, + input, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// ResourceSparseApplyCenteredRMSPropAttr is an optional argument to ResourceSparseApplyCenteredRMSProp. -type ResourceSparseApplyCenteredRMSPropAttr func(optionalAttr) +// ResourceApplyAdadeltaAttr is an optional argument to ResourceApplyAdadelta. +type ResourceApplyAdadeltaAttr func(optionalAttr) -// ResourceSparseApplyCenteredRMSPropUseLocking sets the optional use_locking attribute to value. +// ResourceApplyAdadeltaUseLocking sets the optional use_locking attribute to value. // -// value: If `True`, updating of the var, mg, ms, and mom tensors is -// protected by a lock; otherwise the behavior is undefined, but may exhibit less -// contention. +// value: If True, updating of the var, accum and update_accum tensors will be protected by +// a lock; otherwise the behavior is undefined, but may exhibit less contention. // If not specified, defaults to false -func ResourceSparseApplyCenteredRMSPropUseLocking(value bool) ResourceSparseApplyCenteredRMSPropAttr { +func ResourceApplyAdadeltaUseLocking(value bool) ResourceApplyAdadeltaAttr { return func(m optionalAttr) { m["use_locking"] = value } } -// Update '*var' according to the centered RMSProp algorithm. -// -// The centered RMSProp algorithm uses an estimate of the centered second moment -// (i.e., the variance) for normalization, as opposed to regular RMSProp, which -// uses the (uncentered) second moment. This often helps with training, but is -// slightly more expensive in terms of computation and memory. -// -// Note that in dense implementation of this algorithm, mg, ms, and mom will -// update even if the grad is zero, but in this sparse implementation, mg, ms, -// and mom will not update in iterations during which the grad is zero. -// -// mean_square = decay * mean_square + (1-decay) * gradient ** 2 -// mean_grad = decay * mean_grad + (1-decay) * gradient -// Delta = learning_rate * gradient / sqrt(mean_square + epsilon - mean_grad ** 2) +// Update '*var' according to the adadelta scheme. // -// ms <- rho * ms_{t-1} + (1-rho) * grad * grad -// mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon) -// var <- var - mom +// accum = rho() * accum + (1 - rho()) * grad.square(); +// update = (update_accum + epsilon).sqrt() * (accum + epsilon()).rsqrt() * grad; +// update_accum = rho() * update_accum + (1 - rho()) * update.square(); +// var -= update; // // Arguments: // var_: Should be from a Variable(). -// mg: Should be from a Variable(). -// ms: Should be from a Variable(). -// mom: Should be from a Variable(). +// accum: Should be from a Variable(). +// accum_update: Should be from a Variable(). // lr: Scaling factor. Must be a scalar. -// rho: Decay rate. Must be a scalar. -// -// epsilon: Ridge term. Must be a scalar. +// rho: Decay factor. Must be a scalar. +// epsilon: Constant factor. Must be a scalar. // grad: The gradient. -// indices: A vector of indices into the first dimension of var, ms and mom. // // Returns the created operation. -func ResourceSparseApplyCenteredRMSProp(scope *Scope, var_ tf.Output, mg tf.Output, ms tf.Output, mom tf.Output, lr tf.Output, rho tf.Output, momentum tf.Output, epsilon tf.Output, grad tf.Output, indices tf.Output, optional ...ResourceSparseApplyCenteredRMSPropAttr) (o *tf.Operation) { +func ResourceApplyAdadelta(scope *Scope, var_ tf.Output, accum tf.Output, accum_update tf.Output, lr tf.Output, rho tf.Output, epsilon tf.Output, grad tf.Output, optional ...ResourceApplyAdadeltaAttr) (o *tf.Operation) { if scope.Err() != nil { return } @@ -8314,32 +10686,69 @@ func ResourceSparseApplyCenteredRMSProp(scope *Scope, var_ tf.Output, mg tf.Outp a(attrs) } opspec := tf.OpSpec{ - Type: "ResourceSparseApplyCenteredRMSProp", + Type: "ResourceApplyAdadelta", Input: []tf.Input{ - var_, mg, ms, mom, lr, rho, momentum, epsilon, grad, indices, + var_, accum, accum_update, lr, rho, epsilon, grad, }, Attrs: attrs, } return scope.AddOperation(opspec) } -// Creates a dataset that batches `batch_size` elements from `input_dataset`. +// NonMaxSuppressionAttr is an optional argument to NonMaxSuppression. +type NonMaxSuppressionAttr func(optionalAttr) + +// NonMaxSuppressionIouThreshold sets the optional iou_threshold attribute to value. // -// Arguments: +// value: A float representing the threshold for deciding whether boxes +// overlap too much with respect to IOU. +// If not specified, defaults to 0.5 +func NonMaxSuppressionIouThreshold(value float32) NonMaxSuppressionAttr { + return func(m optionalAttr) { + m["iou_threshold"] = value + } +} + +// Greedily selects a subset of bounding boxes in descending order of score, // -// batch_size: A scalar representing the number of elements to accumulate in a -// batch. +// pruning away boxes that have high intersection-over-union (IOU) overlap +// with previously selected boxes. Bounding boxes are supplied as +// [y1, x1, y2, x2], where (y1, x1) and (y2, x2) are the coordinates of any +// diagonal pair of box corners and the coordinates can be provided as normalized +// (i.e., lying in the interval [0, 1]) or absolute. Note that this algorithm +// is agnostic to where the origin is in the coordinate system. Note that this +// algorithm is invariant to orthogonal transformations and translations +// of the coordinate system; thus translating or reflections of the coordinate +// system result in the same boxes being selected by the algorithm. +// The output of this operation is a set of integers indexing into the input +// collection of bounding boxes representing the selected boxes. The bounding +// box coordinates corresponding to the selected indices can then be obtained +// using the `tf.gather operation`. For example: +// selected_indices = tf.image.non_max_suppression( +// boxes, scores, max_output_size, iou_threshold) +// selected_boxes = tf.gather(boxes, selected_indices) // +// Arguments: +// boxes: A 2-D float tensor of shape `[num_boxes, 4]`. +// scores: A 1-D float tensor of shape `[num_boxes]` representing a single +// score corresponding to each box (each row of boxes). +// max_output_size: A scalar integer tensor representing the maximum number of +// boxes to be selected by non max suppression. // -func BatchDataset(scope *Scope, input_dataset tf.Output, batch_size tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { +// Returns A 1-D integer tensor of shape `[M]` representing the selected +// indices from the boxes tensor, where `M <= max_output_size`. +func NonMaxSuppression(scope *Scope, boxes tf.Output, scores tf.Output, max_output_size tf.Output, optional ...NonMaxSuppressionAttr) (selected_indices tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "BatchDataset", + Type: "NonMaxSuppression", Input: []tf.Input{ - input_dataset, batch_size, + boxes, scores, max_output_size, }, Attrs: attrs, } @@ -8347,90 +10756,95 @@ func BatchDataset(scope *Scope, input_dataset tf.Output, batch_size tf.Output, o return op.Output(0) } -// Inverse fast Fourier transform. -// -// Computes the inverse 1-dimensional discrete Fourier transform over the -// inner-most dimension of `input`. -// -// Arguments: -// input: A complex64 tensor. -// -// Returns A complex64 tensor of the same shape as `input`. The inner-most -// dimension of `input` is replaced with its inverse 1D Fourier transform. -// -// @compatibility(numpy) -// Equivalent to np.fft.ifft -// @end_compatibility -func IFFT(scope *Scope, input tf.Output) (output tf.Output) { +// Creates a dataset that emits `components` as a tuple of tensors once. +func TensorDataset(scope *Scope, components []tf.Output, output_shapes []tf.Shape) (handle tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"output_shapes": output_shapes} opspec := tf.OpSpec{ - Type: "IFFT", + Type: "TensorDataset", Input: []tf.Input{ - input, + tf.OutputList(components), }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Generates values in an interval. -// -// A sequence of `num` evenly-spaced values are generated beginning at `start`. -// If `num > 1`, the values in the sequence increase by `stop - start / num - 1`, -// so that the last one is exactly `stop`. +// Component-wise multiplies a SparseTensor by a dense Tensor. // -// For example: +// The output locations corresponding to the implicitly zero elements in the sparse +// tensor will be zero (i.e., will not take up storage space), regardless of the +// contents of the dense tensor (even if it's +/-INF and that INF*0 == NaN). // -// ``` -// tf.linspace(10.0, 12.0, 3, name="linspace") => [ 10.0 11.0 12.0] -// ``` +// *Limitation*: this Op only broadcasts the dense side to the sparse side, but not +// the other direction. // // Arguments: -// start: First entry in the range. -// stop: Last entry in the range. -// num: Number of values to generate. +// sp_indices: 2-D. `N x R` matrix with the indices of non-empty values in a +// SparseTensor, possibly not in canonical ordering. +// sp_values: 1-D. `N` non-empty values corresponding to `sp_indices`. +// sp_shape: 1-D. Shape of the input SparseTensor. +// dense: `R`-D. The dense Tensor operand. // -// Returns 1-D. The generated values. -func LinSpace(scope *Scope, start tf.Output, stop tf.Output, num tf.Output) (output tf.Output) { +// Returns 1-D. The `N` values that are operated on. +func SparseDenseCwiseMul(scope *Scope, sp_indices tf.Output, sp_values tf.Output, sp_shape tf.Output, dense tf.Output) (output tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "LinSpace", + Type: "SparseDenseCwiseMul", Input: []tf.Input{ - start, stop, num, + sp_indices, sp_values, sp_shape, dense, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// DestroyResourceOpAttr is an optional argument to DestroyResourceOp. -type DestroyResourceOpAttr func(optionalAttr) +// ResourceSparseApplyRMSPropAttr is an optional argument to ResourceSparseApplyRMSProp. +type ResourceSparseApplyRMSPropAttr func(optionalAttr) -// DestroyResourceOpIgnoreLookupError sets the optional ignore_lookup_error attribute to value. +// ResourceSparseApplyRMSPropUseLocking sets the optional use_locking attribute to value. // -// value: whether to ignore the error when the resource -// doesn't exist. -// If not specified, defaults to true -func DestroyResourceOpIgnoreLookupError(value bool) DestroyResourceOpAttr { +// value: If `True`, updating of the var, ms, and mom tensors is protected +// by a lock; otherwise the behavior is undefined, but may exhibit less +// contention. +// If not specified, defaults to false +func ResourceSparseApplyRMSPropUseLocking(value bool) ResourceSparseApplyRMSPropAttr { return func(m optionalAttr) { - m["ignore_lookup_error"] = value + m["use_locking"] = value } } -// Deletes the resource specified by the handle. +// Update '*var' according to the RMSProp algorithm. // -// All subsequent operations using the resource will result in a NotFound -// error status. +// Note that in dense implementation of this algorithm, ms and mom will +// update even if the grad is zero, but in this sparse implementation, ms +// and mom will not update in iterations during which the grad is zero. +// +// mean_square = decay * mean_square + (1-decay) * gradient ** 2 +// Delta = learning_rate * gradient / sqrt(mean_square + epsilon) +// +// ms <- rho * ms_{t-1} + (1-rho) * grad * grad +// mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon) +// var <- var - mom // // Arguments: -// resource: handle to the resource to delete. +// var_: Should be from a Variable(). +// ms: Should be from a Variable(). +// mom: Should be from a Variable(). +// lr: Scaling factor. Must be a scalar. +// rho: Decay rate. Must be a scalar. +// +// epsilon: Ridge term. Must be a scalar. +// grad: The gradient. +// indices: A vector of indices into the first dimension of var, ms and mom. // // Returns the created operation. -func DestroyResourceOp(scope *Scope, resource tf.Output, optional ...DestroyResourceOpAttr) (o *tf.Operation) { +func ResourceSparseApplyRMSProp(scope *Scope, var_ tf.Output, ms tf.Output, mom tf.Output, lr tf.Output, rho tf.Output, momentum tf.Output, epsilon tf.Output, grad tf.Output, indices tf.Output, optional ...ResourceSparseApplyRMSPropAttr) (o *tf.Operation) { if scope.Err() != nil { return } @@ -8439,140 +10853,168 @@ func DestroyResourceOp(scope *Scope, resource tf.Output, optional ...DestroyReso a(attrs) } opspec := tf.OpSpec{ - Type: "DestroyResourceOp", + Type: "ResourceSparseApplyRMSProp", Input: []tf.Input{ - resource, + var_, ms, mom, lr, rho, momentum, epsilon, grad, indices, }, Attrs: attrs, } return scope.AddOperation(opspec) } -// LRNAttr is an optional argument to LRN. -type LRNAttr func(optionalAttr) - -// LRNDepthRadius sets the optional depth_radius attribute to value. +// Returns the truth value of (x > y) element-wise. // -// value: 0-D. Half-width of the 1-D normalization window. -// If not specified, defaults to 5 -func LRNDepthRadius(value int64) LRNAttr { - return func(m optionalAttr) { - m["depth_radius"] = value +// *NOTE*: `Greater` supports broadcasting. More about broadcasting +// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) +func Greater(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { + if scope.Err() != nil { + return } -} - -// LRNBias sets the optional bias attribute to value. -// -// value: An offset (usually positive to avoid dividing by 0). -// If not specified, defaults to 1 -func LRNBias(value float32) LRNAttr { - return func(m optionalAttr) { - m["bias"] = value + opspec := tf.OpSpec{ + Type: "Greater", + Input: []tf.Input{ + x, y, + }, } + op := scope.AddOperation(opspec) + return op.Output(0) } -// LRNAlpha sets the optional alpha attribute to value. -// -// value: A scale factor, usually positive. -// If not specified, defaults to 1 -func LRNAlpha(value float32) LRNAttr { - return func(m optionalAttr) { - m["alpha"] = value - } -} +// SampleDistortedBoundingBoxAttr is an optional argument to SampleDistortedBoundingBox. +type SampleDistortedBoundingBoxAttr func(optionalAttr) -// LRNBeta sets the optional beta attribute to value. +// SampleDistortedBoundingBoxSeed sets the optional seed attribute to value. // -// value: An exponent. -// If not specified, defaults to 0.5 -func LRNBeta(value float32) LRNAttr { +// value: If either `seed` or `seed2` are set to non-zero, the random number +// generator is seeded by the given `seed`. Otherwise, it is seeded by a random +// seed. +// If not specified, defaults to 0 +func SampleDistortedBoundingBoxSeed(value int64) SampleDistortedBoundingBoxAttr { return func(m optionalAttr) { - m["beta"] = value + m["seed"] = value } } -// Local Response Normalization. -// -// The 4-D `input` tensor is treated as a 3-D array of 1-D vectors (along the last -// dimension), and each vector is normalized independently. Within a given vector, -// each component is divided by the weighted, squared sum of inputs within -// `depth_radius`. In detail, -// -// sqr_sum[a, b, c, d] = -// sum(input[a, b, c, d - depth_radius : d + depth_radius + 1] ** 2) -// output = input / (bias + alpha * sqr_sum) ** beta -// -// For details, see [Krizhevsky et al., ImageNet classification with deep -// convolutional neural networks (NIPS 2012)](http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks). +// SampleDistortedBoundingBoxSeed2 sets the optional seed2 attribute to value. // -// Arguments: -// input: 4-D. -func LRN(scope *Scope, input tf.Output, optional ...LRNAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "LRN", - Input: []tf.Input{ - input, - }, - Attrs: attrs, +// value: A second seed to avoid seed collision. +// If not specified, defaults to 0 +func SampleDistortedBoundingBoxSeed2(value int64) SampleDistortedBoundingBoxAttr { + return func(m optionalAttr) { + m["seed2"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// Creates a dataset that zips together `input_datasets`. -func ZipDataset(scope *Scope, input_datasets []tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { - if scope.Err() != nil { - return +// SampleDistortedBoundingBoxMinObjectCovered sets the optional min_object_covered attribute to value. +// +// value: The cropped area of the image must contain at least this +// fraction of any bounding box supplied. The value of this parameter should be +// non-negative. In the case of 0, the cropped area does not need to overlap +// any of the bounding boxes supplied. +// If not specified, defaults to 0.1 +func SampleDistortedBoundingBoxMinObjectCovered(value float32) SampleDistortedBoundingBoxAttr { + return func(m optionalAttr) { + m["min_object_covered"] = value } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "ZipDataset", - Input: []tf.Input{ - tf.OutputList(input_datasets), - }, - Attrs: attrs, +} + +// SampleDistortedBoundingBoxAspectRatioRange sets the optional aspect_ratio_range attribute to value. +// +// value: The cropped area of the image must have an aspect ratio = +// width / height within this range. +// If not specified, defaults to +func SampleDistortedBoundingBoxAspectRatioRange(value []float32) SampleDistortedBoundingBoxAttr { + return func(m optionalAttr) { + m["aspect_ratio_range"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// ResourceSparseApplyAdagradAttr is an optional argument to ResourceSparseApplyAdagrad. -type ResourceSparseApplyAdagradAttr func(optionalAttr) +// SampleDistortedBoundingBoxAreaRange sets the optional area_range attribute to value. +// +// value: The cropped area of the image must contain a fraction of the +// supplied image within in this range. +// If not specified, defaults to +func SampleDistortedBoundingBoxAreaRange(value []float32) SampleDistortedBoundingBoxAttr { + return func(m optionalAttr) { + m["area_range"] = value + } +} -// ResourceSparseApplyAdagradUseLocking sets the optional use_locking attribute to value. +// SampleDistortedBoundingBoxMaxAttempts sets the optional max_attempts attribute to value. // -// value: If `True`, updating of the var and accum tensors will be protected -// by a lock; otherwise the behavior is undefined, but may exhibit less -// contention. +// value: Number of attempts at generating a cropped region of the image +// of the specified constraints. After `max_attempts` failures, return the entire +// image. +// If not specified, defaults to 100 +func SampleDistortedBoundingBoxMaxAttempts(value int64) SampleDistortedBoundingBoxAttr { + return func(m optionalAttr) { + m["max_attempts"] = value + } +} + +// SampleDistortedBoundingBoxUseImageIfNoBoundingBoxes sets the optional use_image_if_no_bounding_boxes attribute to value. +// +// value: Controls behavior if no bounding boxes supplied. +// If true, assume an implicit bounding box covering the whole input. If false, +// raise an error. // If not specified, defaults to false -func ResourceSparseApplyAdagradUseLocking(value bool) ResourceSparseApplyAdagradAttr { +func SampleDistortedBoundingBoxUseImageIfNoBoundingBoxes(value bool) SampleDistortedBoundingBoxAttr { return func(m optionalAttr) { - m["use_locking"] = value + m["use_image_if_no_bounding_boxes"] = value } } -// Update relevant entries in '*var' and '*accum' according to the adagrad scheme. +// Generate a single randomly distorted bounding box for an image. // -// That is for rows we have grad for, we update var and accum as follows: -// accum += grad * grad -// var -= lr * grad * (1 / sqrt(accum)) +// Bounding box annotations are often supplied in addition to ground-truth labels +// in image recognition or object localization tasks. A common technique for +// training such a system is to randomly distort an image while preserving +// its content, i.e. *data augmentation*. This Op outputs a randomly distorted +// localization of an object, i.e. bounding box, given an `image_size`, +// `bounding_boxes` and a series of constraints. +// +// The output of this Op is a single bounding box that may be used to crop the +// original image. The output is returned as 3 tensors: `begin`, `size` and +// `bboxes`. The first 2 tensors can be fed directly into `tf.slice` to crop the +// image. The latter may be supplied to `tf.image.draw_bounding_boxes` to visualize +// what the bounding box looks like. +// +// Bounding boxes are supplied and returned as `[y_min, x_min, y_max, x_max]`. The +// bounding box coordinates are floats in `[0.0, 1.0]` relative to the width and +// height of the underlying image. +// +// For example, +// +// ```python +// # Generate a single distorted bounding box. +// begin, size, bbox_for_draw = tf.image.sample_distorted_bounding_box( +// tf.shape(image), +// bounding_boxes=bounding_boxes) +// +// # Draw the bounding box in an image summary. +// image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0), +// bbox_for_draw) +// tf.summary.image('images_with_box', image_with_box) +// +// # Employ the bounding box to distort the image. +// distorted_image = tf.slice(image, begin, size) +// ``` +// +// Note that if no bounding box information is available, setting +// `use_image_if_no_bounding_boxes = true` will assume there is a single implicit +// bounding box covering the whole image. If `use_image_if_no_bounding_boxes` is +// false and no bounding boxes are supplied, an error is raised. // // Arguments: -// var_: Should be from a Variable(). -// accum: Should be from a Variable(). -// lr: Learning rate. Must be a scalar. -// grad: The gradient. -// indices: A vector of indices into the first dimension of var and accum. +// image_size: 1-D, containing `[height, width, channels]`. +// bounding_boxes: 3-D with shape `[batch, N, 4]` describing the N bounding boxes +// associated with the image. // -// Returns the created operation. -func ResourceSparseApplyAdagrad(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, indices tf.Output, optional ...ResourceSparseApplyAdagradAttr) (o *tf.Operation) { +// Returns 1-D, containing `[offset_height, offset_width, 0]`. Provide as input to +// `tf.slice`.1-D, containing `[target_height, target_width, -1]`. Provide as input to +// `tf.slice`.3-D with shape `[1, 1, 4]` containing the distorted bounding box. +// Provide as input to `tf.image.draw_bounding_boxes`. +func SampleDistortedBoundingBox(scope *Scope, image_size tf.Output, bounding_boxes tf.Output, optional ...SampleDistortedBoundingBoxAttr) (begin tf.Output, size tf.Output, bboxes tf.Output) { if scope.Err() != nil { return } @@ -8581,282 +11023,227 @@ func ResourceSparseApplyAdagrad(scope *Scope, var_ tf.Output, accum tf.Output, l a(attrs) } opspec := tf.OpSpec{ - Type: "ResourceSparseApplyAdagrad", + Type: "SampleDistortedBoundingBox", Input: []tf.Input{ - var_, accum, lr, grad, indices, + image_size, bounding_boxes, }, Attrs: attrs, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1), op.Output(2) } -// 2D real-valued fast Fourier transform. -// -// Computes the 2-dimensional discrete Fourier transform of a real-valued signal -// over the inner-most 2 dimensions of `input`. -// -// Since the DFT of a real signal is Hermitian-symmetric, `RFFT2D` only returns the -// `fft_length / 2 + 1` unique components of the FFT for the inner-most dimension -// of `output`: the zero-frequency term, followed by the `fft_length / 2` -// positive-frequency terms. +// Converts each string in the input Tensor to its hash mod by a number of buckets. // -// Along each axis `RFFT2D` is computed on, if `fft_length` is smaller than the -// corresponding dimension of `input`, the dimension is cropped. If it is larger, -// the dimension is padded with zeros. +// The hash function is deterministic on the content of the string within the +// process and will never change. However, it is not suitable for cryptography. +// This function may be used when CPU time is scarce and inputs are trusted or +// unimportant. There is a risk of adversaries constructing inputs that all hash +// to the same bucket. To prevent this problem, use a strong hash function with +// `tf.string_to_hash_bucket_strong`. // // Arguments: -// input: A float32 tensor. -// fft_length: An int32 tensor of shape [2]. The FFT length for each dimension. -// -// Returns A complex64 tensor of the same rank as `input`. The inner-most 2 -// dimensions of `input` are replaced with their 2D Fourier transform. The -// inner-most dimension contains `fft_length / 2 + 1` unique frequency -// components. +// input: The strings to assign a hash bucket. +// num_buckets: The number of buckets. // -// @compatibility(numpy) -// Equivalent to np.fft.rfft2 -// @end_compatibility -func RFFT2D(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Output) { +// Returns A Tensor of the same shape as the input `string_tensor`. +func StringToHashBucketFast(scope *Scope, input tf.Output, num_buckets int64) (output tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"num_buckets": num_buckets} opspec := tf.OpSpec{ - Type: "RFFT2D", + Type: "StringToHashBucketFast", Input: []tf.Input{ - input, fft_length, + input, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// ResizeAreaAttr is an optional argument to ResizeArea. -type ResizeAreaAttr func(optionalAttr) +// TensorArrayGatherV3Attr is an optional argument to TensorArrayGatherV3. +type TensorArrayGatherV3Attr func(optionalAttr) -// ResizeAreaAlignCorners sets the optional align_corners attribute to value. +// TensorArrayGatherV3ElementShape sets the optional element_shape attribute to value. // -// value: If true, rescale input by (new_height - 1) / (height - 1), which -// exactly aligns the 4 corners of images and resized images. If false, rescale -// by new_height / height. Treat similarly the width dimension. -// If not specified, defaults to false -func ResizeAreaAlignCorners(value bool) ResizeAreaAttr { +// value: The expected shape of an element, if known. Used to +// validate the shapes of TensorArray elements. If this shape is not +// fully specified, gathering zero-size TensorArrays is an error. +// If not specified, defaults to +func TensorArrayGatherV3ElementShape(value tf.Shape) TensorArrayGatherV3Attr { return func(m optionalAttr) { - m["align_corners"] = value + m["element_shape"] = value } } -// Resize `images` to `size` using area interpolation. -// -// Input images can be of different types but output images are always float. +// Gather specific elements from the TensorArray into output `value`. // -// Each output pixel is computed by first transforming the pixel's footprint into -// the input tensor and then averaging the pixels that intersect the footprint. An -// input pixel's contribution to the average is weighted by the fraction of its -// area that intersects the footprint. This is the same as OpenCV's INTER_AREA. +// All elements selected by `indices` must have the same shape. // // Arguments: -// images: 4-D with shape `[batch, height, width, channels]`. -// size: = A 1-D int32 Tensor of 2 elements: `new_height, new_width`. The -// new size for the images. +// handle: The handle to a TensorArray. +// indices: The locations in the TensorArray from which to read tensor elements. +// flow_in: A float scalar that enforces proper chaining of operations. +// dtype: The type of the elem that is returned. // -// Returns 4-D with shape -// `[batch, new_height, new_width, channels]`. -func ResizeArea(scope *Scope, images tf.Output, size tf.Output, optional ...ResizeAreaAttr) (resized_images tf.Output) { +// Returns All of the elements in the TensorArray, concatenated along a new +// axis (the new dimension 0). +func TensorArrayGatherV3(scope *Scope, handle tf.Output, indices tf.Output, flow_in tf.Output, dtype tf.DataType, optional ...TensorArrayGatherV3Attr) (value tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"dtype": dtype} for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ResizeArea", - Input: []tf.Input{ - images, size, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Pads a tensor with zeros. -// -// This operation pads a `input` with zeros according to the `paddings` you -// specify. `paddings` is an integer tensor with shape `[Dn, 2]`, where n is the -// rank of `input`. For each dimension D of `input`, `paddings[D, 0]` indicates -// how many zeros to add before the contents of `input` in that dimension, and -// `paddings[D, 1]` indicates how many zeros to add after the contents of `input` -// in that dimension. -// -// The padded size of each dimension D of the output is: -// -// `paddings(D, 0) + input.dim_size(D) + paddings(D, 1)` -// -// For example: -// -// ``` -// # 't' is [[1, 1], [2, 2]] -// # 'paddings' is [[1, 1], [2, 2]] -// # rank of 't' is 2 -// pad(t, paddings) ==> [[0, 0, 0, 0, 0, 0] -// [0, 0, 1, 1, 0, 0] -// [0, 0, 2, 2, 0, 0] -// [0, 0, 0, 0, 0, 0]] -// ``` -func Pad(scope *Scope, input tf.Output, paddings tf.Output) (output tf.Output) { - if scope.Err() != nil { - return + a(attrs) } opspec := tf.OpSpec{ - Type: "Pad", + Type: "TensorArrayGatherV3", Input: []tf.Input{ - input, paddings, + handle, indices, flow_in, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Checks whether a resource handle-based variable has been initialized. -// -// Arguments: -// resource: the input resource handle. -// -// Returns a scalar boolean which is true if the variable has been -// initialized. -func VarIsInitializedOp(scope *Scope, resource tf.Output) (is_initialized tf.Output) { +// Mutually reduces multiple tensors of identical type and shape. +func CollectiveReduce(scope *Scope, input tf.Output, group_size int64, group_key int64, instance_key int64, merge_op string, final_op string, subdiv_offsets []int64) (data tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"group_size": group_size, "group_key": group_key, "instance_key": instance_key, "merge_op": merge_op, "final_op": final_op, "subdiv_offsets": subdiv_offsets} opspec := tf.OpSpec{ - Type: "VarIsInitializedOp", + Type: "CollectiveReduce", Input: []tf.Input{ - resource, + input, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// StatelessRandomUniformAttr is an optional argument to StatelessRandomUniform. -type StatelessRandomUniformAttr func(optionalAttr) - -// StatelessRandomUniformDtype sets the optional dtype attribute to value. -// -// value: The type of the output. -// If not specified, defaults to DT_FLOAT -func StatelessRandomUniformDtype(value tf.DataType) StatelessRandomUniformAttr { - return func(m optionalAttr) { - m["dtype"] = value - } -} - -// Outputs deterministic pseudorandom random values from a uniform distribution. +// This op consumes a lock created by `MutexLock`. // -// The generated values follow a uniform distribution in the range `[0, 1)`. The -// lower bound 0 is included in the range, while the upper bound 1 is excluded. +// This op exists to consume a tensor created by `MutexLock` (other than +// direct control dependencies). It should be the only that consumes the tensor, +// and will raise an error if it is not. Its only purpose is to keep the +// mutex lock tensor alive until it is consumed by this op. // -// The outputs are a deterministic function of `shape` and `seed`. +// **NOTE**: This operation must run on the same device as its input. This may +// be enforced via the `colocate_with` mechanism. // // Arguments: -// shape: The shape of the output tensor. -// seed: 2 seeds (shape [2]). +// mutex_lock: A tensor returned by `MutexLock`. // -// Returns Random values with specified shape. -func StatelessRandomUniform(scope *Scope, shape tf.Output, seed tf.Output, optional ...StatelessRandomUniformAttr) (output tf.Output) { +// Returns the created operation. +func ConsumeMutexLock(scope *Scope, mutex_lock tf.Output) (o *tf.Operation) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "StatelessRandomUniform", + Type: "ConsumeMutexLock", Input: []tf.Input{ - shape, seed, + mutex_lock, }, - Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0) + return scope.AddOperation(opspec) } -// Makes its input available to the next iteration. +// Returns x / y element-wise for integer types. // -// Arguments: -// data: The tensor to be made available to the next iteration. +// Truncation designates that negative numbers will round fractional quantities +// toward zero. I.e. -7 / 5 = -1. This matches C semantics but it is different +// than Python semantics. See `FloorDiv` for a division function that matches +// Python Semantics. // -// Returns The same tensor as `data`. -func NextIteration(scope *Scope, data tf.Output) (output tf.Output) { +// *NOTE*: `TruncateDiv` supports broadcasting. More about broadcasting +// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) +func TruncateDiv(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "NextIteration", + Type: "TruncateDiv", Input: []tf.Input{ - data, + x, y, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Output a fact about factorials. -func Fact(scope *Scope) (fact tf.Output) { +// Restores tensors from a V2 checkpoint. +// +// For backward compatibility with the V1 format, this Op currently allows +// restoring from a V1 checkpoint as well: +// - This Op first attempts to find the V2 index file pointed to by "prefix", and +// if found proceed to read it as a V2 checkpoint; +// - Otherwise the V1 read path is invoked. +// Relying on this behavior is not recommended, as the ability to fall back to read +// V1 might be deprecated and eventually removed. +// +// By default, restores the named tensors in full. If the caller wishes to restore +// specific slices of stored tensors, "shape_and_slices" should be non-empty +// strings and correspondingly well-formed. +// +// Callers must ensure all the named tensors are indeed stored in the checkpoint. +// +// Arguments: +// prefix: Must have a single element. The prefix of a V2 checkpoint. +// tensor_names: shape {N}. The names of the tensors to be restored. +// shape_and_slices: shape {N}. The slice specs of the tensors to be restored. +// Empty strings indicate that they are non-partitioned tensors. +// dtypes: shape {N}. The list of expected dtype for the tensors. Must match +// those stored in the checkpoint. +// +// Returns shape {N}. The restored tensors, whose shapes are read from the +// checkpoint directly. +func RestoreV2(scope *Scope, prefix tf.Output, tensor_names tf.Output, shape_and_slices tf.Output, dtypes []tf.DataType) (tensors []tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"dtypes": dtypes} opspec := tf.OpSpec{ - Type: "Fact", + Type: "RestoreV2", + Input: []tf.Input{ + prefix, tensor_names, shape_and_slices, + }, + Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) -} - -// AngleAttr is an optional argument to Angle. -type AngleAttr func(optionalAttr) - -// AngleTout sets the optional Tout attribute to value. -// If not specified, defaults to DT_FLOAT -func AngleTout(value tf.DataType) AngleAttr { - return func(m optionalAttr) { - m["Tout"] = value + if scope.Err() != nil { + return + } + var idx int + var err error + if tensors, idx, err = makeOutputList(op, idx, "tensors"); err != nil { + scope.UpdateErr("RestoreV2", err) + return } + return tensors } -// Returns the argument of a complex number. -// -// Given a tensor `input` of complex numbers, this operation returns a tensor of -// type `float` that is the argument of each element in `input`. All elements in -// `input` must be complex numbers of the form \\(a + bj\\), where *a* -// is the real part and *b* is the imaginary part. +// Creates a dataset that skips `count` elements from the `input_dataset`. // -// The argument returned by this operation is of the form \\(atan2(b, a)\\). +// Arguments: // -// For example: +// count: A scalar representing the number of elements from the `input_dataset` +// that should be skipped. If count is -1, skips everything. // -// ``` -// # tensor 'input' is [-2.25 + 4.75j, 3.25 + 5.75j] -// tf.angle(input) ==> [2.0132, 1.056] -// ``` // -// @compatibility(numpy) -// Equivalent to np.angle. -// @end_compatibility -func Angle(scope *Scope, input tf.Output, optional ...AngleAttr) (output tf.Output) { +func SkipDataset(scope *Scope, input_dataset tf.Output, count tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} opspec := tf.OpSpec{ - Type: "Angle", + Type: "SkipDataset", Input: []tf.Input{ - input, + input_dataset, count, }, Attrs: attrs, } @@ -8864,174 +11251,149 @@ func Angle(scope *Scope, input tf.Output, optional ...AngleAttr) (output tf.Outp return op.Output(0) } -// VarHandleOpAttr is an optional argument to VarHandleOp. -type VarHandleOpAttr func(optionalAttr) - -// VarHandleOpContainer sets the optional container attribute to value. +// Computes the maximum along segments of a tensor. // -// value: the container this variable is placed in. -// If not specified, defaults to "" -func VarHandleOpContainer(value string) VarHandleOpAttr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// VarHandleOpSharedName sets the optional shared_name attribute to value. +// Read @{$math_ops#Segmentation$the section on segmentation} for an explanation of +// segments. // -// value: the name by which this variable is referred to. -// If not specified, defaults to "" -func VarHandleOpSharedName(value string) VarHandleOpAttr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// Creates a handle to a Variable resource. +// Computes a tensor such that +// \\(output_i = \max_j(data_j)\\) where `max` is over `j` such +// that `segment_ids[j] == i`. +// +// If the max is empty for a given segment ID `i`, `output[i] = 0`. +// +//
+// +//
// // Arguments: -// dtype: the type of this variable. Must agree with the dtypes -// of all ops using this variable. -// shape: The (possibly partially specified) shape of this variable. -func VarHandleOp(scope *Scope, dtype tf.DataType, shape tf.Shape, optional ...VarHandleOpAttr) (resource tf.Output) { +// +// segment_ids: A 1-D tensor whose rank is equal to the rank of `data`'s +// first dimension. Values should be sorted and can be repeated. +// +// Returns Has same shape as data, except for dimension 0 which +// has size `k`, the number of segments. +func SegmentMax(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"dtype": dtype, "shape": shape} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "VarHandleOp", - - Attrs: attrs, + Type: "SegmentMax", + Input: []tf.Input{ + data, segment_ids, + }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Elementwise computes the bitwise XOR of `x` and `y`. -// -// The result will have those bits set, that are different in `x` and `y`. The -// computation is performed on the underlying representations of `x` and `y`. -func BitwiseXor(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { +// Computes hyperbolic tangent of `x` element-wise. +func Tanh(scope *Scope, x tf.Output) (y tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "BitwiseXor", + Type: "Tanh", Input: []tf.Input{ - x, y, + x, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Deserialize `SparseTensor` objects. -// -// The input `serialized_sparse` must have the shape `[?, ?, ..., ?, 3]` where -// the last dimension stores serialized `SparseTensor` objects and the other N -// dimensions (N >= 0) correspond to a batch. The ranks of the original -// `SparseTensor` objects must all match. When the final `SparseTensor` is -// created, its rank is the rank of the incoming `SparseTensor` objects plus N; -// the sparse tensors have been concatenated along new dimensions, one for each -// batch. -// -// The output `SparseTensor` object's shape values for the original dimensions -// are the max across the input `SparseTensor` objects' shape values for the -// corresponding dimensions. The new dimensions match the size of the batch. -// -// The input `SparseTensor` objects' indices are assumed ordered in -// standard lexicographic order. If this is not the case, after this -// step run `SparseReorder` to restore index ordering. -// -// For example, if the serialized input is a `[2 x 3]` matrix representing two -// original `SparseTensor` objects: -// -// index = [ 0] -// [10] -// [20] -// values = [1, 2, 3] -// shape = [50] -// -// and +// Receives a tensor value broadcast from another device. +func CollectiveBcastRecv(scope *Scope, T tf.DataType, group_size int64, group_key int64, instance_key int64, shape tf.Shape) (data tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"T": T, "group_size": group_size, "group_key": group_key, "instance_key": instance_key, "shape": shape} + opspec := tf.OpSpec{ + Type: "CollectiveBcastRecv", + + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Decode web-safe base64-encoded strings. // -// index = [ 2] -// [10] -// values = [4, 5] -// shape = [30] +// Input may or may not have padding at the end. See EncodeBase64 for padding. +// Web-safe means that input must use - and _ instead of + and /. // -// then the final deserialized `SparseTensor` will be: +// Arguments: +// input: Base64 strings to decode. // -// index = [0 0] -// [0 10] -// [0 20] -// [1 2] -// [1 10] -// values = [1, 2, 3, 4, 5] -// shape = [2 50] +// Returns Decoded strings. +func DecodeBase64(scope *Scope, input tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "DecodeBase64", + Input: []tf.Input{ + input, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Store the input tensor in the state of the current session. // // Arguments: -// serialized_sparse: The serialized `SparseTensor` objects. The last dimension -// must have 3 columns. -// dtype: The `dtype` of the serialized `SparseTensor` objects. -func DeserializeSparse(scope *Scope, serialized_sparse tf.Output, dtype tf.DataType) (sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output) { +// value: The tensor to be stored. +// +// Returns The handle for the tensor stored in the session state, represented +// as a string. +func GetSessionHandle(scope *Scope, value tf.Output) (handle tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"dtype": dtype} opspec := tf.OpSpec{ - Type: "DeserializeSparse", + Type: "GetSessionHandle", Input: []tf.Input{ - serialized_sparse, + value, }, - Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + return op.Output(0) } -// ResourceApplyRMSPropAttr is an optional argument to ResourceApplyRMSProp. -type ResourceApplyRMSPropAttr func(optionalAttr) +// ResourceSparseApplyProximalAdagradAttr is an optional argument to ResourceSparseApplyProximalAdagrad. +type ResourceSparseApplyProximalAdagradAttr func(optionalAttr) -// ResourceApplyRMSPropUseLocking sets the optional use_locking attribute to value. +// ResourceSparseApplyProximalAdagradUseLocking sets the optional use_locking attribute to value. // -// value: If `True`, updating of the var, ms, and mom tensors is protected -// by a lock; otherwise the behavior is undefined, but may exhibit less -// contention. +// value: If True, updating of the var and accum tensors will be protected by +// a lock; otherwise the behavior is undefined, but may exhibit less contention. // If not specified, defaults to false -func ResourceApplyRMSPropUseLocking(value bool) ResourceApplyRMSPropAttr { +func ResourceSparseApplyProximalAdagradUseLocking(value bool) ResourceSparseApplyProximalAdagradAttr { return func(m optionalAttr) { m["use_locking"] = value } } -// Update '*var' according to the RMSProp algorithm. -// -// Note that in dense implementation of this algorithm, ms and mom will -// update even if the grad is zero, but in this sparse implementation, ms -// and mom will not update in iterations during which the grad is zero. -// -// mean_square = decay * mean_square + (1-decay) * gradient ** 2 -// Delta = learning_rate * gradient / sqrt(mean_square + epsilon) +// Sparse update entries in '*var' and '*accum' according to FOBOS algorithm. // -// ms <- rho * ms_{t-1} + (1-rho) * grad * grad -// mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon) -// var <- var - mom +// That is for rows we have grad for, we update var and accum as follows: +// accum += grad * grad +// prox_v = var +// prox_v -= lr * grad * (1 / sqrt(accum)) +// var = sign(prox_v)/(1+lr*l2) * max{|prox_v|-lr*l1,0} // // Arguments: // var_: Should be from a Variable(). -// ms: Should be from a Variable(). -// mom: Should be from a Variable(). -// lr: Scaling factor. Must be a scalar. -// rho: Decay rate. Must be a scalar. -// -// epsilon: Ridge term. Must be a scalar. +// accum: Should be from a Variable(). +// lr: Learning rate. Must be a scalar. +// l1: L1 regularization. Must be a scalar. +// l2: L2 regularization. Must be a scalar. // grad: The gradient. +// indices: A vector of indices into the first dimension of var and accum. // // Returns the created operation. -func ResourceApplyRMSProp(scope *Scope, var_ tf.Output, ms tf.Output, mom tf.Output, lr tf.Output, rho tf.Output, momentum tf.Output, epsilon tf.Output, grad tf.Output, optional ...ResourceApplyRMSPropAttr) (o *tf.Operation) { +func ResourceSparseApplyProximalAdagrad(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, grad tf.Output, indices tf.Output, optional ...ResourceSparseApplyProximalAdagradAttr) (o *tf.Operation) { if scope.Err() != nil { return } @@ -9040,77 +11402,99 @@ func ResourceApplyRMSProp(scope *Scope, var_ tf.Output, ms tf.Output, mom tf.Out a(attrs) } opspec := tf.OpSpec{ - Type: "ResourceApplyRMSProp", + Type: "ResourceSparseApplyProximalAdagrad", Input: []tf.Input{ - var_, ms, mom, lr, rho, momentum, epsilon, grad, + var_, accum, lr, l1, l2, grad, indices, }, Attrs: attrs, } return scope.AddOperation(opspec) } -// ResourceScatterNdUpdateAttr is an optional argument to ResourceScatterNdUpdate. -type ResourceScatterNdUpdateAttr func(optionalAttr) +// MaxPool3DGradAttr is an optional argument to MaxPool3DGrad. +type MaxPool3DGradAttr func(optionalAttr) -// ResourceScatterNdUpdateUseLocking sets the optional use_locking attribute to value. +// MaxPool3DGradDataFormat sets the optional data_format attribute to value. // -// value: An optional bool. Defaults to True. If True, the assignment will -// be protected by a lock; otherwise the behavior is undefined, -// but may exhibit less contention. -// If not specified, defaults to true -func ResourceScatterNdUpdateUseLocking(value bool) ResourceScatterNdUpdateAttr { +// value: The data format of the input and output data. With the +// default format "NDHWC", the data is stored in the order of: +// [batch, in_depth, in_height, in_width, in_channels]. +// Alternatively, the format could be "NCDHW", the data storage order is: +// [batch, in_channels, in_depth, in_height, in_width]. +// If not specified, defaults to "NDHWC" +func MaxPool3DGradDataFormat(value string) MaxPool3DGradAttr { return func(m optionalAttr) { - m["use_locking"] = value + m["data_format"] = value } } -// Applies sparse `updates` to individual values or slices within a given -// -// variable according to `indices`. -// -// `ref` is a `Tensor` with rank `P` and `indices` is a `Tensor` of rank `Q`. -// -// `indices` must be integer tensor, containing indices into `ref`. -// It must be shape `[d_0, ..., d_{Q-2}, K]` where `0 < K <= P`. -// -// The innermost dimension of `indices` (with length `K`) corresponds to -// indices into elements (if `K = P`) or slices (if `K < P`) along the `K`th -// dimension of `ref`. -// -// `updates` is `Tensor` of rank `Q-1+P-K` with shape: -// -// ``` -// [d_0, ..., d_{Q-2}, ref.shape[K], ..., ref.shape[P-1]]. -// ``` +// Computes gradients of max pooling function. // -// For example, say we want to update 4 scattered elements to a rank-1 tensor to -// 8 elements. In Python, that update would look like this: +// Arguments: +// orig_input: The original input tensor. +// orig_output: The original output tensor. +// grad: Output backprop of shape `[batch, depth, rows, cols, channels]`. +// ksize: 1-D tensor of length 5. The size of the window for each dimension of +// the input tensor. Must have `ksize[0] = ksize[4] = 1`. +// strides: 1-D tensor of length 5. The stride of the sliding window for each +// dimension of `input`. Must have `strides[0] = strides[4] = 1`. +// padding: The type of padding algorithm to use. +func MaxPool3DGrad(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPool3DGradAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "MaxPool3DGrad", + Input: []tf.Input{ + orig_input, orig_output, grad, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// SparseReduceSumAttr is an optional argument to SparseReduceSum. +type SparseReduceSumAttr func(optionalAttr) + +// SparseReduceSumKeepDims sets the optional keep_dims attribute to value. // -// ```python -// ref = tfe.Variable([1, 2, 3, 4, 5, 6, 7, 8]) -// indices = tf.constant([[4], [3], [1] ,[7]]) -// updates = tf.constant([9, 10, 11, 12]) -// update = tf.scatter_nd_update(ref, indices, updates) -// with tf.Session() as sess: -// print sess.run(update) -// ``` +// value: If true, retain reduced dimensions with length 1. +// If not specified, defaults to false +func SparseReduceSumKeepDims(value bool) SparseReduceSumAttr { + return func(m optionalAttr) { + m["keep_dims"] = value + } +} + +// Computes the sum of elements across dimensions of a SparseTensor. // -// The resulting update to ref would look like this: +// This Op takes a SparseTensor and is the sparse counterpart to +// `tf.reduce_sum()`. In particular, this Op also returns a dense `Tensor` +// instead of a sparse one. // -// [1, 11, 3, 10, 9, 6, 7, 12] +// Reduces `sp_input` along the dimensions given in `reduction_axes`. Unless +// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in +// `reduction_axes`. If `keep_dims` is true, the reduced dimensions are retained +// with length 1. // -// See @{tf.scatter_nd} for more details about how to make updates to -// slices. +// If `reduction_axes` has no entries, all dimensions are reduced, and a tensor +// with a single element is returned. Additionally, the axes can be negative, +// which are interpreted according to the indexing rules in Python. // // Arguments: -// ref: A resource handle. Must be from a VarHandleOp. -// indices: A Tensor. Must be one of the following types: int32, int64. -// A tensor of indices into ref. -// updates: A Tensor. Must have the same type as ref. A tensor of updated -// values to add to ref. +// input_indices: 2-D. `N x R` matrix with the indices of non-empty values in a +// SparseTensor, possibly not in canonical ordering. +// input_values: 1-D. `N` non-empty values corresponding to `input_indices`. +// input_shape: 1-D. Shape of the input SparseTensor. +// reduction_axes: 1-D. Length-`K` vector containing the reduction axes. // -// Returns the created operation. -func ResourceScatterNdUpdate(scope *Scope, ref tf.Output, indices tf.Output, updates tf.Output, optional ...ResourceScatterNdUpdateAttr) (o *tf.Operation) { +// Returns `R-K`-D. The reduced Tensor. +func SparseReduceSum(scope *Scope, input_indices tf.Output, input_values tf.Output, input_shape tf.Output, reduction_axes tf.Output, optional ...SparseReduceSumAttr) (output tf.Output) { if scope.Err() != nil { return } @@ -9119,59 +11503,38 @@ func ResourceScatterNdUpdate(scope *Scope, ref tf.Output, indices tf.Output, upd a(attrs) } opspec := tf.OpSpec{ - Type: "ResourceScatterNdUpdate", + Type: "SparseReduceSum", Input: []tf.Input{ - ref, indices, updates, + input_indices, input_values, input_shape, reduction_axes, }, Attrs: attrs, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0) } -// SqueezeAttr is an optional argument to Squeeze. -type SqueezeAttr func(optionalAttr) +// VariableShapeAttr is an optional argument to VariableShape. +type VariableShapeAttr func(optionalAttr) -// SqueezeAxis sets the optional axis attribute to value. -// -// value: If specified, only squeezes the dimensions listed. The dimension -// index starts at 0. It is an error to squeeze a dimension that is not 1. Must -// be in the range `[-rank(input), rank(input))`. -// If not specified, defaults to <> -// -// REQUIRES: len(value) >= 0 -func SqueezeAxis(value []int64) SqueezeAttr { +// VariableShapeOutType sets the optional out_type attribute to value. +// If not specified, defaults to DT_INT32 +func VariableShapeOutType(value tf.DataType) VariableShapeAttr { return func(m optionalAttr) { - m["squeeze_dims"] = value + m["out_type"] = value } } -// Removes dimensions of size 1 from the shape of a tensor. -// -// Given a tensor `input`, this operation returns a tensor of the same type with -// all dimensions of size 1 removed. If you don't want to remove all size 1 -// dimensions, you can remove specific size 1 dimensions by specifying -// `axis`. -// -// For example: -// -// ``` -// # 't' is a tensor of shape [1, 2, 1, 3, 1, 1] -// shape(squeeze(t)) ==> [2, 3] -// ``` +// Returns the shape of the variable pointed to by `resource`. // -// Or, to remove specific size 1 dimensions: +// This operation returns a 1-D integer tensor representing the shape of `input`. +// +// For example: // // ``` -// # 't' is a tensor of shape [1, 2, 1, 3, 1, 1] -// shape(squeeze(t, [2, 4])) ==> [1, 2, 3, 1] +// # 't' is [[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]]] +// shape(t) ==> [2, 2, 3] // ``` -// -// Arguments: -// input: The `input` to squeeze. -// -// Returns Contains the same data as `input`, but has one or more dimensions of -// size 1 removed. -func Squeeze(scope *Scope, input tf.Output, optional ...SqueezeAttr) (output tf.Output) { +func VariableShape(scope *Scope, input tf.Output, optional ...VariableShapeAttr) (output tf.Output) { if scope.Err() != nil { return } @@ -9180,7 +11543,7 @@ func Squeeze(scope *Scope, input tf.Output, optional ...SqueezeAttr) (output tf. a(attrs) } opspec := tf.OpSpec{ - Type: "Squeeze", + Type: "VariableShape", Input: []tf.Input{ input, }, @@ -9190,126 +11553,156 @@ func Squeeze(scope *Scope, input tf.Output, optional ...SqueezeAttr) (output tf. return op.Output(0) } -// ResourceApplyAdadeltaAttr is an optional argument to ResourceApplyAdadelta. -type ResourceApplyAdadeltaAttr func(optionalAttr) +// SparseToSparseSetOperationAttr is an optional argument to SparseToSparseSetOperation. +type SparseToSparseSetOperationAttr func(optionalAttr) -// ResourceApplyAdadeltaUseLocking sets the optional use_locking attribute to value. -// -// value: If True, updating of the var, accum and update_accum tensors will be protected by -// a lock; otherwise the behavior is undefined, but may exhibit less contention. -// If not specified, defaults to false -func ResourceApplyAdadeltaUseLocking(value bool) ResourceApplyAdadeltaAttr { +// SparseToSparseSetOperationValidateIndices sets the optional validate_indices attribute to value. +// If not specified, defaults to true +func SparseToSparseSetOperationValidateIndices(value bool) SparseToSparseSetOperationAttr { return func(m optionalAttr) { - m["use_locking"] = value + m["validate_indices"] = value } } -// Update '*var' according to the adadelta scheme. +// Applies set operation along last dimension of 2 `SparseTensor` inputs. // -// accum = rho() * accum + (1 - rho()) * grad.square(); -// update = (update_accum + epsilon).sqrt() * (accum + epsilon()).rsqrt() * grad; -// update_accum = rho() * update_accum + (1 - rho()) * update.square(); -// var -= update; +// See SetOperationOp::SetOperationFromContext for values of `set_operation`. +// +// If `validate_indices` is `True`, `SparseToSparseSetOperation` validates the +// order and range of `set1` and `set2` indices. +// +// Input `set1` is a `SparseTensor` represented by `set1_indices`, `set1_values`, +// and `set1_shape`. For `set1` ranked `n`, 1st `n-1` dimensions must be the same +// as `set2`. Dimension `n` contains values in a set, duplicates are allowed but +// ignored. +// +// Input `set2` is a `SparseTensor` represented by `set2_indices`, `set2_values`, +// and `set2_shape`. For `set2` ranked `n`, 1st `n-1` dimensions must be the same +// as `set1`. Dimension `n` contains values in a set, duplicates are allowed but +// ignored. +// +// If `validate_indices` is `True`, this op validates the order and range of `set1` +// and `set2` indices. +// +// Output `result` is a `SparseTensor` represented by `result_indices`, +// `result_values`, and `result_shape`. For `set1` and `set2` ranked `n`, this +// has rank `n` and the same 1st `n-1` dimensions as `set1` and `set2`. The `nth` +// dimension contains the result of `set_operation` applied to the corresponding +// `[0...n-1]` dimension of `set`. // // Arguments: -// var_: Should be from a Variable(). -// accum: Should be from a Variable(). -// accum_update: Should be from a Variable(). -// lr: Scaling factor. Must be a scalar. -// rho: Decay factor. Must be a scalar. -// epsilon: Constant factor. Must be a scalar. -// grad: The gradient. +// set1_indices: 2D `Tensor`, indices of a `SparseTensor`. Must be in row-major +// order. +// set1_values: 1D `Tensor`, values of a `SparseTensor`. Must be in row-major +// order. +// set1_shape: 1D `Tensor`, shape of a `SparseTensor`. `set1_shape[0...n-1]` must +// be the same as `set2_shape[0...n-1]`, `set1_shape[n]` is the +// max set size across `0...n-1` dimensions. +// set2_indices: 2D `Tensor`, indices of a `SparseTensor`. Must be in row-major +// order. +// set2_values: 1D `Tensor`, values of a `SparseTensor`. Must be in row-major +// order. +// set2_shape: 1D `Tensor`, shape of a `SparseTensor`. `set2_shape[0...n-1]` must +// be the same as `set1_shape[0...n-1]`, `set2_shape[n]` is the +// max set size across `0...n-1` dimensions. // -// Returns the created operation. -func ResourceApplyAdadelta(scope *Scope, var_ tf.Output, accum tf.Output, accum_update tf.Output, lr tf.Output, rho tf.Output, epsilon tf.Output, grad tf.Output, optional ...ResourceApplyAdadeltaAttr) (o *tf.Operation) { +// +// Returns 2D indices of a `SparseTensor`.1D values of a `SparseTensor`.1D `Tensor` shape of a `SparseTensor`. `result_shape[0...n-1]` is +// the same as the 1st `n-1` dimensions of `set1` and `set2`, `result_shape[n]` +// is the max result set size across all `0...n-1` dimensions. +func SparseToSparseSetOperation(scope *Scope, set1_indices tf.Output, set1_values tf.Output, set1_shape tf.Output, set2_indices tf.Output, set2_values tf.Output, set2_shape tf.Output, set_operation string, optional ...SparseToSparseSetOperationAttr) (result_indices tf.Output, result_values tf.Output, result_shape tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"set_operation": set_operation} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "ResourceApplyAdadelta", + Type: "SparseToSparseSetOperation", Input: []tf.Input{ - var_, accum, accum_update, lr, rho, epsilon, grad, + set1_indices, set1_values, set1_shape, set2_indices, set2_values, set2_shape, }, Attrs: attrs, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1), op.Output(2) } -// NonMaxSuppressionAttr is an optional argument to NonMaxSuppression. -type NonMaxSuppressionAttr func(optionalAttr) - -// NonMaxSuppressionIouThreshold sets the optional iou_threshold attribute to value. +// Computes softmax cross entropy cost and gradients to backpropagate. // -// value: A float representing the threshold for deciding whether boxes -// overlap too much with respect to IOU. -// If not specified, defaults to 0.5 -func NonMaxSuppressionIouThreshold(value float32) NonMaxSuppressionAttr { - return func(m optionalAttr) { - m["iou_threshold"] = value +// Unlike `SoftmaxCrossEntropyWithLogits`, this operation does not accept +// a matrix of label probabilities, but rather a single label per row +// of features. This label is considered to have probability 1.0 for the +// given row. +// +// Inputs are the logits, not probabilities. +// +// Arguments: +// features: batch_size x num_classes matrix +// labels: batch_size vector with values in [0, num_classes). +// This is the label for the given minibatch entry. +// +// Returns Per example loss (batch_size vector).backpropagated gradients (batch_size x num_classes matrix). +func SparseSoftmaxCrossEntropyWithLogits(scope *Scope, features tf.Output, labels tf.Output) (loss tf.Output, backprop tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "SparseSoftmaxCrossEntropyWithLogits", + Input: []tf.Input{ + features, labels, + }, } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1) } -// Greedily selects a subset of bounding boxes in descending order of score, +// Fast Fourier transform. // -// pruning away boxes that have high intersection-over-union (IOU) overlap -// with previously selected boxes. Bounding boxes are supplied as -// [y1, x1, y2, x2], where (y1, x1) and (y2, x2) are the coordinates of any -// diagonal pair of box corners and the coordinates can be provided as normalized -// (i.e., lying in the interval [0, 1]) or absolute. Note that this algorithm -// is agnostic to where the origin is in the coordinate system. Note that this -// algorithm is invariant to orthogonal transformations and translations -// of the coordinate system; thus translating or reflections of the coordinate -// system result in the same boxes being selected by the algorithm. -// The output of this operation is a set of integers indexing into the input -// collection of bounding boxes representing the selected boxes. The bounding -// box coordinates corresponding to the selected indices can then be obtained -// using the `tf.gather operation`. For example: -// selected_indices = tf.image.non_max_suppression( -// boxes, scores, max_output_size, iou_threshold) -// selected_boxes = tf.gather(boxes, selected_indices) +// Computes the 1-dimensional discrete Fourier transform over the inner-most +// dimension of `input`. // // Arguments: -// boxes: A 2-D float tensor of shape `[num_boxes, 4]`. -// scores: A 1-D float tensor of shape `[num_boxes]` representing a single -// score corresponding to each box (each row of boxes). -// max_output_size: A scalar integer tensor representing the maximum number of -// boxes to be selected by non max suppression. +// input: A complex64 tensor. // -// Returns A 1-D integer tensor of shape `[M]` representing the selected -// indices from the boxes tensor, where `M <= max_output_size`. -func NonMaxSuppression(scope *Scope, boxes tf.Output, scores tf.Output, max_output_size tf.Output, optional ...NonMaxSuppressionAttr) (selected_indices tf.Output) { +// Returns A complex64 tensor of the same shape as `input`. The inner-most +// dimension of `input` is replaced with its 1D Fourier transform. +// +// @compatibility(numpy) +// Equivalent to np.fft.fft +// @end_compatibility +func FFT(scope *Scope, input tf.Output) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "NonMaxSuppression", + Type: "FFT", Input: []tf.Input{ - boxes, scores, max_output_size, + input, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Creates a dataset that emits `components` as a tuple of tensors once. -func TensorDataset(scope *Scope, components []tf.Output, output_shapes []tf.Shape) (handle tf.Output) { +// Transforms a serialized tensorflow.TensorProto proto into a Tensor. +// +// Arguments: +// serialized: A scalar string containing a serialized TensorProto proto. +// out_type: The type of the serialized tensor. The provided type must match the +// type of the serialized tensor and no implicit conversion will take place. +// +// Returns A Tensor of type `out_type`. +func ParseTensor(scope *Scope, serialized tf.Output, out_type tf.DataType) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"output_shapes": output_shapes} + attrs := map[string]interface{}{"out_type": out_type} opspec := tf.OpSpec{ - Type: "TensorDataset", + Type: "ParseTensor", Input: []tf.Input{ - tf.OutputList(components), + serialized, }, Attrs: attrs, } @@ -9317,78 +11710,84 @@ func TensorDataset(scope *Scope, components []tf.Output, output_shapes []tf.Shap return op.Output(0) } -// Component-wise multiplies a SparseTensor by a dense Tensor. +// MaxPoolWithArgmaxAttr is an optional argument to MaxPoolWithArgmax. +type MaxPoolWithArgmaxAttr func(optionalAttr) + +// MaxPoolWithArgmaxTargmax sets the optional Targmax attribute to value. +// If not specified, defaults to DT_INT64 +func MaxPoolWithArgmaxTargmax(value tf.DataType) MaxPoolWithArgmaxAttr { + return func(m optionalAttr) { + m["Targmax"] = value + } +} + +// Performs max pooling on the input and outputs both max values and indices. // -// The output locations corresponding to the implicitly zero elements in the sparse -// tensor will be zero (i.e., will not take up storage space), regardless of the -// contents of the dense tensor (even if it's +/-INF and that INF*0 == NaN). +// The indices in `argmax` are flattened, so that a maximum value at position +// `[b, y, x, c]` becomes flattened index +// `((b * height + y) * width + x) * channels + c`. // -// *Limitation*: this Op only broadcasts the dense side to the sparse side, but not -// the other direction. +// The indices returned are always in `[0, height) x [0, width)` before flattening, +// even if padding is involved and the mathematically correct answer is outside +// (either negative or too large). This is a bug, but fixing it is difficult to do +// in a safe backwards compatible way, especially due to flattening. // // Arguments: -// sp_indices: 2-D. `N x R` matrix with the indices of non-empty values in a -// SparseTensor, possibly not in canonical ordering. -// sp_values: 1-D. `N` non-empty values corresponding to `sp_indices`. -// sp_shape: 1-D. Shape of the input SparseTensor. -// dense: `R`-D. The dense Tensor operand. +// input: 4-D with shape `[batch, height, width, channels]`. Input to pool over. +// ksize: The size of the window for each dimension of the input tensor. +// strides: The stride of the sliding window for each dimension of the +// input tensor. +// padding: The type of padding algorithm to use. // -// Returns 1-D. The `N` values that are operated on. -func SparseDenseCwiseMul(scope *Scope, sp_indices tf.Output, sp_values tf.Output, sp_shape tf.Output, dense tf.Output) (output tf.Output) { +// Returns The max pooled output tensor.4-D. The flattened indices of the max values chosen for each output. +func MaxPoolWithArgmax(scope *Scope, input tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPoolWithArgmaxAttr) (output tf.Output, argmax tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "SparseDenseCwiseMul", + Type: "MaxPoolWithArgmax", Input: []tf.Input{ - sp_indices, sp_values, sp_shape, dense, + input, }, + Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1) } -// ResourceSparseApplyRMSPropAttr is an optional argument to ResourceSparseApplyRMSProp. -type ResourceSparseApplyRMSPropAttr func(optionalAttr) +// ResourceSparseApplyAdagradDAAttr is an optional argument to ResourceSparseApplyAdagradDA. +type ResourceSparseApplyAdagradDAAttr func(optionalAttr) -// ResourceSparseApplyRMSPropUseLocking sets the optional use_locking attribute to value. +// ResourceSparseApplyAdagradDAUseLocking sets the optional use_locking attribute to value. // -// value: If `True`, updating of the var, ms, and mom tensors is protected -// by a lock; otherwise the behavior is undefined, but may exhibit less -// contention. +// value: If True, updating of the var and accum tensors will be protected by +// a lock; otherwise the behavior is undefined, but may exhibit less contention. // If not specified, defaults to false -func ResourceSparseApplyRMSPropUseLocking(value bool) ResourceSparseApplyRMSPropAttr { +func ResourceSparseApplyAdagradDAUseLocking(value bool) ResourceSparseApplyAdagradDAAttr { return func(m optionalAttr) { m["use_locking"] = value } } -// Update '*var' according to the RMSProp algorithm. -// -// Note that in dense implementation of this algorithm, ms and mom will -// update even if the grad is zero, but in this sparse implementation, ms -// and mom will not update in iterations during which the grad is zero. -// -// mean_square = decay * mean_square + (1-decay) * gradient ** 2 -// Delta = learning_rate * gradient / sqrt(mean_square + epsilon) -// -// ms <- rho * ms_{t-1} + (1-rho) * grad * grad -// mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon) -// var <- var - mom +// Update entries in '*var' and '*accum' according to the proximal adagrad scheme. // // Arguments: // var_: Should be from a Variable(). -// ms: Should be from a Variable(). -// mom: Should be from a Variable(). -// lr: Scaling factor. Must be a scalar. -// rho: Decay rate. Must be a scalar. -// -// epsilon: Ridge term. Must be a scalar. +// gradient_accumulator: Should be from a Variable(). +// gradient_squared_accumulator: Should be from a Variable(). // grad: The gradient. -// indices: A vector of indices into the first dimension of var, ms and mom. +// indices: A vector of indices into the first dimension of var and accum. +// lr: Learning rate. Must be a scalar. +// l1: L1 regularization. Must be a scalar. +// l2: L2 regularization. Must be a scalar. +// global_step: Training step number. Must be a scalar. // // Returns the created operation. -func ResourceSparseApplyRMSProp(scope *Scope, var_ tf.Output, ms tf.Output, mom tf.Output, lr tf.Output, rho tf.Output, momentum tf.Output, epsilon tf.Output, grad tf.Output, indices tf.Output, optional ...ResourceSparseApplyRMSPropAttr) (o *tf.Operation) { +func ResourceSparseApplyAdagradDA(scope *Scope, var_ tf.Output, gradient_accumulator tf.Output, gradient_squared_accumulator tf.Output, grad tf.Output, indices tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, global_step tf.Output, optional ...ResourceSparseApplyAdagradDAAttr) (o *tf.Operation) { if scope.Err() != nil { return } @@ -9397,273 +11796,204 @@ func ResourceSparseApplyRMSProp(scope *Scope, var_ tf.Output, ms tf.Output, mom a(attrs) } opspec := tf.OpSpec{ - Type: "ResourceSparseApplyRMSProp", + Type: "ResourceSparseApplyAdagradDA", Input: []tf.Input{ - var_, ms, mom, lr, rho, momentum, epsilon, grad, indices, + var_, gradient_accumulator, gradient_squared_accumulator, grad, indices, lr, l1, l2, global_step, }, Attrs: attrs, } return scope.AddOperation(opspec) } -// Returns the truth value of (x > y) element-wise. +// EncodeJpegAttr is an optional argument to EncodeJpeg. +type EncodeJpegAttr func(optionalAttr) + +// EncodeJpegFormat sets the optional format attribute to value. // -// *NOTE*: `Greater` supports broadcasting. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func Greater(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Greater", - Input: []tf.Input{ - x, y, - }, +// value: Per pixel image format. +// If not specified, defaults to "" +func EncodeJpegFormat(value string) EncodeJpegAttr { + return func(m optionalAttr) { + m["format"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// SampleDistortedBoundingBoxAttr is an optional argument to SampleDistortedBoundingBox. -type SampleDistortedBoundingBoxAttr func(optionalAttr) - -// SampleDistortedBoundingBoxSeed sets the optional seed attribute to value. +// EncodeJpegQuality sets the optional quality attribute to value. // -// value: If either `seed` or `seed2` are set to non-zero, the random number -// generator is seeded by the given `seed`. Otherwise, it is seeded by a random -// seed. -// If not specified, defaults to 0 -func SampleDistortedBoundingBoxSeed(value int64) SampleDistortedBoundingBoxAttr { +// value: Quality of the compression from 0 to 100 (higher is better and slower). +// If not specified, defaults to 95 +func EncodeJpegQuality(value int64) EncodeJpegAttr { return func(m optionalAttr) { - m["seed"] = value + m["quality"] = value } } -// SampleDistortedBoundingBoxSeed2 sets the optional seed2 attribute to value. +// EncodeJpegProgressive sets the optional progressive attribute to value. // -// value: A second seed to avoid seed collision. -// If not specified, defaults to 0 -func SampleDistortedBoundingBoxSeed2(value int64) SampleDistortedBoundingBoxAttr { +// value: If True, create a JPEG that loads progressively (coarse to fine). +// If not specified, defaults to false +func EncodeJpegProgressive(value bool) EncodeJpegAttr { return func(m optionalAttr) { - m["seed2"] = value + m["progressive"] = value } } -// SampleDistortedBoundingBoxMinObjectCovered sets the optional min_object_covered attribute to value. +// EncodeJpegOptimizeSize sets the optional optimize_size attribute to value. // -// value: The cropped area of the image must contain at least this -// fraction of any bounding box supplied. The value of this parameter should be -// non-negative. In the case of 0, the cropped area does not need to overlap -// any of the bounding boxes supplied. -// If not specified, defaults to 0.1 -func SampleDistortedBoundingBoxMinObjectCovered(value float32) SampleDistortedBoundingBoxAttr { +// value: If True, spend CPU/RAM to reduce size with no quality change. +// If not specified, defaults to false +func EncodeJpegOptimizeSize(value bool) EncodeJpegAttr { return func(m optionalAttr) { - m["min_object_covered"] = value + m["optimize_size"] = value } } -// SampleDistortedBoundingBoxAspectRatioRange sets the optional aspect_ratio_range attribute to value. +// EncodeJpegChromaDownsampling sets the optional chroma_downsampling attribute to value. // -// value: The cropped area of the image must have an aspect ratio = -// width / height within this range. -// If not specified, defaults to -func SampleDistortedBoundingBoxAspectRatioRange(value []float32) SampleDistortedBoundingBoxAttr { +// value: See http://en.wikipedia.org/wiki/Chroma_subsampling. +// If not specified, defaults to true +func EncodeJpegChromaDownsampling(value bool) EncodeJpegAttr { return func(m optionalAttr) { - m["aspect_ratio_range"] = value + m["chroma_downsampling"] = value } } -// SampleDistortedBoundingBoxAreaRange sets the optional area_range attribute to value. +// EncodeJpegDensityUnit sets the optional density_unit attribute to value. // -// value: The cropped area of the image must contain a fraction of the -// supplied image within in this range. -// If not specified, defaults to -func SampleDistortedBoundingBoxAreaRange(value []float32) SampleDistortedBoundingBoxAttr { +// value: Unit used to specify `x_density` and `y_density`: +// pixels per inch (`'in'`) or centimeter (`'cm'`). +// If not specified, defaults to "in" +func EncodeJpegDensityUnit(value string) EncodeJpegAttr { return func(m optionalAttr) { - m["area_range"] = value + m["density_unit"] = value } } -// SampleDistortedBoundingBoxMaxAttempts sets the optional max_attempts attribute to value. +// EncodeJpegXDensity sets the optional x_density attribute to value. // -// value: Number of attempts at generating a cropped region of the image -// of the specified constraints. After `max_attempts` failures, return the entire -// image. -// If not specified, defaults to 100 -func SampleDistortedBoundingBoxMaxAttempts(value int64) SampleDistortedBoundingBoxAttr { +// value: Horizontal pixels per density unit. +// If not specified, defaults to 300 +func EncodeJpegXDensity(value int64) EncodeJpegAttr { return func(m optionalAttr) { - m["max_attempts"] = value + m["x_density"] = value } } -// SampleDistortedBoundingBoxUseImageIfNoBoundingBoxes sets the optional use_image_if_no_bounding_boxes attribute to value. +// EncodeJpegYDensity sets the optional y_density attribute to value. // -// value: Controls behavior if no bounding boxes supplied. -// If true, assume an implicit bounding box covering the whole input. If false, -// raise an error. -// If not specified, defaults to false -func SampleDistortedBoundingBoxUseImageIfNoBoundingBoxes(value bool) SampleDistortedBoundingBoxAttr { +// value: Vertical pixels per density unit. +// If not specified, defaults to 300 +func EncodeJpegYDensity(value int64) EncodeJpegAttr { return func(m optionalAttr) { - m["use_image_if_no_bounding_boxes"] = value + m["y_density"] = value } } -// Generate a single randomly distorted bounding box for an image. -// -// Bounding box annotations are often supplied in addition to ground-truth labels -// in image recognition or object localization tasks. A common technique for -// training such a system is to randomly distort an image while preserving -// its content, i.e. *data augmentation*. This Op outputs a randomly distorted -// localization of an object, i.e. bounding box, given an `image_size`, -// `bounding_boxes` and a series of constraints. -// -// The output of this Op is a single bounding box that may be used to crop the -// original image. The output is returned as 3 tensors: `begin`, `size` and -// `bboxes`. The first 2 tensors can be fed directly into `tf.slice` to crop the -// image. The latter may be supplied to `tf.image.draw_bounding_boxes` to visualize -// what the bounding box looks like. -// -// Bounding boxes are supplied and returned as `[y_min, x_min, y_max, x_max]`. The -// bounding box coordinates are floats in `[0.0, 1.0]` relative to the width and -// height of the underlying image. -// -// For example, -// -// ```python -// # Generate a single distorted bounding box. -// begin, size, bbox_for_draw = tf.image.sample_distorted_bounding_box( -// tf.shape(image), -// bounding_boxes=bounding_boxes) -// -// # Draw the bounding box in an image summary. -// image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0), -// bbox_for_draw) -// tf.summary.image('images_with_box', image_with_box) -// -// # Employ the bounding box to distort the image. -// distorted_image = tf.slice(image, begin, size) -// ``` -// -// Note that if no bounding box information is available, setting -// `use_image_if_no_bounding_boxes = true` will assume there is a single implicit -// bounding box covering the whole image. If `use_image_if_no_bounding_boxes` is -// false and no bounding boxes are supplied, an error is raised. -// -// Arguments: -// image_size: 1-D, containing `[height, width, channels]`. -// bounding_boxes: 3-D with shape `[batch, N, 4]` describing the N bounding boxes -// associated with the image. +// EncodeJpegXmpMetadata sets the optional xmp_metadata attribute to value. // -// Returns 1-D, containing `[offset_height, offset_width, 0]`. Provide as input to -// `tf.slice`.1-D, containing `[target_height, target_width, -1]`. Provide as input to -// `tf.slice`.3-D with shape `[1, 1, 4]` containing the distorted bounding box. -// Provide as input to `tf.image.draw_bounding_boxes`. -func SampleDistortedBoundingBox(scope *Scope, image_size tf.Output, bounding_boxes tf.Output, optional ...SampleDistortedBoundingBoxAttr) (begin tf.Output, size tf.Output, bboxes tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "SampleDistortedBoundingBox", - Input: []tf.Input{ - image_size, bounding_boxes, - }, - Attrs: attrs, +// value: If not empty, embed this XMP metadata in the image header. +// If not specified, defaults to "" +func EncodeJpegXmpMetadata(value string) EncodeJpegAttr { + return func(m optionalAttr) { + m["xmp_metadata"] = value } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) } -// Converts each string in the input Tensor to its hash mod by a number of buckets. +// JPEG-encode an image. // -// The hash function is deterministic on the content of the string within the -// process and will never change. However, it is not suitable for cryptography. -// This function may be used when CPU time is scarce and inputs are trusted or -// unimportant. There is a risk of adversaries constructing inputs that all hash -// to the same bucket. To prevent this problem, use a strong hash function with -// `tf.string_to_hash_bucket_strong`. +// `image` is a 3-D uint8 Tensor of shape `[height, width, channels]`. // -// Arguments: -// input: The strings to assign a hash bucket. -// num_buckets: The number of buckets. +// The attr `format` can be used to override the color format of the encoded +// output. Values can be: // -// Returns A Tensor of the same shape as the input `string_tensor`. -func StringToHashBucketFast(scope *Scope, input tf.Output, num_buckets int64) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_buckets": num_buckets} - opspec := tf.OpSpec{ - Type: "StringToHashBucketFast", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Returns the max of x and y (i.e. x > y ? x : y) element-wise. +// * `''`: Use a default format based on the number of channels in the image. +// * `grayscale`: Output a grayscale JPEG image. The `channels` dimension +// of `image` must be 1. +// * `rgb`: Output an RGB JPEG image. The `channels` dimension +// of `image` must be 3. +// +// If `format` is not specified or is the empty string, a default format is picked +// in function of the number of channels in `image`: +// +// * 1: Output a grayscale image. +// * 3: Output an RGB image. // -// *NOTE*: `Maximum` supports broadcasting. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func Maximum(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { +// Arguments: +// image: 3-D with shape `[height, width, channels]`. +// +// Returns 0-D. JPEG-encoded image. +func EncodeJpeg(scope *Scope, image tf.Output, optional ...EncodeJpegAttr) (contents tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "Maximum", + Type: "EncodeJpeg", Input: []tf.Input{ - x, y, + image, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// TensorArrayGatherV3Attr is an optional argument to TensorArrayGatherV3. -type TensorArrayGatherV3Attr func(optionalAttr) +// MultinomialAttr is an optional argument to Multinomial. +type MultinomialAttr func(optionalAttr) -// TensorArrayGatherV3ElementShape sets the optional element_shape attribute to value. +// MultinomialSeed sets the optional seed attribute to value. // -// value: The expected shape of an element, if known. Used to -// validate the shapes of TensorArray elements. If this shape is not -// fully specified, gathering zero-size TensorArrays is an error. -// If not specified, defaults to -func TensorArrayGatherV3ElementShape(value tf.Shape) TensorArrayGatherV3Attr { +// value: If either seed or seed2 is set to be non-zero, the internal random number +// generator is seeded by the given seed. Otherwise, a random seed is used. +// If not specified, defaults to 0 +func MultinomialSeed(value int64) MultinomialAttr { return func(m optionalAttr) { - m["element_shape"] = value + m["seed"] = value } } -// Gather specific elements from the TensorArray into output `value`. +// MultinomialSeed2 sets the optional seed2 attribute to value. // -// All elements selected by `indices` must have the same shape. +// value: A second seed to avoid seed collision. +// If not specified, defaults to 0 +func MultinomialSeed2(value int64) MultinomialAttr { + return func(m optionalAttr) { + m["seed2"] = value + } +} + +// MultinomialOutputDtype sets the optional output_dtype attribute to value. +// If not specified, defaults to DT_INT64 +func MultinomialOutputDtype(value tf.DataType) MultinomialAttr { + return func(m optionalAttr) { + m["output_dtype"] = value + } +} + +// Draws samples from a multinomial distribution. // // Arguments: -// handle: The handle to a TensorArray. -// indices: The locations in the TensorArray from which to read tensor elements. -// flow_in: A float scalar that enforces proper chaining of operations. -// dtype: The type of the elem that is returned. +// logits: 2-D Tensor with shape `[batch_size, num_classes]`. Each slice `[i, :]` +// represents the unnormalized log probabilities for all classes. +// num_samples: 0-D. Number of independent samples to draw for each row slice. // -// Returns All of the elements in the TensorArray, concatenated along a new -// axis (the new dimension 0). -func TensorArrayGatherV3(scope *Scope, handle tf.Output, indices tf.Output, flow_in tf.Output, dtype tf.DataType, optional ...TensorArrayGatherV3Attr) (value tf.Output) { +// Returns 2-D Tensor with shape `[batch_size, num_samples]`. Each slice `[i, :]` +// contains the drawn class labels with range `[0, num_classes)`. +func Multinomial(scope *Scope, logits tf.Output, num_samples tf.Output, optional ...MultinomialAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"dtype": dtype} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "TensorArrayGatherV3", + Type: "Multinomial", Input: []tf.Input{ - handle, indices, flow_in, + logits, num_samples, }, Attrs: attrs, } @@ -9671,232 +12001,242 @@ func TensorArrayGatherV3(scope *Scope, handle tf.Output, indices tf.Output, flow return op.Output(0) } -// Returns x / y element-wise for integer types. -// -// Truncation designates that negative numbers will round fractional quantities -// toward zero. I.e. -7 / 5 = -1. This matches C semantics but it is different -// than Python semantics. See `FloorDiv` for a division function that matches -// Python Semantics. -// -// *NOTE*: `TruncateDiv` supports broadcasting. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func TruncateDiv(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { +// Returns the truth value of NOT x element-wise. +func LogicalNot(scope *Scope, x tf.Output) (y tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "TruncateDiv", + Type: "LogicalNot", Input: []tf.Input{ - x, y, + x, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Restores tensors from a V2 checkpoint. +// 3D real-valued fast Fourier transform. // -// For backward compatibility with the V1 format, this Op currently allows -// restoring from a V1 checkpoint as well: -// - This Op first attempts to find the V2 index file pointed to by "prefix", and -// if found proceed to read it as a V2 checkpoint; -// - Otherwise the V1 read path is invoked. -// Relying on this behavior is not recommended, as the ability to fall back to read -// V1 might be deprecated and eventually removed. +// Computes the 3-dimensional discrete Fourier transform of a real-valued signal +// over the inner-most 3 dimensions of `input`. // -// By default, restores the named tensors in full. If the caller wishes to restore -// specific slices of stored tensors, "shape_and_slices" should be non-empty -// strings and correspondingly well-formed. +// Since the DFT of a real signal is Hermitian-symmetric, `RFFT3D` only returns the +// `fft_length / 2 + 1` unique components of the FFT for the inner-most dimension +// of `output`: the zero-frequency term, followed by the `fft_length / 2` +// positive-frequency terms. // -// Callers must ensure all the named tensors are indeed stored in the checkpoint. +// Along each axis `RFFT3D` is computed on, if `fft_length` is smaller than the +// corresponding dimension of `input`, the dimension is cropped. If it is larger, +// the dimension is padded with zeros. // // Arguments: -// prefix: Must have a single element. The prefix of a V2 checkpoint. -// tensor_names: shape {N}. The names of the tensors to be restored. -// shape_and_slices: shape {N}. The slice specs of the tensors to be restored. -// Empty strings indicate that they are non-partitioned tensors. -// dtypes: shape {N}. The list of expected dtype for the tensors. Must match -// those stored in the checkpoint. +// input: A float32 tensor. +// fft_length: An int32 tensor of shape [3]. The FFT length for each dimension. // -// Returns shape {N}. The restored tensors, whose shapes are read from the -// checkpoint directly. -func RestoreV2(scope *Scope, prefix tf.Output, tensor_names tf.Output, shape_and_slices tf.Output, dtypes []tf.DataType) (tensors []tf.Output) { +// Returns A complex64 tensor of the same rank as `input`. The inner-most 3 +// dimensions of `input` are replaced with the their 3D Fourier transform. The +// inner-most dimension contains `fft_length / 2 + 1` unique frequency +// components. +// +// @compatibility(numpy) +// Equivalent to np.fft.rfftn with 3 dimensions. +// @end_compatibility +func RFFT3D(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"dtypes": dtypes} opspec := tf.OpSpec{ - Type: "RestoreV2", + Type: "RFFT3D", Input: []tf.Input{ - prefix, tensor_names, shape_and_slices, + input, fft_length, }, - Attrs: attrs, } op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if tensors, idx, err = makeOutputList(op, idx, "tensors"); err != nil { - scope.UpdateErr("RestoreV2", err) - return - } - return tensors + return op.Output(0) } -// Creates a dataset that skips `count` elements from the `input_dataset`. -// -// Arguments: -// -// count: A scalar representing the number of elements from the `input_dataset` -// that should be skipped. If count is -1, skips everything. -// +// TensorArrayV3Attr is an optional argument to TensorArrayV3. +type TensorArrayV3Attr func(optionalAttr) + +// TensorArrayV3ElementShape sets the optional element_shape attribute to value. // -func SkipDataset(scope *Scope, input_dataset tf.Output, count tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "SkipDataset", - Input: []tf.Input{ - input_dataset, count, - }, - Attrs: attrs, +// value: The expected shape of an element, if known. Used to +// validate the shapes of TensorArray elements. If this shape is not +// fully specified, gathering zero-size TensorArrays is an error. +// If not specified, defaults to +func TensorArrayV3ElementShape(value tf.Shape) TensorArrayV3Attr { + return func(m optionalAttr) { + m["element_shape"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// Computes the maximum along segments of a tensor. -// -// Read @{$math_ops#segmentation$the section on segmentation} for an explanation of -// segments. -// -// Computes a tensor such that -// \\(output_i = \max_j(data_j)\\) where `max` is over `j` such -// that `segment_ids[j] == i`. -// -// If the max is empty for a given segment ID `i`, `output[i] = 0`. -// -//
-// -//
-// -// Arguments: -// -// segment_ids: A 1-D tensor whose rank is equal to the rank of `data`'s -// first dimension. Values should be sorted and can be repeated. +// TensorArrayV3DynamicSize sets the optional dynamic_size attribute to value. // -// Returns Has same shape as data, except for dimension 0 which -// has size `k`, the number of segments. -func SegmentMax(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.Output) { - if scope.Err() != nil { - return +// value: A boolean that determines whether writes to the TensorArray +// are allowed to grow the size. By default, this is not allowed. +// If not specified, defaults to false +func TensorArrayV3DynamicSize(value bool) TensorArrayV3Attr { + return func(m optionalAttr) { + m["dynamic_size"] = value } - opspec := tf.OpSpec{ - Type: "SegmentMax", - Input: []tf.Input{ - data, segment_ids, - }, +} + +// TensorArrayV3ClearAfterRead sets the optional clear_after_read attribute to value. +// +// value: If true (default), Tensors in the TensorArray are cleared +// after being read. This disables multiple read semantics but allows early +// release of memory. +// If not specified, defaults to true +func TensorArrayV3ClearAfterRead(value bool) TensorArrayV3Attr { + return func(m optionalAttr) { + m["clear_after_read"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// Computes hyperbolic tangent of `x` element-wise. -func Tanh(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return +// TensorArrayV3IdenticalElementShapes sets the optional identical_element_shapes attribute to value. +// +// value: If true (default is false), then all +// elements in the TensorArray will be expected to have have identical shapes. +// This allows certain behaviors, like dynamically checking for +// consistent shapes on write, and being able to fill in properly +// shaped zero tensors on stack -- even if the element_shape attribute +// is not fully defined. +// If not specified, defaults to false +func TensorArrayV3IdenticalElementShapes(value bool) TensorArrayV3Attr { + return func(m optionalAttr) { + m["identical_element_shapes"] = value } - opspec := tf.OpSpec{ - Type: "Tanh", - Input: []tf.Input{ - x, - }, +} + +// TensorArrayV3TensorArrayName sets the optional tensor_array_name attribute to value. +// +// value: Overrides the name used for the temporary tensor_array +// resource. Default value is the name of the 'TensorArray' op (which +// is guaranteed unique). +// If not specified, defaults to "" +func TensorArrayV3TensorArrayName(value string) TensorArrayV3Attr { + return func(m optionalAttr) { + m["tensor_array_name"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// Decode web-safe base64-encoded strings. +// An array of Tensors of given size. // -// Input may or may not have padding at the end. See EncodeBase64 for padding. -// Web-safe means that input must use - and _ instead of + and /. +// Write data via Write and read via Read or Pack. // // Arguments: -// input: Base64 strings to decode. +// size: The size of the array. +// dtype: The type of the elements on the tensor_array. // -// Returns Decoded strings. -func DecodeBase64(scope *Scope, input tf.Output) (output tf.Output) { +// Returns The handle to the TensorArray.A scalar used to control gradient flow. +func TensorArrayV3(scope *Scope, size tf.Output, dtype tf.DataType, optional ...TensorArrayV3Attr) (handle tf.Output, flow tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"dtype": dtype} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "DecodeBase64", + Type: "TensorArrayV3", Input: []tf.Input{ - input, + size, }, + Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1) } -// Store the input tensor in the state of the current session. +// Runs multiple additive regression ensemble predictors on input instances and +// +// computes the logits. It is designed to be used during prediction. +// It traverses all the trees and calculates the final score for each instance. // // Arguments: -// value: The tensor to be stored. // -// Returns The handle for the tensor stored in the session state, represented -// as a string. -func GetSessionHandle(scope *Scope, value tf.Output) (handle tf.Output) { +// bucketized_features: A list of rank 1 Tensors containing bucket id for each +// feature. +// logits_dimension: scalar, dimension of the logits, to be used for partial logits +// shape. +// +// Returns Output rank 2 Tensor containing logits for each example. +func BoostedTreesPredict(scope *Scope, tree_ensemble_handle tf.Output, bucketized_features []tf.Output, logits_dimension int64) (logits tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"logits_dimension": logits_dimension} opspec := tf.OpSpec{ - Type: "GetSessionHandle", + Type: "BoostedTreesPredict", Input: []tf.Input{ - value, + tree_ensemble_handle, tf.OutputList(bucketized_features), }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// ResourceSparseApplyProximalAdagradAttr is an optional argument to ResourceSparseApplyProximalAdagrad. -type ResourceSparseApplyProximalAdagradAttr func(optionalAttr) +// MatrixSolveLsAttr is an optional argument to MatrixSolveLs. +type MatrixSolveLsAttr func(optionalAttr) -// ResourceSparseApplyProximalAdagradUseLocking sets the optional use_locking attribute to value. -// -// value: If True, updating of the var and accum tensors will be protected by -// a lock; otherwise the behavior is undefined, but may exhibit less contention. -// If not specified, defaults to false -func ResourceSparseApplyProximalAdagradUseLocking(value bool) ResourceSparseApplyProximalAdagradAttr { +// MatrixSolveLsFast sets the optional fast attribute to value. +// If not specified, defaults to true +func MatrixSolveLsFast(value bool) MatrixSolveLsAttr { return func(m optionalAttr) { - m["use_locking"] = value + m["fast"] = value } } -// Sparse update entries in '*var' and '*accum' according to FOBOS algorithm. +// Solves one or more linear least-squares problems. // -// That is for rows we have grad for, we update var and accum as follows: -// accum += grad * grad -// prox_v = var -// prox_v -= lr * grad * (1 / sqrt(accum)) -// var = sign(prox_v)/(1+lr*l2) * max{|prox_v|-lr*l1,0} +// `matrix` is a tensor of shape `[..., M, N]` whose inner-most 2 dimensions +// form real or complex matrices of size `[M, N]`. `Rhs` is a tensor of the same +// type as `matrix` and shape `[..., M, K]`. +// The output is a tensor shape `[..., N, K]` where each output matrix solves +// each of the equations +// `matrix[..., :, :]` * `output[..., :, :]` = `rhs[..., :, :]` +// in the least squares sense. +// +// We use the following notation for (complex) matrix and right-hand sides +// in the batch: +// +// `matrix`=\\(A \in \mathbb{C}^{m \times n}\\), +// `rhs`=\\(B \in \mathbb{C}^{m \times k}\\), +// `output`=\\(X \in \mathbb{C}^{n \times k}\\), +// `l2_regularizer`=\\(\lambda \in \mathbb{R}\\). +// +// If `fast` is `True`, then the solution is computed by solving the normal +// equations using Cholesky decomposition. Specifically, if \\(m \ge n\\) then +// \\(X = (A^H A + \lambda I)^{-1} A^H B\\), which solves the least-squares +// problem \\(X = \mathrm{argmin}_{Z \in \Re^{n \times k} } ||A Z - B||_F^2 + \lambda ||Z||_F^2\\). +// If \\(m \lt n\\) then `output` is computed as +// \\(X = A^H (A A^H + \lambda I)^{-1} B\\), which (for \\(\lambda = 0\\)) is the +// minimum-norm solution to the under-determined linear system, i.e. +// \\(X = \mathrm{argmin}_{Z \in \mathbb{C}^{n \times k} } ||Z||_F^2 \\), +// subject to \\(A Z = B\\). Notice that the fast path is only numerically stable +// when \\(A\\) is numerically full rank and has a condition number +// \\(\mathrm{cond}(A) \lt \frac{1}{\sqrt{\epsilon_{mach} } }\\) or \\(\lambda\\) is +// sufficiently large. +// +// If `fast` is `False` an algorithm based on the numerically robust complete +// orthogonal decomposition is used. This computes the minimum-norm +// least-squares solution, even when \\(A\\) is rank deficient. This path is +// typically 6-7 times slower than the fast path. If `fast` is `False` then +// `l2_regularizer` is ignored. // // Arguments: -// var_: Should be from a Variable(). -// accum: Should be from a Variable(). -// lr: Learning rate. Must be a scalar. -// l1: L1 regularization. Must be a scalar. -// l2: L2 regularization. Must be a scalar. -// grad: The gradient. -// indices: A vector of indices into the first dimension of var and accum. +// matrix: Shape is `[..., M, N]`. +// rhs: Shape is `[..., M, K]`. +// l2_regularizer: Scalar tensor. // -// Returns the created operation. -func ResourceSparseApplyProximalAdagrad(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, grad tf.Output, indices tf.Output, optional ...ResourceSparseApplyProximalAdagradAttr) (o *tf.Operation) { +// @compatibility(numpy) +// Equivalent to np.linalg.lstsq +// @end_compatibility +// +// Returns Shape is `[..., N, K]`. +func MatrixSolveLs(scope *Scope, matrix tf.Output, rhs tf.Output, l2_regularizer tf.Output, optional ...MatrixSolveLsAttr) (output tf.Output) { if scope.Err() != nil { return } @@ -9905,168 +12245,74 @@ func ResourceSparseApplyProximalAdagrad(scope *Scope, var_ tf.Output, accum tf.O a(attrs) } opspec := tf.OpSpec{ - Type: "ResourceSparseApplyProximalAdagrad", + Type: "MatrixSolveLs", Input: []tf.Input{ - var_, accum, lr, l1, l2, grad, indices, + matrix, rhs, l2_regularizer, }, Attrs: attrs, } - return scope.AddOperation(opspec) -} - -// Returns element-wise largest integer not greater than x. -func Floor(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Floor", - Input: []tf.Input{ - x, - }, - } op := scope.AddOperation(opspec) return op.Output(0) } -// Computes the Gauss error function of `x` element-wise. -func Erf(scope *Scope, x tf.Output) (y tf.Output) { +// Elementwise computes the bitwise OR of `x` and `y`. +// +// The result will have those bits set, that are set in `x`, `y` or both. The +// computation is performed on the underlying representations of `x` and `y`. +func BitwiseOr(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "Erf", + Type: "BitwiseOr", Input: []tf.Input{ - x, + x, y, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// OneHotAttr is an optional argument to OneHot. -type OneHotAttr func(optionalAttr) +// MaxPool3DAttr is an optional argument to MaxPool3D. +type MaxPool3DAttr func(optionalAttr) -// OneHotAxis sets the optional axis attribute to value. +// MaxPool3DDataFormat sets the optional data_format attribute to value. // -// value: The axis to fill (default: -1, a new inner-most axis). -// If not specified, defaults to -1 -func OneHotAxis(value int64) OneHotAttr { +// value: The data format of the input and output data. With the +// default format "NDHWC", the data is stored in the order of: +// [batch, in_depth, in_height, in_width, in_channels]. +// Alternatively, the format could be "NCDHW", the data storage order is: +// [batch, in_channels, in_depth, in_height, in_width]. +// If not specified, defaults to "NDHWC" +func MaxPool3DDataFormat(value string) MaxPool3DAttr { return func(m optionalAttr) { - m["axis"] = value + m["data_format"] = value } } -// Returns a one-hot tensor. -// -// The locations represented by indices in `indices` take value `on_value`, -// while all other locations take value `off_value`. -// -// If the input `indices` is rank `N`, the output will have rank `N+1`, -// The new axis is created at dimension `axis` (default: the new axis is -// appended at the end). -// -// If `indices` is a scalar the output shape will be a vector of length `depth`. -// -// If `indices` is a vector of length `features`, the output shape will be: -// ``` -// features x depth if axis == -1 -// depth x features if axis == 0 -// ``` -// -// If `indices` is a matrix (batch) with shape `[batch, features]`, -// the output shape will be: -// ``` -// batch x features x depth if axis == -1 -// batch x depth x features if axis == 1 -// depth x batch x features if axis == 0 -// ``` -// -// -// Examples -// ========= -// -// Suppose that -// -// ``` -// indices = [0, 2, -1, 1] -// depth = 3 -// on_value = 5.0 -// off_value = 0.0 -// axis = -1 -// ``` -// -// Then output is `[4 x 3]`: -// -// ```output = -// [5.0 0.0 0.0] // one_hot(0) -// [0.0 0.0 5.0] // one_hot(2) -// [0.0 0.0 0.0] // one_hot(-1) -// [0.0 5.0 0.0] // one_hot(1) -// ``` -// -// Suppose that -// -// ``` -// indices = [0, 2, -1, 1] -// depth = 3 -// on_value = 0.0 -// off_value = 3.0 -// axis = 0 -// ``` -// -// Then output is `[3 x 4]`: -// -// ```output = -// [0.0 3.0 3.0 3.0] -// [3.0 3.0 3.0 0.0] -// [3.0 3.0 3.0 3.0] -// [3.0 0.0 3.0 3.0] -// // ^ one_hot(0) -// // ^ one_hot(2) -// // ^ one_hot(-1) -// // ^ one_hot(1) -// ``` -// Suppose that -// -// ``` -// indices = [[0, 2], [1, -1]] -// depth = 3 -// on_value = 1.0 -// off_value = 0.0 -// axis = -1 -// ``` -// -// Then output is `[2 x 2 x 3]`: -// -// ```output = -// [ -// [1.0, 0.0, 0.0] // one_hot(0) -// [0.0, 0.0, 1.0] // one_hot(2) -// ][ -// [0.0, 1.0, 0.0] // one_hot(1) -// [0.0, 0.0, 0.0] // one_hot(-1) -// ]``` +// Performs 3D max pooling on the input. // -// Arguments: -// indices: A tensor of indices. -// depth: A scalar defining the depth of the one hot dimension. -// on_value: A scalar defining the value to fill in output when `indices[j] = i`. -// off_value: A scalar defining the value to fill in output when `indices[j] != i`. +// Arguments: +// input: Shape `[batch, depth, rows, cols, channels]` tensor to pool over. +// ksize: 1-D tensor of length 5. The size of the window for each dimension of +// the input tensor. Must have `ksize[0] = ksize[4] = 1`. +// strides: 1-D tensor of length 5. The stride of the sliding window for each +// dimension of `input`. Must have `strides[0] = strides[4] = 1`. +// padding: The type of padding algorithm to use. // -// Returns The one-hot tensor. -func OneHot(scope *Scope, indices tf.Output, depth tf.Output, on_value tf.Output, off_value tf.Output, optional ...OneHotAttr) (output tf.Output) { +// Returns The max pooled output tensor. +func MaxPool3D(scope *Scope, input tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPool3DAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "OneHot", + Type: "MaxPool3D", Input: []tf.Input{ - indices, depth, on_value, off_value, + input, }, Attrs: attrs, } @@ -10074,27 +12320,42 @@ func OneHot(scope *Scope, indices tf.Output, depth tf.Output, on_value tf.Output return op.Output(0) } -// Reads the value of a variable. -// -// The tensor returned by this operation is immutable. +// Conv3DBackpropInputAttr is an optional argument to Conv3DBackpropInput. +type Conv3DBackpropInputAttr func(optionalAttr) + +// Conv3DBackpropInputDilations sets the optional dilations attribute to value. +// If not specified, defaults to +func Conv3DBackpropInputDilations(value []int64) Conv3DBackpropInputAttr { + return func(m optionalAttr) { + m["dilations"] = value + } +} + +// Computes the gradients of 3-D convolution with respect to the input. // -// The value returned by this operation is guaranteed to be influenced by all the -// writes on which this operation depends directly or indirectly, and to not be -// influenced by any of the writes which depend directly or indirectly on this -// operation. +// DEPRECATED at GraphDef version 10: Use Conv3DBackpropInputV2 // // Arguments: -// resource: handle to the resource in which to store the variable. -// dtype: the dtype of the value. -func ReadVariableOp(scope *Scope, resource tf.Output, dtype tf.DataType) (value tf.Output) { +// input: Shape `[batch, depth, rows, cols, in_channels]`. +// filter: Shape `[depth, rows, cols, in_channels, out_channels]`. +// `in_channels` must match between `input` and `filter`. +// out_backprop: Backprop signal of shape `[batch, out_depth, out_rows, out_cols, +// out_channels]`. +// strides: 1-D tensor of length 5. The stride of the sliding window for each +// dimension of `input`. Must have `strides[0] = strides[4] = 1`. +// padding: The type of padding algorithm to use. +func Conv3DBackpropInput(scope *Scope, input tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...Conv3DBackpropInputAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"dtype": dtype} + attrs := map[string]interface{}{"strides": strides, "padding": padding} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "ReadVariableOp", + Type: "Conv3DBackpropInput", Input: []tf.Input{ - resource, + input, filter, out_backprop, }, Attrs: attrs, } @@ -10102,268 +12363,280 @@ func ReadVariableOp(scope *Scope, resource tf.Output, dtype tf.DataType) (value return op.Output(0) } -// MaxPool3DGradAttr is an optional argument to MaxPool3DGrad. -type MaxPool3DGradAttr func(optionalAttr) +// ResourceApplyProximalAdagradAttr is an optional argument to ResourceApplyProximalAdagrad. +type ResourceApplyProximalAdagradAttr func(optionalAttr) -// MaxPool3DGradDataFormat sets the optional data_format attribute to value. +// ResourceApplyProximalAdagradUseLocking sets the optional use_locking attribute to value. // -// value: The data format of the input and output data. With the -// default format "NDHWC", the data is stored in the order of: -// [batch, in_depth, in_height, in_width, in_channels]. -// Alternatively, the format could be "NCDHW", the data storage order is: -// [batch, in_channels, in_depth, in_height, in_width]. -// If not specified, defaults to "NDHWC" -func MaxPool3DGradDataFormat(value string) MaxPool3DGradAttr { +// value: If True, updating of the var and accum tensors will be protected by +// a lock; otherwise the behavior is undefined, but may exhibit less contention. +// If not specified, defaults to false +func ResourceApplyProximalAdagradUseLocking(value bool) ResourceApplyProximalAdagradAttr { return func(m optionalAttr) { - m["data_format"] = value + m["use_locking"] = value } } -// Computes gradients of max pooling function. +// Update '*var' and '*accum' according to FOBOS with Adagrad learning rate. +// +// accum += grad * grad +// prox_v = var - lr * grad * (1 / sqrt(accum)) +// var = sign(prox_v)/(1+lr*l2) * max{|prox_v|-lr*l1,0} // // Arguments: -// orig_input: The original input tensor. -// orig_output: The original output tensor. -// grad: Output backprop of shape `[batch, depth, rows, cols, channels]`. -// ksize: 1-D tensor of length 5. The size of the window for each dimension of -// the input tensor. Must have `ksize[0] = ksize[4] = 1`. -// strides: 1-D tensor of length 5. The stride of the sliding window for each -// dimension of `input`. Must have `strides[0] = strides[4] = 1`. -// padding: The type of padding algorithm to use. -func MaxPool3DGrad(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPool3DGradAttr) (output tf.Output) { +// var_: Should be from a Variable(). +// accum: Should be from a Variable(). +// lr: Scaling factor. Must be a scalar. +// l1: L1 regularization. Must be a scalar. +// l2: L2 regularization. Must be a scalar. +// grad: The gradient. +// +// Returns the created operation. +func ResourceApplyProximalAdagrad(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, grad tf.Output, optional ...ResourceApplyProximalAdagradAttr) (o *tf.Operation) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "MaxPool3DGrad", + Type: "ResourceApplyProximalAdagrad", Input: []tf.Input{ - orig_input, orig_output, grad, + var_, accum, lr, l1, l2, grad, }, Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0) + return scope.AddOperation(opspec) } -// SparseReduceSumAttr is an optional argument to SparseReduceSum. -type SparseReduceSumAttr func(optionalAttr) +// MutableHashTableOfTensorsV2Attr is an optional argument to MutableHashTableOfTensorsV2. +type MutableHashTableOfTensorsV2Attr func(optionalAttr) -// SparseReduceSumKeepDims sets the optional keep_dims attribute to value. +// MutableHashTableOfTensorsV2Container sets the optional container attribute to value. // -// value: If true, retain reduced dimensions with length 1. -// If not specified, defaults to false -func SparseReduceSumKeepDims(value bool) SparseReduceSumAttr { +// value: If non-empty, this table is placed in the given container. +// Otherwise, a default container is used. +// If not specified, defaults to "" +func MutableHashTableOfTensorsV2Container(value string) MutableHashTableOfTensorsV2Attr { return func(m optionalAttr) { - m["keep_dims"] = value + m["container"] = value } } -// Computes the sum of elements across dimensions of a SparseTensor. -// -// This Op takes a SparseTensor and is the sparse counterpart to -// `tf.reduce_sum()`. In particular, this Op also returns a dense `Tensor` -// instead of a sparse one. +// MutableHashTableOfTensorsV2SharedName sets the optional shared_name attribute to value. // -// Reduces `sp_input` along the dimensions given in `reduction_axes`. Unless -// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in -// `reduction_axes`. If `keep_dims` is true, the reduced dimensions are retained -// with length 1. +// value: If non-empty, this table is shared under the given name across +// multiple sessions. +// If not specified, defaults to "" +func MutableHashTableOfTensorsV2SharedName(value string) MutableHashTableOfTensorsV2Attr { + return func(m optionalAttr) { + m["shared_name"] = value + } +} + +// MutableHashTableOfTensorsV2UseNodeNameSharing sets the optional use_node_name_sharing attribute to value. +// If not specified, defaults to false +func MutableHashTableOfTensorsV2UseNodeNameSharing(value bool) MutableHashTableOfTensorsV2Attr { + return func(m optionalAttr) { + m["use_node_name_sharing"] = value + } +} + +// MutableHashTableOfTensorsV2ValueShape sets the optional value_shape attribute to value. +// If not specified, defaults to <> +func MutableHashTableOfTensorsV2ValueShape(value tf.Shape) MutableHashTableOfTensorsV2Attr { + return func(m optionalAttr) { + m["value_shape"] = value + } +} + +// Creates an empty hash table. // -// If `reduction_axes` has no entries, all dimensions are reduced, and a tensor -// with a single element is returned. Additionally, the axes can be negative, -// which are interpreted according to the indexing rules in Python. +// This op creates a mutable hash table, specifying the type of its keys and +// values. Each value must be a vector. Data can be inserted into the table using +// the insert operations. It does not support the initialization operation. // // Arguments: -// input_indices: 2-D. `N x R` matrix with the indices of non-empty values in a -// SparseTensor, possibly not in canonical ordering. -// input_values: 1-D. `N` non-empty values corresponding to `input_indices`. -// input_shape: 1-D. Shape of the input SparseTensor. -// reduction_axes: 1-D. Length-`K` vector containing the reduction axes. +// key_dtype: Type of the table keys. +// value_dtype: Type of the table values. // -// Returns `R-K`-D. The reduced Tensor. -func SparseReduceSum(scope *Scope, input_indices tf.Output, input_values tf.Output, input_shape tf.Output, reduction_axes tf.Output, optional ...SparseReduceSumAttr) (output tf.Output) { +// Returns Handle to a table. +func MutableHashTableOfTensorsV2(scope *Scope, key_dtype tf.DataType, value_dtype tf.DataType, optional ...MutableHashTableOfTensorsV2Attr) (table_handle tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"key_dtype": key_dtype, "value_dtype": value_dtype} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "SparseReduceSum", + Type: "MutableHashTableOfTensorsV2", + + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Subtracts sparse updates from the variable referenced by `resource`. +// +// This operation computes +// +// # Scalar indices +// ref[indices, ...] -= updates[...] +// +// # Vector indices (for each i) +// ref[indices[i], ...] -= updates[i, ...] +// +// # High rank indices (for each i, ..., j) +// ref[indices[i, ..., j], ...] -= updates[i, ..., j, ...] +// +// Duplicate entries are handled correctly: if multiple `indices` reference +// the same location, their contributions add. +// +// Requires `updates.shape = indices.shape + ref.shape[1:]` or `updates.shape = []`. +// +//
+// +//
+// +// Arguments: +// resource: Should be from a `Variable` node. +// indices: A tensor of indices into the first dimension of `ref`. +// updates: A tensor of updated values to add to `ref`. +// +// Returns the created operation. +func ResourceScatterSub(scope *Scope, resource tf.Output, indices tf.Output, updates tf.Output) (o *tf.Operation) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "ResourceScatterSub", Input: []tf.Input{ - input_indices, input_values, input_shape, reduction_axes, + resource, indices, updates, + }, + } + return scope.AddOperation(opspec) +} + +// Inverse 2D fast Fourier transform. +// +// Computes the inverse 2-dimensional discrete Fourier transform over the +// inner-most 2 dimensions of `input`. +// +// Arguments: +// input: A complex64 tensor. +// +// Returns A complex64 tensor of the same shape as `input`. The inner-most 2 +// dimensions of `input` are replaced with their inverse 2D Fourier transform. +// +// @compatibility(numpy) +// Equivalent to np.fft.ifft2 +// @end_compatibility +func IFFT2D(scope *Scope, input tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "IFFT2D", + Input: []tf.Input{ + input, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Returns element-wise remainder of division. This emulates C semantics in that +// Creates a tensor filled with a scalar value. // -// the result here is consistent with a truncating divide. E.g. `truncate(x / y) * -// y + truncate_mod(x, y) = x`. +// This operation creates a tensor of shape `dims` and fills it with `value`. +// +// For example: +// +// ``` +// # Output tensor has shape [2, 3]. +// fill([2, 3], 9) ==> [[9, 9, 9] +// [9, 9, 9]] +// ``` +// +// Arguments: +// dims: 1-D. Represents the shape of the output tensor. +// value: 0-D (scalar). Value to fill the returned tensor. // -// *NOTE*: `TruncateMod` supports broadcasting. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func TruncateMod(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { +// @compatibility(numpy) +// Equivalent to np.full +// @end_compatibility +func Fill(scope *Scope, dims tf.Output, value tf.Output) (output tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "TruncateMod", + Type: "Fill", Input: []tf.Input{ - x, y, + dims, value, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Inverse 2D real-valued fast Fourier transform. -// -// Computes the inverse 2-dimensional discrete Fourier transform of a real-valued -// signal over the inner-most 2 dimensions of `input`. -// -// The inner-most 2 dimensions of `input` are assumed to be the result of `RFFT2D`: -// The inner-most dimension contains the `fft_length / 2 + 1` unique components of -// the DFT of a real-valued signal. If `fft_length` is not provided, it is computed -// from the size of the inner-most 2 dimensions of `input`. If the FFT length used -// to compute `input` is odd, it should be provided since it cannot be inferred -// properly. +// 2D fast Fourier transform. // -// Along each axis `IRFFT2D` is computed on, if `fft_length` (or -// `fft_length / 2 + 1` for the inner-most dimension) is smaller than the -// corresponding dimension of `input`, the dimension is cropped. If it is larger, -// the dimension is padded with zeros. +// Computes the 2-dimensional discrete Fourier transform over the inner-most +// 2 dimensions of `input`. // // Arguments: // input: A complex64 tensor. -// fft_length: An int32 tensor of shape [2]. The FFT length for each dimension. // -// Returns A float32 tensor of the same rank as `input`. The inner-most 2 -// dimensions of `input` are replaced with the `fft_length` samples of their -// inverse 2D Fourier transform. +// Returns A complex64 tensor of the same shape as `input`. The inner-most 2 +// dimensions of `input` are replaced with their 2D Fourier transform. // // @compatibility(numpy) -// Equivalent to np.fft.irfft2 +// Equivalent to np.fft.fft2 // @end_compatibility -func IRFFT2D(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Output) { +func FFT2D(scope *Scope, input tf.Output) (output tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "IRFFT2D", + Type: "FFT2D", Input: []tf.Input{ - input, fft_length, + input, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// DecodeJpegAttr is an optional argument to DecodeJpeg. -type DecodeJpegAttr func(optionalAttr) - -// DecodeJpegChannels sets the optional channels attribute to value. -// -// value: Number of color channels for the decoded image. -// If not specified, defaults to 0 -func DecodeJpegChannels(value int64) DecodeJpegAttr { - return func(m optionalAttr) { - m["channels"] = value - } -} - -// DecodeJpegRatio sets the optional ratio attribute to value. -// -// value: Downscaling ratio. -// If not specified, defaults to 1 -func DecodeJpegRatio(value int64) DecodeJpegAttr { - return func(m optionalAttr) { - m["ratio"] = value - } -} - -// DecodeJpegFancyUpscaling sets the optional fancy_upscaling attribute to value. -// -// value: If true use a slower but nicer upscaling of the -// chroma planes (yuv420/422 only). -// If not specified, defaults to true -func DecodeJpegFancyUpscaling(value bool) DecodeJpegAttr { - return func(m optionalAttr) { - m["fancy_upscaling"] = value - } -} +// ResourceApplyProximalGradientDescentAttr is an optional argument to ResourceApplyProximalGradientDescent. +type ResourceApplyProximalGradientDescentAttr func(optionalAttr) -// DecodeJpegTryRecoverTruncated sets the optional try_recover_truncated attribute to value. +// ResourceApplyProximalGradientDescentUseLocking sets the optional use_locking attribute to value. // -// value: If true try to recover an image from truncated input. +// value: If True, the subtraction will be protected by a lock; +// otherwise the behavior is undefined, but may exhibit less contention. // If not specified, defaults to false -func DecodeJpegTryRecoverTruncated(value bool) DecodeJpegAttr { - return func(m optionalAttr) { - m["try_recover_truncated"] = value - } -} - -// DecodeJpegAcceptableFraction sets the optional acceptable_fraction attribute to value. -// -// value: The minimum required fraction of lines before a truncated -// input is accepted. -// If not specified, defaults to 1 -func DecodeJpegAcceptableFraction(value float32) DecodeJpegAttr { - return func(m optionalAttr) { - m["acceptable_fraction"] = value - } -} - -// DecodeJpegDctMethod sets the optional dct_method attribute to value. -// -// value: string specifying a hint about the algorithm used for -// decompression. Defaults to "" which maps to a system-specific -// default. Currently valid values are ["INTEGER_FAST", -// "INTEGER_ACCURATE"]. The hint may be ignored (e.g., the internal -// jpeg library changes to a version that does not have that specific -// option.) -// If not specified, defaults to "" -func DecodeJpegDctMethod(value string) DecodeJpegAttr { +func ResourceApplyProximalGradientDescentUseLocking(value bool) ResourceApplyProximalGradientDescentAttr { return func(m optionalAttr) { - m["dct_method"] = value + m["use_locking"] = value } } -// Decode a JPEG-encoded image to a uint8 tensor. -// -// The attr `channels` indicates the desired number of color channels for the -// decoded image. -// -// Accepted values are: -// -// * 0: Use the number of channels in the JPEG-encoded image. -// * 1: output a grayscale image. -// * 3: output an RGB image. -// -// If needed, the JPEG-encoded image is transformed to match the requested number -// of color channels. -// -// The attr `ratio` allows downscaling the image by an integer factor during -// decoding. Allowed values are: 1, 2, 4, and 8. This is much faster than -// downscaling the image later. -// +// Update '*var' as FOBOS algorithm with fixed learning rate. // -// This op also supports decoding PNGs and non-animated GIFs since the interface is -// the same, though it is cleaner to use `tf.image.decode_image`. +// prox_v = var - alpha * delta +// var = sign(prox_v)/(1+alpha*l2) * max{|prox_v|-alpha*l1,0} // // Arguments: -// contents: 0-D. The JPEG-encoded image. +// var_: Should be from a Variable(). +// alpha: Scaling factor. Must be a scalar. +// l1: L1 regularization. Must be a scalar. +// l2: L2 regularization. Must be a scalar. +// delta: The change. // -// Returns 3-D with shape `[height, width, channels]`.. -func DecodeJpeg(scope *Scope, contents tf.Output, optional ...DecodeJpegAttr) (image tf.Output) { +// Returns the created operation. +func ResourceApplyProximalGradientDescent(scope *Scope, var_ tf.Output, alpha tf.Output, l1 tf.Output, l2 tf.Output, delta tf.Output, optional ...ResourceApplyProximalGradientDescentAttr) (o *tf.Operation) { if scope.Err() != nil { return } @@ -10372,156 +12645,182 @@ func DecodeJpeg(scope *Scope, contents tf.Output, optional ...DecodeJpegAttr) (i a(attrs) } opspec := tf.OpSpec{ - Type: "DecodeJpeg", + Type: "ResourceApplyProximalGradientDescent", Input: []tf.Input{ - contents, + var_, alpha, l1, l2, delta, }, Attrs: attrs, } + return scope.AddOperation(opspec) +} + +// Computes the gradient for the sqrt of `x` wrt its input. +// +// Specifically, `grad = dy * 0.5 / y`, where `y = sqrt(x)`, and `dy` +// is the corresponding input gradient. +func SqrtGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "SqrtGrad", + Input: []tf.Input{ + y, dy, + }, + } op := scope.AddOperation(opspec) return op.Output(0) } -// Transforms a vector of brain.Example protos (as strings) into typed tensors. +// Get the value of the tensor specified by its handle. // // Arguments: -// serialized: A vector containing a batch of binary serialized Example protos. -// names: A vector containing the names of the serialized protos. -// May contain, for example, table key (descriptive) names for the -// corresponding serialized protos. These are purely useful for debugging -// purposes, and the presence of values here has no effect on the output. -// May also be an empty vector if no names are available. -// If non-empty, this vector must be the same length as "serialized". -// sparse_keys: A list of Nsparse string Tensors (scalars). -// The keys expected in the Examples' features associated with sparse values. -// dense_keys: A list of Ndense string Tensors (scalars). -// The keys expected in the Examples' features associated with dense values. -// dense_defaults: A list of Ndense Tensors (some may be empty). -// dense_defaults[j] provides default values -// when the example's feature_map lacks dense_key[j]. If an empty Tensor is -// provided for dense_defaults[j], then the Feature dense_keys[j] is required. -// The input type is inferred from dense_defaults[j], even when it's empty. -// If dense_defaults[j] is not empty, and dense_shapes[j] is fully defined, -// then the shape of dense_defaults[j] must match that of dense_shapes[j]. -// If dense_shapes[j] has an undefined major dimension (variable strides dense -// feature), dense_defaults[j] must contain a single element: -// the padding element. -// sparse_types: A list of Nsparse types; the data types of data in each Feature -// given in sparse_keys. -// Currently the ParseExample supports DT_FLOAT (FloatList), -// DT_INT64 (Int64List), and DT_STRING (BytesList). -// dense_shapes: A list of Ndense shapes; the shapes of data in each Feature -// given in dense_keys. -// The number of elements in the Feature corresponding to dense_key[j] -// must always equal dense_shapes[j].NumEntries(). -// If dense_shapes[j] == (D0, D1, ..., DN) then the shape of output -// Tensor dense_values[j] will be (|serialized|, D0, D1, ..., DN): -// The dense outputs are just the inputs row-stacked by batch. -// This works for dense_shapes[j] = (-1, D1, ..., DN). In this case -// the shape of the output Tensor dense_values[j] will be -// (|serialized|, M, D1, .., DN), where M is the maximum number of blocks -// of elements of length D1 * .... * DN, across all minibatch entries -// in the input. Any minibatch entry with less than M blocks of elements of -// length D1 * ... * DN will be padded with the corresponding default_value -// scalar element along the second dimension. -func ParseExample(scope *Scope, serialized tf.Output, names tf.Output, sparse_keys []tf.Output, dense_keys []tf.Output, dense_defaults []tf.Output, sparse_types []tf.DataType, dense_shapes []tf.Shape) (sparse_indices []tf.Output, sparse_values []tf.Output, sparse_shapes []tf.Output, dense_values []tf.Output) { +// handle: The handle for a tensor stored in the session state. +// dtype: The type of the output value. +// +// Returns The tensor for the given handle. +func GetSessionTensor(scope *Scope, handle tf.Output, dtype tf.DataType) (value tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"sparse_types": sparse_types, "dense_shapes": dense_shapes} + attrs := map[string]interface{}{"dtype": dtype} opspec := tf.OpSpec{ - Type: "ParseExample", + Type: "GetSessionTensor", Input: []tf.Input{ - serialized, names, tf.OutputList(sparse_keys), tf.OutputList(dense_keys), tf.OutputList(dense_defaults), + handle, }, Attrs: attrs, } op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Returns x - y element-wise. +// +// *NOTE*: `Subtract` supports broadcasting. More about broadcasting +// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) +func Sub(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { if scope.Err() != nil { return } - var idx int - var err error - if sparse_indices, idx, err = makeOutputList(op, idx, "sparse_indices"); err != nil { - scope.UpdateErr("ParseExample", err) - return - } - if sparse_values, idx, err = makeOutputList(op, idx, "sparse_values"); err != nil { - scope.UpdateErr("ParseExample", err) - return - } - if sparse_shapes, idx, err = makeOutputList(op, idx, "sparse_shapes"); err != nil { - scope.UpdateErr("ParseExample", err) - return + opspec := tf.OpSpec{ + Type: "Sub", + Input: []tf.Input{ + x, y, + }, } - if dense_values, idx, err = makeOutputList(op, idx, "dense_values"); err != nil { - scope.UpdateErr("ParseExample", err) - return + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// LogUniformCandidateSamplerAttr is an optional argument to LogUniformCandidateSampler. +type LogUniformCandidateSamplerAttr func(optionalAttr) + +// LogUniformCandidateSamplerSeed sets the optional seed attribute to value. +// +// value: If either seed or seed2 are set to be non-zero, the random number +// generator is seeded by the given seed. Otherwise, it is seeded by a +// random seed. +// If not specified, defaults to 0 +func LogUniformCandidateSamplerSeed(value int64) LogUniformCandidateSamplerAttr { + return func(m optionalAttr) { + m["seed"] = value } - return sparse_indices, sparse_values, sparse_shapes, dense_values } -// VariableShapeAttr is an optional argument to VariableShape. -type VariableShapeAttr func(optionalAttr) - -// VariableShapeOutType sets the optional out_type attribute to value. -// If not specified, defaults to DT_INT32 -func VariableShapeOutType(value tf.DataType) VariableShapeAttr { +// LogUniformCandidateSamplerSeed2 sets the optional seed2 attribute to value. +// +// value: An second seed to avoid seed collision. +// If not specified, defaults to 0 +func LogUniformCandidateSamplerSeed2(value int64) LogUniformCandidateSamplerAttr { return func(m optionalAttr) { - m["out_type"] = value + m["seed2"] = value } } -// Returns the shape of the variable pointed to by `resource`. +// Generates labels for candidate sampling with a log-uniform distribution. // -// This operation returns a 1-D integer tensor representing the shape of `input`. +// See explanations of candidate sampling and the data formats at +// go/candidate-sampling. // -// For example: +// For each batch, this op picks a single set of sampled candidate labels. // -// ``` -// # 't' is [[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]]] -// shape(t) ==> [2, 2, 3] -// ``` -func VariableShape(scope *Scope, input tf.Output, optional ...VariableShapeAttr) (output tf.Output) { +// The advantages of sampling candidates per-batch are simplicity and the +// possibility of efficient dense matrix multiplication. The disadvantage is that +// the sampled candidates must be chosen independently of the context and of the +// true labels. +// +// Arguments: +// true_classes: A batch_size * num_true matrix, in which each row contains the +// IDs of the num_true target_classes in the corresponding original label. +// num_true: Number of true labels per context. +// num_sampled: Number of candidates to randomly sample. +// unique: If unique is true, we sample with rejection, so that all sampled +// candidates in a batch are unique. This requires some approximation to +// estimate the post-rejection sampling probabilities. +// range_max: The sampler will sample integers from the interval [0, range_max). +// +// Returns A vector of length num_sampled, in which each element is +// the ID of a sampled candidate.A batch_size * num_true matrix, representing +// the number of times each candidate is expected to occur in a batch +// of sampled candidates. If unique=true, then this is a probability.A vector of length num_sampled, for each sampled +// candidate representing the number of times the candidate is expected +// to occur in a batch of sampled candidates. If unique=true, then this is a +// probability. +func LogUniformCandidateSampler(scope *Scope, true_classes tf.Output, num_true int64, num_sampled int64, unique bool, range_max int64, optional ...LogUniformCandidateSamplerAttr) (sampled_candidates tf.Output, true_expected_count tf.Output, sampled_expected_count tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"num_true": num_true, "num_sampled": num_sampled, "unique": unique, "range_max": range_max} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "VariableShape", + Type: "LogUniformCandidateSampler", Input: []tf.Input{ - input, + true_classes, }, Attrs: attrs, } op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1), op.Output(2) +} + +// Returns the max of x and y (i.e. x > y ? x : y) element-wise. +// +// *NOTE*: `Maximum` supports broadcasting. More about broadcasting +// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) +func Maximum(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Maximum", + Input: []tf.Input{ + x, y, + }, + } + op := scope.AddOperation(opspec) return op.Output(0) } // Computes softmax cross entropy cost and gradients to backpropagate. // -// Unlike `SoftmaxCrossEntropyWithLogits`, this operation does not accept -// a matrix of label probabilities, but rather a single label per row -// of features. This label is considered to have probability 1.0 for the -// given row. -// // Inputs are the logits, not probabilities. // // Arguments: // features: batch_size x num_classes matrix -// labels: batch_size vector with values in [0, num_classes). -// This is the label for the given minibatch entry. +// labels: batch_size x num_classes matrix +// The caller must ensure that each batch of labels represents a valid +// probability distribution. // // Returns Per example loss (batch_size vector).backpropagated gradients (batch_size x num_classes matrix). -func SparseSoftmaxCrossEntropyWithLogits(scope *Scope, features tf.Output, labels tf.Output) (loss tf.Output, backprop tf.Output) { +func SoftmaxCrossEntropyWithLogits(scope *Scope, features tf.Output, labels tf.Output) (loss tf.Output, backprop tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "SparseSoftmaxCrossEntropyWithLogits", + Type: "SoftmaxCrossEntropyWithLogits", Input: []tf.Input{ features, labels, }, @@ -10530,271 +12829,272 @@ func SparseSoftmaxCrossEntropyWithLogits(scope *Scope, features tf.Output, label return op.Output(0), op.Output(1) } -// Fast Fourier transform. +// ReduceJoinAttr is an optional argument to ReduceJoin. +type ReduceJoinAttr func(optionalAttr) + +// ReduceJoinKeepDims sets the optional keep_dims attribute to value. // -// Computes the 1-dimensional discrete Fourier transform over the inner-most -// dimension of `input`. +// value: If `True`, retain reduced dimensions with length `1`. +// If not specified, defaults to false +func ReduceJoinKeepDims(value bool) ReduceJoinAttr { + return func(m optionalAttr) { + m["keep_dims"] = value + } +} + +// ReduceJoinSeparator sets the optional separator attribute to value. // -// Arguments: -// input: A complex64 tensor. +// value: The separator to use when joining. +// If not specified, defaults to "" +func ReduceJoinSeparator(value string) ReduceJoinAttr { + return func(m optionalAttr) { + m["separator"] = value + } +} + +// Joins a string Tensor across the given dimensions. // -// Returns A complex64 tensor of the same shape as `input`. The inner-most -// dimension of `input` is replaced with its 1D Fourier transform. +// Computes the string join across dimensions in the given string Tensor of shape +// `[d_0, d_1, ..., d_n-1]`. Returns a new Tensor created by joining the input +// strings with the given separator (default: empty string). Negative indices are +// counted backwards from the end, with `-1` being equivalent to `n - 1`. // -// @compatibility(numpy) -// Equivalent to np.fft.fft -// @end_compatibility -func FFT(scope *Scope, input tf.Output) (output tf.Output) { +// For example: +// +// ```python +// # tensor `a` is [["a", "b"], ["c", "d"]] +// tf.reduce_join(a, 0) ==> ["ac", "bd"] +// tf.reduce_join(a, 1) ==> ["ab", "cd"] +// tf.reduce_join(a, -2) = tf.reduce_join(a, 0) ==> ["ac", "bd"] +// tf.reduce_join(a, -1) = tf.reduce_join(a, 1) ==> ["ab", "cd"] +// tf.reduce_join(a, 0, keep_dims=True) ==> [["ac", "bd"]] +// tf.reduce_join(a, 1, keep_dims=True) ==> [["ab"], ["cd"]] +// tf.reduce_join(a, 0, separator=".") ==> ["a.c", "b.d"] +// tf.reduce_join(a, [0, 1]) ==> ["acbd"] +// tf.reduce_join(a, [1, 0]) ==> ["abcd"] +// tf.reduce_join(a, []) ==> ["abcd"] +// ``` +// +// Arguments: +// inputs: The input to be joined. All reduced indices must have non-zero size. +// reduction_indices: The dimensions to reduce over. Dimensions are reduced in the +// order specified. Omitting `reduction_indices` is equivalent to passing +// `[n-1, n-2, ..., 0]`. Negative indices from `-n` to `-1` are supported. +// +// Returns Has shape equal to that of the input with reduced dimensions removed or +// set to `1` depending on `keep_dims`. +func ReduceJoin(scope *Scope, inputs tf.Output, reduction_indices tf.Output, optional ...ReduceJoinAttr) (output tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "FFT", + Type: "ReduceJoin", Input: []tf.Input{ - input, + inputs, reduction_indices, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Transforms a serialized tensorflow.TensorProto proto into a Tensor. -// -// Arguments: -// serialized: A scalar string containing a serialized TensorProto proto. -// out_type: The type of the serialized tensor. The provided type must match the -// type of the serialized tensor and no implicit conversion will take place. -// -// Returns A Tensor of type `out_type`. -func ParseTensor(scope *Scope, serialized tf.Output, out_type tf.DataType) (output tf.Output) { +// Computes cos of x element-wise. +func Cos(scope *Scope, x tf.Output) (y tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"out_type": out_type} opspec := tf.OpSpec{ - Type: "ParseTensor", + Type: "Cos", Input: []tf.Input{ - serialized, + x, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// MaxPoolWithArgmaxAttr is an optional argument to MaxPoolWithArgmax. -type MaxPoolWithArgmaxAttr func(optionalAttr) +// FusedBatchNormGradAttr is an optional argument to FusedBatchNormGrad. +type FusedBatchNormGradAttr func(optionalAttr) -// MaxPoolWithArgmaxTargmax sets the optional Targmax attribute to value. -// If not specified, defaults to DT_INT64 -func MaxPoolWithArgmaxTargmax(value tf.DataType) MaxPoolWithArgmaxAttr { +// FusedBatchNormGradEpsilon sets the optional epsilon attribute to value. +// +// value: A small float number added to the variance of x. +// If not specified, defaults to 0.0001 +func FusedBatchNormGradEpsilon(value float32) FusedBatchNormGradAttr { return func(m optionalAttr) { - m["Targmax"] = value + m["epsilon"] = value } } -// Performs max pooling on the input and outputs both max values and indices. +// FusedBatchNormGradDataFormat sets the optional data_format attribute to value. // -// The indices in `argmax` are flattened, so that a maximum value at position -// `[b, y, x, c]` becomes flattened index -// `((b * height + y) * width + x) * channels + c`. +// value: The data format for y_backprop, x, x_backprop. +// Either "NHWC" (default) or "NCHW". +// If not specified, defaults to "NHWC" +func FusedBatchNormGradDataFormat(value string) FusedBatchNormGradAttr { + return func(m optionalAttr) { + m["data_format"] = value + } +} + +// FusedBatchNormGradIsTraining sets the optional is_training attribute to value. // -// The indices returned are always in `[0, height) x [0, width)` before flattening, -// even if padding is involved and the mathematically correct answer is outside -// (either negative or too large). This is a bug, but fixing it is difficult to do -// in a safe backwards compatible way, especially due to flattening. +// value: A bool value to indicate the operation is for training (default) +// or inference. +// If not specified, defaults to true +func FusedBatchNormGradIsTraining(value bool) FusedBatchNormGradAttr { + return func(m optionalAttr) { + m["is_training"] = value + } +} + +// Gradient for batch normalization. +// +// Note that the size of 4D Tensors are defined by either "NHWC" or "NCHW". +// The size of 1D Tensors matches the dimension C of the 4D Tensors. // // Arguments: -// input: 4-D with shape `[batch, height, width, channels]`. Input to pool over. -// ksize: The size of the window for each dimension of the input tensor. -// strides: The stride of the sliding window for each dimension of the -// input tensor. -// padding: The type of padding algorithm to use. +// y_backprop: A 4D Tensor for the gradient with respect to y. +// x: A 4D Tensor for input data. +// scale: A 1D Tensor for scaling factor, to scale the normalized x. +// reserve_space_1: When is_training is True, a 1D Tensor for the computed batch +// mean to be reused in gradient computation. When is_training is +// False, a 1D Tensor for the population mean to be reused in both +// 1st and 2nd order gradient computation. +// reserve_space_2: When is_training is True, a 1D Tensor for the computed batch +// variance (inverted variance in the cuDNN case) to be reused in +// gradient computation. When is_training is False, a 1D Tensor +// for the population variance to be reused in both 1st and 2nd +// order gradient computation. // -// Returns The max pooled output tensor.4-D. The flattened indices of the max values chosen for each output. -func MaxPoolWithArgmax(scope *Scope, input tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPoolWithArgmaxAttr) (output tf.Output, argmax tf.Output) { +// Returns A 4D Tensor for the gradient with respect to x.A 1D Tensor for the gradient with respect to scale.A 1D Tensor for the gradient with respect to offset.Unused placeholder to match the mean input in FusedBatchNorm.Unused placeholder to match the variance input +// in FusedBatchNorm. +func FusedBatchNormGrad(scope *Scope, y_backprop tf.Output, x tf.Output, scale tf.Output, reserve_space_1 tf.Output, reserve_space_2 tf.Output, optional ...FusedBatchNormGradAttr) (x_backprop tf.Output, scale_backprop tf.Output, offset_backprop tf.Output, reserve_space_3 tf.Output, reserve_space_4 tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "MaxPoolWithArgmax", + Type: "FusedBatchNormGrad", Input: []tf.Input{ - input, + y_backprop, x, scale, reserve_space_1, reserve_space_2, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) + return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4) } -// ResourceSparseApplyAdagradDAAttr is an optional argument to ResourceSparseApplyAdagradDA. -type ResourceSparseApplyAdagradDAAttr func(optionalAttr) +// TopKAttr is an optional argument to TopK. +type TopKAttr func(optionalAttr) -// ResourceSparseApplyAdagradDAUseLocking sets the optional use_locking attribute to value. +// TopKSorted sets the optional sorted attribute to value. // -// value: If True, updating of the var and accum tensors will be protected by -// a lock; otherwise the behavior is undefined, but may exhibit less contention. -// If not specified, defaults to false -func ResourceSparseApplyAdagradDAUseLocking(value bool) ResourceSparseApplyAdagradDAAttr { +// value: If true the resulting `k` elements will be sorted by the values in +// descending order. +// If not specified, defaults to true +func TopKSorted(value bool) TopKAttr { return func(m optionalAttr) { - m["use_locking"] = value + m["sorted"] = value } } -// Update entries in '*var' and '*accum' according to the proximal adagrad scheme. +// Finds values and indices of the `k` largest elements for the last dimension. +// +// DEPRECATED at GraphDef version 7: Use TopKV2 instead +// +// If the input is a vector (rank-1), finds the `k` largest entries in the vector +// and outputs their values and indices as vectors. Thus `values[j]` is the +// `j`-th largest entry in `input`, and its index is `indices[j]`. +// +// For matrices (resp. higher rank input), computes the top `k` entries in each +// row (resp. vector along the last dimension). Thus, +// +// values.shape = indices.shape = input.shape[:-1] + [k] +// +// If two elements are equal, the lower-index element appears first. +// +// If `k` varies dynamically, use `TopKV2` below. // // Arguments: -// var_: Should be from a Variable(). -// gradient_accumulator: Should be from a Variable(). -// gradient_squared_accumulator: Should be from a Variable(). -// grad: The gradient. -// indices: A vector of indices into the first dimension of var and accum. -// lr: Learning rate. Must be a scalar. -// l1: L1 regularization. Must be a scalar. -// l2: L2 regularization. Must be a scalar. -// global_step: Training step number. Must be a scalar. +// input: 1-D or higher with last dimension at least `k`. +// k: Number of top elements to look for along the last dimension (along each +// row for matrices). // -// Returns the created operation. -func ResourceSparseApplyAdagradDA(scope *Scope, var_ tf.Output, gradient_accumulator tf.Output, gradient_squared_accumulator tf.Output, grad tf.Output, indices tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, global_step tf.Output, optional ...ResourceSparseApplyAdagradDAAttr) (o *tf.Operation) { +// Returns The `k` largest elements along each last dimensional slice.The indices of `values` within the last dimension of `input`. +func TopK(scope *Scope, input tf.Output, k int64, optional ...TopKAttr) (values tf.Output, indices tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"k": k} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "ResourceSparseApplyAdagradDA", + Type: "TopK", Input: []tf.Input{ - var_, gradient_accumulator, gradient_squared_accumulator, grad, indices, lr, l1, l2, global_step, + input, }, Attrs: attrs, } - return scope.AddOperation(opspec) -} - -// EncodeJpegAttr is an optional argument to EncodeJpeg. -type EncodeJpegAttr func(optionalAttr) - -// EncodeJpegFormat sets the optional format attribute to value. -// -// value: Per pixel image format. -// If not specified, defaults to "" -func EncodeJpegFormat(value string) EncodeJpegAttr { - return func(m optionalAttr) { - m["format"] = value - } -} - -// EncodeJpegQuality sets the optional quality attribute to value. -// -// value: Quality of the compression from 0 to 100 (higher is better and slower). -// If not specified, defaults to 95 -func EncodeJpegQuality(value int64) EncodeJpegAttr { - return func(m optionalAttr) { - m["quality"] = value - } -} - -// EncodeJpegProgressive sets the optional progressive attribute to value. -// -// value: If True, create a JPEG that loads progressively (coarse to fine). -// If not specified, defaults to false -func EncodeJpegProgressive(value bool) EncodeJpegAttr { - return func(m optionalAttr) { - m["progressive"] = value - } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1) } -// EncodeJpegOptimizeSize sets the optional optimize_size attribute to value. +// Compute the Hurwitz zeta function \\(\zeta(x, q)\\). // -// value: If True, spend CPU/RAM to reduce size with no quality change. -// If not specified, defaults to false -func EncodeJpegOptimizeSize(value bool) EncodeJpegAttr { - return func(m optionalAttr) { - m["optimize_size"] = value - } -} - -// EncodeJpegChromaDownsampling sets the optional chroma_downsampling attribute to value. +// The Hurwitz zeta function is defined as: // -// value: See http://en.wikipedia.org/wiki/Chroma_subsampling. -// If not specified, defaults to true -func EncodeJpegChromaDownsampling(value bool) EncodeJpegAttr { - return func(m optionalAttr) { - m["chroma_downsampling"] = value - } -} - -// EncodeJpegDensityUnit sets the optional density_unit attribute to value. // -// value: Unit used to specify `x_density` and `y_density`: -// pixels per inch (`'in'`) or centimeter (`'cm'`). -// If not specified, defaults to "in" -func EncodeJpegDensityUnit(value string) EncodeJpegAttr { - return func(m optionalAttr) { - m["density_unit"] = value +// \\(\zeta(x, q) = \sum_{n=0}^{\infty} (q + n)^{-x}\\) +func Zeta(scope *Scope, x tf.Output, q tf.Output) (z tf.Output) { + if scope.Err() != nil { + return } -} - -// EncodeJpegXDensity sets the optional x_density attribute to value. -// -// value: Horizontal pixels per density unit. -// If not specified, defaults to 300 -func EncodeJpegXDensity(value int64) EncodeJpegAttr { - return func(m optionalAttr) { - m["x_density"] = value + opspec := tf.OpSpec{ + Type: "Zeta", + Input: []tf.Input{ + x, q, + }, } + op := scope.AddOperation(opspec) + return op.Output(0) } -// EncodeJpegYDensity sets the optional y_density attribute to value. -// -// value: Vertical pixels per density unit. -// If not specified, defaults to 300 -func EncodeJpegYDensity(value int64) EncodeJpegAttr { - return func(m optionalAttr) { - m["y_density"] = value - } -} +// ProdAttr is an optional argument to Prod. +type ProdAttr func(optionalAttr) -// EncodeJpegXmpMetadata sets the optional xmp_metadata attribute to value. +// ProdKeepDims sets the optional keep_dims attribute to value. // -// value: If not empty, embed this XMP metadata in the image header. -// If not specified, defaults to "" -func EncodeJpegXmpMetadata(value string) EncodeJpegAttr { +// value: If true, retain reduced dimensions with length 1. +// If not specified, defaults to false +func ProdKeepDims(value bool) ProdAttr { return func(m optionalAttr) { - m["xmp_metadata"] = value + m["keep_dims"] = value } } -// JPEG-encode an image. -// -// `image` is a 3-D uint8 Tensor of shape `[height, width, channels]`. -// -// The attr `format` can be used to override the color format of the encoded -// output. Values can be: -// -// * `''`: Use a default format based on the number of channels in the image. -// * `grayscale`: Output a grayscale JPEG image. The `channels` dimension -// of `image` must be 1. -// * `rgb`: Output an RGB JPEG image. The `channels` dimension -// of `image` must be 3. -// -// If `format` is not specified or is the empty string, a default format is picked -// in function of the number of channels in `image`: +// Computes the product of elements across dimensions of a tensor. // -// * 1: Output a grayscale image. -// * 3: Output an RGB image. +// Reduces `input` along the dimensions given in `axis`. Unless +// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in +// `axis`. If `keep_dims` is true, the reduced dimensions are +// retained with length 1. // // Arguments: -// image: 3-D with shape `[height, width, channels]`. +// input: The tensor to reduce. +// axis: The dimensions to reduce. Must be in the range +// `[-rank(input), rank(input))`. // -// Returns 0-D. JPEG-encoded image. -func EncodeJpeg(scope *Scope, image tf.Output, optional ...EncodeJpegAttr) (contents tf.Output) { +// Returns The reduced tensor. +func Prod(scope *Scope, input tf.Output, axis tf.Output, optional ...ProdAttr) (output tf.Output) { if scope.Err() != nil { return } @@ -10803,9 +13103,9 @@ func EncodeJpeg(scope *Scope, image tf.Output, optional ...EncodeJpegAttr) (cont a(attrs) } opspec := tf.OpSpec{ - Type: "EncodeJpeg", + Type: "Prod", Input: []tf.Input{ - image, + input, axis, }, Attrs: attrs, } @@ -10813,59 +13113,57 @@ func EncodeJpeg(scope *Scope, image tf.Output, optional ...EncodeJpegAttr) (cont return op.Output(0) } -// MultinomialAttr is an optional argument to Multinomial. -type MultinomialAttr func(optionalAttr) +// FusedResizeAndPadConv2DAttr is an optional argument to FusedResizeAndPadConv2D. +type FusedResizeAndPadConv2DAttr func(optionalAttr) -// MultinomialSeed sets the optional seed attribute to value. +// FusedResizeAndPadConv2DResizeAlignCorners sets the optional resize_align_corners attribute to value. // -// value: If either seed or seed2 is set to be non-zero, the internal random number -// generator is seeded by the given seed. Otherwise, a random seed is used. -// If not specified, defaults to 0 -func MultinomialSeed(value int64) MultinomialAttr { +// value: If true, the centers of the 4 corner pixels of the input and output tensors are +// aligned, preserving the values at the corner pixels. Defaults to false. +// If not specified, defaults to false +func FusedResizeAndPadConv2DResizeAlignCorners(value bool) FusedResizeAndPadConv2DAttr { return func(m optionalAttr) { - m["seed"] = value + m["resize_align_corners"] = value } } -// MultinomialSeed2 sets the optional seed2 attribute to value. +// Performs a resize and padding as a preprocess during a convolution. // -// value: A second seed to avoid seed collision. -// If not specified, defaults to 0 -func MultinomialSeed2(value int64) MultinomialAttr { - return func(m optionalAttr) { - m["seed2"] = value - } -} - -// MultinomialOutputDtype sets the optional output_dtype attribute to value. -// If not specified, defaults to DT_INT64 -func MultinomialOutputDtype(value tf.DataType) MultinomialAttr { - return func(m optionalAttr) { - m["output_dtype"] = value - } -} - -// Draws samples from a multinomial distribution. +// It's often possible to do spatial transformations more efficiently as part of +// the packing stage of a convolution, so this op allows for an optimized +// implementation where these stages are fused together. This prevents the need to +// write out the intermediate results as whole tensors, reducing memory pressure, +// and we can get some latency gains by merging the transformation calculations. +// The data_format attribute for Conv2D isn't supported by this op, and defaults to +// 'NHWC' order. +// Internally this op uses a single per-graph scratch buffer, which means that it +// will block if multiple versions are being run in parallel. This is because this +// operator is primarily an optimization to minimize memory usage. // // Arguments: -// logits: 2-D Tensor with shape `[batch_size, num_classes]`. Each slice `[i, :]` -// represents the unnormalized log probabilities for all classes. -// num_samples: 0-D. Number of independent samples to draw for each row slice. +// input: 4-D with shape `[batch, in_height, in_width, in_channels]`. +// size: A 1-D int32 Tensor of 2 elements: `new_height, new_width`. The +// new size for the images. +// paddings: A two-column matrix specifying the padding sizes. The number of +// rows must be the same as the rank of `input`. +// filter: 4-D with shape +// `[filter_height, filter_width, in_channels, out_channels]`. // -// Returns 2-D Tensor with shape `[batch_size, num_samples]`. Each slice `[i, :]` -// contains the drawn class labels with range `[0, num_classes)`. -func Multinomial(scope *Scope, logits tf.Output, num_samples tf.Output, optional ...MultinomialAttr) (output tf.Output) { +// strides: 1-D of length 4. The stride of the sliding window for each dimension +// of `input`. Must be in the same order as the dimension specified with format. +// padding: The type of padding algorithm to use. +func FusedResizeAndPadConv2D(scope *Scope, input tf.Output, size tf.Output, paddings tf.Output, filter tf.Output, mode string, strides []int64, padding string, optional ...FusedResizeAndPadConv2DAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"mode": mode, "strides": strides, "padding": padding} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "Multinomial", + Type: "FusedResizeAndPadConv2D", Input: []tf.Input{ - logits, num_samples, + input, size, paddings, filter, }, Attrs: attrs, } @@ -10873,260 +13171,232 @@ func Multinomial(scope *Scope, logits tf.Output, num_samples tf.Output, optional return op.Output(0) } -// Returns the truth value of NOT x element-wise. -func LogicalNot(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "LogicalNot", - Input: []tf.Input{ - x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// 3D real-valued fast Fourier transform. -// -// Computes the 3-dimensional discrete Fourier transform of a real-valued signal -// over the inner-most 3 dimensions of `input`. -// -// Since the DFT of a real signal is Hermitian-symmetric, `RFFT3D` only returns the -// `fft_length / 2 + 1` unique components of the FFT for the inner-most dimension -// of `output`: the zero-frequency term, followed by the `fft_length / 2` -// positive-frequency terms. +// Returns a list of tensors with the same shapes and contents as the input // -// Along each axis `RFFT3D` is computed on, if `fft_length` is smaller than the -// corresponding dimension of `input`, the dimension is cropped. If it is larger, -// the dimension is padded with zeros. +// tensors. // -// Arguments: -// input: A float32 tensor. -// fft_length: An int32 tensor of shape [3]. The FFT length for each dimension. +// This op can be used to override the gradient for complicated functions. For +// example, suppose y = f(x) and we wish to apply a custom function g for backprop +// such that dx = g(dy). In Python, // -// Returns A complex64 tensor of the same rank as `input`. The inner-most 3 -// dimensions of `input` are replaced with the their 3D Fourier transform. The -// inner-most dimension contains `fft_length / 2 + 1` unique frequency -// components. +// ```python +// with tf.get_default_graph().gradient_override_map( +// {'IdentityN': 'OverrideGradientWithG'}): +// y, _ = identity_n([f(x), x]) // -// @compatibility(numpy) -// Equivalent to np.fft.rfftn with 3 dimensions. -// @end_compatibility -func RFFT3D(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Output) { +// @tf.RegisterGradient('OverrideGradientWithG') +// def ApplyG(op, dy, _): +// return [None, g(dy)] # Do not backprop to f(x). +// ``` +func IdentityN(scope *Scope, input []tf.Output) (output []tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "RFFT3D", + Type: "IdentityN", Input: []tf.Input{ - input, fft_length, + tf.OutputList(input), }, } op := scope.AddOperation(opspec) - return op.Output(0) -} - -// TensorArrayV3Attr is an optional argument to TensorArrayV3. -type TensorArrayV3Attr func(optionalAttr) - -// TensorArrayV3ElementShape sets the optional element_shape attribute to value. -// -// value: The expected shape of an element, if known. Used to -// validate the shapes of TensorArray elements. If this shape is not -// fully specified, gathering zero-size TensorArrays is an error. -// If not specified, defaults to -func TensorArrayV3ElementShape(value tf.Shape) TensorArrayV3Attr { - return func(m optionalAttr) { - m["element_shape"] = value + if scope.Err() != nil { + return } -} - -// TensorArrayV3DynamicSize sets the optional dynamic_size attribute to value. -// -// value: A boolean that determines whether writes to the TensorArray -// are allowed to grow the size. By default, this is not allowed. -// If not specified, defaults to false -func TensorArrayV3DynamicSize(value bool) TensorArrayV3Attr { - return func(m optionalAttr) { - m["dynamic_size"] = value + var idx int + var err error + if output, idx, err = makeOutputList(op, idx, "output"); err != nil { + scope.UpdateErr("IdentityN", err) + return } + return output } -// TensorArrayV3ClearAfterRead sets the optional clear_after_read attribute to value. -// -// value: If true (default), Tensors in the TensorArray are cleared -// after being read. This disables multiple read semantics but allows early -// release of memory. -// If not specified, defaults to true -func TensorArrayV3ClearAfterRead(value bool) TensorArrayV3Attr { - return func(m optionalAttr) { - m["clear_after_read"] = value - } -} +// ResourceApplyCenteredRMSPropAttr is an optional argument to ResourceApplyCenteredRMSProp. +type ResourceApplyCenteredRMSPropAttr func(optionalAttr) -// TensorArrayV3IdenticalElementShapes sets the optional identical_element_shapes attribute to value. +// ResourceApplyCenteredRMSPropUseLocking sets the optional use_locking attribute to value. // -// value: If true (default is false), then all -// elements in the TensorArray will be expected to have have identical shapes. -// This allows certain behaviors, like dynamically checking for -// consistent shapes on write, and being able to fill in properly -// shaped zero tensors on stack -- even if the element_shape attribute -// is not fully defined. +// value: If `True`, updating of the var, mg, ms, and mom tensors is +// protected by a lock; otherwise the behavior is undefined, but may exhibit less +// contention. // If not specified, defaults to false -func TensorArrayV3IdenticalElementShapes(value bool) TensorArrayV3Attr { +func ResourceApplyCenteredRMSPropUseLocking(value bool) ResourceApplyCenteredRMSPropAttr { return func(m optionalAttr) { - m["identical_element_shapes"] = value + m["use_locking"] = value } } -// TensorArrayV3TensorArrayName sets the optional tensor_array_name attribute to value. +// Update '*var' according to the centered RMSProp algorithm. // -// value: Overrides the name used for the temporary tensor_array -// resource. Default value is the name of the 'TensorArray' op (which -// is guaranteed unique). -// If not specified, defaults to "" -func TensorArrayV3TensorArrayName(value string) TensorArrayV3Attr { - return func(m optionalAttr) { - m["tensor_array_name"] = value - } -} - -// An array of Tensors of given size. +// The centered RMSProp algorithm uses an estimate of the centered second moment +// (i.e., the variance) for normalization, as opposed to regular RMSProp, which +// uses the (uncentered) second moment. This often helps with training, but is +// slightly more expensive in terms of computation and memory. // -// Write data via Write and read via Read or Pack. +// Note that in dense implementation of this algorithm, mg, ms, and mom will +// update even if the grad is zero, but in this sparse implementation, mg, ms, +// and mom will not update in iterations during which the grad is zero. +// +// mean_square = decay * mean_square + (1-decay) * gradient ** 2 +// mean_grad = decay * mean_grad + (1-decay) * gradient +// +// Delta = learning_rate * gradient / sqrt(mean_square + epsilon - mean_grad ** 2) +// +// mg <- rho * mg_{t-1} + (1-rho) * grad +// ms <- rho * ms_{t-1} + (1-rho) * grad * grad +// mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms - mg * mg + epsilon) +// var <- var - mom // // Arguments: -// size: The size of the array. -// dtype: The type of the elements on the tensor_array. +// var_: Should be from a Variable(). +// mg: Should be from a Variable(). +// ms: Should be from a Variable(). +// mom: Should be from a Variable(). +// lr: Scaling factor. Must be a scalar. +// rho: Decay rate. Must be a scalar. // -// Returns The handle to the TensorArray.A scalar used to control gradient flow. -func TensorArrayV3(scope *Scope, size tf.Output, dtype tf.DataType, optional ...TensorArrayV3Attr) (handle tf.Output, flow tf.Output) { +// epsilon: Ridge term. Must be a scalar. +// grad: The gradient. +// +// Returns the created operation. +func ResourceApplyCenteredRMSProp(scope *Scope, var_ tf.Output, mg tf.Output, ms tf.Output, mom tf.Output, lr tf.Output, rho tf.Output, momentum tf.Output, epsilon tf.Output, grad tf.Output, optional ...ResourceApplyCenteredRMSPropAttr) (o *tf.Operation) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"dtype": dtype} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "TensorArrayV3", + Type: "ResourceApplyCenteredRMSProp", Input: []tf.Input{ - size, + var_, mg, ms, mom, lr, rho, momentum, epsilon, grad, }, Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) + return scope.AddOperation(opspec) } -// MaxPool3DAttr is an optional argument to MaxPool3D. -type MaxPool3DAttr func(optionalAttr) - -// MaxPool3DDataFormat sets the optional data_format attribute to value. +// Adds `bias` to `value`. // -// value: The data format of the input and output data. With the -// default format "NDHWC", the data is stored in the order of: -// [batch, in_depth, in_height, in_width, in_channels]. -// Alternatively, the format could be "NCDHW", the data storage order is: -// [batch, in_channels, in_depth, in_height, in_width]. -// If not specified, defaults to "NDHWC" -func MaxPool3DDataFormat(value string) MaxPool3DAttr { - return func(m optionalAttr) { - m["data_format"] = value - } -} - -// Performs 3D max pooling on the input. +// This is a deprecated version of BiasAdd and will be soon removed. +// +// This is a special case of `tf.add` where `bias` is restricted to be 1-D. +// Broadcasting is supported, so `value` may have any number of dimensions. // // Arguments: -// input: Shape `[batch, depth, rows, cols, channels]` tensor to pool over. -// ksize: 1-D tensor of length 5. The size of the window for each dimension of -// the input tensor. Must have `ksize[0] = ksize[4] = 1`. -// strides: 1-D tensor of length 5. The stride of the sliding window for each -// dimension of `input`. Must have `strides[0] = strides[4] = 1`. -// padding: The type of padding algorithm to use. +// value: Any number of dimensions. +// bias: 1-D with size the last dimension of `value`. // -// Returns The max pooled output tensor. -func MaxPool3D(scope *Scope, input tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPool3DAttr) (output tf.Output) { +// Returns Broadcasted sum of `value` and `bias`. +func BiasAddV1(scope *Scope, value tf.Output, bias tf.Output) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "MaxPool3D", + Type: "BiasAddV1", Input: []tf.Input{ - input, + value, bias, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Computes the gradients of 3-D convolution with respect to the input. +// Reverses specific dimensions of a tensor. +// +// NOTE `tf.reverse` has now changed behavior in preparation for 1.0. +// `tf.reverse_v2` is currently an alias that will be deprecated before TF 1.0. +// +// Given a `tensor`, and a `int32` tensor `axis` representing the set of +// dimensions of `tensor` to reverse. This operation reverses each dimension +// `i` for which there exists `j` s.t. `axis[j] == i`. +// +// `tensor` can have up to 8 dimensions. The number of dimensions specified +// in `axis` may be 0 or more entries. If an index is specified more than +// once, a InvalidArgument error is raised. +// +// For example: +// +// ``` +// # tensor 't' is [[[[ 0, 1, 2, 3], +// # [ 4, 5, 6, 7], +// # [ 8, 9, 10, 11]], +// # [[12, 13, 14, 15], +// # [16, 17, 18, 19], +// # [20, 21, 22, 23]]]] +// # tensor 't' shape is [1, 2, 3, 4] +// +// # 'dims' is [3] or 'dims' is [-1] +// reverse(t, dims) ==> [[[[ 3, 2, 1, 0], +// [ 7, 6, 5, 4], +// [ 11, 10, 9, 8]], +// [[15, 14, 13, 12], +// [19, 18, 17, 16], +// [23, 22, 21, 20]]]] // -// DEPRECATED at GraphDef version 10: Use Conv3DBackpropInputV2 +// # 'dims' is '[1]' (or 'dims' is '[-3]') +// reverse(t, dims) ==> [[[[12, 13, 14, 15], +// [16, 17, 18, 19], +// [20, 21, 22, 23] +// [[ 0, 1, 2, 3], +// [ 4, 5, 6, 7], +// [ 8, 9, 10, 11]]]] +// +// # 'dims' is '[2]' (or 'dims' is '[-2]') +// reverse(t, dims) ==> [[[[8, 9, 10, 11], +// [4, 5, 6, 7], +// [0, 1, 2, 3]] +// [[20, 21, 22, 23], +// [16, 17, 18, 19], +// [12, 13, 14, 15]]]] +// ``` // // Arguments: -// input: Shape `[batch, depth, rows, cols, in_channels]`. -// filter: Shape `[depth, rows, cols, in_channels, out_channels]`. -// `in_channels` must match between `input` and `filter`. -// out_backprop: Backprop signal of shape `[batch, out_depth, out_rows, out_cols, -// out_channels]`. -// strides: 1-D tensor of length 5. The stride of the sliding window for each -// dimension of `input`. Must have `strides[0] = strides[4] = 1`. -// padding: The type of padding algorithm to use. -func Conv3DBackpropInput(scope *Scope, input tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, padding string) (output tf.Output) { +// tensor: Up to 8-D. +// axis: 1-D. The indices of the dimensions to reverse. Must be in the range +// `[-rank(tensor), rank(tensor))`. +// +// Returns The same shape as `tensor`. +func ReverseV2(scope *Scope, tensor tf.Output, axis tf.Output) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"strides": strides, "padding": padding} opspec := tf.OpSpec{ - Type: "Conv3DBackpropInput", + Type: "ReverseV2", Input: []tf.Input{ - input, filter, out_backprop, + tensor, axis, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// ResourceApplyProximalAdagradAttr is an optional argument to ResourceApplyProximalAdagrad. -type ResourceApplyProximalAdagradAttr func(optionalAttr) +// RealAttr is an optional argument to Real. +type RealAttr func(optionalAttr) -// ResourceApplyProximalAdagradUseLocking sets the optional use_locking attribute to value. -// -// value: If True, updating of the var and accum tensors will be protected by -// a lock; otherwise the behavior is undefined, but may exhibit less contention. -// If not specified, defaults to false -func ResourceApplyProximalAdagradUseLocking(value bool) ResourceApplyProximalAdagradAttr { +// RealTout sets the optional Tout attribute to value. +// If not specified, defaults to DT_FLOAT +func RealTout(value tf.DataType) RealAttr { return func(m optionalAttr) { - m["use_locking"] = value + m["Tout"] = value } } -// Update '*var' and '*accum' according to FOBOS with Adagrad learning rate. +// Returns the real part of a complex number. // -// accum += grad * grad -// prox_v = var - lr * grad * (1 / sqrt(accum)) -// var = sign(prox_v)/(1+lr*l2) * max{|prox_v|-lr*l1,0} +// Given a tensor `input` of complex numbers, this operation returns a tensor of +// type `float` that is the real part of each element in `input`. All elements in +// `input` must be complex numbers of the form \\(a + bj\\), where *a* is the real +// part returned by this operation and *b* is the imaginary part. // -// Arguments: -// var_: Should be from a Variable(). -// accum: Should be from a Variable(). -// lr: Scaling factor. Must be a scalar. -// l1: L1 regularization. Must be a scalar. -// l2: L2 regularization. Must be a scalar. -// grad: The gradient. +// For example: // -// Returns the created operation. -func ResourceApplyProximalAdagrad(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, grad tf.Output, optional ...ResourceApplyProximalAdagradAttr) (o *tf.Operation) { +// ``` +// # tensor 'input' is [-2.25 + 4.75j, 3.25 + 5.75j] +// tf.real(input) ==> [-2.25, 3.25] +// ``` +func Real(scope *Scope, input tf.Output, optional ...RealAttr) (output tf.Output) { if scope.Err() != nil { return } @@ -11135,190 +13405,161 @@ func ResourceApplyProximalAdagrad(scope *Scope, var_ tf.Output, accum tf.Output, a(attrs) } opspec := tf.OpSpec{ - Type: "ResourceApplyProximalAdagrad", + Type: "Real", Input: []tf.Input{ - var_, accum, lr, l1, l2, grad, + input, }, Attrs: attrs, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0) } -// MutableHashTableOfTensorsV2Attr is an optional argument to MutableHashTableOfTensorsV2. -type MutableHashTableOfTensorsV2Attr func(optionalAttr) +// AudioSummaryAttr is an optional argument to AudioSummary. +type AudioSummaryAttr func(optionalAttr) -// MutableHashTableOfTensorsV2Container sets the optional container attribute to value. +// AudioSummaryMaxOutputs sets the optional max_outputs attribute to value. // -// value: If non-empty, this table is placed in the given container. -// Otherwise, a default container is used. -// If not specified, defaults to "" -func MutableHashTableOfTensorsV2Container(value string) MutableHashTableOfTensorsV2Attr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// MutableHashTableOfTensorsV2SharedName sets the optional shared_name attribute to value. +// value: Max number of batch elements to generate audio for. +// If not specified, defaults to 3 // -// value: If non-empty, this table is shared under the given name across -// multiple sessions. -// If not specified, defaults to "" -func MutableHashTableOfTensorsV2SharedName(value string) MutableHashTableOfTensorsV2Attr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// MutableHashTableOfTensorsV2UseNodeNameSharing sets the optional use_node_name_sharing attribute to value. -// If not specified, defaults to false -func MutableHashTableOfTensorsV2UseNodeNameSharing(value bool) MutableHashTableOfTensorsV2Attr { - return func(m optionalAttr) { - m["use_node_name_sharing"] = value - } -} - -// MutableHashTableOfTensorsV2ValueShape sets the optional value_shape attribute to value. -// If not specified, defaults to <> -func MutableHashTableOfTensorsV2ValueShape(value tf.Shape) MutableHashTableOfTensorsV2Attr { +// REQUIRES: value >= 1 +func AudioSummaryMaxOutputs(value int64) AudioSummaryAttr { return func(m optionalAttr) { - m["value_shape"] = value + m["max_outputs"] = value } } -// Creates an empty hash table. +// Outputs a `Summary` protocol buffer with audio. // -// This op creates a mutable hash table, specifying the type of its keys and -// values. Each value must be a vector. Data can be inserted into the table using -// the insert operations. It does not support the initialization operation. +// DEPRECATED at GraphDef version 15: Use AudioSummaryV2. +// +// The summary has up to `max_outputs` summary values containing audio. The +// audio is built from `tensor` which must be 3-D with shape `[batch_size, +// frames, channels]` or 2-D with shape `[batch_size, frames]`. The values are +// assumed to be in the range of `[-1.0, 1.0]` with a sample rate of `sample_rate`. +// +// The `tag` argument is a scalar `Tensor` of type `string`. It is used to +// build the `tag` of the summary values: +// +// * If `max_outputs` is 1, the summary value tag is '*tag*/audio'. +// * If `max_outputs` is greater than 1, the summary value tags are +// generated sequentially as '*tag*/audio/0', '*tag*/audio/1', etc. // // Arguments: -// key_dtype: Type of the table keys. -// value_dtype: Type of the table values. +// tag: Scalar. Used to build the `tag` attribute of the summary values. +// tensor: 2-D of shape `[batch_size, frames]`. +// sample_rate: The sample rate of the signal in hertz. // -// Returns Handle to a table. -func MutableHashTableOfTensorsV2(scope *Scope, key_dtype tf.DataType, value_dtype tf.DataType, optional ...MutableHashTableOfTensorsV2Attr) (table_handle tf.Output) { +// Returns Scalar. Serialized `Summary` protocol buffer. +func AudioSummary(scope *Scope, tag tf.Output, tensor tf.Output, sample_rate float32, optional ...AudioSummaryAttr) (summary tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"key_dtype": key_dtype, "value_dtype": value_dtype} + attrs := map[string]interface{}{"sample_rate": sample_rate} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "MutableHashTableOfTensorsV2", - + Type: "AudioSummary", + Input: []tf.Input{ + tag, tensor, + }, Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Inverse 2D fast Fourier transform. -// -// Computes the inverse 2-dimensional discrete Fourier transform over the -// inner-most 2 dimensions of `input`. -// -// Arguments: -// input: A complex64 tensor. -// -// Returns A complex64 tensor of the same shape as `input`. The inner-most 2 -// dimensions of `input` are replaced with their inverse 2D Fourier transform. +// QrAttr is an optional argument to Qr. +type QrAttr func(optionalAttr) + +// QrFullMatrices sets the optional full_matrices attribute to value. // -// @compatibility(numpy) -// Equivalent to np.fft.ifft2 -// @end_compatibility -func IFFT2D(scope *Scope, input tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "IFFT2D", - Input: []tf.Input{ - input, - }, +// value: If true, compute full-sized `q` and `r`. If false +// (the default), compute only the leading `P` columns of `q`. +// If not specified, defaults to false +func QrFullMatrices(value bool) QrAttr { + return func(m optionalAttr) { + m["full_matrices"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// Creates a tensor filled with a scalar value. -// -// This operation creates a tensor of shape `dims` and fills it with `value`. +// Computes the QR decompositions of one or more matrices. // -// For example: +// Computes the QR decomposition of each inner matrix in `tensor` such that +// `tensor[..., :, :] = q[..., :, :] * r[..., :,:])` // -// ``` -// # Output tensor has shape [2, 3]. -// fill([2, 3], 9) ==> [[9, 9, 9] -// [9, 9, 9]] +// ```python +// # a is a tensor. +// # q is a tensor of orthonormal matrices. +// # r is a tensor of upper triangular matrices. +// q, r = qr(a) +// q_full, r_full = qr(a, full_matrices=True) // ``` // // Arguments: -// dims: 1-D. Represents the shape of the output tensor. -// value: 0-D (scalar). Value to fill the returned tensor. +// input: A tensor of shape `[..., M, N]` whose inner-most 2 dimensions +// form matrices of size `[M, N]`. Let `P` be the minimum of `M` and `N`. // -// @compatibility(numpy) -// Equivalent to np.full -// @end_compatibility -func Fill(scope *Scope, dims tf.Output, value tf.Output) (output tf.Output) { +// Returns Orthonormal basis for range of `a`. If `full_matrices` is `False` then +// shape is `[..., M, P]`; if `full_matrices` is `True` then shape is +// `[..., M, M]`.Triangular factor. If `full_matrices` is `False` then shape is +// `[..., P, N]`. If `full_matrices` is `True` then shape is `[..., M, N]`. +func Qr(scope *Scope, input tf.Output, optional ...QrAttr) (q tf.Output, r tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "Fill", + Type: "Qr", Input: []tf.Input{ - dims, value, + input, }, + Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1) } -// 2D fast Fourier transform. -// -// Computes the 2-dimensional discrete Fourier transform over the inner-most -// 2 dimensions of `input`. -// -// Arguments: -// input: A complex64 tensor. -// -// Returns A complex64 tensor of the same shape as `input`. The inner-most 2 -// dimensions of `input` are replaced with their 2D Fourier transform. -// -// @compatibility(numpy) -// Equivalent to np.fft.fft2 -// @end_compatibility -func FFT2D(scope *Scope, input tf.Output) (output tf.Output) { +// Records the bytes size of each element of `input_dataset` in a StatsAggregator. +func BytesProducedStatsDataset(scope *Scope, input_dataset tf.Output, tag tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} opspec := tf.OpSpec{ - Type: "FFT2D", + Type: "BytesProducedStatsDataset", Input: []tf.Input{ - input, + input_dataset, tag, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// ResourceApplyProximalGradientDescentAttr is an optional argument to ResourceApplyProximalGradientDescent. -type ResourceApplyProximalGradientDescentAttr func(optionalAttr) +// ResourceSparseApplyProximalGradientDescentAttr is an optional argument to ResourceSparseApplyProximalGradientDescent. +type ResourceSparseApplyProximalGradientDescentAttr func(optionalAttr) -// ResourceApplyProximalGradientDescentUseLocking sets the optional use_locking attribute to value. +// ResourceSparseApplyProximalGradientDescentUseLocking sets the optional use_locking attribute to value. // // value: If True, the subtraction will be protected by a lock; // otherwise the behavior is undefined, but may exhibit less contention. // If not specified, defaults to false -func ResourceApplyProximalGradientDescentUseLocking(value bool) ResourceApplyProximalGradientDescentAttr { +func ResourceSparseApplyProximalGradientDescentUseLocking(value bool) ResourceSparseApplyProximalGradientDescentAttr { return func(m optionalAttr) { m["use_locking"] = value } } -// Update '*var' as FOBOS algorithm with fixed learning rate. +// Sparse update '*var' as FOBOS algorithm with fixed learning rate. // -// prox_v = var - alpha * delta +// That is for rows we have grad for, we update var as follows: +// prox_v = var - alpha * grad // var = sign(prox_v)/(1+alpha*l2) * max{|prox_v|-alpha*l1,0} // // Arguments: @@ -11326,10 +13567,11 @@ func ResourceApplyProximalGradientDescentUseLocking(value bool) ResourceApplyPro // alpha: Scaling factor. Must be a scalar. // l1: L1 regularization. Must be a scalar. // l2: L2 regularization. Must be a scalar. -// delta: The change. +// grad: The gradient. +// indices: A vector of indices into the first dimension of var and accum. // // Returns the created operation. -func ResourceApplyProximalGradientDescent(scope *Scope, var_ tf.Output, alpha tf.Output, l1 tf.Output, l2 tf.Output, delta tf.Output, optional ...ResourceApplyProximalGradientDescentAttr) (o *tf.Operation) { +func ResourceSparseApplyProximalGradientDescent(scope *Scope, var_ tf.Output, alpha tf.Output, l1 tf.Output, l2 tf.Output, grad tf.Output, indices tf.Output, optional ...ResourceSparseApplyProximalGradientDescentAttr) (o *tf.Operation) { if scope.Err() != nil { return } @@ -11338,154 +13580,262 @@ func ResourceApplyProximalGradientDescent(scope *Scope, var_ tf.Output, alpha tf a(attrs) } opspec := tf.OpSpec{ - Type: "ResourceApplyProximalGradientDescent", + Type: "ResourceSparseApplyProximalGradientDescent", Input: []tf.Input{ - var_, alpha, l1, l2, delta, + var_, alpha, l1, l2, grad, indices, }, Attrs: attrs, } return scope.AddOperation(opspec) } -// Computes the gradient for the sqrt of `x` wrt its input. +// MeanAttr is an optional argument to Mean. +type MeanAttr func(optionalAttr) + +// MeanKeepDims sets the optional keep_dims attribute to value. // -// Specifically, `grad = dy * 0.5 / y`, where `y = sqrt(x)`, and `dy` -// is the corresponding input gradient. -func SqrtGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) { +// value: If true, retain reduced dimensions with length 1. +// If not specified, defaults to false +func MeanKeepDims(value bool) MeanAttr { + return func(m optionalAttr) { + m["keep_dims"] = value + } +} + +// Computes the mean of elements across dimensions of a tensor. +// +// Reduces `input` along the dimensions given in `axis`. Unless +// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in +// `axis`. If `keep_dims` is true, the reduced dimensions are +// retained with length 1. +// +// Arguments: +// input: The tensor to reduce. +// axis: The dimensions to reduce. Must be in the range +// `[-rank(input), rank(input))`. +// +// Returns The reduced tensor. +func Mean(scope *Scope, input tf.Output, axis tf.Output, optional ...MeanAttr) (output tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "SqrtGrad", + Type: "Mean", Input: []tf.Input{ - y, dy, + input, axis, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Get the value of the tensor specified by its handle. +// InitializeTableFromTextFileV2Attr is an optional argument to InitializeTableFromTextFileV2. +type InitializeTableFromTextFileV2Attr func(optionalAttr) + +// InitializeTableFromTextFileV2VocabSize sets the optional vocab_size attribute to value. +// +// value: Number of elements of the file, use -1 if unknown. +// If not specified, defaults to -1 +// +// REQUIRES: value >= -1 +func InitializeTableFromTextFileV2VocabSize(value int64) InitializeTableFromTextFileV2Attr { + return func(m optionalAttr) { + m["vocab_size"] = value + } +} + +// InitializeTableFromTextFileV2Delimiter sets the optional delimiter attribute to value. +// +// value: Delimiter to separate fields in a line. +// If not specified, defaults to "\t" +func InitializeTableFromTextFileV2Delimiter(value string) InitializeTableFromTextFileV2Attr { + return func(m optionalAttr) { + m["delimiter"] = value + } +} + +// Initializes a table from a text file. +// +// It inserts one key-value pair into the table for each line of the file. +// The key and value is extracted from the whole line content, elements from the +// split line based on `delimiter` or the line number (starting from zero). +// Where to extract the key and value from a line is specified by `key_index` and +// `value_index`. +// +// - A value of -1 means use the line number(starting from zero), expects `int64`. +// - A value of -2 means use the whole line content, expects `string`. +// - A value >= 0 means use the index (starting at zero) of the split line based +// on `delimiter`. // // Arguments: -// handle: The handle for a tensor stored in the session state. -// dtype: The type of the output value. +// table_handle: Handle to a table which will be initialized. +// filename: Filename of a vocabulary text file. +// key_index: Column index in a line to get the table `key` values from. +// value_index: Column index that represents information of a line to get the table +// `value` values from. // -// Returns The tensor for the given handle. -func GetSessionTensor(scope *Scope, handle tf.Output, dtype tf.DataType) (value tf.Output) { +// Returns the created operation. +func InitializeTableFromTextFileV2(scope *Scope, table_handle tf.Output, filename tf.Output, key_index int64, value_index int64, optional ...InitializeTableFromTextFileV2Attr) (o *tf.Operation) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"dtype": dtype} + attrs := map[string]interface{}{"key_index": key_index, "value_index": value_index} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "GetSessionTensor", + Type: "InitializeTableFromTextFileV2", Input: []tf.Input{ - handle, + table_handle, filename, }, Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0) + return scope.AddOperation(opspec) } -// Returns x - y element-wise. +// QuantizedReluAttr is an optional argument to QuantizedRelu. +type QuantizedReluAttr func(optionalAttr) + +// QuantizedReluOutType sets the optional out_type attribute to value. +// If not specified, defaults to DT_QUINT8 +func QuantizedReluOutType(value tf.DataType) QuantizedReluAttr { + return func(m optionalAttr) { + m["out_type"] = value + } +} + +// Computes Quantized Rectified Linear: `max(features, 0)` // -// *NOTE*: `Subtract` supports broadcasting. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func Sub(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { +// Arguments: +// +// min_features: The float value that the lowest quantized value represents. +// max_features: The float value that the highest quantized value represents. +// +// Returns Has the same output shape as "features".The float value that the lowest quantized value represents.The float value that the highest quantized value represents. +func QuantizedRelu(scope *Scope, features tf.Output, min_features tf.Output, max_features tf.Output, optional ...QuantizedReluAttr) (activations tf.Output, min_activations tf.Output, max_activations tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "Sub", + Type: "QuantizedRelu", Input: []tf.Input{ - x, y, + features, min_features, max_features, }, + Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1), op.Output(2) } -// Computes softmax cross entropy cost and gradients to backpropagate. +// Reshapes a SparseTensor to represent values in a new dense shape. // -// Inputs are the logits, not probabilities. +// This operation has the same semantics as reshape on the represented dense +// tensor. The `input_indices` are recomputed based on the requested `new_shape`. +// +// If one component of `new_shape` is the special value -1, the size of that +// dimension is computed so that the total dense size remains constant. At +// most one component of `new_shape` can be -1. The number of dense elements +// implied by `new_shape` must be the same as the number of dense elements +// originally implied by `input_shape`. +// +// Reshaping does not affect the order of values in the SparseTensor. +// +// If the input tensor has rank `R_in` and `N` non-empty values, and `new_shape` +// has length `R_out`, then `input_indices` has shape `[N, R_in]`, +// `input_shape` has length `R_in`, `output_indices` has shape `[N, R_out]`, and +// `output_shape` has length `R_out`. // // Arguments: -// features: batch_size x num_classes matrix -// labels: batch_size x num_classes matrix -// The caller must ensure that each batch of labels represents a valid -// probability distribution. +// input_indices: 2-D. `N x R_in` matrix with the indices of non-empty values in a +// SparseTensor. +// input_shape: 1-D. `R_in` vector with the input SparseTensor's dense shape. +// new_shape: 1-D. `R_out` vector with the requested new dense shape. // -// Returns Per example loss (batch_size vector).backpropagated gradients (batch_size x num_classes matrix). -func SoftmaxCrossEntropyWithLogits(scope *Scope, features tf.Output, labels tf.Output) (loss tf.Output, backprop tf.Output) { +// Returns 2-D. `N x R_out` matrix with the updated indices of non-empty +// values in the output SparseTensor.1-D. `R_out` vector with the full dense shape of the output +// SparseTensor. This is the same as `new_shape` but with any -1 dimensions +// filled in. +func SparseReshape(scope *Scope, input_indices tf.Output, input_shape tf.Output, new_shape tf.Output) (output_indices tf.Output, output_shape tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "SoftmaxCrossEntropyWithLogits", + Type: "SparseReshape", Input: []tf.Input{ - features, labels, + input_indices, input_shape, new_shape, }, } op := scope.AddOperation(opspec) return op.Output(0), op.Output(1) } -// ReduceJoinAttr is an optional argument to ReduceJoin. -type ReduceJoinAttr func(optionalAttr) - -// ReduceJoinKeepDims sets the optional keep_dims attribute to value. +// Deprecated. Use TensorArraySplitV3 // -// value: If `True`, retain reduced dimensions with length `1`. -// If not specified, defaults to false -func ReduceJoinKeepDims(value bool) ReduceJoinAttr { - return func(m optionalAttr) { - m["keep_dims"] = value +// DEPRECATED at GraphDef version 26: Use TensorArraySplitV3 +func TensorArraySplitV2(scope *Scope, handle tf.Output, value tf.Output, lengths tf.Output, flow_in tf.Output) (flow_out tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "TensorArraySplitV2", + Input: []tf.Input{ + handle, value, lengths, flow_in, + }, } + op := scope.AddOperation(opspec) + return op.Output(0) } -// ReduceJoinSeparator sets the optional separator attribute to value. +// PackAttr is an optional argument to Pack. +type PackAttr func(optionalAttr) + +// PackAxis sets the optional axis attribute to value. // -// value: The separator to use when joining. -// If not specified, defaults to "" -func ReduceJoinSeparator(value string) ReduceJoinAttr { +// value: Dimension along which to pack. Negative values wrap around, so the +// valid range is `[-(R+1), R+1)`. +// If not specified, defaults to 0 +func PackAxis(value int64) PackAttr { return func(m optionalAttr) { - m["separator"] = value + m["axis"] = value } } -// Joins a string Tensor across the given dimensions. -// -// Computes the string join across dimensions in the given string Tensor of shape -// `[d_0, d_1, ..., d_n-1]`. Returns a new Tensor created by joining the input -// strings with the given separator (default: empty string). Negative indices are -// counted backwards from the end, with `-1` being equivalent to `n - 1`. +// Packs a list of `N` rank-`R` tensors into one rank-`(R+1)` tensor. // -// For example: +// Packs the `N` tensors in `values` into a tensor with rank one higher than each +// tensor in `values`, by packing them along the `axis` dimension. +// Given a list of tensors of shape `(A, B, C)`; // -// ```python -// # tensor `a` is [["a", "b"], ["c", "d"]] -// tf.reduce_join(a, 0) ==> ["ac", "bd"] -// tf.reduce_join(a, 1) ==> ["ab", "cd"] -// tf.reduce_join(a, -2) = tf.reduce_join(a, 0) ==> ["ac", "bd"] -// tf.reduce_join(a, -1) = tf.reduce_join(a, 1) ==> ["ab", "cd"] -// tf.reduce_join(a, 0, keep_dims=True) ==> [["ac", "bd"]] -// tf.reduce_join(a, 1, keep_dims=True) ==> [["ab"], ["cd"]] -// tf.reduce_join(a, 0, separator=".") ==> ["a.c", "b.d"] -// tf.reduce_join(a, [0, 1]) ==> ["acbd"] -// tf.reduce_join(a, [1, 0]) ==> ["abcd"] -// tf.reduce_join(a, []) ==> ["abcd"] +// if `axis == 0` then the `output` tensor will have the shape `(N, A, B, C)`. +// if `axis == 1` then the `output` tensor will have the shape `(A, N, B, C)`. +// Etc. +// +// For example: +// +// ``` +// # 'x' is [1, 4] +// # 'y' is [2, 5] +// # 'z' is [3, 6] +// pack([x, y, z]) => [[1, 4], [2, 5], [3, 6]] # Pack along first dim. +// pack([x, y, z], axis=1) => [[1, 2, 3], [4, 5, 6]] // ``` // +// This is the opposite of `unpack`. +// // Arguments: -// inputs: The input to be joined. All reduced indices must have non-zero size. -// reduction_indices: The dimensions to reduce over. Dimensions are reduced in the -// order specified. Omitting `reduction_indices` is equivalent to passing -// `[n-1, n-2, ..., 0]`. Negative indices from `-n` to `-1` are supported. +// values: Must be of same shape and type. // -// Returns Has shape equal to that of the input with reduced dimensions removed or -// set to `1` depending on `keep_dims`. -func ReduceJoin(scope *Scope, inputs tf.Output, reduction_indices tf.Output, optional ...ReduceJoinAttr) (output tf.Output) { +// Returns The packed tensor. +func Pack(scope *Scope, values []tf.Output, optional ...PackAttr) (output tf.Output) { if scope.Err() != nil { return } @@ -11494,9 +13844,9 @@ func ReduceJoin(scope *Scope, inputs tf.Output, reduction_indices tf.Output, opt a(attrs) } opspec := tf.OpSpec{ - Type: "ReduceJoin", + Type: "Pack", Input: []tf.Input{ - inputs, reduction_indices, + tf.OutputList(values), }, Attrs: attrs, } @@ -11504,78 +13854,86 @@ func ReduceJoin(scope *Scope, inputs tf.Output, reduction_indices tf.Output, opt return op.Output(0) } -// Computes cos of x element-wise. -func Cos(scope *Scope, x tf.Output) (y tf.Output) { +// Reorders a SparseTensor into the canonical, row-major ordering. +// +// Note that by convention, all sparse ops preserve the canonical ordering along +// increasing dimension number. The only time ordering can be violated is during +// manual manipulation of the indices and values vectors to add entries. +// +// Reordering does not affect the shape of the SparseTensor. +// +// If the tensor has rank `R` and `N` non-empty values, `input_indices` has +// shape `[N, R]`, input_values has length `N`, and input_shape has length `R`. +// +// Arguments: +// input_indices: 2-D. `N x R` matrix with the indices of non-empty values in a +// SparseTensor, possibly not in canonical ordering. +// input_values: 1-D. `N` non-empty values corresponding to `input_indices`. +// input_shape: 1-D. Shape of the input SparseTensor. +// +// Returns 2-D. `N x R` matrix with the same indices as input_indices, but +// in canonical row-major ordering.1-D. `N` non-empty values corresponding to `output_indices`. +func SparseReorder(scope *Scope, input_indices tf.Output, input_values tf.Output, input_shape tf.Output) (output_indices tf.Output, output_values tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "Cos", + Type: "SparseReorder", Input: []tf.Input{ - x, + input_indices, input_values, input_shape, }, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1) } -// FusedBatchNormGradAttr is an optional argument to FusedBatchNormGrad. -type FusedBatchNormGradAttr func(optionalAttr) - -// FusedBatchNormGradEpsilon sets the optional epsilon attribute to value. -// -// value: A small float number added to the variance of x. -// If not specified, defaults to 0.0001 -func FusedBatchNormGradEpsilon(value float32) FusedBatchNormGradAttr { - return func(m optionalAttr) { - m["epsilon"] = value +// Computes rectified linear: `max(features, 0)`. +func Relu(scope *Scope, features tf.Output) (activations tf.Output) { + if scope.Err() != nil { + return } -} - -// FusedBatchNormGradDataFormat sets the optional data_format attribute to value. -// -// value: The data format for y_backprop, x, x_backprop. -// Either "NHWC" (default) or "NCHW". -// If not specified, defaults to "NHWC" -func FusedBatchNormGradDataFormat(value string) FusedBatchNormGradAttr { - return func(m optionalAttr) { - m["data_format"] = value + opspec := tf.OpSpec{ + Type: "Relu", + Input: []tf.Input{ + features, + }, } + op := scope.AddOperation(opspec) + return op.Output(0) } -// FusedBatchNormGradIsTraining sets the optional is_training attribute to value. +// ResourceApplyAddSignAttr is an optional argument to ResourceApplyAddSign. +type ResourceApplyAddSignAttr func(optionalAttr) + +// ResourceApplyAddSignUseLocking sets the optional use_locking attribute to value. // -// value: A bool value to indicate the operation is for training (default) -// or inference. -// If not specified, defaults to true -func FusedBatchNormGradIsTraining(value bool) FusedBatchNormGradAttr { +// value: If `True`, updating of the var and m tensors is +// protected by a lock; otherwise the behavior is undefined, but may exhibit less +// contention. +// If not specified, defaults to false +func ResourceApplyAddSignUseLocking(value bool) ResourceApplyAddSignAttr { return func(m optionalAttr) { - m["is_training"] = value + m["use_locking"] = value } } -// Gradient for batch normalization. +// Update '*var' according to the AddSign update. // -// Note that the size of 4D Tensors are defined by either "NHWC" or "NCHW". -// The size of 1D Tensors matches the dimension C of the 4D Tensors. +// m_t <- beta1 * m_{t-1} + (1 - beta1) * g +// update <- (alpha + sign_decay * sign(g) *sign(m)) * g +// variable <- variable - lr_t * update // // Arguments: -// y_backprop: A 4D Tensor for the gradient with respect to y. -// x: A 4D Tensor for input data. -// scale: A 1D Tensor for scaling factor, to scale the normalized x. -// reserve_space_1: When is_training is True, a 1D Tensor for the computed batch -// mean to be reused in gradient computation. When is_training is -// False, a 1D Tensor for the population mean to be reused in both -// 1st and 2nd order gradient computation. -// reserve_space_2: When is_training is True, a 1D Tensor for the computed batch -// variance (inverted variance in the cuDNN case) to be reused in -// gradient computation. When is_training is False, a 1D Tensor -// for the population variance to be reused in both 1st and 2nd -// order gradient computation. +// var_: Should be from a Variable(). +// m: Should be from a Variable(). +// lr: Scaling factor. Must be a scalar. +// alpha: Must be a scalar. +// sign_decay: Must be a scalar. +// beta: Must be a scalar. +// grad: The gradient. // -// Returns A 4D Tensor for the gradient with respect to x.A 1D Tensor for the gradient with respect to scale.A 1D Tensor for the gradient with respect to offset.Unused placeholder to match the mean input in FusedBatchNorm.Unused placeholder to match the variance input -// in FusedBatchNorm. -func FusedBatchNormGrad(scope *Scope, y_backprop tf.Output, x tf.Output, scale tf.Output, reserve_space_1 tf.Output, reserve_space_2 tf.Output, optional ...FusedBatchNormGradAttr) (x_backprop tf.Output, scale_backprop tf.Output, offset_backprop tf.Output, reserve_space_3 tf.Output, reserve_space_4 tf.Output) { +// Returns the created operation. +func ResourceApplyAddSign(scope *Scope, var_ tf.Output, m tf.Output, lr tf.Output, alpha tf.Output, sign_decay tf.Output, beta tf.Output, grad tf.Output, optional ...ResourceApplyAddSignAttr) (o *tf.Operation) { if scope.Err() != nil { return } @@ -11584,119 +13942,161 @@ func FusedBatchNormGrad(scope *Scope, y_backprop tf.Output, x tf.Output, scale t a(attrs) } opspec := tf.OpSpec{ - Type: "FusedBatchNormGrad", + Type: "ResourceApplyAddSign", Input: []tf.Input{ - y_backprop, x, scale, reserve_space_1, reserve_space_2, + var_, m, lr, alpha, sign_decay, beta, grad, }, Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4) + return scope.AddOperation(opspec) } -// TopKAttr is an optional argument to TopK. -type TopKAttr func(optionalAttr) +// CudnnRNNBackpropAttr is an optional argument to CudnnRNNBackprop. +type CudnnRNNBackpropAttr func(optionalAttr) -// TopKSorted sets the optional sorted attribute to value. -// -// value: If true the resulting `k` elements will be sorted by the values in -// descending order. -// If not specified, defaults to true -func TopKSorted(value bool) TopKAttr { +// CudnnRNNBackpropRnnMode sets the optional rnn_mode attribute to value. +// If not specified, defaults to "lstm" +func CudnnRNNBackpropRnnMode(value string) CudnnRNNBackpropAttr { return func(m optionalAttr) { - m["sorted"] = value + m["rnn_mode"] = value } } -// Finds values and indices of the `k` largest elements for the last dimension. -// -// DEPRECATED at GraphDef version 7: Use TopKV2 instead -// -// If the input is a vector (rank-1), finds the `k` largest entries in the vector -// and outputs their values and indices as vectors. Thus `values[j]` is the -// `j`-th largest entry in `input`, and its index is `indices[j]`. -// -// For matrices (resp. higher rank input), computes the top `k` entries in each -// row (resp. vector along the last dimension). Thus, -// -// values.shape = indices.shape = input.shape[:-1] + [k] -// -// If two elements are equal, the lower-index element appears first. -// -// If `k` varies dynamically, use `TopKV2` below. -// -// Arguments: -// input: 1-D or higher with last dimension at least `k`. -// k: Number of top elements to look for along the last dimension (along each -// row for matrices). -// -// Returns The `k` largest elements along each last dimensional slice.The indices of `values` within the last dimension of `input`. -func TopK(scope *Scope, input tf.Output, k int64, optional ...TopKAttr) (values tf.Output, indices tf.Output) { +// CudnnRNNBackpropInputMode sets the optional input_mode attribute to value. +// If not specified, defaults to "linear_input" +func CudnnRNNBackpropInputMode(value string) CudnnRNNBackpropAttr { + return func(m optionalAttr) { + m["input_mode"] = value + } +} + +// CudnnRNNBackpropDirection sets the optional direction attribute to value. +// If not specified, defaults to "unidirectional" +func CudnnRNNBackpropDirection(value string) CudnnRNNBackpropAttr { + return func(m optionalAttr) { + m["direction"] = value + } +} + +// CudnnRNNBackpropDropout sets the optional dropout attribute to value. +// If not specified, defaults to 0 +func CudnnRNNBackpropDropout(value float32) CudnnRNNBackpropAttr { + return func(m optionalAttr) { + m["dropout"] = value + } +} + +// CudnnRNNBackpropSeed sets the optional seed attribute to value. +// If not specified, defaults to 0 +func CudnnRNNBackpropSeed(value int64) CudnnRNNBackpropAttr { + return func(m optionalAttr) { + m["seed"] = value + } +} + +// CudnnRNNBackpropSeed2 sets the optional seed2 attribute to value. +// If not specified, defaults to 0 +func CudnnRNNBackpropSeed2(value int64) CudnnRNNBackpropAttr { + return func(m optionalAttr) { + m["seed2"] = value + } +} + +// Backprop step of CudnnRNN. +// +// Compute the backprop of both data and weights in a RNN. +// +// rnn_mode: Indicates the type of the RNN model. +// input_mode: Indicate whether there is a linear projection between the input and +// The actual computation before the first layer. 'skip_input' is only allowed +// when input_size == num_units; 'auto_select' implies 'skip_input' when +// input_size == num_units; otherwise, it implies 'linear_input'. +// direction: Indicates whether a bidirectional model will be used. +// dir = (direction == bidirectional) ? 2 : 1 +// dropout: dropout probability. When set to 0., dropout is disabled. +// seed: the 1st part of a seed to initialize dropout. +// seed2: the 2nd part of a seed to initialize dropout. +// input: a 3-D tensor with the shape of [seq_length, batch_size, input_size]. +// input_h: a 3-D tensor with the shape of [num_layer * dir, batch_size, +// num_units]. +// input_c: For LSTM, a 3-D tensor with the shape of +// [num_layer * dir, batch, num_units]. For other models, it is ignored. +// params: a 1-D tensor that contains the weights and biases in an opaque layout. +// The size must be created through CudnnRNNParamsSize, and initialized +// separately. Note that they might not be compatible across different +// generations. So it is a good idea to save and restore +// output: a 3-D tensor with the shape of [seq_length, batch_size, +// dir * num_units]. +// output_h: the same shape has input_h. +// output_c: the same shape as input_c for LSTM. An empty tensor for other models. +// output_backprop: A 3-D tensor with the same shape as output in the forward pass. +// output_h_backprop: A 3-D tensor with the same shape as output_h in the forward +// pass. +// output_c_backprop: A 3-D tensor with the same shape as output_c in the forward +// pass. +// reserve_space: The same reserve_space produced in for forward operation. +// input_backprop: The backprop to input in the forward pass. Has the same shape +// as input. +// input_h_backprop: The backprop to input_h in the forward pass. Has the same +// shape as input_h. +// input_c_backprop: The backprop to input_c in the forward pass. Has the same +// shape as input_c. +// params_backprop: The backprop to the params buffer in the forward pass. Has the +// same shape as params. +func CudnnRNNBackprop(scope *Scope, input tf.Output, input_h tf.Output, input_c tf.Output, params tf.Output, output tf.Output, output_h tf.Output, output_c tf.Output, output_backprop tf.Output, output_h_backprop tf.Output, output_c_backprop tf.Output, reserve_space tf.Output, optional ...CudnnRNNBackpropAttr) (input_backprop tf.Output, input_h_backprop tf.Output, input_c_backprop tf.Output, params_backprop tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"k": k} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "TopK", + Type: "CudnnRNNBackprop", Input: []tf.Input{ - input, + input, input_h, input_c, params, output, output_h, output_c, output_backprop, output_h_backprop, output_c_backprop, reserve_space, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) + return op.Output(0), op.Output(1), op.Output(2), op.Output(3) } -// Compute the Hurwitz zeta function \\(\zeta(x, q)\\). +// FractionalMaxPoolGradAttr is an optional argument to FractionalMaxPoolGrad. +type FractionalMaxPoolGradAttr func(optionalAttr) + +// FractionalMaxPoolGradOverlapping sets the optional overlapping attribute to value. // -// The Hurwitz zeta function is defined as: +// value: When set to True, it means when pooling, the values at the boundary +// of adjacent pooling cells are used by both cells. For example: // +// `index 0 1 2 3 4` // -// \\(\zeta(x, q) = \sum_{n=0}^{\infty} (q + n)^{-x}\\) -func Zeta(scope *Scope, x tf.Output, q tf.Output) (z tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Zeta", - Input: []tf.Input{ - x, q, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// ProdAttr is an optional argument to Prod. -type ProdAttr func(optionalAttr) - -// ProdKeepDims sets the optional keep_dims attribute to value. +// `value 20 5 16 3 7` // -// value: If true, retain reduced dimensions with length 1. +// If the pooling sequence is [0, 2, 4], then 16, at index 2 will be used twice. +// The result would be [20, 16] for fractional max pooling. // If not specified, defaults to false -func ProdKeepDims(value bool) ProdAttr { +func FractionalMaxPoolGradOverlapping(value bool) FractionalMaxPoolGradAttr { return func(m optionalAttr) { - m["keep_dims"] = value + m["overlapping"] = value } } -// Computes the product of elements across dimensions of a tensor. -// -// Reduces `input` along the dimensions given in `axis`. Unless -// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in -// `axis`. If `keep_dims` is true, the reduced dimensions are -// retained with length 1. +// Computes gradient of the FractionalMaxPool function. // // Arguments: -// input: The tensor to reduce. -// axis: The dimensions to reduce. Must be in the range -// `[-rank(input), rank(input))`. +// orig_input: Original input for `fractional_max_pool` +// orig_output: Original output for `fractional_max_pool` +// out_backprop: 4-D with shape `[batch, height, width, channels]`. Gradients +// w.r.t. the output of `fractional_max_pool`. +// row_pooling_sequence: row pooling sequence, form pooling region with +// col_pooling_sequence. +// col_pooling_sequence: column pooling sequence, form pooling region with +// row_pooling sequence. // -// Returns The reduced tensor. -func Prod(scope *Scope, input tf.Output, axis tf.Output, optional ...ProdAttr) (output tf.Output) { +// Returns 4-D. Gradients w.r.t. the input of `fractional_max_pool`. +func FractionalMaxPoolGrad(scope *Scope, orig_input tf.Output, orig_output tf.Output, out_backprop tf.Output, row_pooling_sequence tf.Output, col_pooling_sequence tf.Output, optional ...FractionalMaxPoolGradAttr) (output tf.Output) { if scope.Err() != nil { return } @@ -11705,9 +14105,9 @@ func Prod(scope *Scope, input tf.Output, axis tf.Output, optional ...ProdAttr) ( a(attrs) } opspec := tf.OpSpec{ - Type: "Prod", + Type: "FractionalMaxPoolGrad", Input: []tf.Input{ - input, axis, + orig_input, orig_output, out_backprop, row_pooling_sequence, col_pooling_sequence, }, Attrs: attrs, } @@ -11715,224 +14115,222 @@ func Prod(scope *Scope, input tf.Output, axis tf.Output, optional ...ProdAttr) ( return op.Output(0) } -// FusedResizeAndPadConv2DAttr is an optional argument to FusedResizeAndPadConv2D. -type FusedResizeAndPadConv2DAttr func(optionalAttr) +// ResourceApplyAdagradDAAttr is an optional argument to ResourceApplyAdagradDA. +type ResourceApplyAdagradDAAttr func(optionalAttr) -// FusedResizeAndPadConv2DResizeAlignCorners sets the optional resize_align_corners attribute to value. +// ResourceApplyAdagradDAUseLocking sets the optional use_locking attribute to value. // -// value: If true, rescale input by (new_height - 1) / (height - 1), -// which exactly aligns the 4 corners of images and resized images. If false, rescale -// by new_height / height. Treat similarly the width dimension. +// value: If True, updating of the var and accum tensors will be protected by +// a lock; otherwise the behavior is undefined, but may exhibit less contention. // If not specified, defaults to false -func FusedResizeAndPadConv2DResizeAlignCorners(value bool) FusedResizeAndPadConv2DAttr { +func ResourceApplyAdagradDAUseLocking(value bool) ResourceApplyAdagradDAAttr { return func(m optionalAttr) { - m["resize_align_corners"] = value + m["use_locking"] = value } } -// Performs a resize and padding as a preprocess during a convolution. -// -// It's often possible to do spatial transformations more efficiently as part of -// the packing stage of a convolution, so this op allows for an optimized -// implementation where these stages are fused together. This prevents the need to -// write out the intermediate results as whole tensors, reducing memory pressure, -// and we can get some latency gains by merging the transformation calculations. -// The data_format attribute for Conv2D isn't supported by this op, and defaults to -// 'NHWC' order. -// Internally this op uses a single per-graph scratch buffer, which means that it -// will block if multiple versions are being run in parallel. This is because this -// operator is primarily an optimization to minimize memory usage. +// Update '*var' according to the proximal adagrad scheme. // // Arguments: -// input: 4-D with shape `[batch, in_height, in_width, in_channels]`. -// size: A 1-D int32 Tensor of 2 elements: `new_height, new_width`. The -// new size for the images. -// paddings: A two-column matrix specifying the padding sizes. The number of -// rows must be the same as the rank of `input`. -// filter: 4-D with shape -// `[filter_height, filter_width, in_channels, out_channels]`. +// var_: Should be from a Variable(). +// gradient_accumulator: Should be from a Variable(). +// gradient_squared_accumulator: Should be from a Variable(). +// grad: The gradient. +// lr: Scaling factor. Must be a scalar. +// l1: L1 regularization. Must be a scalar. +// l2: L2 regularization. Must be a scalar. +// global_step: Training step number. Must be a scalar. // -// strides: 1-D of length 4. The stride of the sliding window for each dimension -// of `input`. Must be in the same order as the dimension specified with format. -// padding: The type of padding algorithm to use. -func FusedResizeAndPadConv2D(scope *Scope, input tf.Output, size tf.Output, paddings tf.Output, filter tf.Output, mode string, strides []int64, padding string, optional ...FusedResizeAndPadConv2DAttr) (output tf.Output) { +// Returns the created operation. +func ResourceApplyAdagradDA(scope *Scope, var_ tf.Output, gradient_accumulator tf.Output, gradient_squared_accumulator tf.Output, grad tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, global_step tf.Output, optional ...ResourceApplyAdagradDAAttr) (o *tf.Operation) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"mode": mode, "strides": strides, "padding": padding} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "FusedResizeAndPadConv2D", + Type: "ResourceApplyAdagradDA", Input: []tf.Input{ - input, size, paddings, filter, + var_, gradient_accumulator, gradient_squared_accumulator, grad, lr, l1, l2, global_step, }, Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0) + return scope.AddOperation(opspec) } -// Inverse 3D fast Fourier transform. -// -// Computes the inverse 3-dimensional discrete Fourier transform over the -// inner-most 3 dimensions of `input`. -// -// Arguments: -// input: A complex64 tensor. -// -// Returns A complex64 tensor of the same shape as `input`. The inner-most 3 -// dimensions of `input` are replaced with their inverse 3D Fourier transform. -// -// @compatibility(numpy) -// Equivalent to np.fft.ifftn with 3 dimensions. -// @end_compatibility -func IFFT3D(scope *Scope, input tf.Output) (output tf.Output) { - if scope.Err() != nil { - return +// CudnnRNNCanonicalToParamsAttr is an optional argument to CudnnRNNCanonicalToParams. +type CudnnRNNCanonicalToParamsAttr func(optionalAttr) + +// CudnnRNNCanonicalToParamsRnnMode sets the optional rnn_mode attribute to value. +// If not specified, defaults to "lstm" +func CudnnRNNCanonicalToParamsRnnMode(value string) CudnnRNNCanonicalToParamsAttr { + return func(m optionalAttr) { + m["rnn_mode"] = value } - opspec := tf.OpSpec{ - Type: "IFFT3D", - Input: []tf.Input{ - input, - }, +} + +// CudnnRNNCanonicalToParamsInputMode sets the optional input_mode attribute to value. +// If not specified, defaults to "linear_input" +func CudnnRNNCanonicalToParamsInputMode(value string) CudnnRNNCanonicalToParamsAttr { + return func(m optionalAttr) { + m["input_mode"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// Adds `bias` to `value`. -// -// This is a deprecated version of BiasAdd and will be soon removed. +// CudnnRNNCanonicalToParamsDirection sets the optional direction attribute to value. +// If not specified, defaults to "unidirectional" +func CudnnRNNCanonicalToParamsDirection(value string) CudnnRNNCanonicalToParamsAttr { + return func(m optionalAttr) { + m["direction"] = value + } +} + +// CudnnRNNCanonicalToParamsDropout sets the optional dropout attribute to value. +// If not specified, defaults to 0 +func CudnnRNNCanonicalToParamsDropout(value float32) CudnnRNNCanonicalToParamsAttr { + return func(m optionalAttr) { + m["dropout"] = value + } +} + +// CudnnRNNCanonicalToParamsSeed sets the optional seed attribute to value. +// If not specified, defaults to 0 +func CudnnRNNCanonicalToParamsSeed(value int64) CudnnRNNCanonicalToParamsAttr { + return func(m optionalAttr) { + m["seed"] = value + } +} + +// CudnnRNNCanonicalToParamsSeed2 sets the optional seed2 attribute to value. +// If not specified, defaults to 0 +func CudnnRNNCanonicalToParamsSeed2(value int64) CudnnRNNCanonicalToParamsAttr { + return func(m optionalAttr) { + m["seed2"] = value + } +} + +// Converts CudnnRNN params from canonical form to usable form. // -// This is a special case of `tf.add` where `bias` is restricted to be 1-D. -// Broadcasting is supported, so `value` may have any number of dimensions. +// Writes a set of weights into the opaque params buffer so they can be used in +// upcoming training or inferences. // -// Arguments: -// value: Any number of dimensions. -// bias: 1-D with size the last dimension of `value`. +// Note that the params buffer may not be compatible across different GPUs. So any +// save and restoration should be converted to and from the canonical weights and +// biases. // -// Returns Broadcasted sum of `value` and `bias`. -func BiasAddV1(scope *Scope, value tf.Output, bias tf.Output) (output tf.Output) { +// num_layers: Specifies the number of layers in the RNN model. +// num_units: Specifies the size of the hidden state. +// input_size: Specifies the size of the input state. +// weights: the canonical form of weights that can be used for saving +// and restoration. They are more likely to be compatible across different +// generations. +// biases: the canonical form of biases that can be used for saving +// and restoration. They are more likely to be compatible across different +// generations. +// num_params: number of parameter sets for all layers. +// Each layer may contain multiple parameter sets, with each set consisting of +// a weight matrix and a bias vector. +// rnn_mode: Indicates the type of the RNN model. +// input_mode: Indicate whether there is a linear projection between the input and +// The actual computation before the first layer. 'skip_input' is only allowed +// when input_size == num_units; 'auto_select' implies 'skip_input' when +// input_size == num_units; otherwise, it implies 'linear_input'. +// direction: Indicates whether a bidirectional model will be used. +// dir = (direction == bidirectional) ? 2 : 1 +// dropout: dropout probability. When set to 0., dropout is disabled. +// seed: the 1st part of a seed to initialize dropout. +// seed2: the 2nd part of a seed to initialize dropout. +func CudnnRNNCanonicalToParams(scope *Scope, num_layers tf.Output, num_units tf.Output, input_size tf.Output, weights []tf.Output, biases []tf.Output, optional ...CudnnRNNCanonicalToParamsAttr) (params tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "BiasAddV1", + Type: "CudnnRNNCanonicalToParams", Input: []tf.Input{ - value, bias, + num_layers, num_units, input_size, tf.OutputList(weights), tf.OutputList(biases), }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Reverses specific dimensions of a tensor. -// -// NOTE `tf.reverse` has now changed behavior in preparation for 1.0. -// `tf.reverse_v2` is currently an alias that will be deprecated before TF 1.0. -// -// Given a `tensor`, and a `int32` tensor `axis` representing the set of -// dimensions of `tensor` to reverse. This operation reverses each dimension -// `i` for which there exists `j` s.t. `axis[j] == i`. -// -// `tensor` can have up to 8 dimensions. The number of dimensions specified -// in `axis` may be 0 or more entries. If an index is specified more than -// once, a InvalidArgument error is raised. -// -// For example: +// SparseReduceMaxSparseAttr is an optional argument to SparseReduceMaxSparse. +type SparseReduceMaxSparseAttr func(optionalAttr) + +// SparseReduceMaxSparseKeepDims sets the optional keep_dims attribute to value. // -// ``` -// # tensor 't' is [[[[ 0, 1, 2, 3], -// # [ 4, 5, 6, 7], -// # [ 8, 9, 10, 11]], -// # [[12, 13, 14, 15], -// # [16, 17, 18, 19], -// # [20, 21, 22, 23]]]] -// # tensor 't' shape is [1, 2, 3, 4] +// value: If true, retain reduced dimensions with length 1. +// If not specified, defaults to false +func SparseReduceMaxSparseKeepDims(value bool) SparseReduceMaxSparseAttr { + return func(m optionalAttr) { + m["keep_dims"] = value + } +} + +// Computes the max of elements across dimensions of a SparseTensor. // -// # 'dims' is [3] or 'dims' is [-1] -// reverse(t, dims) ==> [[[[ 3, 2, 1, 0], -// [ 7, 6, 5, 4], -// [ 11, 10, 9, 8]], -// [[15, 14, 13, 12], -// [19, 18, 17, 16], -// [23, 22, 21, 20]]]] +// This Op takes a SparseTensor and is the sparse counterpart to +// `tf.reduce_max()`. In contrast to SparseReduceMax, this Op returns a +// SparseTensor. // -// # 'dims' is '[1]' (or 'dims' is '[-3]') -// reverse(t, dims) ==> [[[[12, 13, 14, 15], -// [16, 17, 18, 19], -// [20, 21, 22, 23] -// [[ 0, 1, 2, 3], -// [ 4, 5, 6, 7], -// [ 8, 9, 10, 11]]]] +// Reduces `sp_input` along the dimensions given in `reduction_axes`. Unless +// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in +// `reduction_axes`. If `keep_dims` is true, the reduced dimensions are retained +// with length 1. // -// # 'dims' is '[2]' (or 'dims' is '[-2]') -// reverse(t, dims) ==> [[[[8, 9, 10, 11], -// [4, 5, 6, 7], -// [0, 1, 2, 3]] -// [[20, 21, 22, 23], -// [16, 17, 18, 19], -// [12, 13, 14, 15]]]] -// ``` +// If `reduction_axes` has no entries, all dimensions are reduced, and a tensor +// with a single element is returned. Additionally, the axes can be negative, +// which are interpreted according to the indexing rules in Python. // // Arguments: -// tensor: Up to 8-D. -// axis: 1-D. The indices of the dimensions to reverse. Must be in the range -// `[-rank(tensor), rank(tensor))`. -// -// Returns The same shape as `tensor`. -func ReverseV2(scope *Scope, tensor tf.Output, axis tf.Output) (output tf.Output) { +// input_indices: 2-D. `N x R` matrix with the indices of non-empty values in a +// SparseTensor, possibly not in canonical ordering. +// input_values: 1-D. `N` non-empty values corresponding to `input_indices`. +// input_shape: 1-D. Shape of the input SparseTensor. +// reduction_axes: 1-D. Length-`K` vector containing the reduction axes. +func SparseReduceMaxSparse(scope *Scope, input_indices tf.Output, input_values tf.Output, input_shape tf.Output, reduction_axes tf.Output, optional ...SparseReduceMaxSparseAttr) (output_indices tf.Output, output_values tf.Output, output_shape tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "ReverseV2", + Type: "SparseReduceMaxSparse", Input: []tf.Input{ - tensor, axis, + input_indices, input_values, input_shape, reduction_axes, }, + Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) -} - -// RealAttr is an optional argument to Real. -type RealAttr func(optionalAttr) - -// RealTout sets the optional Tout attribute to value. -// If not specified, defaults to DT_FLOAT -func RealTout(value tf.DataType) RealAttr { - return func(m optionalAttr) { - m["Tout"] = value - } + return op.Output(0), op.Output(1), op.Output(2) } -// Returns the real part of a complex number. +// Creates a dataset that emits the outputs of `input_dataset` `count` times. // -// Given a tensor `input` of complex numbers, this operation returns a tensor of -// type `float` that is the real part of each element in `input`. All elements in -// `input` must be complex numbers of the form \\(a + bj\\), where *a* is the real -// part returned by this operation and *b* is the imaginary part. +// Arguments: // -// For example: +// count: A scalar representing the number of times that `input_dataset` should +// be repeated. A value of `-1` indicates that it should be repeated infinitely. // -// ``` -// # tensor 'input' is [-2.25 + 4.75j, 3.25 + 5.75j] -// tf.real(input) ==> [-2.25, 3.25] -// ``` -func Real(scope *Scope, input tf.Output, optional ...RealAttr) (output tf.Output) { +// +func RepeatDataset(scope *Scope, input_dataset tf.Output, count tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} opspec := tf.OpSpec{ - Type: "Real", + Type: "RepeatDataset", Input: []tf.Input{ - input, + input_dataset, count, }, Attrs: attrs, } @@ -11940,55 +14338,76 @@ func Real(scope *Scope, input tf.Output, optional ...RealAttr) (output tf.Output return op.Output(0) } -// AudioSummaryAttr is an optional argument to AudioSummary. -type AudioSummaryAttr func(optionalAttr) +// AddManySparseToTensorsMapAttr is an optional argument to AddManySparseToTensorsMap. +type AddManySparseToTensorsMapAttr func(optionalAttr) -// AudioSummaryMaxOutputs sets the optional max_outputs attribute to value. +// AddManySparseToTensorsMapContainer sets the optional container attribute to value. // -// value: Max number of batch elements to generate audio for. -// If not specified, defaults to 3 +// value: The container name for the `SparseTensorsMap` created by this op. +// If not specified, defaults to "" +func AddManySparseToTensorsMapContainer(value string) AddManySparseToTensorsMapAttr { + return func(m optionalAttr) { + m["container"] = value + } +} + +// AddManySparseToTensorsMapSharedName sets the optional shared_name attribute to value. // -// REQUIRES: value >= 1 -func AudioSummaryMaxOutputs(value int64) AudioSummaryAttr { +// value: The shared name for the `SparseTensorsMap` created by this op. +// If blank, the new Operation's unique name is used. +// If not specified, defaults to "" +func AddManySparseToTensorsMapSharedName(value string) AddManySparseToTensorsMapAttr { return func(m optionalAttr) { - m["max_outputs"] = value + m["shared_name"] = value } } -// Outputs a `Summary` protocol buffer with audio. +// Add an `N`-minibatch `SparseTensor` to a `SparseTensorsMap`, return `N` handles. // -// DEPRECATED at GraphDef version 15: Use AudioSummaryV2. +// A `SparseTensor` of rank `R` is represented by three tensors: `sparse_indices`, +// `sparse_values`, and `sparse_shape`, where // -// The summary has up to `max_outputs` summary values containing audio. The -// audio is built from `tensor` which must be 3-D with shape `[batch_size, -// frames, channels]` or 2-D with shape `[batch_size, frames]`. The values are -// assumed to be in the range of `[-1.0, 1.0]` with a sample rate of `sample_rate`. +// ```sparse_indices.shape[1] == sparse_shape.shape[0] == R``` // -// The `tag` argument is a scalar `Tensor` of type `string`. It is used to -// build the `tag` of the summary values: +// An `N`-minibatch of `SparseTensor` objects is represented as a `SparseTensor` +// having a first `sparse_indices` column taking values between `[0, N)`, where +// the minibatch size `N == sparse_shape[0]`. // -// * If `max_outputs` is 1, the summary value tag is '*tag*/audio'. -// * If `max_outputs` is greater than 1, the summary value tags are -// generated sequentially as '*tag*/audio/0', '*tag*/audio/1', etc. +// The input `SparseTensor` must have rank `R` greater than 1, and the first +// dimension is treated as the minibatch dimension. Elements of the `SparseTensor` +// must be sorted in increasing order of this first dimension. The stored +// `SparseTensor` objects pointed to by each row of the output `sparse_handles` +// will have rank `R-1`. +// +// The `SparseTensor` values can then be read out as part of a minibatch by passing +// the given keys as vector elements to `TakeManySparseFromTensorsMap`. To ensure +// the correct `SparseTensorsMap` is accessed, ensure that the same +// `container` and `shared_name` are passed to that Op. If no `shared_name` +// is provided here, instead use the *name* of the Operation created by calling +// `AddManySparseToTensorsMap` as the `shared_name` passed to +// `TakeManySparseFromTensorsMap`. Ensure the Operations are colocated. // // Arguments: -// tag: Scalar. Used to build the `tag` attribute of the summary values. -// tensor: 2-D of shape `[batch_size, frames]`. -// sample_rate: The sample rate of the signal in hertz. +// sparse_indices: 2-D. The `indices` of the minibatch `SparseTensor`. +// `sparse_indices[:, 0]` must be ordered values in `[0, N)`. +// sparse_values: 1-D. The `values` of the minibatch `SparseTensor`. +// sparse_shape: 1-D. The `shape` of the minibatch `SparseTensor`. +// The minibatch size `N == sparse_shape[0]`. // -// Returns Scalar. Serialized `Summary` protocol buffer. -func AudioSummary(scope *Scope, tag tf.Output, tensor tf.Output, sample_rate float32, optional ...AudioSummaryAttr) (summary tf.Output) { +// Returns 1-D. The handles of the `SparseTensor` now stored in the +// `SparseTensorsMap`. Shape: `[N]`. +func AddManySparseToTensorsMap(scope *Scope, sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output, optional ...AddManySparseToTensorsMapAttr) (sparse_handles tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"sample_rate": sample_rate} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "AudioSummary", + Type: "AddManySparseToTensorsMap", Input: []tf.Input{ - tag, tensor, + sparse_indices, sparse_values, sparse_shape, }, Attrs: attrs, } @@ -11996,151 +14415,182 @@ func AudioSummary(scope *Scope, tag tf.Output, tensor tf.Output, sample_rate flo return op.Output(0) } -// QrAttr is an optional argument to Qr. -type QrAttr func(optionalAttr) - -// QrFullMatrices sets the optional full_matrices attribute to value. -// -// value: If true, compute full-sized `q` and `r`. If false -// (the default), compute only the leading `P` columns of `q`. -// If not specified, defaults to false -func QrFullMatrices(value bool) QrAttr { - return func(m optionalAttr) { - m["full_matrices"] = value - } -} - -// Computes the QR decompositions of one or more matrices. -// -// Computes the QR decomposition of each inner matrix in `tensor` such that -// `tensor[..., :, :] = q[..., :, :] * r[..., :,:])` -// -// ```python -// # a is a tensor. -// # q is a tensor of orthonormal matrices. -// # r is a tensor of upper triangular matrices. -// q, r = qr(a) -// q_full, r_full = qr(a, full_matrices=True) -// ``` +// Concatenates tensors along one dimension. // // Arguments: -// input: A tensor of shape `[..., M, N]` whose inner-most 2 dimensions -// form matrices of size `[M, N]`. Let `P` be the minimum of `M` and `N`. +// values: List of `N` Tensors to concatenate. Their ranks and types must match, +// and their sizes must match in all dimensions except `concat_dim`. +// axis: 0-D. The dimension along which to concatenate. Must be in the +// range [-rank(values), rank(values)). // -// Returns Orthonormal basis for range of `a`. If `full_matrices` is `False` then -// shape is `[..., M, P]`; if `full_matrices` is `True` then shape is -// `[..., M, M]`.Triangular factor. If `full_matrices` is `False` then shape is -// `[..., P, N]`. If `full_matrices` is `True` then shape is `[..., M, N]`. -func Qr(scope *Scope, input tf.Output, optional ...QrAttr) (q tf.Output, r tf.Output) { +// Returns A `Tensor` with the concatenation of values stacked along the +// `concat_dim` dimension. This tensor's shape matches that of `values` except +// in `concat_dim` where it has the sum of the sizes. +func ConcatV2(scope *Scope, values []tf.Output, axis tf.Output) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "Qr", + Type: "ConcatV2", Input: []tf.Input{ - input, + tf.OutputList(values), axis, }, - Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) + return op.Output(0) } -// Records the bytes size of each element of `input_dataset` in a StatsAggregator. -func BytesProducedStatsDataset(scope *Scope, input_dataset tf.Output, tag tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { +// Reads and outputs the entire contents of the input filename. +func ReadFile(scope *Scope, filename tf.Output) (contents tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} opspec := tf.OpSpec{ - Type: "BytesProducedStatsDataset", + Type: "ReadFile", Input: []tf.Input{ - input_dataset, tag, + filename, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// ResourceSparseApplyProximalGradientDescentAttr is an optional argument to ResourceSparseApplyProximalGradientDescent. -type ResourceSparseApplyProximalGradientDescentAttr func(optionalAttr) +// Multiplies sparse updates into the variable referenced by `resource`. +// +// This operation computes +// +// # Scalar indices +// ref[indices, ...] *= updates[...] +// +// # Vector indices (for each i) +// ref[indices[i], ...] *= updates[i, ...] +// +// # High rank indices (for each i, ..., j) +// ref[indices[i, ..., j], ...] *= updates[i, ..., j, ...] +// +// Duplicate entries are handled correctly: if multiple `indices` reference +// the same location, their contributions multiply. +// +// Requires `updates.shape = indices.shape + ref.shape[1:]` or `updates.shape = []`. +// +//
+// +//
+// +// Arguments: +// resource: Should be from a `Variable` node. +// indices: A tensor of indices into the first dimension of `ref`. +// updates: A tensor of updated values to add to `ref`. +// +// Returns the created operation. +func ResourceScatterMul(scope *Scope, resource tf.Output, indices tf.Output, updates tf.Output) (o *tf.Operation) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "ResourceScatterMul", + Input: []tf.Input{ + resource, indices, updates, + }, + } + return scope.AddOperation(opspec) +} -// ResourceSparseApplyProximalGradientDescentUseLocking sets the optional use_locking attribute to value. +// Computes sigmoid of `x` element-wise. // -// value: If True, the subtraction will be protected by a lock; -// otherwise the behavior is undefined, but may exhibit less contention. -// If not specified, defaults to false -func ResourceSparseApplyProximalGradientDescentUseLocking(value bool) ResourceSparseApplyProximalGradientDescentAttr { - return func(m optionalAttr) { - m["use_locking"] = value +// Specifically, `y = 1 / (1 + exp(-x))`. +func Sigmoid(scope *Scope, x tf.Output) (y tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Sigmoid", + Input: []tf.Input{ + x, + }, } + op := scope.AddOperation(opspec) + return op.Output(0) } -// Sparse update '*var' as FOBOS algorithm with fixed learning rate. +// Updates specified rows with values in `v`. // -// That is for rows we have grad for, we update var as follows: -// prox_v = var - alpha * grad -// var = sign(prox_v)/(1+alpha*l2) * max{|prox_v|-alpha*l1,0} +// Computes `x[i, :] = v; return x`. // // Arguments: -// var_: Should be from a Variable(). -// alpha: Scaling factor. Must be a scalar. -// l1: L1 regularization. Must be a scalar. -// l2: L2 regularization. Must be a scalar. -// grad: The gradient. -// indices: A vector of indices into the first dimension of var and accum. +// x: A tensor of type `T`. +// i: A vector. Indices into the left-most dimension of `x`. +// v: A `Tensor` of type T. Same dimension sizes as x except the first dimension, which must be the same as i's size. // -// Returns the created operation. -func ResourceSparseApplyProximalGradientDescent(scope *Scope, var_ tf.Output, alpha tf.Output, l1 tf.Output, l2 tf.Output, grad tf.Output, indices tf.Output, optional ...ResourceSparseApplyProximalGradientDescentAttr) (o *tf.Operation) { +// Returns A `Tensor` of type T. An alias of `x`. The content of `y` is undefined if there are duplicates in `i`. +func InplaceUpdate(scope *Scope, x tf.Output, i tf.Output, v tf.Output) (y tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "ResourceSparseApplyProximalGradientDescent", + Type: "InplaceUpdate", Input: []tf.Input{ - var_, alpha, l1, l2, grad, indices, + x, i, v, }, - Attrs: attrs, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0) } -// MeanAttr is an optional argument to Mean. -type MeanAttr func(optionalAttr) +// FusedBatchNormAttr is an optional argument to FusedBatchNorm. +type FusedBatchNormAttr func(optionalAttr) -// MeanKeepDims sets the optional keep_dims attribute to value. +// FusedBatchNormEpsilon sets the optional epsilon attribute to value. // -// value: If true, retain reduced dimensions with length 1. -// If not specified, defaults to false -func MeanKeepDims(value bool) MeanAttr { +// value: A small float number added to the variance of x. +// If not specified, defaults to 0.0001 +func FusedBatchNormEpsilon(value float32) FusedBatchNormAttr { return func(m optionalAttr) { - m["keep_dims"] = value + m["epsilon"] = value } } -// Computes the mean of elements across dimensions of a tensor. +// FusedBatchNormDataFormat sets the optional data_format attribute to value. // -// Reduces `input` along the dimensions given in `axis`. Unless -// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in -// `axis`. If `keep_dims` is true, the reduced dimensions are -// retained with length 1. +// value: The data format for x and y. Either "NHWC" (default) or "NCHW". +// If not specified, defaults to "NHWC" +func FusedBatchNormDataFormat(value string) FusedBatchNormAttr { + return func(m optionalAttr) { + m["data_format"] = value + } +} + +// FusedBatchNormIsTraining sets the optional is_training attribute to value. +// +// value: A bool value to indicate the operation is for training (default) +// or inference. +// If not specified, defaults to true +func FusedBatchNormIsTraining(value bool) FusedBatchNormAttr { + return func(m optionalAttr) { + m["is_training"] = value + } +} + +// Batch normalization. +// +// Note that the size of 4D Tensors are defined by either "NHWC" or "NCHW". +// The size of 1D Tensors matches the dimension C of the 4D Tensors. // // Arguments: -// input: The tensor to reduce. -// axis: The dimensions to reduce. Must be in the range -// `[-rank(input), rank(input))`. +// x: A 4D Tensor for input data. +// scale: A 1D Tensor for scaling factor, to scale the normalized x. +// offset: A 1D Tensor for offset, to shift to the normalized x. +// mean: A 1D Tensor for population mean. Used for inference only; +// must be empty for training. +// variance: A 1D Tensor for population variance. Used for inference only; +// must be empty for training. // -// Returns The reduced tensor. -func Mean(scope *Scope, input tf.Output, axis tf.Output, optional ...MeanAttr) (output tf.Output) { +// Returns A 4D Tensor for output data.A 1D Tensor for the computed batch mean, to be used by TensorFlow +// to compute the running mean.A 1D Tensor for the computed batch variance, to be used by +// TensorFlow to compute the running variance.A 1D Tensor for the computed batch mean, to be reused +// in the gradient computation.A 1D Tensor for the computed batch variance (inverted variance +// in the cuDNN case), to be reused in the gradient computation. +func FusedBatchNorm(scope *Scope, x tf.Output, scale tf.Output, offset tf.Output, mean tf.Output, variance tf.Output, optional ...FusedBatchNormAttr) (y tf.Output, batch_mean tf.Output, batch_variance tf.Output, reserve_space_1 tf.Output, reserve_space_2 tf.Output) { if scope.Err() != nil { return } @@ -12149,316 +14599,259 @@ func Mean(scope *Scope, input tf.Output, axis tf.Output, optional ...MeanAttr) ( a(attrs) } opspec := tf.OpSpec{ - Type: "Mean", + Type: "FusedBatchNorm", Input: []tf.Input{ - input, axis, + x, scale, offset, mean, variance, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4) } -// InitializeTableFromTextFileV2Attr is an optional argument to InitializeTableFromTextFileV2. -type InitializeTableFromTextFileV2Attr func(optionalAttr) +// RandomStandardNormalAttr is an optional argument to RandomStandardNormal. +type RandomStandardNormalAttr func(optionalAttr) -// InitializeTableFromTextFileV2VocabSize sets the optional vocab_size attribute to value. -// -// value: Number of elements of the file, use -1 if unknown. -// If not specified, defaults to -1 +// RandomStandardNormalSeed sets the optional seed attribute to value. // -// REQUIRES: value >= -1 -func InitializeTableFromTextFileV2VocabSize(value int64) InitializeTableFromTextFileV2Attr { +// value: If either `seed` or `seed2` are set to be non-zero, the random number +// generator is seeded by the given seed. Otherwise, it is seeded by a +// random seed. +// If not specified, defaults to 0 +func RandomStandardNormalSeed(value int64) RandomStandardNormalAttr { return func(m optionalAttr) { - m["vocab_size"] = value + m["seed"] = value } } -// InitializeTableFromTextFileV2Delimiter sets the optional delimiter attribute to value. +// RandomStandardNormalSeed2 sets the optional seed2 attribute to value. // -// value: Delimiter to separate fields in a line. -// If not specified, defaults to "\t" -func InitializeTableFromTextFileV2Delimiter(value string) InitializeTableFromTextFileV2Attr { +// value: A second seed to avoid seed collision. +// If not specified, defaults to 0 +func RandomStandardNormalSeed2(value int64) RandomStandardNormalAttr { return func(m optionalAttr) { - m["delimiter"] = value + m["seed2"] = value } } -// Initializes a table from a text file. -// -// It inserts one key-value pair into the table for each line of the file. -// The key and value is extracted from the whole line content, elements from the -// split line based on `delimiter` or the line number (starting from zero). -// Where to extract the key and value from a line is specified by `key_index` and -// `value_index`. +// Outputs random values from a normal distribution. // -// - A value of -1 means use the line number(starting from zero), expects `int64`. -// - A value of -2 means use the whole line content, expects `string`. -// - A value >= 0 means use the index (starting at zero) of the split line based -// on `delimiter`. +// The generated values will have mean 0 and standard deviation 1. // // Arguments: -// table_handle: Handle to a table which will be initialized. -// filename: Filename of a vocabulary text file. -// key_index: Column index in a line to get the table `key` values from. -// value_index: Column index that represents information of a line to get the table -// `value` values from. +// shape: The shape of the output tensor. +// dtype: The type of the output. // -// Returns the created operation. -func InitializeTableFromTextFileV2(scope *Scope, table_handle tf.Output, filename tf.Output, key_index int64, value_index int64, optional ...InitializeTableFromTextFileV2Attr) (o *tf.Operation) { +// Returns A tensor of the specified shape filled with random normal values. +func RandomStandardNormal(scope *Scope, shape tf.Output, dtype tf.DataType, optional ...RandomStandardNormalAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"key_index": key_index, "value_index": value_index} + attrs := map[string]interface{}{"dtype": dtype} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "InitializeTableFromTextFileV2", + Type: "RandomStandardNormal", Input: []tf.Input{ - table_handle, filename, + shape, }, Attrs: attrs, } - return scope.AddOperation(opspec) -} - -// QuantizedReluAttr is an optional argument to QuantizedRelu. -type QuantizedReluAttr func(optionalAttr) - -// QuantizedReluOutType sets the optional out_type attribute to value. -// If not specified, defaults to DT_QUINT8 -func QuantizedReluOutType(value tf.DataType) QuantizedReluAttr { - return func(m optionalAttr) { - m["out_type"] = value - } + op := scope.AddOperation(opspec) + return op.Output(0) } -// Computes Quantized Rectified Linear: `max(features, 0)` +// Component-wise divides a SparseTensor by a dense Tensor. // -// Arguments: +// *Limitation*: this Op only broadcasts the dense side to the sparse side, but not +// the other direction. // -// min_features: The float value that the lowest quantized value represents. -// max_features: The float value that the highest quantized value represents. +// Arguments: +// sp_indices: 2-D. `N x R` matrix with the indices of non-empty values in a +// SparseTensor, possibly not in canonical ordering. +// sp_values: 1-D. `N` non-empty values corresponding to `sp_indices`. +// sp_shape: 1-D. Shape of the input SparseTensor. +// dense: `R`-D. The dense Tensor operand. // -// Returns Has the same output shape as "features".The float value that the lowest quantized value represents.The float value that the highest quantized value represents. -func QuantizedRelu(scope *Scope, features tf.Output, min_features tf.Output, max_features tf.Output, optional ...QuantizedReluAttr) (activations tf.Output, min_activations tf.Output, max_activations tf.Output) { +// Returns 1-D. The `N` values that are operated on. +func SparseDenseCwiseDiv(scope *Scope, sp_indices tf.Output, sp_values tf.Output, sp_shape tf.Output, dense tf.Output) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "QuantizedRelu", + Type: "SparseDenseCwiseDiv", Input: []tf.Input{ - features, min_features, max_features, + sp_indices, sp_values, sp_shape, dense, }, - Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + return op.Output(0) } -// Reshapes a SparseTensor to represent values in a new dense shape. -// -// This operation has the same semantics as reshape on the represented dense -// tensor. The `input_indices` are recomputed based on the requested `new_shape`. -// -// If one component of `new_shape` is the special value -1, the size of that -// dimension is computed so that the total dense size remains constant. At -// most one component of `new_shape` can be -1. The number of dense elements -// implied by `new_shape` must be the same as the number of dense elements -// originally implied by `input_shape`. +// FractionalAvgPoolGradAttr is an optional argument to FractionalAvgPoolGrad. +type FractionalAvgPoolGradAttr func(optionalAttr) + +// FractionalAvgPoolGradOverlapping sets the optional overlapping attribute to value. // -// Reshaping does not affect the order of values in the SparseTensor. +// value: When set to True, it means when pooling, the values at the boundary +// of adjacent pooling cells are used by both cells. For example: // -// If the input tensor has rank `R_in` and `N` non-empty values, and `new_shape` -// has length `R_out`, then `input_indices` has shape `[N, R_in]`, -// `input_shape` has length `R_in`, `output_indices` has shape `[N, R_out]`, and -// `output_shape` has length `R_out`. +// `index 0 1 2 3 4` // -// Arguments: -// input_indices: 2-D. `N x R_in` matrix with the indices of non-empty values in a -// SparseTensor. -// input_shape: 1-D. `R_in` vector with the input SparseTensor's dense shape. -// new_shape: 1-D. `R_out` vector with the requested new dense shape. +// `value 20 5 16 3 7` // -// Returns 2-D. `N x R_out` matrix with the updated indices of non-empty -// values in the output SparseTensor.1-D. `R_out` vector with the full dense shape of the output -// SparseTensor. This is the same as `new_shape` but with any -1 dimensions -// filled in. -func SparseReshape(scope *Scope, input_indices tf.Output, input_shape tf.Output, new_shape tf.Output) (output_indices tf.Output, output_shape tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SparseReshape", - Input: []tf.Input{ - input_indices, input_shape, new_shape, - }, +// If the pooling sequence is [0, 2, 4], then 16, at index 2 will be used twice. +// The result would be [41/3, 26/3] for fractional avg pooling. +// If not specified, defaults to false +func FractionalAvgPoolGradOverlapping(value bool) FractionalAvgPoolGradAttr { + return func(m optionalAttr) { + m["overlapping"] = value } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) } -// Deprecated. Use TensorArraySplitV3 +// Computes gradient of the FractionalAvgPool function. // -// DEPRECATED at GraphDef version 26: Use TensorArraySplitV3 -func TensorArraySplitV2(scope *Scope, handle tf.Output, value tf.Output, lengths tf.Output, flow_in tf.Output) (flow_out tf.Output) { +// Unlike FractionalMaxPoolGrad, we don't need to find arg_max for +// FractionalAvgPoolGrad, we just need to evenly back-propagate each element of +// out_backprop to those indices that form the same pooling cell. Therefore, we +// just need to know the shape of original input tensor, instead of the whole +// tensor. +// +// Arguments: +// orig_input_tensor_shape: Original input tensor shape for `fractional_avg_pool` +// out_backprop: 4-D with shape `[batch, height, width, channels]`. Gradients +// w.r.t. the output of `fractional_avg_pool`. +// row_pooling_sequence: row pooling sequence, form pooling region with +// col_pooling_sequence. +// col_pooling_sequence: column pooling sequence, form pooling region with +// row_pooling sequence. +// +// Returns 4-D. Gradients w.r.t. the input of `fractional_avg_pool`. +func FractionalAvgPoolGrad(scope *Scope, orig_input_tensor_shape tf.Output, out_backprop tf.Output, row_pooling_sequence tf.Output, col_pooling_sequence tf.Output, optional ...FractionalAvgPoolGradAttr) (output tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "TensorArraySplitV2", + Type: "FractionalAvgPoolGrad", Input: []tf.Input{ - handle, value, lengths, flow_in, + orig_input_tensor_shape, out_backprop, row_pooling_sequence, col_pooling_sequence, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// PackAttr is an optional argument to Pack. -type PackAttr func(optionalAttr) +// BoostedTreesEnsembleResourceHandleOpAttr is an optional argument to BoostedTreesEnsembleResourceHandleOp. +type BoostedTreesEnsembleResourceHandleOpAttr func(optionalAttr) -// PackAxis sets the optional axis attribute to value. -// -// value: Dimension along which to pack. Negative values wrap around, so the -// valid range is `[-(R+1), R+1)`. -// If not specified, defaults to 0 -func PackAxis(value int64) PackAttr { +// BoostedTreesEnsembleResourceHandleOpContainer sets the optional container attribute to value. +// If not specified, defaults to "" +func BoostedTreesEnsembleResourceHandleOpContainer(value string) BoostedTreesEnsembleResourceHandleOpAttr { return func(m optionalAttr) { - m["axis"] = value + m["container"] = value } } -// Packs a list of `N` rank-`R` tensors into one rank-`(R+1)` tensor. -// -// Packs the `N` tensors in `values` into a tensor with rank one higher than each -// tensor in `values`, by packing them along the `axis` dimension. -// Given a list of tensors of shape `(A, B, C)`; -// -// if `axis == 0` then the `output` tensor will have the shape `(N, A, B, C)`. -// if `axis == 1` then the `output` tensor will have the shape `(A, N, B, C)`. -// Etc. -// -// For example: -// -// ``` -// # 'x' is [1, 4] -// # 'y' is [2, 5] -// # 'z' is [3, 6] -// pack([x, y, z]) => [[1, 4], [2, 5], [3, 6]] # Pack along first dim. -// pack([x, y, z], axis=1) => [[1, 2, 3], [4, 5, 6]] -// ``` -// -// This is the opposite of `unpack`. -// -// Arguments: -// values: Must be of same shape and type. -// -// Returns The packed tensor. -func Pack(scope *Scope, values []tf.Output, optional ...PackAttr) (output tf.Output) { +// BoostedTreesEnsembleResourceHandleOpSharedName sets the optional shared_name attribute to value. +// If not specified, defaults to "" +func BoostedTreesEnsembleResourceHandleOpSharedName(value string) BoostedTreesEnsembleResourceHandleOpAttr { + return func(m optionalAttr) { + m["shared_name"] = value + } +} + +// Creates a handle to a BoostedTreesEnsembleResource +func BoostedTreesEnsembleResourceHandleOp(scope *Scope, optional ...BoostedTreesEnsembleResourceHandleOpAttr) (resource tf.Output) { if scope.Err() != nil { return } attrs := map[string]interface{}{} for _, a := range optional { a(attrs) - } - opspec := tf.OpSpec{ - Type: "Pack", - Input: []tf.Input{ - tf.OutputList(values), - }, + } + opspec := tf.OpSpec{ + Type: "BoostedTreesEnsembleResourceHandleOp", + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Reorders a SparseTensor into the canonical, row-major ordering. -// -// Note that by convention, all sparse ops preserve the canonical ordering along -// increasing dimension number. The only time ordering can be violated is during -// manual manipulation of the indices and values vectors to add entries. -// -// Reordering does not affect the shape of the SparseTensor. -// -// If the tensor has rank `R` and `N` non-empty values, `input_indices` has -// shape `[N, R]`, input_values has length `N`, and input_shape has length `R`. +// Concatenates tensors along one dimension. // // Arguments: -// input_indices: 2-D. `N x R` matrix with the indices of non-empty values in a -// SparseTensor, possibly not in canonical ordering. -// input_values: 1-D. `N` non-empty values corresponding to `input_indices`. -// input_shape: 1-D. Shape of the input SparseTensor. +// concat_dim: 0-D. The dimension along which to concatenate. Must be in the +// range [0, rank(values)). +// values: The `N` Tensors to concatenate. Their ranks and types must match, +// and their sizes must match in all dimensions except `concat_dim`. // -// Returns 2-D. `N x R` matrix with the same indices as input_indices, but -// in canonical row-major ordering.1-D. `N` non-empty values corresponding to `output_indices`. -func SparseReorder(scope *Scope, input_indices tf.Output, input_values tf.Output, input_shape tf.Output) (output_indices tf.Output, output_values tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SparseReorder", - Input: []tf.Input{ - input_indices, input_values, input_shape, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} - -// Computes rectified linear: `max(features, 0)`. -func Relu(scope *Scope, features tf.Output) (activations tf.Output) { +// Returns A `Tensor` with the concatenation of values stacked along the +// `concat_dim` dimension. This tensor's shape matches that of `values` except +// in `concat_dim` where it has the sum of the sizes. +func Concat(scope *Scope, concat_dim tf.Output, values []tf.Output) (output tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "Relu", + Type: "Concat", Input: []tf.Input{ - features, + concat_dim, tf.OutputList(values), }, } op := scope.AddOperation(opspec) return op.Output(0) } -// ResourceApplyAddSignAttr is an optional argument to ResourceApplyAddSign. -type ResourceApplyAddSignAttr func(optionalAttr) +// ResourceApplyMomentumAttr is an optional argument to ResourceApplyMomentum. +type ResourceApplyMomentumAttr func(optionalAttr) -// ResourceApplyAddSignUseLocking sets the optional use_locking attribute to value. +// ResourceApplyMomentumUseLocking sets the optional use_locking attribute to value. // -// value: If `True`, updating of the var and m tensors is -// protected by a lock; otherwise the behavior is undefined, but may exhibit less +// value: If `True`, updating of the var and accum tensors will be protected +// by a lock; otherwise the behavior is undefined, but may exhibit less // contention. // If not specified, defaults to false -func ResourceApplyAddSignUseLocking(value bool) ResourceApplyAddSignAttr { +func ResourceApplyMomentumUseLocking(value bool) ResourceApplyMomentumAttr { return func(m optionalAttr) { m["use_locking"] = value } } -// Update '*var' according to the AddSign update. +// ResourceApplyMomentumUseNesterov sets the optional use_nesterov attribute to value. // -// m_t <- beta1 * m_{t-1} + (1 - beta1) * g -// update <- (alpha + sign_decay * sign(g) *sign(m)) * g -// variable <- variable - lr_t * update +// value: If `True`, the tensor passed to compute grad will be +// var - lr * momentum * accum, so in the end, the var you get is actually +// var - lr * momentum * accum. +// If not specified, defaults to false +func ResourceApplyMomentumUseNesterov(value bool) ResourceApplyMomentumAttr { + return func(m optionalAttr) { + m["use_nesterov"] = value + } +} + +// Update '*var' according to the momentum scheme. Set use_nesterov = True if you +// +// want to use Nesterov momentum. +// +// accum = accum * momentum + grad +// var -= lr * accum // // Arguments: // var_: Should be from a Variable(). -// m: Should be from a Variable(). +// accum: Should be from a Variable(). // lr: Scaling factor. Must be a scalar. -// alpha: Must be a scalar. -// sign_decay: Must be a scalar. -// beta: Must be a scalar. // grad: The gradient. +// momentum: Momentum. Must be a scalar. // // Returns the created operation. -func ResourceApplyAddSign(scope *Scope, var_ tf.Output, m tf.Output, lr tf.Output, alpha tf.Output, sign_decay tf.Output, beta tf.Output, grad tf.Output, optional ...ResourceApplyAddSignAttr) (o *tf.Operation) { +func ResourceApplyMomentum(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, momentum tf.Output, optional ...ResourceApplyMomentumAttr) (o *tf.Operation) { if scope.Err() != nil { return } @@ -12467,61 +14860,56 @@ func ResourceApplyAddSign(scope *Scope, var_ tf.Output, m tf.Output, lr tf.Outpu a(attrs) } opspec := tf.OpSpec{ - Type: "ResourceApplyAddSign", + Type: "ResourceApplyMomentum", Input: []tf.Input{ - var_, m, lr, alpha, sign_decay, beta, grad, + var_, accum, lr, grad, momentum, }, Attrs: attrs, } return scope.AddOperation(opspec) } -// FractionalMaxPoolGradAttr is an optional argument to FractionalMaxPoolGrad. -type FractionalMaxPoolGradAttr func(optionalAttr) +// MaxPoolGradGradAttr is an optional argument to MaxPoolGradGrad. +type MaxPoolGradGradAttr func(optionalAttr) -// FractionalMaxPoolGradOverlapping sets the optional overlapping attribute to value. -// -// value: When set to True, it means when pooling, the values at the boundary -// of adjacent pooling cells are used by both cells. For example: -// -// `index 0 1 2 3 4` -// -// `value 20 5 16 3 7` +// MaxPoolGradGradDataFormat sets the optional data_format attribute to value. // -// If the pooling sequence is [0, 2, 4], then 16, at index 2 will be used twice. -// The result would be [20, 16] for fractional max pooling. -// If not specified, defaults to false -func FractionalMaxPoolGradOverlapping(value bool) FractionalMaxPoolGradAttr { +// value: Specify the data format of the input and output data. With the +// default format "NHWC", the data is stored in the order of: +// [batch, in_height, in_width, in_channels]. +// Alternatively, the format could be "NCHW", the data storage order of: +// [batch, in_channels, in_height, in_width]. +// If not specified, defaults to "NHWC" +func MaxPoolGradGradDataFormat(value string) MaxPoolGradGradAttr { return func(m optionalAttr) { - m["overlapping"] = value + m["data_format"] = value } } -// Computes gradient of the FractionalMaxPool function. +// Computes second-order gradients of the maxpooling function. // // Arguments: -// orig_input: Original input for `fractional_max_pool` -// orig_output: Original output for `fractional_max_pool` -// out_backprop: 4-D with shape `[batch, height, width, channels]`. Gradients -// w.r.t. the output of `fractional_max_pool`. -// row_pooling_sequence: row pooling sequence, form pooling region with -// col_pooling_sequence. -// col_pooling_sequence: column pooling sequence, form pooling region with -// row_pooling sequence. +// orig_input: The original input tensor. +// orig_output: The original output tensor. +// grad: 4-D. Gradients of gradients w.r.t. the input of `max_pool`. +// ksize: The size of the window for each dimension of the input tensor. +// strides: The stride of the sliding window for each dimension of the +// input tensor. +// padding: The type of padding algorithm to use. // -// Returns 4-D. Gradients w.r.t. the input of `fractional_max_pool`. -func FractionalMaxPoolGrad(scope *Scope, orig_input tf.Output, orig_output tf.Output, out_backprop tf.Output, row_pooling_sequence tf.Output, col_pooling_sequence tf.Output, optional ...FractionalMaxPoolGradAttr) (output tf.Output) { +// Returns Gradients of gradients w.r.t. the input to `max_pool`. +func MaxPoolGradGrad(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPoolGradGradAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "FractionalMaxPoolGrad", + Type: "MaxPoolGradGrad", Input: []tf.Input{ - orig_input, orig_output, out_backprop, row_pooling_sequence, col_pooling_sequence, + orig_input, orig_output, grad, }, Attrs: attrs, } @@ -12529,187 +14917,156 @@ func FractionalMaxPoolGrad(scope *Scope, orig_input tf.Output, orig_output tf.Ou return op.Output(0) } -// ResourceApplyAdagradDAAttr is an optional argument to ResourceApplyAdagradDA. -type ResourceApplyAdagradDAAttr func(optionalAttr) - -// ResourceApplyAdagradDAUseLocking sets the optional use_locking attribute to value. +// Returns the last element of the input list as well as a list with all but that element. // -// value: If True, updating of the var and accum tensors will be protected by -// a lock; otherwise the behavior is undefined, but may exhibit less contention. -// If not specified, defaults to false -func ResourceApplyAdagradDAUseLocking(value bool) ResourceApplyAdagradDAAttr { - return func(m optionalAttr) { - m["use_locking"] = value +// Fails if the list is empty. +// +// input_handle: the input list +// tensor: the withdrawn last element of the list +// element_dtype: the type of elements in the list +// element_shape: the shape of the output tensor +func TensorListPopBack(scope *Scope, input_handle tf.Output, element_dtype tf.DataType) (output_handle tf.Output, tensor tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"element_dtype": element_dtype} + opspec := tf.OpSpec{ + Type: "TensorListPopBack", + Input: []tf.Input{ + input_handle, + }, + Attrs: attrs, } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1) } -// Update '*var' according to the proximal adagrad scheme. +// Returns element-wise integer closest to x. // -// Arguments: -// var_: Should be from a Variable(). -// gradient_accumulator: Should be from a Variable(). -// gradient_squared_accumulator: Should be from a Variable(). -// grad: The gradient. -// lr: Scaling factor. Must be a scalar. -// l1: L1 regularization. Must be a scalar. -// l2: L2 regularization. Must be a scalar. -// global_step: Training step number. Must be a scalar. +// If the result is midway between two representable values, +// the even representable is chosen. +// For example: // -// Returns the created operation. -func ResourceApplyAdagradDA(scope *Scope, var_ tf.Output, gradient_accumulator tf.Output, gradient_squared_accumulator tf.Output, grad tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, global_step tf.Output, optional ...ResourceApplyAdagradDAAttr) (o *tf.Operation) { +// ``` +// rint(-1.5) ==> -2.0 +// rint(0.5000001) ==> 1.0 +// rint([-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0]) ==> [-2., -2., -0., 0., 2., 2., 2.] +// ``` +func Rint(scope *Scope, x tf.Output) (y tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "ResourceApplyAdagradDA", + Type: "Rint", Input: []tf.Input{ - var_, gradient_accumulator, gradient_squared_accumulator, grad, lr, l1, l2, global_step, + x, }, - Attrs: attrs, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0) } -// SparseReduceMaxSparseAttr is an optional argument to SparseReduceMaxSparse. -type SparseReduceMaxSparseAttr func(optionalAttr) +// OrderedMapUnstageNoKeyAttr is an optional argument to OrderedMapUnstageNoKey. +type OrderedMapUnstageNoKeyAttr func(optionalAttr) -// SparseReduceMaxSparseKeepDims sets the optional keep_dims attribute to value. +// OrderedMapUnstageNoKeyCapacity sets the optional capacity attribute to value. +// If not specified, defaults to 0 // -// value: If true, retain reduced dimensions with length 1. -// If not specified, defaults to false -func SparseReduceMaxSparseKeepDims(value bool) SparseReduceMaxSparseAttr { +// REQUIRES: value >= 0 +func OrderedMapUnstageNoKeyCapacity(value int64) OrderedMapUnstageNoKeyAttr { return func(m optionalAttr) { - m["keep_dims"] = value + m["capacity"] = value } } -// Computes the max of elements across dimensions of a SparseTensor. -// -// This Op takes a SparseTensor and is the sparse counterpart to -// `tf.reduce_max()`. In contrast to SparseReduceMax, this Op returns a -// SparseTensor. -// -// Reduces `sp_input` along the dimensions given in `reduction_axes`. Unless -// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in -// `reduction_axes`. If `keep_dims` is true, the reduced dimensions are retained -// with length 1. -// -// If `reduction_axes` has no entries, all dimensions are reduced, and a tensor -// with a single element is returned. Additionally, the axes can be negative, -// which are interpreted according to the indexing rules in Python. +// OrderedMapUnstageNoKeyMemoryLimit sets the optional memory_limit attribute to value. +// If not specified, defaults to 0 // -// Arguments: -// input_indices: 2-D. `N x R` matrix with the indices of non-empty values in a -// SparseTensor, possibly not in canonical ordering. -// input_values: 1-D. `N` non-empty values corresponding to `input_indices`. -// input_shape: 1-D. Shape of the input SparseTensor. -// reduction_axes: 1-D. Length-`K` vector containing the reduction axes. -func SparseReduceMaxSparse(scope *Scope, input_indices tf.Output, input_values tf.Output, input_shape tf.Output, reduction_axes tf.Output, optional ...SparseReduceMaxSparseAttr) (output_indices tf.Output, output_values tf.Output, output_shape tf.Output) { +// REQUIRES: value >= 0 +func OrderedMapUnstageNoKeyMemoryLimit(value int64) OrderedMapUnstageNoKeyAttr { + return func(m optionalAttr) { + m["memory_limit"] = value + } +} + +// OrderedMapUnstageNoKeyContainer sets the optional container attribute to value. +// If not specified, defaults to "" +func OrderedMapUnstageNoKeyContainer(value string) OrderedMapUnstageNoKeyAttr { + return func(m optionalAttr) { + m["container"] = value + } +} + +// OrderedMapUnstageNoKeySharedName sets the optional shared_name attribute to value. +// If not specified, defaults to "" +func OrderedMapUnstageNoKeySharedName(value string) OrderedMapUnstageNoKeyAttr { + return func(m optionalAttr) { + m["shared_name"] = value + } +} + +// Op removes and returns the (key, value) element with the smallest +// +// key from the underlying container. If the underlying container +// does not contain elements, the op will block until it does. +func OrderedMapUnstageNoKey(scope *Scope, indices tf.Output, dtypes []tf.DataType, optional ...OrderedMapUnstageNoKeyAttr) (key tf.Output, values []tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"dtypes": dtypes} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "SparseReduceMaxSparse", + Type: "OrderedMapUnstageNoKey", Input: []tf.Input{ - input_indices, input_values, input_shape, reduction_axes, + indices, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// Creates a dataset that emits the outputs of `input_dataset` `count` times. -// -// Arguments: -// -// count: A scalar representing the number of times that `input_dataset` should -// be repeated. A value of `-1` indicates that it should be repeated infinitely. -// -// -func RepeatDataset(scope *Scope, input_dataset tf.Output, count tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "RepeatDataset", - Input: []tf.Input{ - input_dataset, count, - }, - Attrs: attrs, + var idx int + var err error + key = op.Output(idx) + if values, idx, err = makeOutputList(op, idx, "values"); err != nil { + scope.UpdateErr("OrderedMapUnstageNoKey", err) + return } - op := scope.AddOperation(opspec) - return op.Output(0) + return key, values } -// AddManySparseToTensorsMapAttr is an optional argument to AddManySparseToTensorsMap. -type AddManySparseToTensorsMapAttr func(optionalAttr) - -// AddManySparseToTensorsMapContainer sets the optional container attribute to value. -// -// value: The container name for the `SparseTensorsMap` created by this op. -// If not specified, defaults to "" -func AddManySparseToTensorsMapContainer(value string) AddManySparseToTensorsMapAttr { - return func(m optionalAttr) { - m["container"] = value - } -} +// SerializeManySparseAttr is an optional argument to SerializeManySparse. +type SerializeManySparseAttr func(optionalAttr) -// AddManySparseToTensorsMapSharedName sets the optional shared_name attribute to value. +// SerializeManySparseOutType sets the optional out_type attribute to value. // -// value: The shared name for the `SparseTensorsMap` created by this op. -// If blank, the new Operation's unique name is used. -// If not specified, defaults to "" -func AddManySparseToTensorsMapSharedName(value string) AddManySparseToTensorsMapAttr { +// value: The `dtype` to use for serialization; the supported types are `string` +// (default) and `variant`. +// If not specified, defaults to DT_STRING +func SerializeManySparseOutType(value tf.DataType) SerializeManySparseAttr { return func(m optionalAttr) { - m["shared_name"] = value + m["out_type"] = value } } -// Add an `N`-minibatch `SparseTensor` to a `SparseTensorsMap`, return `N` handles. -// -// A `SparseTensor` of rank `R` is represented by three tensors: `sparse_indices`, -// `sparse_values`, and `sparse_shape`, where -// -// ```sparse_indices.shape[1] == sparse_shape.shape[0] == R``` -// -// An `N`-minibatch of `SparseTensor` objects is represented as a `SparseTensor` -// having a first `sparse_indices` column taking values between `[0, N)`, where -// the minibatch size `N == sparse_shape[0]`. +// Serialize an `N`-minibatch `SparseTensor` into an `[N, 3]` `Tensor` object. // -// The input `SparseTensor` must have rank `R` greater than 1, and the first -// dimension is treated as the minibatch dimension. Elements of the `SparseTensor` -// must be sorted in increasing order of this first dimension. The stored -// `SparseTensor` objects pointed to by each row of the output `sparse_handles` -// will have rank `R-1`. +// The `SparseTensor` must have rank `R` greater than 1, and the first dimension +// is treated as the minibatch dimension. Elements of the `SparseTensor` +// must be sorted in increasing order of this first dimension. The serialized +// `SparseTensor` objects going into each row of `serialized_sparse` will have +// rank `R-1`. // -// The `SparseTensor` values can then be read out as part of a minibatch by passing -// the given keys as vector elements to `TakeManySparseFromTensorsMap`. To ensure -// the correct `SparseTensorsMap` is accessed, ensure that the same -// `container` and `shared_name` are passed to that Op. If no `shared_name` -// is provided here, instead use the *name* of the Operation created by calling -// `AddManySparseToTensorsMap` as the `shared_name` passed to -// `TakeManySparseFromTensorsMap`. Ensure the Operations are colocated. +// The minibatch size `N` is extracted from `sparse_shape[0]`. // // Arguments: // sparse_indices: 2-D. The `indices` of the minibatch `SparseTensor`. -// `sparse_indices[:, 0]` must be ordered values in `[0, N)`. // sparse_values: 1-D. The `values` of the minibatch `SparseTensor`. // sparse_shape: 1-D. The `shape` of the minibatch `SparseTensor`. -// The minibatch size `N == sparse_shape[0]`. -// -// Returns 1-D. The handles of the `SparseTensor` now stored in the -// `SparseTensorsMap`. Shape: `[N]`. -func AddManySparseToTensorsMap(scope *Scope, sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output, optional ...AddManySparseToTensorsMapAttr) (sparse_handles tf.Output) { +func SerializeManySparse(scope *Scope, sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output, optional ...SerializeManySparseAttr) (serialized_sparse tf.Output) { if scope.Err() != nil { return } @@ -12718,7 +15075,7 @@ func AddManySparseToTensorsMap(scope *Scope, sparse_indices tf.Output, sparse_va a(attrs) } opspec := tf.OpSpec{ - Type: "AddManySparseToTensorsMap", + Type: "SerializeManySparse", Input: []tf.Input{ sparse_indices, sparse_values, sparse_shape, }, @@ -12728,84 +15085,71 @@ func AddManySparseToTensorsMap(scope *Scope, sparse_indices tf.Output, sparse_va return op.Output(0) } -// Concatenates tensors along one dimension. -// -// Arguments: -// values: List of `N` Tensors to concatenate. Their ranks and types must match, -// and their sizes must match in all dimensions except `concat_dim`. -// axis: 0-D. The dimension along which to concatenate. Must be in the -// range [-rank(values), rank(values)). -// -// Returns A `Tensor` with the concatenation of values stacked along the -// `concat_dim` dimension. This tensor's shape matches that of `values` except -// in `concat_dim` where it has the sum of the sizes. -func ConcatV2(scope *Scope, values []tf.Output, axis tf.Output) (output tf.Output) { +// Computes inverse hyperbolic cosine of x element-wise. +func Acosh(scope *Scope, x tf.Output) (y tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "ConcatV2", + Type: "Acosh", Input: []tf.Input{ - tf.OutputList(values), axis, + x, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Reads and outputs the entire contents of the input filename. -func ReadFile(scope *Scope, filename tf.Output) (contents tf.Output) { - if scope.Err() != nil { - return +// TensorArrayV2Attr is an optional argument to TensorArrayV2. +type TensorArrayV2Attr func(optionalAttr) + +// TensorArrayV2ElementShape sets the optional element_shape attribute to value. +// If not specified, defaults to +func TensorArrayV2ElementShape(value tf.Shape) TensorArrayV2Attr { + return func(m optionalAttr) { + m["element_shape"] = value } - opspec := tf.OpSpec{ - Type: "ReadFile", - Input: []tf.Input{ - filename, - }, +} + +// TensorArrayV2DynamicSize sets the optional dynamic_size attribute to value. +// If not specified, defaults to false +func TensorArrayV2DynamicSize(value bool) TensorArrayV2Attr { + return func(m optionalAttr) { + m["dynamic_size"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// MinAttr is an optional argument to Min. -type MinAttr func(optionalAttr) +// TensorArrayV2ClearAfterRead sets the optional clear_after_read attribute to value. +// If not specified, defaults to true +func TensorArrayV2ClearAfterRead(value bool) TensorArrayV2Attr { + return func(m optionalAttr) { + m["clear_after_read"] = value + } +} -// MinKeepDims sets the optional keep_dims attribute to value. -// -// value: If true, retain reduced dimensions with length 1. -// If not specified, defaults to false -func MinKeepDims(value bool) MinAttr { +// TensorArrayV2TensorArrayName sets the optional tensor_array_name attribute to value. +// If not specified, defaults to "" +func TensorArrayV2TensorArrayName(value string) TensorArrayV2Attr { return func(m optionalAttr) { - m["keep_dims"] = value + m["tensor_array_name"] = value } } -// Computes the minimum of elements across dimensions of a tensor. -// -// Reduces `input` along the dimensions given in `axis`. Unless -// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in -// `axis`. If `keep_dims` is true, the reduced dimensions are -// retained with length 1. -// -// Arguments: -// input: The tensor to reduce. -// axis: The dimensions to reduce. Must be in the range -// `[-rank(input), rank(input))`. +// Deprecated. Use TensorArrayV3 // -// Returns The reduced tensor. -func Min(scope *Scope, input tf.Output, axis tf.Output, optional ...MinAttr) (output tf.Output) { +// DEPRECATED at GraphDef version 26: Use TensorArrayV3 +func TensorArrayV2(scope *Scope, size tf.Output, dtype tf.DataType, optional ...TensorArrayV2Attr) (handle tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"dtype": dtype} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "Min", + Type: "TensorArrayV2", Input: []tf.Input{ - input, axis, + size, }, Attrs: attrs, } @@ -12813,244 +15157,359 @@ func Min(scope *Scope, input tf.Output, axis tf.Output, optional ...MinAttr) (ou return op.Output(0) } -// Shuffle dimensions of x according to a permutation. +// DecodeCSVAttr is an optional argument to DecodeCSV. +type DecodeCSVAttr func(optionalAttr) + +// DecodeCSVFieldDelim sets the optional field_delim attribute to value. // -// The output `y` has the same rank as `x`. The shapes of `x` and `y` satisfy: -// `y.shape[i] == x.shape[perm[i]] for i in [0, 1, ..., rank(x) - 1]` -func Transpose(scope *Scope, x tf.Output, perm tf.Output) (y tf.Output) { - if scope.Err() != nil { - return +// value: char delimiter to separate fields in a record. +// If not specified, defaults to "," +func DecodeCSVFieldDelim(value string) DecodeCSVAttr { + return func(m optionalAttr) { + m["field_delim"] = value } - opspec := tf.OpSpec{ - Type: "Transpose", - Input: []tf.Input{ - x, perm, - }, +} + +// DecodeCSVUseQuoteDelim sets the optional use_quote_delim attribute to value. +// +// value: If false, treats double quotation marks as regular +// characters inside of the string fields (ignoring RFC 4180, Section 2, +// Bullet 5). +// If not specified, defaults to true +func DecodeCSVUseQuoteDelim(value bool) DecodeCSVAttr { + return func(m optionalAttr) { + m["use_quote_delim"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// Computes sigmoid of `x` element-wise. +// DecodeCSVNaValue sets the optional na_value attribute to value. // -// Specifically, `y = 1 / (1 + exp(-x))`. -func Sigmoid(scope *Scope, x tf.Output) (y tf.Output) { +// value: Additional string to recognize as NA/NaN. +// If not specified, defaults to "" +func DecodeCSVNaValue(value string) DecodeCSVAttr { + return func(m optionalAttr) { + m["na_value"] = value + } +} + +// DecodeCSVSelectCols sets the optional select_cols attribute to value. +// If not specified, defaults to <> +func DecodeCSVSelectCols(value []int64) DecodeCSVAttr { + return func(m optionalAttr) { + m["select_cols"] = value + } +} + +// Convert CSV records to tensors. Each column maps to one tensor. +// +// RFC 4180 format is expected for the CSV records. +// (https://tools.ietf.org/html/rfc4180) +// Note that we allow leading and trailing spaces with int or float field. +// +// Arguments: +// records: Each string is a record/row in the csv and all records should have +// the same format. +// record_defaults: One tensor per column of the input record, with either a +// scalar default value for that column or empty if the column is required. +// +// Returns Each tensor will have the same shape as records. +func DecodeCSV(scope *Scope, records tf.Output, record_defaults []tf.Output, optional ...DecodeCSVAttr) (output []tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "Sigmoid", + Type: "DecodeCSV", Input: []tf.Input{ - x, + records, tf.OutputList(record_defaults), }, + Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + if scope.Err() != nil { + return + } + var idx int + var err error + if output, idx, err = makeOutputList(op, idx, "output"); err != nil { + scope.UpdateErr("DecodeCSV", err) + return + } + return output } -// FusedBatchNormAttr is an optional argument to FusedBatchNorm. -type FusedBatchNormAttr func(optionalAttr) +// MapClearAttr is an optional argument to MapClear. +type MapClearAttr func(optionalAttr) -// FusedBatchNormEpsilon sets the optional epsilon attribute to value. +// MapClearCapacity sets the optional capacity attribute to value. +// If not specified, defaults to 0 // -// value: A small float number added to the variance of x. -// If not specified, defaults to 0.0001 -func FusedBatchNormEpsilon(value float32) FusedBatchNormAttr { +// REQUIRES: value >= 0 +func MapClearCapacity(value int64) MapClearAttr { return func(m optionalAttr) { - m["epsilon"] = value + m["capacity"] = value } } -// FusedBatchNormDataFormat sets the optional data_format attribute to value. +// MapClearMemoryLimit sets the optional memory_limit attribute to value. +// If not specified, defaults to 0 // -// value: The data format for x and y. Either "NHWC" (default) or "NCHW". -// If not specified, defaults to "NHWC" -func FusedBatchNormDataFormat(value string) FusedBatchNormAttr { +// REQUIRES: value >= 0 +func MapClearMemoryLimit(value int64) MapClearAttr { return func(m optionalAttr) { - m["data_format"] = value + m["memory_limit"] = value } } -// FusedBatchNormIsTraining sets the optional is_training attribute to value. -// -// value: A bool value to indicate the operation is for training (default) -// or inference. -// If not specified, defaults to true -func FusedBatchNormIsTraining(value bool) FusedBatchNormAttr { +// MapClearContainer sets the optional container attribute to value. +// If not specified, defaults to "" +func MapClearContainer(value string) MapClearAttr { return func(m optionalAttr) { - m["is_training"] = value + m["container"] = value } } -// Batch normalization. -// -// Note that the size of 4D Tensors are defined by either "NHWC" or "NCHW". -// The size of 1D Tensors matches the dimension C of the 4D Tensors. -// -// Arguments: -// x: A 4D Tensor for input data. -// scale: A 1D Tensor for scaling factor, to scale the normalized x. -// offset: A 1D Tensor for offset, to shift to the normalized x. -// mean: A 1D Tensor for population mean. Used for inference only; -// must be empty for training. -// variance: A 1D Tensor for population variance. Used for inference only; -// must be empty for training. +// MapClearSharedName sets the optional shared_name attribute to value. +// If not specified, defaults to "" +func MapClearSharedName(value string) MapClearAttr { + return func(m optionalAttr) { + m["shared_name"] = value + } +} + +// Op removes all elements in the underlying container. // -// Returns A 4D Tensor for output data.A 1D Tensor for the computed batch mean, to be used by TensorFlow -// to compute the running mean.A 1D Tensor for the computed batch variance, to be used by -// TensorFlow to compute the running variance.A 1D Tensor for the computed batch mean, to be reused -// in the gradient computation.A 1D Tensor for the computed batch variance (inverted variance -// in the cuDNN case), to be reused in the gradient computation. -func FusedBatchNorm(scope *Scope, x tf.Output, scale tf.Output, offset tf.Output, mean tf.Output, variance tf.Output, optional ...FusedBatchNormAttr) (y tf.Output, batch_mean tf.Output, batch_variance tf.Output, reserve_space_1 tf.Output, reserve_space_2 tf.Output) { +// Returns the created operation. +func MapClear(scope *Scope, dtypes []tf.DataType, optional ...MapClearAttr) (o *tf.Operation) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"dtypes": dtypes} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "FusedBatchNorm", - Input: []tf.Input{ - x, scale, offset, mean, variance, - }, + Type: "MapClear", + Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4) + return scope.AddOperation(opspec) } -// RandomStandardNormalAttr is an optional argument to RandomStandardNormal. -type RandomStandardNormalAttr func(optionalAttr) +// ThreadUnsafeUnigramCandidateSamplerAttr is an optional argument to ThreadUnsafeUnigramCandidateSampler. +type ThreadUnsafeUnigramCandidateSamplerAttr func(optionalAttr) -// RandomStandardNormalSeed sets the optional seed attribute to value. +// ThreadUnsafeUnigramCandidateSamplerSeed sets the optional seed attribute to value. // -// value: If either `seed` or `seed2` are set to be non-zero, the random number +// value: If either seed or seed2 are set to be non-zero, the random number // generator is seeded by the given seed. Otherwise, it is seeded by a // random seed. // If not specified, defaults to 0 -func RandomStandardNormalSeed(value int64) RandomStandardNormalAttr { +func ThreadUnsafeUnigramCandidateSamplerSeed(value int64) ThreadUnsafeUnigramCandidateSamplerAttr { return func(m optionalAttr) { m["seed"] = value } } -// RandomStandardNormalSeed2 sets the optional seed2 attribute to value. +// ThreadUnsafeUnigramCandidateSamplerSeed2 sets the optional seed2 attribute to value. // -// value: A second seed to avoid seed collision. +// value: An second seed to avoid seed collision. // If not specified, defaults to 0 -func RandomStandardNormalSeed2(value int64) RandomStandardNormalAttr { +func ThreadUnsafeUnigramCandidateSamplerSeed2(value int64) ThreadUnsafeUnigramCandidateSamplerAttr { return func(m optionalAttr) { m["seed2"] = value } } -// Outputs random values from a normal distribution. +// Generates labels for candidate sampling with a learned unigram distribution. // -// The generated values will have mean 0 and standard deviation 1. +// See explanations of candidate sampling and the data formats at +// go/candidate-sampling. +// +// For each batch, this op picks a single set of sampled candidate labels. +// +// The advantages of sampling candidates per-batch are simplicity and the +// possibility of efficient dense matrix multiplication. The disadvantage is that +// the sampled candidates must be chosen independently of the context and of the +// true labels. // // Arguments: -// shape: The shape of the output tensor. -// dtype: The type of the output. +// true_classes: A batch_size * num_true matrix, in which each row contains the +// IDs of the num_true target_classes in the corresponding original label. +// num_true: Number of true labels per context. +// num_sampled: Number of candidates to randomly sample. +// unique: If unique is true, we sample with rejection, so that all sampled +// candidates in a batch are unique. This requires some approximation to +// estimate the post-rejection sampling probabilities. +// range_max: The sampler will sample integers from the interval [0, range_max). // -// Returns A tensor of the specified shape filled with random normal values. -func RandomStandardNormal(scope *Scope, shape tf.Output, dtype tf.DataType, optional ...RandomStandardNormalAttr) (output tf.Output) { +// Returns A vector of length num_sampled, in which each element is +// the ID of a sampled candidate.A batch_size * num_true matrix, representing +// the number of times each candidate is expected to occur in a batch +// of sampled candidates. If unique=true, then this is a probability.A vector of length num_sampled, for each sampled +// candidate representing the number of times the candidate is expected +// to occur in a batch of sampled candidates. If unique=true, then this is a +// probability. +func ThreadUnsafeUnigramCandidateSampler(scope *Scope, true_classes tf.Output, num_true int64, num_sampled int64, unique bool, range_max int64, optional ...ThreadUnsafeUnigramCandidateSamplerAttr) (sampled_candidates tf.Output, true_expected_count tf.Output, sampled_expected_count tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"dtype": dtype} + attrs := map[string]interface{}{"num_true": num_true, "num_sampled": num_sampled, "unique": unique, "range_max": range_max} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "RandomStandardNormal", + Type: "ThreadUnsafeUnigramCandidateSampler", Input: []tf.Input{ - shape, + true_classes, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1), op.Output(2) } -// Component-wise divides a SparseTensor by a dense Tensor. +// MaxPoolV2Attr is an optional argument to MaxPoolV2. +type MaxPoolV2Attr func(optionalAttr) + +// MaxPoolV2DataFormat sets the optional data_format attribute to value. // -// *Limitation*: this Op only broadcasts the dense side to the sparse side, but not -// the other direction. +// value: Specify the data format of the input and output data. With the +// default format "NHWC", the data is stored in the order of: +// [batch, in_height, in_width, in_channels]. +// Alternatively, the format could be "NCHW", the data storage order of: +// [batch, in_channels, in_height, in_width]. +// If not specified, defaults to "NHWC" +func MaxPoolV2DataFormat(value string) MaxPoolV2Attr { + return func(m optionalAttr) { + m["data_format"] = value + } +} + +// Performs max pooling on the input. // // Arguments: -// sp_indices: 2-D. `N x R` matrix with the indices of non-empty values in a -// SparseTensor, possibly not in canonical ordering. -// sp_values: 1-D. `N` non-empty values corresponding to `sp_indices`. -// sp_shape: 1-D. Shape of the input SparseTensor. -// dense: `R`-D. The dense Tensor operand. +// input: 4-D input to pool over. +// ksize: The size of the window for each dimension of the input tensor. +// strides: The stride of the sliding window for each dimension of the +// input tensor. +// padding: The type of padding algorithm to use. // -// Returns 1-D. The `N` values that are operated on. -func SparseDenseCwiseDiv(scope *Scope, sp_indices tf.Output, sp_values tf.Output, sp_shape tf.Output, dense tf.Output) (output tf.Output) { +// Returns The max pooled output tensor. +func MaxPoolV2(scope *Scope, input tf.Output, ksize tf.Output, strides tf.Output, padding string, optional ...MaxPoolV2Attr) (output tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"padding": padding} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "SparseDenseCwiseDiv", + Type: "MaxPoolV2", Input: []tf.Input{ - sp_indices, sp_values, sp_shape, dense, + input, ksize, strides, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// FractionalAvgPoolGradAttr is an optional argument to FractionalAvgPoolGrad. -type FractionalAvgPoolGradAttr func(optionalAttr) +// MutableDenseHashTableV2Attr is an optional argument to MutableDenseHashTableV2. +type MutableDenseHashTableV2Attr func(optionalAttr) -// FractionalAvgPoolGradOverlapping sets the optional overlapping attribute to value. +// MutableDenseHashTableV2Container sets the optional container attribute to value. // -// value: When set to True, it means when pooling, the values at the boundary -// of adjacent pooling cells are used by both cells. For example: +// value: If non-empty, this table is placed in the given container. +// Otherwise, a default container is used. +// If not specified, defaults to "" +func MutableDenseHashTableV2Container(value string) MutableDenseHashTableV2Attr { + return func(m optionalAttr) { + m["container"] = value + } +} + +// MutableDenseHashTableV2SharedName sets the optional shared_name attribute to value. // -// `index 0 1 2 3 4` +// value: If non-empty, this table is shared under the given name across +// multiple sessions. +// If not specified, defaults to "" +func MutableDenseHashTableV2SharedName(value string) MutableDenseHashTableV2Attr { + return func(m optionalAttr) { + m["shared_name"] = value + } +} + +// MutableDenseHashTableV2UseNodeNameSharing sets the optional use_node_name_sharing attribute to value. +// If not specified, defaults to false +func MutableDenseHashTableV2UseNodeNameSharing(value bool) MutableDenseHashTableV2Attr { + return func(m optionalAttr) { + m["use_node_name_sharing"] = value + } +} + +// MutableDenseHashTableV2ValueShape sets the optional value_shape attribute to value. // -// `value 20 5 16 3 7` +// value: The shape of each value. +// If not specified, defaults to <> +func MutableDenseHashTableV2ValueShape(value tf.Shape) MutableDenseHashTableV2Attr { + return func(m optionalAttr) { + m["value_shape"] = value + } +} + +// MutableDenseHashTableV2InitialNumBuckets sets the optional initial_num_buckets attribute to value. // -// If the pooling sequence is [0, 2, 4], then 16, at index 2 will be used twice. -// The result would be [41/3, 26/3] for fractional avg pooling. -// If not specified, defaults to false -func FractionalAvgPoolGradOverlapping(value bool) FractionalAvgPoolGradAttr { +// value: The initial number of hash table buckets. Must be a power +// to 2. +// If not specified, defaults to 131072 +func MutableDenseHashTableV2InitialNumBuckets(value int64) MutableDenseHashTableV2Attr { + return func(m optionalAttr) { + m["initial_num_buckets"] = value + } +} + +// MutableDenseHashTableV2MaxLoadFactor sets the optional max_load_factor attribute to value. +// +// value: The maximum ratio between number of entries and number of +// buckets before growing the table. Must be between 0 and 1. +// If not specified, defaults to 0.8 +func MutableDenseHashTableV2MaxLoadFactor(value float32) MutableDenseHashTableV2Attr { return func(m optionalAttr) { - m["overlapping"] = value + m["max_load_factor"] = value } } -// Computes gradient of the FractionalAvgPool function. +// Creates an empty hash table that uses tensors as the backing store. // -// Unlike FractionalMaxPoolGrad, we don't need to find arg_max for -// FractionalAvgPoolGrad, we just need to evenly back-propagate each element of -// out_backprop to those indices that form the same pooling cell. Therefore, we -// just need to know the shape of original input tensor, instead of the whole -// tensor. +// It uses "open addressing" with quadratic reprobing to resolve +// collisions. +// +// This op creates a mutable hash table, specifying the type of its keys and +// values. Each value must be a scalar. Data can be inserted into the table using +// the insert operations. It does not support the initialization operation. // // Arguments: -// orig_input_tensor_shape: Original input tensor shape for `fractional_avg_pool` -// out_backprop: 4-D with shape `[batch, height, width, channels]`. Gradients -// w.r.t. the output of `fractional_avg_pool`. -// row_pooling_sequence: row pooling sequence, form pooling region with -// col_pooling_sequence. -// col_pooling_sequence: column pooling sequence, form pooling region with -// row_pooling sequence. +// empty_key: The key used to represent empty key buckets internally. Must not +// be used in insert or lookup operations. +// value_dtype: Type of the table values. // -// Returns 4-D. Gradients w.r.t. the input of `fractional_avg_pool`. -func FractionalAvgPoolGrad(scope *Scope, orig_input_tensor_shape tf.Output, out_backprop tf.Output, row_pooling_sequence tf.Output, col_pooling_sequence tf.Output, optional ...FractionalAvgPoolGradAttr) (output tf.Output) { +// Returns Handle to a table. +func MutableDenseHashTableV2(scope *Scope, empty_key tf.Output, value_dtype tf.DataType, optional ...MutableDenseHashTableV2Attr) (table_handle tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"value_dtype": value_dtype} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "FractionalAvgPoolGrad", + Type: "MutableDenseHashTableV2", Input: []tf.Input{ - orig_input_tensor_shape, out_backprop, row_pooling_sequence, col_pooling_sequence, + empty_key, }, Attrs: attrs, } @@ -13058,132 +15517,177 @@ func FractionalAvgPoolGrad(scope *Scope, orig_input_tensor_shape tf.Output, out_ return op.Output(0) } -// Concatenates tensors along one dimension. +// Returns element-wise remainder of division. This emulates C semantics in that +// +// the result here is consistent with a truncating divide. E.g. `truncate(x / y) * +// y + truncate_mod(x, y) = x`. +// +// *NOTE*: `TruncateMod` supports broadcasting. More about broadcasting +// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) +func TruncateMod(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "TruncateMod", + Input: []tf.Input{ + x, y, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Inverse 2D real-valued fast Fourier transform. +// +// Computes the inverse 2-dimensional discrete Fourier transform of a real-valued +// signal over the inner-most 2 dimensions of `input`. +// +// The inner-most 2 dimensions of `input` are assumed to be the result of `RFFT2D`: +// The inner-most dimension contains the `fft_length / 2 + 1` unique components of +// the DFT of a real-valued signal. If `fft_length` is not provided, it is computed +// from the size of the inner-most 2 dimensions of `input`. If the FFT length used +// to compute `input` is odd, it should be provided since it cannot be inferred +// properly. +// +// Along each axis `IRFFT2D` is computed on, if `fft_length` (or +// `fft_length / 2 + 1` for the inner-most dimension) is smaller than the +// corresponding dimension of `input`, the dimension is cropped. If it is larger, +// the dimension is padded with zeros. // // Arguments: -// concat_dim: 0-D. The dimension along which to concatenate. Must be in the -// range [0, rank(values)). -// values: The `N` Tensors to concatenate. Their ranks and types must match, -// and their sizes must match in all dimensions except `concat_dim`. +// input: A complex64 tensor. +// fft_length: An int32 tensor of shape [2]. The FFT length for each dimension. // -// Returns A `Tensor` with the concatenation of values stacked along the -// `concat_dim` dimension. This tensor's shape matches that of `values` except -// in `concat_dim` where it has the sum of the sizes. -func Concat(scope *Scope, concat_dim tf.Output, values []tf.Output) (output tf.Output) { +// Returns A float32 tensor of the same rank as `input`. The inner-most 2 +// dimensions of `input` are replaced with the `fft_length` samples of their +// inverse 2D Fourier transform. +// +// @compatibility(numpy) +// Equivalent to np.fft.irfft2 +// @end_compatibility +func IRFFT2D(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "Concat", + Type: "IRFFT2D", Input: []tf.Input{ - concat_dim, tf.OutputList(values), + input, fft_length, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// ResourceApplyMomentumAttr is an optional argument to ResourceApplyMomentum. -type ResourceApplyMomentumAttr func(optionalAttr) +// DecodeJpegAttr is an optional argument to DecodeJpeg. +type DecodeJpegAttr func(optionalAttr) -// ResourceApplyMomentumUseLocking sets the optional use_locking attribute to value. +// DecodeJpegChannels sets the optional channels attribute to value. // -// value: If `True`, updating of the var and accum tensors will be protected -// by a lock; otherwise the behavior is undefined, but may exhibit less -// contention. -// If not specified, defaults to false -func ResourceApplyMomentumUseLocking(value bool) ResourceApplyMomentumAttr { +// value: Number of color channels for the decoded image. +// If not specified, defaults to 0 +func DecodeJpegChannels(value int64) DecodeJpegAttr { return func(m optionalAttr) { - m["use_locking"] = value + m["channels"] = value } } -// ResourceApplyMomentumUseNesterov sets the optional use_nesterov attribute to value. +// DecodeJpegRatio sets the optional ratio attribute to value. // -// value: If `True`, the tensor passed to compute grad will be -// var - lr * momentum * accum, so in the end, the var you get is actually -// var - lr * momentum * accum. -// If not specified, defaults to false -func ResourceApplyMomentumUseNesterov(value bool) ResourceApplyMomentumAttr { +// value: Downscaling ratio. +// If not specified, defaults to 1 +func DecodeJpegRatio(value int64) DecodeJpegAttr { return func(m optionalAttr) { - m["use_nesterov"] = value + m["ratio"] = value } } -// Update '*var' according to the momentum scheme. Set use_nesterov = True if you -// -// want to use Nesterov momentum. -// -// accum = accum * momentum + grad -// var -= lr * accum -// -// Arguments: -// var_: Should be from a Variable(). -// accum: Should be from a Variable(). -// lr: Scaling factor. Must be a scalar. -// grad: The gradient. -// momentum: Momentum. Must be a scalar. +// DecodeJpegFancyUpscaling sets the optional fancy_upscaling attribute to value. // -// Returns the created operation. -func ResourceApplyMomentum(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, momentum tf.Output, optional ...ResourceApplyMomentumAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) +// value: If true use a slower but nicer upscaling of the +// chroma planes (yuv420/422 only). +// If not specified, defaults to true +func DecodeJpegFancyUpscaling(value bool) DecodeJpegAttr { + return func(m optionalAttr) { + m["fancy_upscaling"] = value } - opspec := tf.OpSpec{ - Type: "ResourceApplyMomentum", - Input: []tf.Input{ - var_, accum, lr, grad, momentum, - }, - Attrs: attrs, +} + +// DecodeJpegTryRecoverTruncated sets the optional try_recover_truncated attribute to value. +// +// value: If true try to recover an image from truncated input. +// If not specified, defaults to false +func DecodeJpegTryRecoverTruncated(value bool) DecodeJpegAttr { + return func(m optionalAttr) { + m["try_recover_truncated"] = value } - return scope.AddOperation(opspec) } -// MaxPoolGradGradAttr is an optional argument to MaxPoolGradGrad. -type MaxPoolGradGradAttr func(optionalAttr) +// DecodeJpegAcceptableFraction sets the optional acceptable_fraction attribute to value. +// +// value: The minimum required fraction of lines before a truncated +// input is accepted. +// If not specified, defaults to 1 +func DecodeJpegAcceptableFraction(value float32) DecodeJpegAttr { + return func(m optionalAttr) { + m["acceptable_fraction"] = value + } +} -// MaxPoolGradGradDataFormat sets the optional data_format attribute to value. +// DecodeJpegDctMethod sets the optional dct_method attribute to value. // -// value: Specify the data format of the input and output data. With the -// default format "NHWC", the data is stored in the order of: -// [batch, in_height, in_width, in_channels]. -// Alternatively, the format could be "NCHW", the data storage order of: -// [batch, in_channels, in_height, in_width]. -// If not specified, defaults to "NHWC" -func MaxPoolGradGradDataFormat(value string) MaxPoolGradGradAttr { +// value: string specifying a hint about the algorithm used for +// decompression. Defaults to "" which maps to a system-specific +// default. Currently valid values are ["INTEGER_FAST", +// "INTEGER_ACCURATE"]. The hint may be ignored (e.g., the internal +// jpeg library changes to a version that does not have that specific +// option.) +// If not specified, defaults to "" +func DecodeJpegDctMethod(value string) DecodeJpegAttr { return func(m optionalAttr) { - m["data_format"] = value + m["dct_method"] = value } } -// Computes second-order gradients of the maxpooling function. +// Decode a JPEG-encoded image to a uint8 tensor. +// +// The attr `channels` indicates the desired number of color channels for the +// decoded image. +// +// Accepted values are: +// +// * 0: Use the number of channels in the JPEG-encoded image. +// * 1: output a grayscale image. +// * 3: output an RGB image. +// +// If needed, the JPEG-encoded image is transformed to match the requested number +// of color channels. +// +// The attr `ratio` allows downscaling the image by an integer factor during +// decoding. Allowed values are: 1, 2, 4, and 8. This is much faster than +// downscaling the image later. +// +// +// This op also supports decoding PNGs and non-animated GIFs since the interface is +// the same, though it is cleaner to use `tf.image.decode_image`. // // Arguments: -// orig_input: The original input tensor. -// orig_output: The original output tensor. -// grad: 4-D. Gradients of gradients w.r.t. the input of `max_pool`. -// ksize: The size of the window for each dimension of the input tensor. -// strides: The stride of the sliding window for each dimension of the -// input tensor. -// padding: The type of padding algorithm to use. +// contents: 0-D. The JPEG-encoded image. // -// Returns Gradients of gradients w.r.t. the input to `max_pool`. -func MaxPoolGradGrad(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPoolGradGradAttr) (output tf.Output) { +// Returns 3-D with shape `[height, width, channels]`.. +func DecodeJpeg(scope *Scope, contents tf.Output, optional ...DecodeJpegAttr) (image tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "MaxPoolGradGrad", + Type: "DecodeJpeg", Input: []tf.Input{ - orig_input, orig_output, grad, + contents, }, Attrs: attrs, } @@ -13191,75 +15695,67 @@ func MaxPoolGradGrad(scope *Scope, orig_input tf.Output, orig_output tf.Output, return op.Output(0) } -// Returns element-wise integer closest to x. +// Serializes the tree ensemble to a proto. // -// If the result is midway between two representable values, -// the even representable is chosen. -// For example: +// Arguments: +// tree_ensemble_handle: Handle to the tree ensemble. // -// ``` -// rint(-1.5) ==> -2.0 -// rint(0.5000001) ==> 1.0 -// rint([-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0]) ==> [-2., -2., -0., 0., 2., 2., 2.] -// ``` -func Rint(scope *Scope, x tf.Output) (y tf.Output) { +// Returns Stamp token of the tree ensemble resource.Serialized proto of the ensemble. +func BoostedTreesSerializeEnsemble(scope *Scope, tree_ensemble_handle tf.Output) (stamp_token tf.Output, tree_ensemble_serialized tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "Rint", + Type: "BoostedTreesSerializeEnsemble", Input: []tf.Input{ - x, + tree_ensemble_handle, }, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1) } -// OrderedMapUnstageNoKeyAttr is an optional argument to OrderedMapUnstageNoKey. -type OrderedMapUnstageNoKeyAttr func(optionalAttr) +// StageSizeAttr is an optional argument to StageSize. +type StageSizeAttr func(optionalAttr) -// OrderedMapUnstageNoKeyCapacity sets the optional capacity attribute to value. +// StageSizeCapacity sets the optional capacity attribute to value. // If not specified, defaults to 0 // // REQUIRES: value >= 0 -func OrderedMapUnstageNoKeyCapacity(value int64) OrderedMapUnstageNoKeyAttr { +func StageSizeCapacity(value int64) StageSizeAttr { return func(m optionalAttr) { m["capacity"] = value } } -// OrderedMapUnstageNoKeyMemoryLimit sets the optional memory_limit attribute to value. +// StageSizeMemoryLimit sets the optional memory_limit attribute to value. // If not specified, defaults to 0 // // REQUIRES: value >= 0 -func OrderedMapUnstageNoKeyMemoryLimit(value int64) OrderedMapUnstageNoKeyAttr { +func StageSizeMemoryLimit(value int64) StageSizeAttr { return func(m optionalAttr) { m["memory_limit"] = value } } -// OrderedMapUnstageNoKeyContainer sets the optional container attribute to value. +// StageSizeContainer sets the optional container attribute to value. // If not specified, defaults to "" -func OrderedMapUnstageNoKeyContainer(value string) OrderedMapUnstageNoKeyAttr { +func StageSizeContainer(value string) StageSizeAttr { return func(m optionalAttr) { m["container"] = value } } -// OrderedMapUnstageNoKeySharedName sets the optional shared_name attribute to value. +// StageSizeSharedName sets the optional shared_name attribute to value. // If not specified, defaults to "" -func OrderedMapUnstageNoKeySharedName(value string) OrderedMapUnstageNoKeyAttr { +func StageSizeSharedName(value string) StageSizeAttr { return func(m optionalAttr) { m["shared_name"] = value } } -// Op removes and returns the (key, value) element with the smallest -// -// key from the underlying container. If the underlying container -// does not contain elements, the op will block until it does. -func OrderedMapUnstageNoKey(scope *Scope, indices tf.Output, dtypes []tf.DataType, optional ...OrderedMapUnstageNoKeyAttr) (key tf.Output, values []tf.Output) { +// Op returns the number of elements in the underlying container. +func StageSize(scope *Scope, dtypes []tf.DataType, optional ...StageSizeAttr) (size tf.Output) { if scope.Err() != nil { return } @@ -13268,292 +15764,240 @@ func OrderedMapUnstageNoKey(scope *Scope, indices tf.Output, dtypes []tf.DataTyp a(attrs) } opspec := tf.OpSpec{ - Type: "OrderedMapUnstageNoKey", - Input: []tf.Input{ - indices, - }, + Type: "StageSize", + Attrs: attrs, } op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - key = op.Output(idx) - if values, idx, err = makeOutputList(op, idx, "values"); err != nil { - scope.UpdateErr("OrderedMapUnstageNoKey", err) - return - } - return key, values -} - -// MaxPool3DGradGradAttr is an optional argument to MaxPool3DGradGrad. -type MaxPool3DGradGradAttr func(optionalAttr) - -// MaxPool3DGradGradDataFormat sets the optional data_format attribute to value. -// -// value: The data format of the input and output data. With the -// default format "NDHWC", the data is stored in the order of: -// [batch, in_depth, in_height, in_width, in_channels]. -// Alternatively, the format could be "NCDHW", the data storage order is: -// [batch, in_channels, in_depth, in_height, in_width]. -// If not specified, defaults to "NDHWC" -func MaxPool3DGradGradDataFormat(value string) MaxPool3DGradGradAttr { - return func(m optionalAttr) { - m["data_format"] = value - } + return op.Output(0) } -// Computes second-order gradients of the maxpooling function. +// Produces the max pool of the input tensor for quantized types. // // Arguments: -// orig_input: The original input tensor. -// orig_output: The original output tensor. -// grad: Output backprop of shape `[batch, depth, rows, cols, channels]`. -// ksize: 1-D tensor of length 5. The size of the window for each dimension of -// the input tensor. Must have `ksize[0] = ksize[4] = 1`. -// strides: 1-D tensor of length 5. The stride of the sliding window for each -// dimension of `input`. Must have `strides[0] = strides[4] = 1`. +// input: The 4D (batch x rows x cols x depth) Tensor to MaxReduce over. +// min_input: The float value that the lowest quantized input value represents. +// max_input: The float value that the highest quantized input value represents. +// ksize: The size of the window for each dimension of the input tensor. +// The length must be 4 to match the number of dimensions of the input. +// strides: The stride of the sliding window for each dimension of the input +// tensor. The length must be 4 to match the number of dimensions of the input. // padding: The type of padding algorithm to use. // -// Returns Gradients of gradients w.r.t. the input to `max_pool`. -func MaxPool3DGradGrad(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPool3DGradGradAttr) (output tf.Output) { +// Returns The float value that the lowest quantized output value represents.The float value that the highest quantized output value represents. +func QuantizedMaxPool(scope *Scope, input tf.Output, min_input tf.Output, max_input tf.Output, ksize []int64, strides []int64, padding string) (output tf.Output, min_output tf.Output, max_output tf.Output) { if scope.Err() != nil { return } attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "MaxPool3DGradGrad", + Type: "QuantizedMaxPool", Input: []tf.Input{ - orig_input, orig_output, grad, + input, min_input, max_input, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1), op.Output(2) } -// Conv3DBackpropFilterV2Attr is an optional argument to Conv3DBackpropFilterV2. -type Conv3DBackpropFilterV2Attr func(optionalAttr) - -// Conv3DBackpropFilterV2DataFormat sets the optional data_format attribute to value. -// -// value: The data format of the input and output data. With the -// default format "NDHWC", the data is stored in the order of: -// [batch, in_depth, in_height, in_width, in_channels]. -// Alternatively, the format could be "NCDHW", the data storage order is: -// [batch, in_channels, in_depth, in_height, in_width]. -// If not specified, defaults to "NDHWC" -func Conv3DBackpropFilterV2DataFormat(value string) Conv3DBackpropFilterV2Attr { - return func(m optionalAttr) { - m["data_format"] = value +// Computes softplus: `log(exp(features) + 1)`. +func Softplus(scope *Scope, features tf.Output) (activations tf.Output) { + if scope.Err() != nil { + return } -} - -// Conv3DBackpropFilterV2Dilations sets the optional dilations attribute to value. -// -// value: 1-D tensor of length 5. The dilation factor for each dimension of -// `input`. If set to k > 1, there will be k-1 skipped cells between each -// filter element on that dimension. The dimension order is determined by the -// value of `data_format`, see above for details. Dilations in the batch and -// depth dimensions must be 1. -// If not specified, defaults to -func Conv3DBackpropFilterV2Dilations(value []int64) Conv3DBackpropFilterV2Attr { - return func(m optionalAttr) { - m["dilations"] = value + opspec := tf.OpSpec{ + Type: "Softplus", + Input: []tf.Input{ + features, + }, } + op := scope.AddOperation(opspec) + return op.Output(0) } -// Computes the gradients of 3-D convolution with respect to the filter. +// Computes exponential of x - 1 element-wise. // -// Arguments: -// input: Shape `[batch, depth, rows, cols, in_channels]`. -// filter_sizes: An integer vector representing the tensor shape of `filter`, -// where `filter` is a 5-D -// `[filter_depth, filter_height, filter_width, in_channels, out_channels]` -// tensor. -// out_backprop: Backprop signal of shape `[batch, out_depth, out_rows, out_cols, -// out_channels]`. -// strides: 1-D tensor of length 5. The stride of the sliding window for each -// dimension of `input`. Must have `strides[0] = strides[4] = 1`. -// padding: The type of padding algorithm to use. -func Conv3DBackpropFilterV2(scope *Scope, input tf.Output, filter_sizes tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...Conv3DBackpropFilterV2Attr) (output tf.Output) { +// I.e., \\(y = (\exp x) - 1\\). +func Expm1(scope *Scope, x tf.Output) (y tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"strides": strides, "padding": padding} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "Conv3DBackpropFilterV2", + Type: "Expm1", Input: []tf.Input{ - input, filter_sizes, out_backprop, + x, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Execute a sub graph on a remote processor. +// Returns the number of records this Reader has produced. // -// The graph specifications(such as graph itself, input tensors and output names) -// are stored as a serialized protocol buffer of RemoteFusedGraphExecuteInfo -// as serialized_remote_fused_graph_execute_info. -// The specifications will be passed to a dedicated registered -// remote fused graph executor. The executor will send the graph specifications -// to a remote processor and execute that graph. The execution results -// will be passed to consumer nodes as outputs of this node. +// This is the same as the number of ReaderRead executions that have +// succeeded. // // Arguments: -// inputs: Arbitrary number of tensors with arbitrary data types -// -// serialized_remote_fused_graph_execute_info: Serialized protocol buffer -// of RemoteFusedGraphExecuteInfo which contains graph specifications. -// -// Returns Arbitrary number of tensors with arbitrary data types -func RemoteFusedGraphExecute(scope *Scope, inputs []tf.Output, Toutputs []tf.DataType, serialized_remote_fused_graph_execute_info string) (outputs []tf.Output) { +// reader_handle: Handle to a Reader. +func ReaderNumRecordsProducedV2(scope *Scope, reader_handle tf.Output) (records_produced tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"Toutputs": Toutputs, "serialized_remote_fused_graph_execute_info": serialized_remote_fused_graph_execute_info} opspec := tf.OpSpec{ - Type: "RemoteFusedGraphExecute", + Type: "ReaderNumRecordsProducedV2", Input: []tf.Input{ - tf.OutputList(inputs), + reader_handle, }, - Attrs: attrs, } op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if outputs, idx, err = makeOutputList(op, idx, "outputs"); err != nil { - scope.UpdateErr("RemoteFusedGraphExecute", err) - return - } - return outputs + return op.Output(0) } -// SerializeManySparseAttr is an optional argument to SerializeManySparse. -type SerializeManySparseAttr func(optionalAttr) - -// SerializeManySparseOutType sets the optional out_type attribute to value. +// Computes the sum along segments of a tensor. // -// value: The `dtype` to use for serialization; the supported types are `string` -// (default) and `variant`. -// If not specified, defaults to DT_STRING -func SerializeManySparseOutType(value tf.DataType) SerializeManySparseAttr { - return func(m optionalAttr) { - m["out_type"] = value - } -} - -// Serialize an `N`-minibatch `SparseTensor` into an `[N, 3]` `Tensor` object. +// Read @{$math_ops#Segmentation$the section on segmentation} for an explanation of +// segments. // -// The `SparseTensor` must have rank `R` greater than 1, and the first dimension -// is treated as the minibatch dimension. Elements of the `SparseTensor` -// must be sorted in increasing order of this first dimension. The serialized -// `SparseTensor` objects going into each row of `serialized_sparse` will have -// rank `R-1`. +// Computes a tensor such that +// \\(output_i = \sum_j data_j\\) where sum is over `j` such +// that `segment_ids[j] == i`. // -// The minibatch size `N` is extracted from `sparse_shape[0]`. +// If the sum is empty for a given segment ID `i`, `output[i] = 0`. +// +//
+// +//
// // Arguments: -// sparse_indices: 2-D. The `indices` of the minibatch `SparseTensor`. -// sparse_values: 1-D. The `values` of the minibatch `SparseTensor`. -// sparse_shape: 1-D. The `shape` of the minibatch `SparseTensor`. -func SerializeManySparse(scope *Scope, sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output, optional ...SerializeManySparseAttr) (serialized_sparse tf.Output) { +// +// segment_ids: A 1-D tensor whose rank is equal to the rank of `data`'s +// first dimension. Values should be sorted and can be repeated. +// +// Returns Has same shape as data, except for dimension 0 which +// has size `k`, the number of segments. +func SegmentSum(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "SerializeManySparse", + Type: "SegmentSum", Input: []tf.Input{ - sparse_indices, sparse_values, sparse_shape, + data, segment_ids, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Computes inverse hyperbolic cosine of x element-wise. -func Acosh(scope *Scope, x tf.Output) (y tf.Output) { +// Creates a dataset that emits the lines of one or more text files. +// +// Arguments: +// filenames: A scalar or a vector containing the name(s) of the file(s) to be +// read. +// compression_type: A scalar containing either (i) the empty string (no +// compression), (ii) "ZLIB", or (iii) "GZIP". +// buffer_size: A scalar containing the number of bytes to buffer. +func TextLineDataset(scope *Scope, filenames tf.Output, compression_type tf.Output, buffer_size tf.Output) (handle tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "Acosh", + Type: "TextLineDataset", Input: []tf.Input{ - x, + filenames, compression_type, buffer_size, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// TensorArrayV2Attr is an optional argument to TensorArrayV2. -type TensorArrayV2Attr func(optionalAttr) +// CudnnRNNParamsSizeAttr is an optional argument to CudnnRNNParamsSize. +type CudnnRNNParamsSizeAttr func(optionalAttr) -// TensorArrayV2ElementShape sets the optional element_shape attribute to value. -// If not specified, defaults to -func TensorArrayV2ElementShape(value tf.Shape) TensorArrayV2Attr { +// CudnnRNNParamsSizeRnnMode sets the optional rnn_mode attribute to value. +// If not specified, defaults to "lstm" +func CudnnRNNParamsSizeRnnMode(value string) CudnnRNNParamsSizeAttr { return func(m optionalAttr) { - m["element_shape"] = value + m["rnn_mode"] = value } } -// TensorArrayV2DynamicSize sets the optional dynamic_size attribute to value. -// If not specified, defaults to false -func TensorArrayV2DynamicSize(value bool) TensorArrayV2Attr { +// CudnnRNNParamsSizeInputMode sets the optional input_mode attribute to value. +// If not specified, defaults to "linear_input" +func CudnnRNNParamsSizeInputMode(value string) CudnnRNNParamsSizeAttr { return func(m optionalAttr) { - m["dynamic_size"] = value + m["input_mode"] = value } } -// TensorArrayV2ClearAfterRead sets the optional clear_after_read attribute to value. -// If not specified, defaults to true -func TensorArrayV2ClearAfterRead(value bool) TensorArrayV2Attr { +// CudnnRNNParamsSizeDirection sets the optional direction attribute to value. +// If not specified, defaults to "unidirectional" +func CudnnRNNParamsSizeDirection(value string) CudnnRNNParamsSizeAttr { return func(m optionalAttr) { - m["clear_after_read"] = value + m["direction"] = value } } -// TensorArrayV2TensorArrayName sets the optional tensor_array_name attribute to value. -// If not specified, defaults to "" -func TensorArrayV2TensorArrayName(value string) TensorArrayV2Attr { +// CudnnRNNParamsSizeDropout sets the optional dropout attribute to value. +// If not specified, defaults to 0 +func CudnnRNNParamsSizeDropout(value float32) CudnnRNNParamsSizeAttr { return func(m optionalAttr) { - m["tensor_array_name"] = value + m["dropout"] = value } } -// Deprecated. Use TensorArrayV3 +// CudnnRNNParamsSizeSeed sets the optional seed attribute to value. +// If not specified, defaults to 0 +func CudnnRNNParamsSizeSeed(value int64) CudnnRNNParamsSizeAttr { + return func(m optionalAttr) { + m["seed"] = value + } +} + +// CudnnRNNParamsSizeSeed2 sets the optional seed2 attribute to value. +// If not specified, defaults to 0 +func CudnnRNNParamsSizeSeed2(value int64) CudnnRNNParamsSizeAttr { + return func(m optionalAttr) { + m["seed2"] = value + } +} + +// Computes size of weights that can be used by a Cudnn RNN model. // -// DEPRECATED at GraphDef version 26: Use TensorArrayV3 -func TensorArrayV2(scope *Scope, size tf.Output, dtype tf.DataType, optional ...TensorArrayV2Attr) (handle tf.Output) { +// Return the params size that can be used by the Cudnn RNN model. Subsequent +// weight allocation and initialization should use this size. +// +// num_layers: Specifies the number of layers in the RNN model. +// num_units: Specifies the size of the hidden state. +// input_size: Specifies the size of the input state. +// rnn_mode: Indicates the type of the RNN model. +// input_mode: Indicate whether there is a linear projection between the input and +// The actual computation before the first layer. 'skip_input' is only allowed +// when input_size == num_units; 'auto_select' implies 'skip_input' when +// input_size == num_units; otherwise, it implies 'linear_input'. +// direction: Indicates whether a bidirectional model will be used. +// dir = (direction == bidirectional) ? 2 : 1 +// dropout: dropout probability. When set to 0., dropout is disabled. +// seed: the 1st part of a seed to initialize dropout. +// seed2: the 2nd part of a seed to initialize dropout. +// params_size: The size of the params buffer that should be allocated and +// initialized for this RNN model. Note that this params buffer may not be +// compatible across GPUs. Please use CudnnRNNParamsWeights and +// CudnnRNNParamsBiases to save and restore them in a way that is compatible +// across different runs. +func CudnnRNNParamsSize(scope *Scope, num_layers tf.Output, num_units tf.Output, input_size tf.Output, T tf.DataType, S tf.DataType, optional ...CudnnRNNParamsSizeAttr) (params_size tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"dtype": dtype} + attrs := map[string]interface{}{"T": T, "S": S} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "TensorArrayV2", + Type: "CudnnRNNParamsSize", Input: []tf.Input{ - size, + num_layers, num_units, input_size, }, Attrs: attrs, } @@ -13561,206 +16005,328 @@ func TensorArrayV2(scope *Scope, size tf.Output, dtype tf.DataType, optional ... return op.Output(0) } -// DecodeCSVAttr is an optional argument to DecodeCSV. -type DecodeCSVAttr func(optionalAttr) - -// DecodeCSVFieldDelim sets the optional field_delim attribute to value. +// Computes gradients for SparseSegmentMean. // -// value: char delimiter to separate fields in a record. -// If not specified, defaults to "," -func DecodeCSVFieldDelim(value string) DecodeCSVAttr { - return func(m optionalAttr) { - m["field_delim"] = value - } -} - -// DecodeCSVUseQuoteDelim sets the optional use_quote_delim attribute to value. +// Returns tensor "output" with same shape as grad, except for dimension 0 whose +// value is output_dim0. // -// value: If false, treats double quotation marks as regular -// characters inside of the string fields (ignoring RFC 4180, Section 2, -// Bullet 5). -// If not specified, defaults to true -func DecodeCSVUseQuoteDelim(value bool) DecodeCSVAttr { - return func(m optionalAttr) { - m["use_quote_delim"] = value +// Arguments: +// grad: gradient propagated to the SparseSegmentMean op. +// indices: indices passed to the corresponding SparseSegmentMean op. +// segment_ids: segment_ids passed to the corresponding SparseSegmentMean op. +// output_dim0: dimension 0 of "data" passed to SparseSegmentMean op. +func SparseSegmentMeanGrad(scope *Scope, grad tf.Output, indices tf.Output, segment_ids tf.Output, output_dim0 tf.Output) (output tf.Output) { + if scope.Err() != nil { + return } -} - -// DecodeCSVNaValue sets the optional na_value attribute to value. -// -// value: Additional string to recognize as NA/NaN. -// If not specified, defaults to "" -func DecodeCSVNaValue(value string) DecodeCSVAttr { - return func(m optionalAttr) { - m["na_value"] = value + opspec := tf.OpSpec{ + Type: "SparseSegmentMeanGrad", + Input: []tf.Input{ + grad, indices, segment_ids, output_dim0, + }, } + op := scope.AddOperation(opspec) + return op.Output(0) } -// Convert CSV records to tensors. Each column maps to one tensor. +// Returns the set of files matching one or more glob patterns. // -// RFC 4180 format is expected for the CSV records. -// (https://tools.ietf.org/html/rfc4180) -// Note that we allow leading and trailing spaces with int or float field. +// Note that this routine only supports wildcard characters in the +// basename portion of the pattern, not in the directory portion. +// Note also that the order of filenames returned can be non-deterministic. // // Arguments: -// records: Each string is a record/row in the csv and all records should have -// the same format. -// record_defaults: One tensor per column of the input record, with either a -// scalar default value for that column or empty if the column is required. +// pattern: Shell wildcard pattern(s). Scalar or vector of type string. // -// Returns Each tensor will have the same shape as records. -func DecodeCSV(scope *Scope, records tf.Output, record_defaults []tf.Output, optional ...DecodeCSVAttr) (output []tf.Output) { +// Returns A vector of matching filenames. +func MatchingFiles(scope *Scope, pattern tf.Output) (filenames tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "DecodeCSV", + Type: "MatchingFiles", Input: []tf.Input{ - records, tf.OutputList(record_defaults), + pattern, }, - Attrs: attrs, } op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Returns the truth value of (x >= y) element-wise. +// +// *NOTE*: `GreaterEqual` supports broadcasting. More about broadcasting +// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) +func GreaterEqual(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { if scope.Err() != nil { return } - var idx int - var err error - if output, idx, err = makeOutputList(op, idx, "output"); err != nil { - scope.UpdateErr("DecodeCSV", err) - return + opspec := tf.OpSpec{ + Type: "GreaterEqual", + Input: []tf.Input{ + x, y, + }, } - return output + op := scope.AddOperation(opspec) + return op.Output(0) } -// MapClearAttr is an optional argument to MapClear. -type MapClearAttr func(optionalAttr) +// Conv3DAttr is an optional argument to Conv3D. +type Conv3DAttr func(optionalAttr) -// MapClearCapacity sets the optional capacity attribute to value. -// If not specified, defaults to 0 +// Conv3DDataFormat sets the optional data_format attribute to value. // -// REQUIRES: value >= 0 -func MapClearCapacity(value int64) MapClearAttr { +// value: The data format of the input and output data. With the +// default format "NDHWC", the data is stored in the order of: +// [batch, in_depth, in_height, in_width, in_channels]. +// Alternatively, the format could be "NCDHW", the data storage order is: +// [batch, in_channels, in_depth, in_height, in_width]. +// If not specified, defaults to "NDHWC" +func Conv3DDataFormat(value string) Conv3DAttr { return func(m optionalAttr) { - m["capacity"] = value + m["data_format"] = value } } -// MapClearMemoryLimit sets the optional memory_limit attribute to value. -// If not specified, defaults to 0 +// Conv3DDilations sets the optional dilations attribute to value. // -// REQUIRES: value >= 0 -func MapClearMemoryLimit(value int64) MapClearAttr { +// value: 1-D tensor of length 5. The dilation factor for each dimension of +// `input`. If set to k > 1, there will be k-1 skipped cells between each +// filter element on that dimension. The dimension order is determined by the +// value of `data_format`, see above for details. Dilations in the batch and +// depth dimensions must be 1. +// If not specified, defaults to +func Conv3DDilations(value []int64) Conv3DAttr { return func(m optionalAttr) { - m["memory_limit"] = value + m["dilations"] = value } } -// MapClearContainer sets the optional container attribute to value. -// If not specified, defaults to "" -func MapClearContainer(value string) MapClearAttr { - return func(m optionalAttr) { - m["container"] = value +// Computes a 3-D convolution given 5-D `input` and `filter` tensors. +// +// In signal processing, cross-correlation is a measure of similarity of +// two waveforms as a function of a time-lag applied to one of them. This +// is also known as a sliding dot product or sliding inner-product. +// +// Our Conv3D implements a form of cross-correlation. +// +// Arguments: +// input: Shape `[batch, in_depth, in_height, in_width, in_channels]`. +// filter: Shape `[filter_depth, filter_height, filter_width, in_channels, +// out_channels]`. `in_channels` must match between `input` and `filter`. +// strides: 1-D tensor of length 5. The stride of the sliding window for each +// dimension of `input`. Must have `strides[0] = strides[4] = 1`. +// padding: The type of padding algorithm to use. +func Conv3D(scope *Scope, input tf.Output, filter tf.Output, strides []int64, padding string, optional ...Conv3DAttr) (output tf.Output) { + if scope.Err() != nil { + return } -} - -// MapClearSharedName sets the optional shared_name attribute to value. -// If not specified, defaults to "" -func MapClearSharedName(value string) MapClearAttr { - return func(m optionalAttr) { - m["shared_name"] = value + attrs := map[string]interface{}{"strides": strides, "padding": padding} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "Conv3D", + Input: []tf.Input{ + input, filter, + }, + Attrs: attrs, } + op := scope.AddOperation(opspec) + return op.Output(0) } -// Op removes all elements in the underlying container. +// Adds up a SparseTensor and a dense Tensor, using these special rules: // -// Returns the created operation. -func MapClear(scope *Scope, dtypes []tf.DataType, optional ...MapClearAttr) (o *tf.Operation) { +// (1) Broadcasts the dense side to have the same shape as the sparse side, if +// eligible; +// (2) Then, only the dense values pointed to by the indices of the SparseTensor +// participate in the cwise addition. +// +// By these rules, the result is a logical SparseTensor with exactly the same +// indices and shape, but possibly with different non-zero values. The output of +// this Op is the resultant non-zero values. +// +// Arguments: +// sp_indices: 2-D. `N x R` matrix with the indices of non-empty values in a +// SparseTensor, possibly not in canonical ordering. +// sp_values: 1-D. `N` non-empty values corresponding to `sp_indices`. +// sp_shape: 1-D. Shape of the input SparseTensor. +// dense: `R`-D. The dense Tensor operand. +// +// Returns 1-D. The `N` values that are operated on. +func SparseDenseCwiseAdd(scope *Scope, sp_indices tf.Output, sp_values tf.Output, sp_shape tf.Output, dense tf.Output) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"dtypes": dtypes} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "MapClear", + Type: "SparseDenseCwiseAdd", + Input: []tf.Input{ + sp_indices, sp_values, sp_shape, dense, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} +// Read an element from the TensorArray into output `value`. +// +// Arguments: +// handle: The handle to a TensorArray. +// +// flow_in: A float scalar that enforces proper chaining of operations. +// dtype: The type of the elem that is returned. +// +// Returns The tensor that is read from the TensorArray. +func TensorArrayReadV3(scope *Scope, handle tf.Output, index tf.Output, flow_in tf.Output, dtype tf.DataType) (value tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"dtype": dtype} + opspec := tf.OpSpec{ + Type: "TensorArrayReadV3", + Input: []tf.Input{ + handle, index, flow_in, + }, Attrs: attrs, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0) } -// ThreadUnsafeUnigramCandidateSamplerAttr is an optional argument to ThreadUnsafeUnigramCandidateSampler. -type ThreadUnsafeUnigramCandidateSamplerAttr func(optionalAttr) +// QuantizeV2Attr is an optional argument to QuantizeV2. +type QuantizeV2Attr func(optionalAttr) -// ThreadUnsafeUnigramCandidateSamplerSeed sets the optional seed attribute to value. -// -// value: If either seed or seed2 are set to be non-zero, the random number -// generator is seeded by the given seed. Otherwise, it is seeded by a -// random seed. -// If not specified, defaults to 0 -func ThreadUnsafeUnigramCandidateSamplerSeed(value int64) ThreadUnsafeUnigramCandidateSamplerAttr { +// QuantizeV2Mode sets the optional mode attribute to value. +// If not specified, defaults to "MIN_COMBINED" +func QuantizeV2Mode(value string) QuantizeV2Attr { return func(m optionalAttr) { - m["seed"] = value + m["mode"] = value } } -// ThreadUnsafeUnigramCandidateSamplerSeed2 sets the optional seed2 attribute to value. -// -// value: An second seed to avoid seed collision. -// If not specified, defaults to 0 -func ThreadUnsafeUnigramCandidateSamplerSeed2(value int64) ThreadUnsafeUnigramCandidateSamplerAttr { +// QuantizeV2RoundMode sets the optional round_mode attribute to value. +// If not specified, defaults to "HALF_AWAY_FROM_ZERO" +func QuantizeV2RoundMode(value string) QuantizeV2Attr { return func(m optionalAttr) { - m["seed2"] = value + m["round_mode"] = value } } -// Generates labels for candidate sampling with a learned unigram distribution. +// Quantize the 'input' tensor of type float to 'output' tensor of type 'T'. // -// See explanations of candidate sampling and the data formats at -// go/candidate-sampling. +// [min_range, max_range] are scalar floats that specify the range for +// the 'input' data. The 'mode' attribute controls exactly which calculations are +// used to convert the float values to their quantized equivalents. The +// 'round_mode' attribute controls which rounding tie-breaking algorithm is used +// when rounding float values to their quantized equivalents. // -// For each batch, this op picks a single set of sampled candidate labels. +// In 'MIN_COMBINED' mode, each value of the tensor will undergo the following: // -// The advantages of sampling candidates per-batch are simplicity and the -// possibility of efficient dense matrix multiplication. The disadvantage is that -// the sampled candidates must be chosen independently of the context and of the -// true labels. +// ``` +// out[i] = (in[i] - min_range) * range(T) / (max_range - min_range) +// if T == qint8, out[i] -= (range(T) + 1) / 2.0 +// ``` +// here `range(T) = numeric_limits::max() - numeric_limits::min()` +// +// *MIN_COMBINED Mode Example* +// +// Assume the input is type float and has a possible range of [0.0, 6.0] and the +// output type is quint8 ([0, 255]). The min_range and max_range values should be +// specified as 0.0 and 6.0. Quantizing from float to quint8 will multiply each +// value of the input by 255/6 and cast to quint8. +// +// If the output type was qint8 ([-128, 127]), the operation will additionally +// subtract each value by 128 prior to casting, so that the range of values aligns +// with the range of qint8. +// +// If the mode is 'MIN_FIRST', then this approach is used: +// +// ``` +// num_discrete_values = 1 << (# of bits in T) +// range_adjust = num_discrete_values / (num_discrete_values - 1) +// range = (range_max - range_min) * range_adjust +// range_scale = num_discrete_values / range +// quantized = round(input * range_scale) - round(range_min * range_scale) + +// numeric_limits::min() +// quantized = max(quantized, numeric_limits::min()) +// quantized = min(quantized, numeric_limits::max()) +// ``` +// +// The biggest difference between this and MIN_COMBINED is that the minimum range +// is rounded first, before it's subtracted from the rounded value. With +// MIN_COMBINED, a small bias is introduced where repeated iterations of quantizing +// and dequantizing will introduce a larger and larger error. +// +// *SCALED mode Example* +// +// `SCALED` mode matches the quantization approach used in +// `QuantizeAndDequantize{V2|V3}`. +// +// If the mode is `SCALED`, we do not use the full range of the output type, +// choosing to elide the lowest possible value for symmetry (e.g., output range is +// -127 to 127, not -128 to 127 for signed 8 bit quantization), so that 0.0 maps to +// 0. +// +// We first find the range of values in our tensor. The +// range we use is always centered on 0, so we find m such that +// ```c++ +// m = max(abs(input_min), abs(input_max)) +// ``` +// +// Our input tensor range is then `[-m, m]`. +// +// Next, we choose our fixed-point quantization buckets, `[min_fixed, max_fixed]`. +// If T is signed, this is +// ``` +// num_bits = sizeof(T) * 8 +// [min_fixed, max_fixed] = +// [-(1 << (num_bits - 1) - 1), (1 << (num_bits - 1)) - 1] +// ``` +// +// Otherwise, if T is unsigned, the fixed-point range is +// ``` +// [min_fixed, max_fixed] = [0, (1 << num_bits) - 1] +// ``` +// +// From this we compute our scaling factor, s: +// ```c++ +// s = (max_fixed - min_fixed) / (2 * m) +// ``` +// +// Now we can quantize the elements of our tensor: +// ```c++ +// result = round(input * s) +// ``` +// +// One thing to watch out for is that the operator may choose to adjust the +// requested minimum and maximum values slightly during the quantization process, +// so you should always use the output ports as the range for further calculations. +// For example, if the requested minimum and maximum values are close to equal, +// they will be separated by a small epsilon value to prevent ill-formed quantized +// buffers from being created. Otherwise, you can end up with buffers where all the +// quantized values map to the same float value, which causes problems for +// operations that have to perform further calculations on them. // // Arguments: -// true_classes: A batch_size * num_true matrix, in which each row contains the -// IDs of the num_true target_classes in the corresponding original label. -// num_true: Number of true labels per context. -// num_sampled: Number of candidates to randomly sample. -// unique: If unique is true, we sample with rejection, so that all sampled -// candidates in a batch are unique. This requires some approximation to -// estimate the post-rejection sampling probabilities. -// range_max: The sampler will sample integers from the interval [0, range_max). // -// Returns A vector of length num_sampled, in which each element is -// the ID of a sampled candidate.A batch_size * num_true matrix, representing -// the number of times each candidate is expected to occur in a batch -// of sampled candidates. If unique=true, then this is a probability.A vector of length num_sampled, for each sampled -// candidate representing the number of times the candidate is expected -// to occur in a batch of sampled candidates. If unique=true, then this is a -// probability. -func ThreadUnsafeUnigramCandidateSampler(scope *Scope, true_classes tf.Output, num_true int64, num_sampled int64, unique bool, range_max int64, optional ...ThreadUnsafeUnigramCandidateSamplerAttr) (sampled_candidates tf.Output, true_expected_count tf.Output, sampled_expected_count tf.Output) { +// min_range: The minimum scalar value possibly produced for the input. +// max_range: The maximum scalar value possibly produced for the input. +// +// +// Returns The quantized data produced from the float input.The actual minimum scalar value used for the output.The actual maximum scalar value used for the output. +func QuantizeV2(scope *Scope, input tf.Output, min_range tf.Output, max_range tf.Output, T tf.DataType, optional ...QuantizeV2Attr) (output tf.Output, output_min tf.Output, output_max tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"num_true": num_true, "num_sampled": num_sampled, "unique": unique, "range_max": range_max} + attrs := map[string]interface{}{"T": T} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "ThreadUnsafeUnigramCandidateSampler", + Type: "QuantizeV2", Input: []tf.Input{ - true_classes, + input, min_range, max_range, }, Attrs: attrs, } @@ -13768,646 +16334,698 @@ func ThreadUnsafeUnigramCandidateSampler(scope *Scope, true_classes tf.Output, n return op.Output(0), op.Output(1), op.Output(2) } -// MaxPoolV2Attr is an optional argument to MaxPoolV2. -type MaxPoolV2Attr func(optionalAttr) - -// MaxPoolV2DataFormat sets the optional data_format attribute to value. -// -// value: Specify the data format of the input and output data. With the -// default format "NHWC", the data is stored in the order of: -// [batch, in_height, in_width, in_channels]. -// Alternatively, the format could be "NCHW", the data storage order of: -// [batch, in_channels, in_height, in_width]. -// If not specified, defaults to "NHWC" -func MaxPoolV2DataFormat(value string) MaxPoolV2Attr { - return func(m optionalAttr) { - m["data_format"] = value - } -} - -// Performs max pooling on the input. -// -// Arguments: -// input: 4-D input to pool over. -// ksize: The size of the window for each dimension of the input tensor. -// strides: The stride of the sliding window for each dimension of the -// input tensor. -// padding: The type of padding algorithm to use. +// Returns the truth value of (x < y) element-wise. // -// Returns The max pooled output tensor. -func MaxPoolV2(scope *Scope, input tf.Output, ksize tf.Output, strides tf.Output, padding string, optional ...MaxPoolV2Attr) (output tf.Output) { +// *NOTE*: `Less` supports broadcasting. More about broadcasting +// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) +func Less(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"padding": padding} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "MaxPoolV2", + Type: "Less", Input: []tf.Input{ - input, ksize, strides, + x, y, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// MutableDenseHashTableV2Attr is an optional argument to MutableDenseHashTableV2. -type MutableDenseHashTableV2Attr func(optionalAttr) +// QuantizedReluXAttr is an optional argument to QuantizedReluX. +type QuantizedReluXAttr func(optionalAttr) -// MutableDenseHashTableV2Container sets the optional container attribute to value. -// -// value: If non-empty, this table is placed in the given container. -// Otherwise, a default container is used. -// If not specified, defaults to "" -func MutableDenseHashTableV2Container(value string) MutableDenseHashTableV2Attr { +// QuantizedReluXOutType sets the optional out_type attribute to value. +// If not specified, defaults to DT_QUINT8 +func QuantizedReluXOutType(value tf.DataType) QuantizedReluXAttr { return func(m optionalAttr) { - m["container"] = value + m["out_type"] = value } } -// MutableDenseHashTableV2SharedName sets the optional shared_name attribute to value. +// Computes Quantized Rectified Linear X: `min(max(features, 0), max_value)` // -// value: If non-empty, this table is shared under the given name across -// multiple sessions. -// If not specified, defaults to "" -func MutableDenseHashTableV2SharedName(value string) MutableDenseHashTableV2Attr { - return func(m optionalAttr) { - m["shared_name"] = value +// Arguments: +// +// +// min_features: The float value that the lowest quantized value represents. +// max_features: The float value that the highest quantized value represents. +// +// Returns Has the same output shape as "features".The float value that the lowest quantized value represents.The float value that the highest quantized value represents. +func QuantizedReluX(scope *Scope, features tf.Output, max_value tf.Output, min_features tf.Output, max_features tf.Output, optional ...QuantizedReluXAttr) (activations tf.Output, min_activations tf.Output, max_activations tf.Output) { + if scope.Err() != nil { + return } -} - -// MutableDenseHashTableV2UseNodeNameSharing sets the optional use_node_name_sharing attribute to value. -// If not specified, defaults to false -func MutableDenseHashTableV2UseNodeNameSharing(value bool) MutableDenseHashTableV2Attr { - return func(m optionalAttr) { - m["use_node_name_sharing"] = value + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) } -} - -// MutableDenseHashTableV2ValueShape sets the optional value_shape attribute to value. -// -// value: The shape of each value. -// If not specified, defaults to <> -func MutableDenseHashTableV2ValueShape(value tf.Shape) MutableDenseHashTableV2Attr { - return func(m optionalAttr) { - m["value_shape"] = value + opspec := tf.OpSpec{ + Type: "QuantizedReluX", + Input: []tf.Input{ + features, max_value, min_features, max_features, + }, + Attrs: attrs, } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1), op.Output(2) } -// MutableDenseHashTableV2InitialNumBuckets sets the optional initial_num_buckets attribute to value. -// -// value: The initial number of hash table buckets. Must be a power -// to 2. -// If not specified, defaults to 131072 -func MutableDenseHashTableV2InitialNumBuckets(value int64) MutableDenseHashTableV2Attr { +// QuantizedConv2DAttr is an optional argument to QuantizedConv2D. +type QuantizedConv2DAttr func(optionalAttr) + +// QuantizedConv2DOutType sets the optional out_type attribute to value. +// If not specified, defaults to DT_QINT32 +func QuantizedConv2DOutType(value tf.DataType) QuantizedConv2DAttr { return func(m optionalAttr) { - m["initial_num_buckets"] = value + m["out_type"] = value } } -// MutableDenseHashTableV2MaxLoadFactor sets the optional max_load_factor attribute to value. +// QuantizedConv2DDilations sets the optional dilations attribute to value. // -// value: The maximum ratio between number of entries and number of -// buckets before growing the table. Must be between 0 and 1. -// If not specified, defaults to 0.8 -func MutableDenseHashTableV2MaxLoadFactor(value float32) MutableDenseHashTableV2Attr { +// value: 1-D tensor of length 4. The dilation factor for each dimension of +// `input`. If set to k > 1, there will be k-1 skipped cells between each +// filter element on that dimension. The dimension order is determined by the +// value of `data_format`, see above for details. Dilations in the batch and +// depth dimensions must be 1. +// If not specified, defaults to +func QuantizedConv2DDilations(value []int64) QuantizedConv2DAttr { return func(m optionalAttr) { - m["max_load_factor"] = value + m["dilations"] = value } } -// Creates an empty hash table that uses tensors as the backing store. -// -// It uses "open addressing" with quadratic reprobing to resolve -// collisions. +// Computes a 2D convolution given quantized 4D input and filter tensors. // -// This op creates a mutable hash table, specifying the type of its keys and -// values. Each value must be a scalar. Data can be inserted into the table using -// the insert operations. It does not support the initialization operation. +// The inputs are quantized tensors where the lowest value represents the real +// number of the associated minimum, and the highest represents the maximum. +// This means that you can only interpret the quantized output in the same way, by +// taking the returned minimum and maximum values into account. // // Arguments: -// empty_key: The key used to represent empty key buckets internally. Must not -// be used in insert or lookup operations. -// value_dtype: Type of the table values. // -// Returns Handle to a table. -func MutableDenseHashTableV2(scope *Scope, empty_key tf.Output, value_dtype tf.DataType, optional ...MutableDenseHashTableV2Attr) (table_handle tf.Output) { +// filter: filter's input_depth dimension must match input's depth dimensions. +// min_input: The float value that the lowest quantized input value represents. +// max_input: The float value that the highest quantized input value represents. +// min_filter: The float value that the lowest quantized filter value represents. +// max_filter: The float value that the highest quantized filter value represents. +// strides: The stride of the sliding window for each dimension of the input +// tensor. +// padding: The type of padding algorithm to use. +// +// Returns The float value that the lowest quantized output value represents.The float value that the highest quantized output value represents. +func QuantizedConv2D(scope *Scope, input tf.Output, filter tf.Output, min_input tf.Output, max_input tf.Output, min_filter tf.Output, max_filter tf.Output, strides []int64, padding string, optional ...QuantizedConv2DAttr) (output tf.Output, min_output tf.Output, max_output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"value_dtype": value_dtype} + attrs := map[string]interface{}{"strides": strides, "padding": padding} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "MutableDenseHashTableV2", + Type: "QuantizedConv2D", Input: []tf.Input{ - empty_key, + input, filter, min_input, max_input, min_filter, max_filter, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1), op.Output(2) } -// StageSizeAttr is an optional argument to StageSize. -type StageSizeAttr func(optionalAttr) +// StatelessMultinomialAttr is an optional argument to StatelessMultinomial. +type StatelessMultinomialAttr func(optionalAttr) -// StageSizeCapacity sets the optional capacity attribute to value. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func StageSizeCapacity(value int64) StageSizeAttr { +// StatelessMultinomialOutputDtype sets the optional output_dtype attribute to value. +// If not specified, defaults to DT_INT64 +func StatelessMultinomialOutputDtype(value tf.DataType) StatelessMultinomialAttr { return func(m optionalAttr) { - m["capacity"] = value + m["output_dtype"] = value } } -// StageSizeMemoryLimit sets the optional memory_limit attribute to value. -// If not specified, defaults to 0 +// Draws samples from a multinomial distribution. // -// REQUIRES: value >= 0 -func StageSizeMemoryLimit(value int64) StageSizeAttr { - return func(m optionalAttr) { - m["memory_limit"] = value +// Arguments: +// logits: 2-D Tensor with shape `[batch_size, num_classes]`. Each slice `[i, :]` +// represents the unnormalized log probabilities for all classes. +// num_samples: 0-D. Number of independent samples to draw for each row slice. +// seed: 2 seeds (shape [2]). +// +// Returns 2-D Tensor with shape `[batch_size, num_samples]`. Each slice `[i, :]` +// contains the drawn class labels with range `[0, num_classes)`. +func StatelessMultinomial(scope *Scope, logits tf.Output, num_samples tf.Output, seed tf.Output, optional ...StatelessMultinomialAttr) (output tf.Output) { + if scope.Err() != nil { + return } -} - -// StageSizeContainer sets the optional container attribute to value. -// If not specified, defaults to "" -func StageSizeContainer(value string) StageSizeAttr { - return func(m optionalAttr) { - m["container"] = value + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "StatelessMultinomial", + Input: []tf.Input{ + logits, num_samples, seed, + }, + Attrs: attrs, } + op := scope.AddOperation(opspec) + return op.Output(0) } -// StageSizeSharedName sets the optional shared_name attribute to value. -// If not specified, defaults to "" -func StageSizeSharedName(value string) StageSizeAttr { +// ResourceGatherAttr is an optional argument to ResourceGather. +type ResourceGatherAttr func(optionalAttr) + +// ResourceGatherValidateIndices sets the optional validate_indices attribute to value. +// If not specified, defaults to true +func ResourceGatherValidateIndices(value bool) ResourceGatherAttr { return func(m optionalAttr) { - m["shared_name"] = value + m["validate_indices"] = value } } -// Op returns the number of elements in the underlying container. -func StageSize(scope *Scope, dtypes []tf.DataType, optional ...StageSizeAttr) (size tf.Output) { +// Gather slices from the variable pointed to by `resource` according to `indices`. +// +// `indices` must be an integer tensor of any dimension (usually 0-D or 1-D). +// Produces an output tensor with shape `indices.shape + params.shape[1:]` where: +// +// ```python +// # Scalar indices +// output[:, ..., :] = params[indices, :, ... :] +// +// # Vector indices +// output[i, :, ..., :] = params[indices[i], :, ... :] +// +// # Higher rank indices +// output[i, ..., j, :, ... :] = params[indices[i, ..., j], :, ..., :] +// ``` +func ResourceGather(scope *Scope, resource tf.Output, indices tf.Output, dtype tf.DataType, optional ...ResourceGatherAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"dtypes": dtypes} + attrs := map[string]interface{}{"dtype": dtype} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "StageSize", - + Type: "ResourceGather", + Input: []tf.Input{ + resource, indices, + }, Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Produces the max pool of the input tensor for quantized types. +// Delete the TensorArray from its resource container. +// +// This enables the user to close and release the resource in the middle +// of a step/run. // // Arguments: -// input: The 4D (batch x rows x cols x depth) Tensor to MaxReduce over. -// min_input: The float value that the lowest quantized input value represents. -// max_input: The float value that the highest quantized input value represents. -// ksize: The size of the window for each dimension of the input tensor. -// The length must be 4 to match the number of dimensions of the input. -// strides: The stride of the sliding window for each dimension of the input -// tensor. The length must be 4 to match the number of dimensions of the input. -// padding: The type of padding algorithm to use. +// handle: The handle to a TensorArray (output of TensorArray or TensorArrayGrad). // -// Returns The float value that the lowest quantized output value represents.The float value that the highest quantized output value represents. -func QuantizedMaxPool(scope *Scope, input tf.Output, min_input tf.Output, max_input tf.Output, ksize []int64, strides []int64, padding string) (output tf.Output, min_output tf.Output, max_output tf.Output) { +// Returns the created operation. +func TensorArrayCloseV3(scope *Scope, handle tf.Output) (o *tf.Operation) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} opspec := tf.OpSpec{ - Type: "QuantizedMaxPool", + Type: "TensorArrayCloseV3", Input: []tf.Input{ - input, min_input, max_input, + handle, }, - Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + return scope.AddOperation(opspec) } -// Computes softplus: `log(exp(features) + 1)`. -func Softplus(scope *Scope, features tf.Output) (activations tf.Output) { +// Adds two `SparseTensor` objects to produce another `SparseTensor`. +// +// The input `SparseTensor` objects' indices are assumed ordered in standard +// lexicographic order. If this is not the case, before this step run +// `SparseReorder` to restore index ordering. +// +// By default, if two values sum to zero at some index, the output `SparseTensor` +// would still include that particular location in its index, storing a zero in the +// corresponding value slot. To override this, callers can specify `thresh`, +// indicating that if the sum has a magnitude strictly smaller than `thresh`, its +// corresponding value and index would then not be included. In particular, +// `thresh == 0` (default) means everything is kept and actual thresholding happens +// only for a positive value. +// +// In the following shapes, `nnz` is the count after taking `thresh` into account. +// +// Arguments: +// a_indices: 2-D. The `indices` of the first `SparseTensor`, size `[nnz, ndims]` Matrix. +// a_values: 1-D. The `values` of the first `SparseTensor`, size `[nnz]` Vector. +// a_shape: 1-D. The `shape` of the first `SparseTensor`, size `[ndims]` Vector. +// b_indices: 2-D. The `indices` of the second `SparseTensor`, size `[nnz, ndims]` Matrix. +// b_values: 1-D. The `values` of the second `SparseTensor`, size `[nnz]` Vector. +// b_shape: 1-D. The `shape` of the second `SparseTensor`, size `[ndims]` Vector. +// thresh: 0-D. The magnitude threshold that determines if an output value/index +// pair takes space. +func SparseAdd(scope *Scope, a_indices tf.Output, a_values tf.Output, a_shape tf.Output, b_indices tf.Output, b_values tf.Output, b_shape tf.Output, thresh tf.Output) (sum_indices tf.Output, sum_values tf.Output, sum_shape tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "Softplus", + Type: "SparseAdd", Input: []tf.Input{ - features, + a_indices, a_values, a_shape, b_indices, b_values, b_shape, thresh, }, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1), op.Output(2) } -// Computes exponential of x - 1 element-wise. +// OrderedMapPeekAttr is an optional argument to OrderedMapPeek. +type OrderedMapPeekAttr func(optionalAttr) + +// OrderedMapPeekCapacity sets the optional capacity attribute to value. +// If not specified, defaults to 0 // -// I.e., \\(y = (\exp x) - 1\\). -func Expm1(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return +// REQUIRES: value >= 0 +func OrderedMapPeekCapacity(value int64) OrderedMapPeekAttr { + return func(m optionalAttr) { + m["capacity"] = value } - opspec := tf.OpSpec{ - Type: "Expm1", - Input: []tf.Input{ - x, - }, +} + +// OrderedMapPeekMemoryLimit sets the optional memory_limit attribute to value. +// If not specified, defaults to 0 +// +// REQUIRES: value >= 0 +func OrderedMapPeekMemoryLimit(value int64) OrderedMapPeekAttr { + return func(m optionalAttr) { + m["memory_limit"] = value + } +} + +// OrderedMapPeekContainer sets the optional container attribute to value. +// If not specified, defaults to "" +func OrderedMapPeekContainer(value string) OrderedMapPeekAttr { + return func(m optionalAttr) { + m["container"] = value + } +} + +// OrderedMapPeekSharedName sets the optional shared_name attribute to value. +// If not specified, defaults to "" +func OrderedMapPeekSharedName(value string) OrderedMapPeekAttr { + return func(m optionalAttr) { + m["shared_name"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// Returns the number of records this Reader has produced. -// -// This is the same as the number of ReaderRead executions that have -// succeeded. +// Op peeks at the values at the specified key. If the // -// Arguments: -// reader_handle: Handle to a Reader. -func ReaderNumRecordsProducedV2(scope *Scope, reader_handle tf.Output) (records_produced tf.Output) { +// underlying container does not contain this key +// this op will block until it does. This Op is optimized for +// performance. +func OrderedMapPeek(scope *Scope, key tf.Output, indices tf.Output, dtypes []tf.DataType, optional ...OrderedMapPeekAttr) (values []tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"dtypes": dtypes} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "ReaderNumRecordsProducedV2", + Type: "OrderedMapPeek", Input: []tf.Input{ - reader_handle, + key, indices, }, + Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes the sum along segments of a tensor. -// -// Read @{$math_ops#segmentation$the section on segmentation} for an explanation of -// segments. -// -// Computes a tensor such that -// \\(output_i = \sum_j data_j\\) where sum is over `j` such -// that `segment_ids[j] == i`. -// -// If the sum is empty for a given segment ID `i`, `output[i] = 0`. -// -//
-// -//
-// -// Arguments: -// -// segment_ids: A 1-D tensor whose rank is equal to the rank of `data`'s -// first dimension. Values should be sorted and can be repeated. -// -// Returns Has same shape as data, except for dimension 0 which -// has size `k`, the number of segments. -func SegmentSum(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.Output) { if scope.Err() != nil { return } - opspec := tf.OpSpec{ - Type: "SegmentSum", - Input: []tf.Input{ - data, segment_ids, - }, + var idx int + var err error + if values, idx, err = makeOutputList(op, idx, "values"); err != nil { + scope.UpdateErr("OrderedMapPeek", err) + return } - op := scope.AddOperation(opspec) - return op.Output(0) + return values } -// Creates a dataset that emits the lines of one or more text files. +// DecodeAndCropJpegAttr is an optional argument to DecodeAndCropJpeg. +type DecodeAndCropJpegAttr func(optionalAttr) + +// DecodeAndCropJpegChannels sets the optional channels attribute to value. // -// Arguments: -// filenames: A scalar or a vector containing the name(s) of the file(s) to be -// read. -// compression_type: A scalar containing either (i) the empty string (no -// compression), (ii) "ZLIB", or (iii) "GZIP". -// buffer_size: A scalar containing the number of bytes to buffer. -func TextLineDataset(scope *Scope, filenames tf.Output, compression_type tf.Output, buffer_size tf.Output) (handle tf.Output) { - if scope.Err() != nil { - return +// value: Number of color channels for the decoded image. +// If not specified, defaults to 0 +func DecodeAndCropJpegChannels(value int64) DecodeAndCropJpegAttr { + return func(m optionalAttr) { + m["channels"] = value } - opspec := tf.OpSpec{ - Type: "TextLineDataset", - Input: []tf.Input{ - filenames, compression_type, buffer_size, - }, +} + +// DecodeAndCropJpegRatio sets the optional ratio attribute to value. +// +// value: Downscaling ratio. +// If not specified, defaults to 1 +func DecodeAndCropJpegRatio(value int64) DecodeAndCropJpegAttr { + return func(m optionalAttr) { + m["ratio"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// Computes gradients for SparseSegmentMean. +// DecodeAndCropJpegFancyUpscaling sets the optional fancy_upscaling attribute to value. // -// Returns tensor "output" with same shape as grad, except for dimension 0 whose -// value is output_dim0. +// value: If true use a slower but nicer upscaling of the +// chroma planes (yuv420/422 only). +// If not specified, defaults to true +func DecodeAndCropJpegFancyUpscaling(value bool) DecodeAndCropJpegAttr { + return func(m optionalAttr) { + m["fancy_upscaling"] = value + } +} + +// DecodeAndCropJpegTryRecoverTruncated sets the optional try_recover_truncated attribute to value. // -// Arguments: -// grad: gradient propagated to the SparseSegmentMean op. -// indices: indices passed to the corresponding SparseSegmentMean op. -// segment_ids: segment_ids passed to the corresponding SparseSegmentMean op. -// output_dim0: dimension 0 of "data" passed to SparseSegmentMean op. -func SparseSegmentMeanGrad(scope *Scope, grad tf.Output, indices tf.Output, segment_ids tf.Output, output_dim0 tf.Output) (output tf.Output) { - if scope.Err() != nil { - return +// value: If true try to recover an image from truncated input. +// If not specified, defaults to false +func DecodeAndCropJpegTryRecoverTruncated(value bool) DecodeAndCropJpegAttr { + return func(m optionalAttr) { + m["try_recover_truncated"] = value } - opspec := tf.OpSpec{ - Type: "SparseSegmentMeanGrad", - Input: []tf.Input{ - grad, indices, segment_ids, output_dim0, - }, +} + +// DecodeAndCropJpegAcceptableFraction sets the optional acceptable_fraction attribute to value. +// +// value: The minimum required fraction of lines before a truncated +// input is accepted. +// If not specified, defaults to 1 +func DecodeAndCropJpegAcceptableFraction(value float32) DecodeAndCropJpegAttr { + return func(m optionalAttr) { + m["acceptable_fraction"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// Returns the truth value of (x >= y) element-wise. +// DecodeAndCropJpegDctMethod sets the optional dct_method attribute to value. // -// *NOTE*: `GreaterEqual` supports broadcasting. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func GreaterEqual(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { +// value: string specifying a hint about the algorithm used for +// decompression. Defaults to "" which maps to a system-specific +// default. Currently valid values are ["INTEGER_FAST", +// "INTEGER_ACCURATE"]. The hint may be ignored (e.g., the internal +// jpeg library changes to a version that does not have that specific +// option.) +// If not specified, defaults to "" +func DecodeAndCropJpegDctMethod(value string) DecodeAndCropJpegAttr { + return func(m optionalAttr) { + m["dct_method"] = value + } +} + +// Decode and Crop a JPEG-encoded image to a uint8 tensor. +// +// The attr `channels` indicates the desired number of color channels for the +// decoded image. +// +// Accepted values are: +// +// * 0: Use the number of channels in the JPEG-encoded image. +// * 1: output a grayscale image. +// * 3: output an RGB image. +// +// If needed, the JPEG-encoded image is transformed to match the requested number +// of color channels. +// +// The attr `ratio` allows downscaling the image by an integer factor during +// decoding. Allowed values are: 1, 2, 4, and 8. This is much faster than +// downscaling the image later. +// +// +// It is equivalent to a combination of decode and crop, but much faster by only +// decoding partial jpeg image. +// +// Arguments: +// contents: 0-D. The JPEG-encoded image. +// crop_window: 1-D. The crop window: [crop_y, crop_x, crop_height, crop_width]. +// +// Returns 3-D with shape `[height, width, channels]`.. +func DecodeAndCropJpeg(scope *Scope, contents tf.Output, crop_window tf.Output, optional ...DecodeAndCropJpegAttr) (image tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "GreaterEqual", + Type: "DecodeAndCropJpeg", Input: []tf.Input{ - x, y, + contents, crop_window, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Conv3DAttr is an optional argument to Conv3D. -type Conv3DAttr func(optionalAttr) +// AllCandidateSamplerAttr is an optional argument to AllCandidateSampler. +type AllCandidateSamplerAttr func(optionalAttr) -// Conv3DDataFormat sets the optional data_format attribute to value. +// AllCandidateSamplerSeed sets the optional seed attribute to value. // -// value: The data format of the input and output data. With the -// default format "NDHWC", the data is stored in the order of: -// [batch, in_depth, in_height, in_width, in_channels]. -// Alternatively, the format could be "NCDHW", the data storage order is: -// [batch, in_channels, in_depth, in_height, in_width]. -// If not specified, defaults to "NDHWC" -func Conv3DDataFormat(value string) Conv3DAttr { +// value: If either seed or seed2 are set to be non-zero, the random number +// generator is seeded by the given seed. Otherwise, it is seeded by a +// random seed. +// If not specified, defaults to 0 +func AllCandidateSamplerSeed(value int64) AllCandidateSamplerAttr { return func(m optionalAttr) { - m["data_format"] = value + m["seed"] = value } } -// Conv3DDilations sets the optional dilations attribute to value. +// AllCandidateSamplerSeed2 sets the optional seed2 attribute to value. // -// value: 1-D tensor of length 5. The dilation factor for each dimension of -// `input`. If set to k > 1, there will be k-1 skipped cells between each -// filter element on that dimension. The dimension order is determined by the -// value of `data_format`, see above for details. Dilations in the batch and -// depth dimensions must be 1. -// If not specified, defaults to -func Conv3DDilations(value []int64) Conv3DAttr { +// value: An second seed to avoid seed collision. +// If not specified, defaults to 0 +func AllCandidateSamplerSeed2(value int64) AllCandidateSamplerAttr { return func(m optionalAttr) { - m["dilations"] = value + m["seed2"] = value } } -// Computes a 3-D convolution given 5-D `input` and `filter` tensors. +// Generates labels for candidate sampling with a learned unigram distribution. // -// In signal processing, cross-correlation is a measure of similarity of -// two waveforms as a function of a time-lag applied to one of them. This -// is also known as a sliding dot product or sliding inner-product. +// See explanations of candidate sampling and the data formats at +// go/candidate-sampling. // -// Our Conv3D implements a form of cross-correlation. +// For each batch, this op picks a single set of sampled candidate labels. +// +// The advantages of sampling candidates per-batch are simplicity and the +// possibility of efficient dense matrix multiplication. The disadvantage is that +// the sampled candidates must be chosen independently of the context and of the +// true labels. // // Arguments: -// input: Shape `[batch, in_depth, in_height, in_width, in_channels]`. -// filter: Shape `[filter_depth, filter_height, filter_width, in_channels, -// out_channels]`. `in_channels` must match between `input` and `filter`. -// strides: 1-D tensor of length 5. The stride of the sliding window for each -// dimension of `input`. Must have `strides[0] = strides[4] = 1`. -// padding: The type of padding algorithm to use. -func Conv3D(scope *Scope, input tf.Output, filter tf.Output, strides []int64, padding string, optional ...Conv3DAttr) (output tf.Output) { +// true_classes: A batch_size * num_true matrix, in which each row contains the +// IDs of the num_true target_classes in the corresponding original label. +// num_true: Number of true labels per context. +// num_sampled: Number of candidates to produce. +// unique: If unique is true, we sample with rejection, so that all sampled +// candidates in a batch are unique. This requires some approximation to +// estimate the post-rejection sampling probabilities. +// +// Returns A vector of length num_sampled, in which each element is +// the ID of a sampled candidate.A batch_size * num_true matrix, representing +// the number of times each candidate is expected to occur in a batch +// of sampled candidates. If unique=true, then this is a probability.A vector of length num_sampled, for each sampled +// candidate representing the number of times the candidate is expected +// to occur in a batch of sampled candidates. If unique=true, then this is a +// probability. +func AllCandidateSampler(scope *Scope, true_classes tf.Output, num_true int64, num_sampled int64, unique bool, optional ...AllCandidateSamplerAttr) (sampled_candidates tf.Output, true_expected_count tf.Output, sampled_expected_count tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"strides": strides, "padding": padding} + attrs := map[string]interface{}{"num_true": num_true, "num_sampled": num_sampled, "unique": unique} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "Conv3D", + Type: "AllCandidateSampler", Input: []tf.Input{ - input, filter, + true_classes, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1), op.Output(2) } -// Adds up a SparseTensor and a dense Tensor, using these special rules: +// Saves the input tensors to disk. // -// (1) Broadcasts the dense side to have the same shape as the sparse side, if -// eligible; -// (2) Then, only the dense values pointed to by the indices of the SparseTensor -// participate in the cwise addition. +// The size of `tensor_names` must match the number of tensors in `data`. `data[i]` +// is written to `filename` with name `tensor_names[i]`. // -// By these rules, the result is a logical SparseTensor with exactly the same -// indices and shape, but possibly with different non-zero values. The output of -// this Op is the resultant non-zero values. +// See also `SaveSlices`. // // Arguments: -// sp_indices: 2-D. `N x R` matrix with the indices of non-empty values in a -// SparseTensor, possibly not in canonical ordering. -// sp_values: 1-D. `N` non-empty values corresponding to `sp_indices`. -// sp_shape: 1-D. Shape of the input SparseTensor. -// dense: `R`-D. The dense Tensor operand. +// filename: Must have a single element. The name of the file to which we write +// the tensor. +// tensor_names: Shape `[N]`. The names of the tensors to be saved. +// data: `N` tensors to save. // -// Returns 1-D. The `N` values that are operated on. -func SparseDenseCwiseAdd(scope *Scope, sp_indices tf.Output, sp_values tf.Output, sp_shape tf.Output, dense tf.Output) (output tf.Output) { +// Returns the created operation. +func Save(scope *Scope, filename tf.Output, tensor_names tf.Output, data []tf.Output) (o *tf.Operation) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "SparseDenseCwiseAdd", + Type: "Save", Input: []tf.Input{ - sp_indices, sp_values, sp_shape, dense, + filename, tensor_names, tf.OutputList(data), }, } - op := scope.AddOperation(opspec) - return op.Output(0) + return scope.AddOperation(opspec) } -// Read an element from the TensorArray into output `value`. -// -// Arguments: -// handle: The handle to a TensorArray. +// Returns element-wise remainder of division. When `x < 0` xor `y < 0` is // -// flow_in: A float scalar that enforces proper chaining of operations. -// dtype: The type of the elem that is returned. +// true, this follows Python semantics in that the result here is consistent +// with a flooring divide. E.g. `floor(x / y) * y + mod(x, y) = x`. // -// Returns The tensor that is read from the TensorArray. -func TensorArrayReadV3(scope *Scope, handle tf.Output, index tf.Output, flow_in tf.Output, dtype tf.DataType) (value tf.Output) { +// *NOTE*: `FloorMod` supports broadcasting. More about broadcasting +// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) +func FloorMod(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"dtype": dtype} opspec := tf.OpSpec{ - Type: "TensorArrayReadV3", + Type: "FloorMod", Input: []tf.Input{ - handle, index, flow_in, + x, y, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// QuantizeV2Attr is an optional argument to QuantizeV2. -type QuantizeV2Attr func(optionalAttr) +// SparseTensorDenseMatMulAttr is an optional argument to SparseTensorDenseMatMul. +type SparseTensorDenseMatMulAttr func(optionalAttr) -// QuantizeV2Mode sets the optional mode attribute to value. -// If not specified, defaults to "MIN_COMBINED" -func QuantizeV2Mode(value string) QuantizeV2Attr { +// SparseTensorDenseMatMulAdjointA sets the optional adjoint_a attribute to value. +// +// value: Use the adjoint of A in the matrix multiply. If A is complex, this +// is transpose(conj(A)). Otherwise it's transpose(A). +// If not specified, defaults to false +func SparseTensorDenseMatMulAdjointA(value bool) SparseTensorDenseMatMulAttr { return func(m optionalAttr) { - m["mode"] = value + m["adjoint_a"] = value } } -// QuantizeV2RoundMode sets the optional round_mode attribute to value. -// If not specified, defaults to "HALF_AWAY_FROM_ZERO" -func QuantizeV2RoundMode(value string) QuantizeV2Attr { +// SparseTensorDenseMatMulAdjointB sets the optional adjoint_b attribute to value. +// +// value: Use the adjoint of B in the matrix multiply. If B is complex, this +// is transpose(conj(B)). Otherwise it's transpose(B). +// If not specified, defaults to false +func SparseTensorDenseMatMulAdjointB(value bool) SparseTensorDenseMatMulAttr { return func(m optionalAttr) { - m["round_mode"] = value + m["adjoint_b"] = value } } -// Quantize the 'input' tensor of type float to 'output' tensor of type 'T'. -// -// [min_range, max_range] are scalar floats that specify the range for -// the 'input' data. The 'mode' attribute controls exactly which calculations are -// used to convert the float values to their quantized equivalents. The -// 'round_mode' attribute controls which rounding tie-breaking algorithm is used -// when rounding float values to their quantized equivalents. -// -// In 'MIN_COMBINED' mode, each value of the tensor will undergo the following: -// -// ``` -// out[i] = (in[i] - min_range) * range(T) / (max_range - min_range) -// if T == qint8, out[i] -= (range(T) + 1) / 2.0 -// ``` -// here `range(T) = numeric_limits::max() - numeric_limits::min()` -// -// *MIN_COMBINED Mode Example* -// -// Assume the input is type float and has a possible range of [0.0, 6.0] and the -// output type is quint8 ([0, 255]). The min_range and max_range values should be -// specified as 0.0 and 6.0. Quantizing from float to quint8 will multiply each -// value of the input by 255/6 and cast to quint8. -// -// If the output type was qint8 ([-128, 127]), the operation will additionally -// subtract each value by 128 prior to casting, so that the range of values aligns -// with the range of qint8. -// -// If the mode is 'MIN_FIRST', then this approach is used: +// Multiply SparseTensor (of rank 2) "A" by dense matrix "B". // -// ``` -// num_discrete_values = 1 << (# of bits in T) -// range_adjust = num_discrete_values / (num_discrete_values - 1) -// range = (range_max - range_min) * range_adjust -// range_scale = num_discrete_values / range -// quantized = round(input * range_scale) - round(range_min * range_scale) + -// numeric_limits::min() -// quantized = max(quantized, numeric_limits::min()) -// quantized = min(quantized, numeric_limits::max()) -// ``` +// No validity checking is performed on the indices of A. However, the following +// input format is recommended for optimal behavior: // -// The biggest difference between this and MIN_COMBINED is that the minimum range -// is rounded first, before it's subtracted from the rounded value. With -// MIN_COMBINED, a small bias is introduced where repeated iterations of quantizing -// and dequantizing will introduce a larger and larger error. +// if adjoint_a == false: +// A should be sorted in lexicographically increasing order. Use SparseReorder +// if you're not sure. +// if adjoint_a == true: +// A should be sorted in order of increasing dimension 1 (i.e., "column major" +// order instead of "row major" order). // -// *SCALED mode Example* +// Arguments: +// a_indices: 2-D. The `indices` of the `SparseTensor`, size `[nnz, 2]` Matrix. +// a_values: 1-D. The `values` of the `SparseTensor`, size `[nnz]` Vector. +// a_shape: 1-D. The `shape` of the `SparseTensor`, size `[2]` Vector. +// b: 2-D. A dense Matrix. +func SparseTensorDenseMatMul(scope *Scope, a_indices tf.Output, a_values tf.Output, a_shape tf.Output, b tf.Output, optional ...SparseTensorDenseMatMulAttr) (product tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "SparseTensorDenseMatMul", + Input: []tf.Input{ + a_indices, a_values, a_shape, b, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Deserialize and concatenate `SparseTensors` from a serialized minibatch. // -// `SCALED` mode matches the quantization approach used in -// `QuantizeAndDequantize{V2|V3}`. +// The input `serialized_sparse` must be a string matrix of shape `[N x 3]` where +// `N` is the minibatch size and the rows correspond to packed outputs of +// `SerializeSparse`. The ranks of the original `SparseTensor` objects +// must all match. When the final `SparseTensor` is created, it has rank one +// higher than the ranks of the incoming `SparseTensor` objects +// (they have been concatenated along a new row dimension). // -// If the mode is `SCALED`, we do not use the full range of the output type, -// choosing to elide the lowest possible value for symmetry (e.g., output range is -// -127 to 127, not -128 to 127 for signed 8 bit quantization), so that 0.0 maps to -// 0. +// The output `SparseTensor` object's shape values for all dimensions but the +// first are the max across the input `SparseTensor` objects' shape values +// for the corresponding dimensions. Its first shape value is `N`, the minibatch +// size. // -// We first find the range of values in our tensor. The -// range we use is always centered on 0, so we find m such that -// ```c++ -// m = max(abs(input_min), abs(input_max)) -// ``` +// The input `SparseTensor` objects' indices are assumed ordered in +// standard lexicographic order. If this is not the case, after this +// step run `SparseReorder` to restore index ordering. // -// Our input tensor range is then `[-m, m]`. +// For example, if the serialized input is a `[2 x 3]` matrix representing two +// original `SparseTensor` objects: // -// Next, we choose our fixed-point quantization buckets, `[min_fixed, max_fixed]`. -// If T is signed, this is -// ``` -// num_bits = sizeof(T) * 8 -// [min_fixed, max_fixed] = -// [-(1 << (num_bits - 1) - 1), (1 << (num_bits - 1)) - 1] -// ``` +// index = [ 0] +// [10] +// [20] +// values = [1, 2, 3] +// shape = [50] // -// Otherwise, if T is unsigned, the fixed-point range is -// ``` -// [min_fixed, max_fixed] = [0, (1 << num_bits) - 1] -// ``` +// and // -// From this we compute our scaling factor, s: -// ```c++ -// s = (max_fixed - min_fixed) / (2 * m) -// ``` +// index = [ 2] +// [10] +// values = [4, 5] +// shape = [30] // -// Now we can quantize the elements of our tensor: -// ```c++ -// result = round(input * s) -// ``` +// then the final deserialized `SparseTensor` will be: // -// One thing to watch out for is that the operator may choose to adjust the -// requested minimum and maximum values slightly during the quantization process, -// so you should always use the output ports as the range for further calculations. -// For example, if the requested minimum and maximum values are close to equal, -// they will be separated by a small epsilon value to prevent ill-formed quantized -// buffers from being created. Otherwise, you can end up with buffers where all the -// quantized values map to the same float value, which causes problems for -// operations that have to perform further calculations on them. +// index = [0 0] +// [0 10] +// [0 20] +// [1 2] +// [1 10] +// values = [1, 2, 3, 4, 5] +// shape = [2 50] // // Arguments: -// -// min_range: The minimum scalar value possibly produced for the input. -// max_range: The maximum scalar value possibly produced for the input. -// -// -// Returns The quantized data produced from the float input.The actual minimum scalar value used for the output.The actual maximum scalar value used for the output. -func QuantizeV2(scope *Scope, input tf.Output, min_range tf.Output, max_range tf.Output, T tf.DataType, optional ...QuantizeV2Attr) (output tf.Output, output_min tf.Output, output_max tf.Output) { +// serialized_sparse: 2-D, The `N` serialized `SparseTensor` objects. +// Must have 3 columns. +// dtype: The `dtype` of the serialized `SparseTensor` objects. +func DeserializeManySparse(scope *Scope, serialized_sparse tf.Output, dtype tf.DataType) (sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"T": T} - for _, a := range optional { - a(attrs) - } + attrs := map[string]interface{}{"dtype": dtype} opspec := tf.OpSpec{ - Type: "QuantizeV2", + Type: "DeserializeManySparse", Input: []tf.Input{ - input, min_range, max_range, + serialized_sparse, }, Attrs: attrs, } @@ -14415,119 +17033,171 @@ func QuantizeV2(scope *Scope, input tf.Output, min_range tf.Output, max_range tf return op.Output(0), op.Output(1), op.Output(2) } -// Returns the truth value of (x < y) element-wise. +// StringJoinAttr is an optional argument to StringJoin. +type StringJoinAttr func(optionalAttr) + +// StringJoinSeparator sets the optional separator attribute to value. // -// *NOTE*: `Less` supports broadcasting. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func Less(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { +// value: string, an optional join separator. +// If not specified, defaults to "" +func StringJoinSeparator(value string) StringJoinAttr { + return func(m optionalAttr) { + m["separator"] = value + } +} + +// Joins the strings in the given list of string tensors into one tensor; +// +// with the given separator (default is an empty separator). +// +// Arguments: +// inputs: A list of string tensors. The tensors must all have the same shape, +// or be scalars. Scalars may be mixed in; these will be broadcast to the shape +// of non-scalar inputs. +func StringJoin(scope *Scope, inputs []tf.Output, optional ...StringJoinAttr) (output tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "Less", + Type: "StringJoin", Input: []tf.Input{ - x, y, + tf.OutputList(inputs), }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// QuantizedReluXAttr is an optional argument to QuantizedReluX. -type QuantizedReluXAttr func(optionalAttr) +// Returns immutable tensor from memory region. +// +// The current implementation memmaps the tensor from a file. +// +// Arguments: +// dtype: Type of the returned tensor. +// shape: Shape of the returned tensor. +// memory_region_name: Name of readonly memory region used by the tensor, see +// NewReadOnlyMemoryRegionFromFile in tensorflow::Env. +func ImmutableConst(scope *Scope, dtype tf.DataType, shape tf.Shape, memory_region_name string) (tensor tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"dtype": dtype, "shape": shape, "memory_region_name": memory_region_name} + opspec := tf.OpSpec{ + Type: "ImmutableConst", -// QuantizedReluXOutType sets the optional out_type attribute to value. -// If not specified, defaults to DT_QUINT8 -func QuantizedReluXOutType(value tf.DataType) QuantizedReluXAttr { - return func(m optionalAttr) { - m["out_type"] = value + Attrs: attrs, } + op := scope.AddOperation(opspec) + return op.Output(0) } -// Computes Quantized Rectified Linear X: `min(max(features, 0), max_value)` +// Inverse real-valued fast Fourier transform. // -// Arguments: +// Computes the inverse 1-dimensional discrete Fourier transform of a real-valued +// signal over the inner-most dimension of `input`. +// +// The inner-most dimension of `input` is assumed to be the result of `RFFT`: the +// `fft_length / 2 + 1` unique components of the DFT of a real-valued signal. If +// `fft_length` is not provided, it is computed from the size of the inner-most +// dimension of `input` (`fft_length = 2 * (inner - 1)`). If the FFT length used to +// compute `input` is odd, it should be provided since it cannot be inferred +// properly. // +// Along the axis `IRFFT` is computed on, if `fft_length / 2 + 1` is smaller +// than the corresponding dimension of `input`, the dimension is cropped. If it is +// larger, the dimension is padded with zeros. // -// min_features: The float value that the lowest quantized value represents. -// max_features: The float value that the highest quantized value represents. +// Arguments: +// input: A complex64 tensor. +// fft_length: An int32 tensor of shape [1]. The FFT length. // -// Returns Has the same output shape as "features".The float value that the lowest quantized value represents.The float value that the highest quantized value represents. -func QuantizedReluX(scope *Scope, features tf.Output, max_value tf.Output, min_features tf.Output, max_features tf.Output, optional ...QuantizedReluXAttr) (activations tf.Output, min_activations tf.Output, max_activations tf.Output) { +// Returns A float32 tensor of the same rank as `input`. The inner-most +// dimension of `input` is replaced with the `fft_length` samples of its inverse +// 1D Fourier transform. +// +// @compatibility(numpy) +// Equivalent to np.fft.irfft +// @end_compatibility +func IRFFT(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "QuantizedReluX", + Type: "IRFFT", Input: []tf.Input{ - features, max_value, min_features, max_features, + input, fft_length, }, - Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// QuantizedConv2DAttr is an optional argument to QuantizedConv2D. -type QuantizedConv2DAttr func(optionalAttr) - -// QuantizedConv2DOutType sets the optional out_type attribute to value. -// If not specified, defaults to DT_QINT32 -func QuantizedConv2DOutType(value tf.DataType) QuantizedConv2DAttr { - return func(m optionalAttr) { - m["out_type"] = value - } + return op.Output(0) } -// QuantizedConv2DDilations sets the optional dilations attribute to value. +// Concatenates a list of `SparseTensor` along the specified dimension. // -// value: 1-D tensor of length 4. The dilation factor for each dimension of -// `input`. If set to k > 1, there will be k-1 skipped cells between each -// filter element on that dimension. The dimension order is determined by the -// value of `data_format`, see above for details. Dilations in the batch and -// depth dimensions must be 1. -// If not specified, defaults to -func QuantizedConv2DDilations(value []int64) QuantizedConv2DAttr { - return func(m optionalAttr) { - m["dilations"] = value - } -} - -// Computes a 2D convolution given quantized 4D input and filter tensors. +// Concatenation is with respect to the dense versions of these sparse tensors. +// It is assumed that each input is a `SparseTensor` whose elements are ordered +// along increasing dimension number. // -// The inputs are quantized tensors where the lowest value represents the real -// number of the associated minimum, and the highest represents the maximum. -// This means that you can only interpret the quantized output in the same way, by -// taking the returned minimum and maximum values into account. +// All inputs' shapes must match, except for the concat dimension. The +// `indices`, `values`, and `shapes` lists must have the same length. // -// Arguments: +// The output shape is identical to the inputs', except along the concat +// dimension, where it is the sum of the inputs' sizes along that dimension. // -// filter: filter's input_depth dimension must match input's depth dimensions. -// min_input: The float value that the lowest quantized input value represents. -// max_input: The float value that the highest quantized input value represents. -// min_filter: The float value that the lowest quantized filter value represents. -// max_filter: The float value that the highest quantized filter value represents. -// strides: The stride of the sliding window for each dimension of the input -// tensor. -// padding: The type of padding algorithm to use. +// The output elements will be resorted to preserve the sort order along +// increasing dimension number. // -// Returns The float value that the lowest quantized output value represents.The float value that the highest quantized output value represents. -func QuantizedConv2D(scope *Scope, input tf.Output, filter tf.Output, min_input tf.Output, max_input tf.Output, min_filter tf.Output, max_filter tf.Output, strides []int64, padding string, optional ...QuantizedConv2DAttr) (output tf.Output, min_output tf.Output, max_output tf.Output) { +// This op runs in `O(M log M)` time, where `M` is the total number of non-empty +// values across all inputs. This is due to the need for an internal sort in +// order to concatenate efficiently across an arbitrary dimension. +// +// For example, if `concat_dim = 1` and the inputs are +// +// sp_inputs[0]: shape = [2, 3] +// [0, 2]: "a" +// [1, 0]: "b" +// [1, 1]: "c" +// +// sp_inputs[1]: shape = [2, 4] +// [0, 1]: "d" +// [0, 2]: "e" +// +// then the output will be +// +// shape = [2, 7] +// [0, 2]: "a" +// [0, 4]: "d" +// [0, 5]: "e" +// [1, 0]: "b" +// [1, 1]: "c" +// +// Graphically this is equivalent to doing +// +// [ a] concat [ d e ] = [ a d e ] +// [b c ] [ ] [b c ] +// +// Arguments: +// indices: 2-D. Indices of each input `SparseTensor`. +// values: 1-D. Non-empty values of each `SparseTensor`. +// shapes: 1-D. Shapes of each `SparseTensor`. +// concat_dim: Dimension to concatenate along. Must be in range [-rank, rank), +// where rank is the number of dimensions in each input `SparseTensor`. +// +// Returns 2-D. Indices of the concatenated `SparseTensor`.1-D. Non-empty values of the concatenated `SparseTensor`.1-D. Shape of the concatenated `SparseTensor`. +func SparseConcat(scope *Scope, indices []tf.Output, values []tf.Output, shapes []tf.Output, concat_dim int64) (output_indices tf.Output, output_values tf.Output, output_shape tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"strides": strides, "padding": padding} - for _, a := range optional { - a(attrs) - } + attrs := map[string]interface{}{"concat_dim": concat_dim} opspec := tf.OpSpec{ - Type: "QuantizedConv2D", + Type: "SparseConcat", Input: []tf.Input{ - input, filter, min_input, max_input, min_filter, max_filter, + tf.OutputList(indices), tf.OutputList(values), tf.OutputList(shapes), }, Attrs: attrs, } @@ -14535,567 +17205,482 @@ func QuantizedConv2D(scope *Scope, input tf.Output, filter tf.Output, min_input return op.Output(0), op.Output(1), op.Output(2) } -// ResourceGatherAttr is an optional argument to ResourceGather. -type ResourceGatherAttr func(optionalAttr) - -// ResourceGatherValidateIndices sets the optional validate_indices attribute to value. -// If not specified, defaults to true -func ResourceGatherValidateIndices(value bool) ResourceGatherAttr { - return func(m optionalAttr) { - m["validate_indices"] = value - } -} - -// Gather slices from the variable pointed to by `resource` according to `indices`. +// Generates sparse cross from a list of sparse and dense tensors. +// +// The op takes two lists, one of 2D `SparseTensor` and one of 2D `Tensor`, each +// representing features of one feature column. It outputs a 2D `SparseTensor` with +// the batchwise crosses of these features. +// +// For example, if the inputs are +// +// inputs[0]: SparseTensor with shape = [2, 2] +// [0, 0]: "a" +// [1, 0]: "b" +// [1, 1]: "c" +// +// inputs[1]: SparseTensor with shape = [2, 1] +// [0, 0]: "d" +// [1, 0]: "e" +// +// inputs[2]: Tensor [["f"], ["g"]] +// +// then the output will be +// +// shape = [2, 2] +// [0, 0]: "a_X_d_X_f" +// [1, 0]: "b_X_e_X_g" +// [1, 1]: "c_X_e_X_g" +// +// if hashed_output=true then the output will be +// +// shape = [2, 2] +// [0, 0]: FingerprintCat64( +// Fingerprint64("f"), FingerprintCat64( +// Fingerprint64("d"), Fingerprint64("a"))) +// [1, 0]: FingerprintCat64( +// Fingerprint64("g"), FingerprintCat64( +// Fingerprint64("e"), Fingerprint64("b"))) +// [1, 1]: FingerprintCat64( +// Fingerprint64("g"), FingerprintCat64( +// Fingerprint64("e"), Fingerprint64("c"))) // -// `indices` must be an integer tensor of any dimension (usually 0-D or 1-D). -// Produces an output tensor with shape `indices.shape + params.shape[1:]` where: +// Arguments: +// indices: 2-D. Indices of each input `SparseTensor`. +// values: 1-D. values of each `SparseTensor`. +// shapes: 1-D. Shapes of each `SparseTensor`. +// dense_inputs: 2-D. Columns represented by dense `Tensor`. +// hashed_output: If true, returns the hash of the cross instead of the string. +// This will allow us avoiding string manipulations. +// num_buckets: It is used if hashed_output is true. +// output = hashed_value%num_buckets if num_buckets > 0 else hashed_value. +// hash_key: Specify the hash_key that will be used by the `FingerprintCat64` +// function to combine the crosses fingerprints. // -// ```python -// # Scalar indices -// output[:, ..., :] = params[indices, :, ... :] // -// # Vector indices -// output[i, :, ..., :] = params[indices[i], :, ... :] // -// # Higher rank indices -// output[i, ..., j, :, ... :] = params[indices[i, ..., j], :, ..., :] -// ``` -func ResourceGather(scope *Scope, resource tf.Output, indices tf.Output, dtype tf.DataType, optional ...ResourceGatherAttr) (output tf.Output) { +// Returns 2-D. Indices of the concatenated `SparseTensor`.1-D. Non-empty values of the concatenated or hashed +// `SparseTensor`.1-D. Shape of the concatenated `SparseTensor`. +func SparseCross(scope *Scope, indices []tf.Output, values []tf.Output, shapes []tf.Output, dense_inputs []tf.Output, hashed_output bool, num_buckets int64, hash_key int64, out_type tf.DataType, internal_type tf.DataType) (output_indices tf.Output, output_values tf.Output, output_shape tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"dtype": dtype} - for _, a := range optional { - a(attrs) - } + attrs := map[string]interface{}{"hashed_output": hashed_output, "num_buckets": num_buckets, "hash_key": hash_key, "out_type": out_type, "internal_type": internal_type} opspec := tf.OpSpec{ - Type: "ResourceGather", + Type: "SparseCross", Input: []tf.Input{ - resource, indices, + tf.OutputList(indices), tf.OutputList(values), tf.OutputList(shapes), tf.OutputList(dense_inputs), }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1), op.Output(2) } -// Delete the TensorArray from its resource container. -// -// This enables the user to close and release the resource in the middle -// of a step/run. +// Concatenates quantized tensors along one dimension. // // Arguments: -// handle: The handle to a TensorArray (output of TensorArray or TensorArrayGrad). +// concat_dim: 0-D. The dimension along which to concatenate. Must be in the +// range [0, rank(values)). +// values: The `N` Tensors to concatenate. Their ranks and types must match, +// and their sizes must match in all dimensions except `concat_dim`. +// input_mins: The minimum scalar values for each of the input tensors. +// input_maxes: The maximum scalar values for each of the input tensors. // -// Returns the created operation. -func TensorArrayCloseV3(scope *Scope, handle tf.Output) (o *tf.Operation) { +// Returns A `Tensor` with the concatenation of values stacked along the +// `concat_dim` dimension. This tensor's shape matches that of `values` except +// in `concat_dim` where it has the sum of the sizes.The float value that the minimum quantized output value represents.The float value that the maximum quantized output value represents. +func QuantizedConcat(scope *Scope, concat_dim tf.Output, values []tf.Output, input_mins []tf.Output, input_maxes []tf.Output) (output tf.Output, output_min tf.Output, output_max tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "TensorArrayCloseV3", + Type: "QuantizedConcat", Input: []tf.Input{ - handle, + concat_dim, tf.OutputList(values), tf.OutputList(input_mins), tf.OutputList(input_maxes), }, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1), op.Output(2) } -// Adds two `SparseTensor` objects to produce another `SparseTensor`. +// Slice a `SparseTensor` based on the `start` and `size`. // -// The input `SparseTensor` objects' indices are assumed ordered in standard -// lexicographic order. If this is not the case, before this step run -// `SparseReorder` to restore index ordering. +// For example, if the input is // -// By default, if two values sum to zero at some index, the output `SparseTensor` -// would still include that particular location in its index, storing a zero in the -// corresponding value slot. To override this, callers can specify `thresh`, -// indicating that if the sum has a magnitude strictly smaller than `thresh`, its -// corresponding value and index would then not be included. In particular, -// `thresh == 0` (default) means everything is kept and actual thresholding happens -// only for a positive value. +// input_tensor = shape = [2, 7] +// [ a d e ] +// [b c ] // -// In the following shapes, `nnz` is the count after taking `thresh` into account. +// Graphically the output tensors are: +// +// sparse_slice([0, 0], [2, 4]) = shape = [2, 4] +// [ a ] +// [b c ] +// +// sparse_slice([0, 4], [2, 3]) = shape = [2, 3] +// [ d e ] +// [ ] // // Arguments: -// a_indices: 2-D. The `indices` of the first `SparseTensor`, size `[nnz, ndims]` Matrix. -// a_values: 1-D. The `values` of the first `SparseTensor`, size `[nnz]` Vector. -// a_shape: 1-D. The `shape` of the first `SparseTensor`, size `[ndims]` Vector. -// b_indices: 2-D. The `indices` of the second `SparseTensor`, size `[nnz, ndims]` Matrix. -// b_values: 1-D. The `values` of the second `SparseTensor`, size `[nnz]` Vector. -// b_shape: 1-D. The `shape` of the second `SparseTensor`, size `[ndims]` Vector. -// thresh: 0-D. The magnitude threshold that determines if an output value/index -// pair takes space. -func SparseAdd(scope *Scope, a_indices tf.Output, a_values tf.Output, a_shape tf.Output, b_indices tf.Output, b_values tf.Output, b_shape tf.Output, thresh tf.Output) (sum_indices tf.Output, sum_values tf.Output, sum_shape tf.Output) { +// indices: 2-D tensor represents the indices of the sparse tensor. +// values: 1-D tensor represents the values of the sparse tensor. +// shape: 1-D. tensor represents the shape of the sparse tensor. +// start: 1-D. tensor represents the start of the slice. +// size: 1-D. tensor represents the size of the slice. +// output indices: A list of 1-D tensors represents the indices of the output +// sparse tensors. +// +// Returns A list of 1-D tensors represents the values of the output sparse +// tensors.A list of 1-D tensors represents the shape of the output sparse +// tensors. +func SparseSlice(scope *Scope, indices tf.Output, values tf.Output, shape tf.Output, start tf.Output, size tf.Output) (output_indices tf.Output, output_values tf.Output, output_shape tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "SparseAdd", + Type: "SparseSlice", Input: []tf.Input{ - a_indices, a_values, a_shape, b_indices, b_values, b_shape, thresh, + indices, values, shape, start, size, }, } op := scope.AddOperation(opspec) return op.Output(0), op.Output(1), op.Output(2) } -// OrderedMapPeekAttr is an optional argument to OrderedMapPeek. -type OrderedMapPeekAttr func(optionalAttr) - -// OrderedMapPeekCapacity sets the optional capacity attribute to value. -// If not specified, defaults to 0 +// Returns the element-wise min of two SparseTensors. // -// REQUIRES: value >= 0 -func OrderedMapPeekCapacity(value int64) OrderedMapPeekAttr { - return func(m optionalAttr) { - m["capacity"] = value - } -} - -// OrderedMapPeekMemoryLimit sets the optional memory_limit attribute to value. -// If not specified, defaults to 0 +// Assumes the two SparseTensors have the same shape, i.e., no broadcasting. // -// REQUIRES: value >= 0 -func OrderedMapPeekMemoryLimit(value int64) OrderedMapPeekAttr { - return func(m optionalAttr) { - m["memory_limit"] = value - } -} - -// OrderedMapPeekContainer sets the optional container attribute to value. -// If not specified, defaults to "" -func OrderedMapPeekContainer(value string) OrderedMapPeekAttr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// OrderedMapPeekSharedName sets the optional shared_name attribute to value. -// If not specified, defaults to "" -func OrderedMapPeekSharedName(value string) OrderedMapPeekAttr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// Op peeks at the values at the specified key. If the +// Arguments: +// a_indices: 2-D. `N x R` matrix with the indices of non-empty values in a +// SparseTensor, in the canonical lexicographic ordering. +// a_values: 1-D. `N` non-empty values corresponding to `a_indices`. +// a_shape: 1-D. Shape of the input SparseTensor. +// b_indices: counterpart to `a_indices` for the other operand. +// b_values: counterpart to `a_values` for the other operand; must be of the same dtype. +// b_shape: counterpart to `a_shape` for the other operand; the two shapes must be equal. // -// underlying container does not contain this key -// this op will block until it does. This Op is optimized for -// performance. -func OrderedMapPeek(scope *Scope, key tf.Output, indices tf.Output, dtypes []tf.DataType, optional ...OrderedMapPeekAttr) (values []tf.Output) { +// Returns 2-D. The indices of the output SparseTensor.1-D. The values of the output SparseTensor. +func SparseSparseMinimum(scope *Scope, a_indices tf.Output, a_values tf.Output, a_shape tf.Output, b_indices tf.Output, b_values tf.Output, b_shape tf.Output) (output_indices tf.Output, output_values tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"dtypes": dtypes} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "OrderedMapPeek", + Type: "SparseSparseMinimum", Input: []tf.Input{ - key, indices, + a_indices, a_values, a_shape, b_indices, b_values, b_shape, }, - Attrs: attrs, } op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if values, idx, err = makeOutputList(op, idx, "values"); err != nil { - scope.UpdateErr("OrderedMapPeek", err) - return - } - return values -} - -// DecodeAndCropJpegAttr is an optional argument to DecodeAndCropJpeg. -type DecodeAndCropJpegAttr func(optionalAttr) - -// DecodeAndCropJpegChannels sets the optional channels attribute to value. -// -// value: Number of color channels for the decoded image. -// If not specified, defaults to 0 -func DecodeAndCropJpegChannels(value int64) DecodeAndCropJpegAttr { - return func(m optionalAttr) { - m["channels"] = value - } -} - -// DecodeAndCropJpegRatio sets the optional ratio attribute to value. -// -// value: Downscaling ratio. -// If not specified, defaults to 1 -func DecodeAndCropJpegRatio(value int64) DecodeAndCropJpegAttr { - return func(m optionalAttr) { - m["ratio"] = value - } -} - -// DecodeAndCropJpegFancyUpscaling sets the optional fancy_upscaling attribute to value. -// -// value: If true use a slower but nicer upscaling of the -// chroma planes (yuv420/422 only). -// If not specified, defaults to true -func DecodeAndCropJpegFancyUpscaling(value bool) DecodeAndCropJpegAttr { - return func(m optionalAttr) { - m["fancy_upscaling"] = value - } + return op.Output(0), op.Output(1) } -// DecodeAndCropJpegTryRecoverTruncated sets the optional try_recover_truncated attribute to value. -// -// value: If true try to recover an image from truncated input. -// If not specified, defaults to false -func DecodeAndCropJpegTryRecoverTruncated(value bool) DecodeAndCropJpegAttr { - return func(m optionalAttr) { - m["try_recover_truncated"] = value - } -} +// TakeManySparseFromTensorsMapAttr is an optional argument to TakeManySparseFromTensorsMap. +type TakeManySparseFromTensorsMapAttr func(optionalAttr) -// DecodeAndCropJpegAcceptableFraction sets the optional acceptable_fraction attribute to value. +// TakeManySparseFromTensorsMapContainer sets the optional container attribute to value. // -// value: The minimum required fraction of lines before a truncated -// input is accepted. -// If not specified, defaults to 1 -func DecodeAndCropJpegAcceptableFraction(value float32) DecodeAndCropJpegAttr { +// value: The container name for the `SparseTensorsMap` read by this op. +// If not specified, defaults to "" +func TakeManySparseFromTensorsMapContainer(value string) TakeManySparseFromTensorsMapAttr { return func(m optionalAttr) { - m["acceptable_fraction"] = value + m["container"] = value } } -// DecodeAndCropJpegDctMethod sets the optional dct_method attribute to value. +// TakeManySparseFromTensorsMapSharedName sets the optional shared_name attribute to value. // -// value: string specifying a hint about the algorithm used for -// decompression. Defaults to "" which maps to a system-specific -// default. Currently valid values are ["INTEGER_FAST", -// "INTEGER_ACCURATE"]. The hint may be ignored (e.g., the internal -// jpeg library changes to a version that does not have that specific -// option.) +// value: The shared name for the `SparseTensorsMap` read by this op. +// It should not be blank; rather the `shared_name` or unique Operation name +// of the Op that created the original `SparseTensorsMap` should be used. // If not specified, defaults to "" -func DecodeAndCropJpegDctMethod(value string) DecodeAndCropJpegAttr { +func TakeManySparseFromTensorsMapSharedName(value string) TakeManySparseFromTensorsMapAttr { return func(m optionalAttr) { - m["dct_method"] = value + m["shared_name"] = value } } -// Decode and Crop a JPEG-encoded image to a uint8 tensor. +// Read `SparseTensors` from a `SparseTensorsMap` and concatenate them. // -// The attr `channels` indicates the desired number of color channels for the -// decoded image. +// The input `sparse_handles` must be an `int64` matrix of shape `[N, 1]` where +// `N` is the minibatch size and the rows correspond to the output handles of +// `AddSparseToTensorsMap` or `AddManySparseToTensorsMap`. The ranks of the +// original `SparseTensor` objects that went into the given input ops must all +// match. When the final `SparseTensor` is created, it has rank one +// higher than the ranks of the incoming `SparseTensor` objects +// (they have been concatenated along a new row dimension on the left). // -// Accepted values are: +// The output `SparseTensor` object's shape values for all dimensions but the +// first are the max across the input `SparseTensor` objects' shape values +// for the corresponding dimensions. Its first shape value is `N`, the minibatch +// size. // -// * 0: Use the number of channels in the JPEG-encoded image. -// * 1: output a grayscale image. -// * 3: output an RGB image. +// The input `SparseTensor` objects' indices are assumed ordered in +// standard lexicographic order. If this is not the case, after this +// step run `SparseReorder` to restore index ordering. // -// If needed, the JPEG-encoded image is transformed to match the requested number -// of color channels. +// For example, if the handles represent an input, which is a `[2, 3]` matrix +// representing two original `SparseTensor` objects: // -// The attr `ratio` allows downscaling the image by an integer factor during -// decoding. Allowed values are: 1, 2, 4, and 8. This is much faster than -// downscaling the image later. +// ``` +// index = [ 0] +// [10] +// [20] +// values = [1, 2, 3] +// shape = [50] +// ``` // +// and // -// It is equivalent to a combination of decode and crop, but much faster by only -// decoding partial jpeg image. +// ``` +// index = [ 2] +// [10] +// values = [4, 5] +// shape = [30] +// ``` +// +// then the final `SparseTensor` will be: +// +// ``` +// index = [0 0] +// [0 10] +// [0 20] +// [1 2] +// [1 10] +// values = [1, 2, 3, 4, 5] +// shape = [2 50] +// ``` // // Arguments: -// contents: 0-D. The JPEG-encoded image. -// crop_window: 1-D. The crop window: [crop_y, crop_x, crop_height, crop_width]. +// sparse_handles: 1-D, The `N` serialized `SparseTensor` objects. +// Shape: `[N]`. +// dtype: The `dtype` of the `SparseTensor` objects stored in the +// `SparseTensorsMap`. // -// Returns 3-D with shape `[height, width, channels]`.. -func DecodeAndCropJpeg(scope *Scope, contents tf.Output, crop_window tf.Output, optional ...DecodeAndCropJpegAttr) (image tf.Output) { +// Returns 2-D. The `indices` of the minibatch `SparseTensor`.1-D. The `values` of the minibatch `SparseTensor`.1-D. The `shape` of the minibatch `SparseTensor`. +func TakeManySparseFromTensorsMap(scope *Scope, sparse_handles tf.Output, dtype tf.DataType, optional ...TakeManySparseFromTensorsMapAttr) (sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"dtype": dtype} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "DecodeAndCropJpeg", + Type: "TakeManySparseFromTensorsMap", Input: []tf.Input{ - contents, crop_window, + sparse_handles, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1), op.Output(2) } -// AllCandidateSamplerAttr is an optional argument to AllCandidateSampler. -type AllCandidateSamplerAttr func(optionalAttr) +// MaxPoolAttr is an optional argument to MaxPool. +type MaxPoolAttr func(optionalAttr) -// AllCandidateSamplerSeed sets the optional seed attribute to value. +// MaxPoolDataFormat sets the optional data_format attribute to value. // -// value: If either seed or seed2 are set to be non-zero, the random number -// generator is seeded by the given seed. Otherwise, it is seeded by a -// random seed. -// If not specified, defaults to 0 -func AllCandidateSamplerSeed(value int64) AllCandidateSamplerAttr { +// value: Specify the data format of the input and output data. With the +// default format "NHWC", the data is stored in the order of: +// [batch, in_height, in_width, in_channels]. +// Alternatively, the format could be "NCHW", the data storage order of: +// [batch, in_channels, in_height, in_width]. +// If not specified, defaults to "NHWC" +func MaxPoolDataFormat(value string) MaxPoolAttr { return func(m optionalAttr) { - m["seed"] = value + m["data_format"] = value } } -// AllCandidateSamplerSeed2 sets the optional seed2 attribute to value. +// Performs max pooling on the input. // -// value: An second seed to avoid seed collision. -// If not specified, defaults to 0 -func AllCandidateSamplerSeed2(value int64) AllCandidateSamplerAttr { - return func(m optionalAttr) { - m["seed2"] = value +// Arguments: +// input: 4-D input to pool over. +// ksize: The size of the window for each dimension of the input tensor. +// strides: The stride of the sliding window for each dimension of the +// input tensor. +// padding: The type of padding algorithm to use. +// +// Returns The max pooled output tensor. +func MaxPool(scope *Scope, input tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPoolAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} + for _, a := range optional { + a(attrs) } + opspec := tf.OpSpec{ + Type: "MaxPool", + Input: []tf.Input{ + input, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) } -// Generates labels for candidate sampling with a learned unigram distribution. +// Says whether the targets are in the top `K` predictions. // -// See explanations of candidate sampling and the data formats at -// go/candidate-sampling. +// This outputs a `batch_size` bool array, an entry `out[i]` is `true` if the +// prediction for the target class is among the top `k` predictions among +// all predictions for example `i`. Note that the behavior of `InTopK` differs +// from the `TopK` op in its handling of ties; if multiple classes have the +// same prediction value and straddle the top-`k` boundary, all of those +// classes are considered to be in the top `k`. // -// For each batch, this op picks a single set of sampled candidate labels. +// More formally, let // -// The advantages of sampling candidates per-batch are simplicity and the -// possibility of efficient dense matrix multiplication. The disadvantage is that -// the sampled candidates must be chosen independently of the context and of the -// true labels. +// \\(predictions_i\\) be the predictions for all classes for example `i`, +// \\(targets_i\\) be the target class for example `i`, +// \\(out_i\\) be the output for example `i`, +// +// $$out_i = predictions_{i, targets_i} \in TopKIncludingTies(predictions_i)$$ // // Arguments: -// true_classes: A batch_size * num_true matrix, in which each row contains the -// IDs of the num_true target_classes in the corresponding original label. -// num_true: Number of true labels per context. -// num_sampled: Number of candidates to produce. -// unique: If unique is true, we sample with rejection, so that all sampled -// candidates in a batch are unique. This requires some approximation to -// estimate the post-rejection sampling probabilities. +// predictions: A `batch_size` x `classes` tensor. +// targets: A `batch_size` vector of class ids. +// k: Number of top elements to look at for computing precision. // -// Returns A vector of length num_sampled, in which each element is -// the ID of a sampled candidate.A batch_size * num_true matrix, representing -// the number of times each candidate is expected to occur in a batch -// of sampled candidates. If unique=true, then this is a probability.A vector of length num_sampled, for each sampled -// candidate representing the number of times the candidate is expected -// to occur in a batch of sampled candidates. If unique=true, then this is a -// probability. -func AllCandidateSampler(scope *Scope, true_classes tf.Output, num_true int64, num_sampled int64, unique bool, optional ...AllCandidateSamplerAttr) (sampled_candidates tf.Output, true_expected_count tf.Output, sampled_expected_count tf.Output) { +// Returns Computed precision at `k` as a `bool Tensor`. +func InTopKV2(scope *Scope, predictions tf.Output, targets tf.Output, k tf.Output) (precision tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"num_true": num_true, "num_sampled": num_sampled, "unique": unique} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "AllCandidateSampler", + Type: "InTopKV2", Input: []tf.Input{ - true_classes, + predictions, targets, k, }, - Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + return op.Output(0) } -// Saves the input tensors to disk. -// -// The size of `tensor_names` must match the number of tensors in `data`. `data[i]` -// is written to `filename` with name `tensor_names[i]`. +// Assigns a new value to a variable. // -// See also `SaveSlices`. +// Any ReadVariableOp with a control dependency on this op is guaranteed to return +// this value or a subsequent newer value of the variable. // // Arguments: -// filename: Must have a single element. The name of the file to which we write -// the tensor. -// tensor_names: Shape `[N]`. The names of the tensors to be saved. -// data: `N` tensors to save. +// resource: handle to the resource in which to store the variable. +// value: the value to set the new tensor to use. // // Returns the created operation. -func Save(scope *Scope, filename tf.Output, tensor_names tf.Output, data []tf.Output) (o *tf.Operation) { +func AssignVariableOp(scope *Scope, resource tf.Output, value tf.Output) (o *tf.Operation) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "Save", + Type: "AssignVariableOp", Input: []tf.Input{ - filename, tensor_names, tf.OutputList(data), + resource, value, }, } return scope.AddOperation(opspec) } -// Returns element-wise remainder of division. When `x < 0` xor `y < 0` is +// Returns a tensor of ones with the same shape and type as x. // -// true, this follows Python semantics in that the result here is consistent -// with a flooring divide. E.g. `floor(x / y) * y + mod(x, y) = x`. +// Arguments: +// x: a tensor of type T. // -// *NOTE*: `FloorMod` supports broadcasting. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func FloorMod(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { +// Returns a tensor of the same shape and type as x but filled with ones. +func OnesLike(scope *Scope, x tf.Output) (y tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "FloorMod", + Type: "OnesLike", Input: []tf.Input{ - x, y, + x, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// SparseTensorDenseMatMulAttr is an optional argument to SparseTensorDenseMatMul. -type SparseTensorDenseMatMulAttr func(optionalAttr) - -// SparseTensorDenseMatMulAdjointA sets the optional adjoint_a attribute to value. -// -// value: Use the adjoint of A in the matrix multiply. If A is complex, this -// is transpose(conj(A)). Otherwise it's transpose(A). -// If not specified, defaults to false -func SparseTensorDenseMatMulAdjointA(value bool) SparseTensorDenseMatMulAttr { - return func(m optionalAttr) { - m["adjoint_a"] = value - } -} - -// SparseTensorDenseMatMulAdjointB sets the optional adjoint_b attribute to value. -// -// value: Use the adjoint of B in the matrix multiply. If B is complex, this -// is transpose(conj(B)). Otherwise it's transpose(B). -// If not specified, defaults to false -func SparseTensorDenseMatMulAdjointB(value bool) SparseTensorDenseMatMulAttr { - return func(m optionalAttr) { - m["adjoint_b"] = value - } -} - -// Multiply SparseTensor (of rank 2) "A" by dense matrix "B". -// -// No validity checking is performed on the indices of A. However, the following -// input format is recommended for optimal behavior: +// The gradient of SparseFillEmptyRows. // -// if adjoint_a == false: -// A should be sorted in lexicographically increasing order. Use SparseReorder -// if you're not sure. -// if adjoint_a == true: -// A should be sorted in order of increasing dimension 1 (i.e., "column major" -// order instead of "row major" order). +// Takes vectors reverse_index_map, shaped `[N]`, and grad_values, +// shaped `[N_full]`, where `N_full >= N` and copies data into either +// `d_values` or `d_default_value`. Here `d_values` is shaped `[N]` and +// `d_default_value` is a scalar. +// +// d_values[j] = grad_values[reverse_index_map[j]] +// d_default_value = sum_{k : 0 .. N_full - 1} ( +// grad_values[k] * 1{k not in reverse_index_map}) // // Arguments: -// a_indices: 2-D. The `indices` of the `SparseTensor`, size `[nnz, 2]` Matrix. -// a_values: 1-D. The `values` of the `SparseTensor`, size `[nnz]` Vector. -// a_shape: 1-D. The `shape` of the `SparseTensor`, size `[2]` Vector. -// b: 2-D. A dense Matrix. -func SparseTensorDenseMatMul(scope *Scope, a_indices tf.Output, a_values tf.Output, a_shape tf.Output, b tf.Output, optional ...SparseTensorDenseMatMulAttr) (product tf.Output) { +// reverse_index_map: 1-D. The reverse index map from SparseFillEmptyRows. +// grad_values: 1-D. The gradients from backprop. +// +// Returns 1-D. The backprop into values.0-D. The backprop into default_value. +func SparseFillEmptyRowsGrad(scope *Scope, reverse_index_map tf.Output, grad_values tf.Output) (d_values tf.Output, d_default_value tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "SparseTensorDenseMatMul", + Type: "SparseFillEmptyRowsGrad", Input: []tf.Input{ - a_indices, a_values, a_shape, b, + reverse_index_map, grad_values, }, - Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1) } -// Deserialize and concatenate `SparseTensors` from a serialized minibatch. -// -// The input `serialized_sparse` must be a string matrix of shape `[N x 3]` where -// `N` is the minibatch size and the rows correspond to packed outputs of -// `SerializeSparse`. The ranks of the original `SparseTensor` objects -// must all match. When the final `SparseTensor` is created, it has rank one -// higher than the ranks of the incoming `SparseTensor` objects -// (they have been concatenated along a new row dimension). -// -// The output `SparseTensor` object's shape values for all dimensions but the -// first are the max across the input `SparseTensor` objects' shape values -// for the corresponding dimensions. Its first shape value is `N`, the minibatch -// size. -// -// The input `SparseTensor` objects' indices are assumed ordered in -// standard lexicographic order. If this is not the case, after this -// step run `SparseReorder` to restore index ordering. -// -// For example, if the serialized input is a `[2 x 3]` matrix representing two -// original `SparseTensor` objects: -// -// index = [ 0] -// [10] -// [20] -// values = [1, 2, 3] -// shape = [50] -// -// and -// -// index = [ 2] -// [10] -// values = [4, 5] -// shape = [30] -// -// then the final deserialized `SparseTensor` will be: +// Computes scaled exponential linear: `scale * alpha * (exp(features) - 1)` // -// index = [0 0] -// [0 10] -// [0 20] -// [1 2] -// [1 10] -// values = [1, 2, 3, 4, 5] -// shape = [2 50] +// if < 0, `scale * features` otherwise. // -// Arguments: -// serialized_sparse: 2-D, The `N` serialized `SparseTensor` objects. -// Must have 3 columns. -// dtype: The `dtype` of the serialized `SparseTensor` objects. -func DeserializeManySparse(scope *Scope, serialized_sparse tf.Output, dtype tf.DataType) (sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output) { +// See [Self-Normalizing Neural Networks](https://arxiv.org/abs/1706.02515) +func Selu(scope *Scope, features tf.Output) (activations tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"dtype": dtype} opspec := tf.OpSpec{ - Type: "DeserializeManySparse", + Type: "Selu", Input: []tf.Input{ - serialized_sparse, + features, }, - Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + return op.Output(0) } -// StringJoinAttr is an optional argument to StringJoin. -type StringJoinAttr func(optionalAttr) +// SetSizeAttr is an optional argument to SetSize. +type SetSizeAttr func(optionalAttr) -// StringJoinSeparator sets the optional separator attribute to value. -// -// value: string, an optional join separator. -// If not specified, defaults to "" -func StringJoinSeparator(value string) StringJoinAttr { +// SetSizeValidateIndices sets the optional validate_indices attribute to value. +// If not specified, defaults to true +func SetSizeValidateIndices(value bool) SetSizeAttr { return func(m optionalAttr) { - m["separator"] = value + m["validate_indices"] = value } } -// Joins the strings in the given list of string tensors into one tensor; +// Number of unique elements along last dimension of input `set`. // -// with the given separator (default is an empty separator). +// Input `set` is a `SparseTensor` represented by `set_indices`, `set_values`, +// and `set_shape`. The last dimension contains values in a set, duplicates are +// allowed but ignored. +// +// If `validate_indices` is `True`, this op validates the order and range of `set` +// indices. // // Arguments: -// inputs: A list of string tensors. The tensors must all have the same shape, -// or be scalars. Scalars may be mixed in; these will be broadcast to the shape -// of non-scalar inputs. -func StringJoin(scope *Scope, inputs []tf.Output, optional ...StringJoinAttr) (output tf.Output) { +// set_indices: 2D `Tensor`, indices of a `SparseTensor`. +// set_values: 1D `Tensor`, values of a `SparseTensor`. +// set_shape: 1D `Tensor`, shape of a `SparseTensor`. +// +// Returns For `set` ranked `n`, this is a `Tensor` with rank `n-1`, and the same 1st +// `n-1` dimensions as `set`. Each value is the number of unique elements in +// the corresponding `[0...n-1]` dimension of `set`. +func SetSize(scope *Scope, set_indices tf.Output, set_values tf.Output, set_shape tf.Output, optional ...SetSizeAttr) (size tf.Output) { if scope.Err() != nil { return } @@ -15104,9 +17689,9 @@ func StringJoin(scope *Scope, inputs []tf.Output, optional ...StringJoinAttr) (o a(attrs) } opspec := tf.OpSpec{ - Type: "StringJoin", + Type: "SetSize", Input: []tf.Input{ - tf.OutputList(inputs), + set_indices, set_values, set_shape, }, Attrs: attrs, } @@ -15114,385 +17699,299 @@ func StringJoin(scope *Scope, inputs []tf.Output, optional ...StringJoinAttr) (o return op.Output(0) } -// Returns immutable tensor from memory region. -// -// The current implementation memmaps the tensor from a file. -// -// Arguments: -// dtype: Type of the returned tensor. -// shape: Shape of the returned tensor. -// memory_region_name: Name of readonly memory region used by the tensor, see -// NewReadOnlyMemoryRegionFromFile in tensorflow::Env. -func ImmutableConst(scope *Scope, dtype tf.DataType, shape tf.Shape, memory_region_name string) (tensor tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtype": dtype, "shape": shape, "memory_region_name": memory_region_name} - opspec := tf.OpSpec{ - Type: "ImmutableConst", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Inverse real-valued fast Fourier transform. -// -// Computes the inverse 1-dimensional discrete Fourier transform of a real-valued -// signal over the inner-most dimension of `input`. +// Computes the sign and the log of the absolute value of the determinant of // -// The inner-most dimension of `input` is assumed to be the result of `RFFT`: the -// `fft_length / 2 + 1` unique components of the DFT of a real-valued signal. If -// `fft_length` is not provided, it is computed from the size of the inner-most -// dimension of `input` (`fft_length = 2 * (inner - 1)`). If the FFT length used to -// compute `input` is odd, it should be provided since it cannot be inferred -// properly. +// one or more square matrices. // -// Along the axis `IRFFT` is computed on, if `fft_length / 2 + 1` is smaller -// than the corresponding dimension of `input`, the dimension is cropped. If it is -// larger, the dimension is padded with zeros. +// The input is a tensor of shape `[N, M, M]` whose inner-most 2 dimensions +// form square matrices. The outputs are two tensors containing the signs and +// absolute values of the log determinants for all N input submatrices +// `[..., :, :]` such that the determinant = sign*exp(log_abs_determinant). +// The log_abs_determinant is computed as det(P)*sum(log(diag(LU))) where LU +// is the LU decomposition of the input and P is the corresponding +// permutation matrix. // // Arguments: -// input: A complex64 tensor. -// fft_length: An int32 tensor of shape [1]. The FFT length. -// -// Returns A float32 tensor of the same rank as `input`. The inner-most -// dimension of `input` is replaced with the `fft_length` samples of its inverse -// 1D Fourier transform. +// input: Shape is `[N, M, M]`. // -// @compatibility(numpy) -// Equivalent to np.fft.irfft -// @end_compatibility -func IRFFT(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Output) { +// Returns The signs of the log determinants of the inputs. Shape is `[N]`.The logs of the absolute values of the determinants +// of the N input matrices. Shape is `[N]`. +func LogMatrixDeterminant(scope *Scope, input tf.Output) (sign tf.Output, log_abs_determinant tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "IRFFT", + Type: "LogMatrixDeterminant", Input: []tf.Input{ - input, fft_length, + input, }, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1) } -// Concatenates a list of `SparseTensor` along the specified dimension. -// -// Concatenation is with respect to the dense versions of these sparse tensors. -// It is assumed that each input is a `SparseTensor` whose elements are ordered -// along increasing dimension number. -// -// All inputs' shapes must match, except for the concat dimension. The -// `indices`, `values`, and `shapes` lists must have the same length. -// -// The output shape is identical to the inputs', except along the concat -// dimension, where it is the sum of the inputs' sizes along that dimension. -// -// The output elements will be resorted to preserve the sort order along -// increasing dimension number. -// -// This op runs in `O(M log M)` time, where `M` is the total number of non-empty -// values across all inputs. This is due to the need for an internal sort in -// order to concatenate efficiently across an arbitrary dimension. -// -// For example, if `concat_dim = 1` and the inputs are -// -// sp_inputs[0]: shape = [2, 3] -// [0, 2]: "a" -// [1, 0]: "b" -// [1, 1]: "c" -// -// sp_inputs[1]: shape = [2, 4] -// [0, 1]: "d" -// [0, 2]: "e" -// -// then the output will be -// -// shape = [2, 7] -// [0, 2]: "a" -// [0, 4]: "d" -// [0, 5]: "e" -// [1, 0]: "b" -// [1, 1]: "c" +// SumAttr is an optional argument to Sum. +type SumAttr func(optionalAttr) + +// SumKeepDims sets the optional keep_dims attribute to value. // -// Graphically this is equivalent to doing +// value: If true, retain reduced dimensions with length 1. +// If not specified, defaults to false +func SumKeepDims(value bool) SumAttr { + return func(m optionalAttr) { + m["keep_dims"] = value + } +} + +// Computes the sum of elements across dimensions of a tensor. // -// [ a] concat [ d e ] = [ a d e ] -// [b c ] [ ] [b c ] +// Reduces `input` along the dimensions given in `axis`. Unless +// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in +// `axis`. If `keep_dims` is true, the reduced dimensions are +// retained with length 1. // // Arguments: -// indices: 2-D. Indices of each input `SparseTensor`. -// values: 1-D. Non-empty values of each `SparseTensor`. -// shapes: 1-D. Shapes of each `SparseTensor`. -// concat_dim: Dimension to concatenate along. Must be in range [-rank, rank), -// where rank is the number of dimensions in each input `SparseTensor`. +// input: The tensor to reduce. +// axis: The dimensions to reduce. Must be in the range +// `[-rank(input), rank(input))`. // -// Returns 2-D. Indices of the concatenated `SparseTensor`.1-D. Non-empty values of the concatenated `SparseTensor`.1-D. Shape of the concatenated `SparseTensor`. -func SparseConcat(scope *Scope, indices []tf.Output, values []tf.Output, shapes []tf.Output, concat_dim int64) (output_indices tf.Output, output_values tf.Output, output_shape tf.Output) { +// Returns The reduced tensor. +func Sum(scope *Scope, input tf.Output, axis tf.Output, optional ...SumAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"concat_dim": concat_dim} + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "SparseConcat", + Type: "Sum", Input: []tf.Input{ - tf.OutputList(indices), tf.OutputList(values), tf.OutputList(shapes), + input, axis, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + return op.Output(0) } -// Generates sparse cross from a list of sparse and dense tensors. -// -// The op takes two lists, one of 2D `SparseTensor` and one of 2D `Tensor`, each -// representing features of one feature column. It outputs a 2D `SparseTensor` with -// the batchwise crosses of these features. -// -// For example, if the inputs are -// -// inputs[0]: SparseTensor with shape = [2, 2] -// [0, 0]: "a" -// [1, 0]: "b" -// [1, 1]: "c" -// -// inputs[1]: SparseTensor with shape = [2, 1] -// [0, 0]: "d" -// [1, 0]: "e" -// -// inputs[2]: Tensor [["f"], ["g"]] -// -// then the output will be -// -// shape = [2, 2] -// [0, 0]: "a_X_d_X_f" -// [1, 0]: "b_X_e_X_g" -// [1, 1]: "c_X_e_X_g" -// -// if hashed_output=true then the output will be -// -// shape = [2, 2] -// [0, 0]: FingerprintCat64( -// Fingerprint64("f"), FingerprintCat64( -// Fingerprint64("d"), Fingerprint64("a"))) -// [1, 0]: FingerprintCat64( -// Fingerprint64("g"), FingerprintCat64( -// Fingerprint64("e"), Fingerprint64("b"))) -// [1, 1]: FingerprintCat64( -// Fingerprint64("g"), FingerprintCat64( -// Fingerprint64("e"), Fingerprint64("c"))) +// Delete the tensor specified by its handle in the session. // // Arguments: -// indices: 2-D. Indices of each input `SparseTensor`. -// values: 1-D. values of each `SparseTensor`. -// shapes: 1-D. Shapes of each `SparseTensor`. -// dense_inputs: 2-D. Columns represented by dense `Tensor`. -// hashed_output: If true, returns the hash of the cross instead of the string. -// This will allow us avoiding string manipulations. -// num_buckets: It is used if hashed_output is true. -// output = hashed_value%num_buckets if num_buckets > 0 else hashed_value. -// hash_key: Specify the hash_key that will be used by the `FingerprintCat64` -// function to combine the crosses fingerprints. -// -// +// handle: The handle for a tensor stored in the session state. // -// Returns 2-D. Indices of the concatenated `SparseTensor`.1-D. Non-empty values of the concatenated or hashed -// `SparseTensor`.1-D. Shape of the concatenated `SparseTensor`. -func SparseCross(scope *Scope, indices []tf.Output, values []tf.Output, shapes []tf.Output, dense_inputs []tf.Output, hashed_output bool, num_buckets int64, hash_key int64, out_type tf.DataType, internal_type tf.DataType) (output_indices tf.Output, output_values tf.Output, output_shape tf.Output) { +// Returns the created operation. +func DeleteSessionTensor(scope *Scope, handle tf.Output) (o *tf.Operation) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"hashed_output": hashed_output, "num_buckets": num_buckets, "hash_key": hash_key, "out_type": out_type, "internal_type": internal_type} opspec := tf.OpSpec{ - Type: "SparseCross", + Type: "DeleteSessionTensor", Input: []tf.Input{ - tf.OutputList(indices), tf.OutputList(values), tf.OutputList(shapes), tf.OutputList(dense_inputs), + handle, }, - Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + return scope.AddOperation(opspec) } -// Concatenates quantized tensors along one dimension. +// L2 Loss. +// +// Computes half the L2 norm of a tensor without the `sqrt`: +// +// output = sum(t ** 2) / 2 // // Arguments: -// concat_dim: 0-D. The dimension along which to concatenate. Must be in the -// range [0, rank(values)). -// values: The `N` Tensors to concatenate. Their ranks and types must match, -// and their sizes must match in all dimensions except `concat_dim`. -// input_mins: The minimum scalar values for each of the input tensors. -// input_maxes: The maximum scalar values for each of the input tensors. +// t: Typically 2-D, but may have any dimensions. // -// Returns A `Tensor` with the concatenation of values stacked along the -// `concat_dim` dimension. This tensor's shape matches that of `values` except -// in `concat_dim` where it has the sum of the sizes.The float value that the minimum quantized output value represents.The float value that the maximum quantized output value represents. -func QuantizedConcat(scope *Scope, concat_dim tf.Output, values []tf.Output, input_mins []tf.Output, input_maxes []tf.Output) (output tf.Output, output_min tf.Output, output_max tf.Output) { +// Returns 0-D. +func L2Loss(scope *Scope, t tf.Output) (output tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "QuantizedConcat", + Type: "L2Loss", Input: []tf.Input{ - concat_dim, tf.OutputList(values), tf.OutputList(input_mins), tf.OutputList(input_maxes), + t, }, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + return op.Output(0) } -// Slice a `SparseTensor` based on the `start` and `size`. -// -// For example, if the input is +// DenseToSparseSetOperationAttr is an optional argument to DenseToSparseSetOperation. +type DenseToSparseSetOperationAttr func(optionalAttr) + +// DenseToSparseSetOperationValidateIndices sets the optional validate_indices attribute to value. +// If not specified, defaults to true +func DenseToSparseSetOperationValidateIndices(value bool) DenseToSparseSetOperationAttr { + return func(m optionalAttr) { + m["validate_indices"] = value + } +} + +// Applies set operation along last dimension of `Tensor` and `SparseTensor`. // -// input_tensor = shape = [2, 7] -// [ a d e ] -// [b c ] +// See SetOperationOp::SetOperationFromContext for values of `set_operation`. // -// Graphically the output tensors are: +// Input `set2` is a `SparseTensor` represented by `set2_indices`, `set2_values`, +// and `set2_shape`. For `set2` ranked `n`, 1st `n-1` dimensions must be the same +// as `set1`. Dimension `n` contains values in a set, duplicates are allowed but +// ignored. // -// sparse_slice([0, 0], [2, 4]) = shape = [2, 4] -// [ a ] -// [b c ] +// If `validate_indices` is `True`, this op validates the order and range of `set2` +// indices. // -// sparse_slice([0, 4], [2, 3]) = shape = [2, 3] -// [ d e ] -// [ ] +// Output `result` is a `SparseTensor` represented by `result_indices`, +// `result_values`, and `result_shape`. For `set1` and `set2` ranked `n`, this +// has rank `n` and the same 1st `n-1` dimensions as `set1` and `set2`. The `nth` +// dimension contains the result of `set_operation` applied to the corresponding +// `[0...n-1]` dimension of `set`. // // Arguments: -// indices: 2-D tensor represents the indices of the sparse tensor. -// values: 1-D tensor represents the values of the sparse tensor. -// shape: 1-D. tensor represents the shape of the sparse tensor. -// start: 1-D. tensor represents the start of the slice. -// size: 1-D. tensor represents the size of the slice. -// output indices: A list of 1-D tensors represents the indices of the output -// sparse tensors. +// set1: `Tensor` with rank `n`. 1st `n-1` dimensions must be the same as `set2`. +// Dimension `n` contains values in a set, duplicates are allowed but ignored. +// set2_indices: 2D `Tensor`, indices of a `SparseTensor`. Must be in row-major +// order. +// set2_values: 1D `Tensor`, values of a `SparseTensor`. Must be in row-major +// order. +// set2_shape: 1D `Tensor`, shape of a `SparseTensor`. `set2_shape[0...n-1]` must +// be the same as the 1st `n-1` dimensions of `set1`, `result_shape[n]` is the +// max set size across `n-1` dimensions. // -// Returns A list of 1-D tensors represents the values of the output sparse -// tensors.A list of 1-D tensors represents the shape of the output sparse -// tensors. -func SparseSlice(scope *Scope, indices tf.Output, values tf.Output, shape tf.Output, start tf.Output, size tf.Output) (output_indices tf.Output, output_values tf.Output, output_shape tf.Output) { +// +// Returns 2D indices of a `SparseTensor`.1D values of a `SparseTensor`.1D `Tensor` shape of a `SparseTensor`. `result_shape[0...n-1]` is +// the same as the 1st `n-1` dimensions of `set1` and `set2`, `result_shape[n]` +// is the max result set size across all `0...n-1` dimensions. +func DenseToSparseSetOperation(scope *Scope, set1 tf.Output, set2_indices tf.Output, set2_values tf.Output, set2_shape tf.Output, set_operation string, optional ...DenseToSparseSetOperationAttr) (result_indices tf.Output, result_values tf.Output, result_shape tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"set_operation": set_operation} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "SparseSlice", + Type: "DenseToSparseSetOperation", Input: []tf.Input{ - indices, values, shape, start, size, + set1, set2_indices, set2_values, set2_shape, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0), op.Output(1), op.Output(2) } -// Adds up a `SparseTensor` and a dense `Tensor`, producing a dense `Tensor`. +// Subtracts a value from the current value of a variable. // -// This Op does not require `a_indices` be sorted in standard lexicographic order. +// Any ReadVariableOp with a control dependency on this op is guaranteed to +// see the decremented value or a subsequent newer one. // // Arguments: -// a_indices: 2-D. The `indices` of the `SparseTensor`, with shape `[nnz, ndims]`. -// a_values: 1-D. The `values` of the `SparseTensor`, with shape `[nnz]`. -// a_shape: 1-D. The `shape` of the `SparseTensor`, with shape `[ndims]`. -// b: `ndims`-D Tensor. With shape `a_shape`. -func SparseTensorDenseAdd(scope *Scope, a_indices tf.Output, a_values tf.Output, a_shape tf.Output, b tf.Output) (output tf.Output) { +// resource: handle to the resource in which to store the variable. +// value: the value by which the variable will be incremented. +// +// Returns the created operation. +func AssignSubVariableOp(scope *Scope, resource tf.Output, value tf.Output) (o *tf.Operation) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "SparseTensorDenseAdd", + Type: "AssignSubVariableOp", Input: []tf.Input{ - a_indices, a_values, a_shape, b, + resource, value, }, } - op := scope.AddOperation(opspec) - return op.Output(0) + return scope.AddOperation(opspec) } -// Returns the set of files matching one or more glob patterns. +// RestoreAttr is an optional argument to Restore. +type RestoreAttr func(optionalAttr) + +// RestorePreferredShard sets the optional preferred_shard attribute to value. // -// Note that this routine only supports wildcard characters in the -// basename portion of the pattern, not in the directory portion. +// value: Index of file to open first if multiple files match +// `file_pattern`. +// If not specified, defaults to -1 +func RestorePreferredShard(value int64) RestoreAttr { + return func(m optionalAttr) { + m["preferred_shard"] = value + } +} + +// Restores a tensor from checkpoint files. +// +// Reads a tensor stored in one or several files. If there are several files (for +// instance because a tensor was saved as slices), `file_pattern` may contain +// wildcard symbols (`*` and `?`) in the filename portion only, not in the +// directory portion. +// +// If a `file_pattern` matches several files, `preferred_shard` can be used to hint +// in which file the requested tensor is likely to be found. This op will first +// open the file at index `preferred_shard` in the list of matching files and try +// to restore tensors from that file. Only if some tensors or tensor slices are +// not found in that first file, then the Op opens all the files. Setting +// `preferred_shard` to match the value passed as the `shard` input +// of a matching `Save` Op may speed up Restore. This attribute only affects +// performance, not correctness. The default value -1 means files are processed in +// order. +// +// See also `RestoreSlice`. // // Arguments: -// pattern: Shell wildcard pattern(s). Scalar or vector of type string. +// file_pattern: Must have a single element. The pattern of the files from +// which we read the tensor. +// tensor_name: Must have a single element. The name of the tensor to be +// restored. +// dt: The type of the tensor to be restored. // -// Returns A vector of matching filenames. -func MatchingFiles(scope *Scope, pattern tf.Output) (filenames tf.Output) { +// Returns The restored tensor. +func Restore(scope *Scope, file_pattern tf.Output, tensor_name tf.Output, dt tf.DataType, optional ...RestoreAttr) (tensor tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"dt": dt} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "MatchingFiles", + Type: "Restore", Input: []tf.Input{ - pattern, + file_pattern, tensor_name, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// MatrixSolveLsAttr is an optional argument to MatrixSolveLs. -type MatrixSolveLsAttr func(optionalAttr) +// QuantizedResizeBilinearAttr is an optional argument to QuantizedResizeBilinear. +type QuantizedResizeBilinearAttr func(optionalAttr) -// MatrixSolveLsFast sets the optional fast attribute to value. -// If not specified, defaults to true -func MatrixSolveLsFast(value bool) MatrixSolveLsAttr { +// QuantizedResizeBilinearAlignCorners sets the optional align_corners attribute to value. +// +// value: If true, the centers of the 4 corner pixels of the input and output tensors are +// aligned, preserving the values at the corner pixels. Defaults to false. +// If not specified, defaults to false +func QuantizedResizeBilinearAlignCorners(value bool) QuantizedResizeBilinearAttr { return func(m optionalAttr) { - m["fast"] = value + m["align_corners"] = value } } -// Solves one or more linear least-squares problems. -// -// `matrix` is a tensor of shape `[..., M, N]` whose inner-most 2 dimensions -// form real or complex matrices of size `[M, N]`. `Rhs` is a tensor of the same -// type as `matrix` and shape `[..., M, K]`. -// The output is a tensor shape `[..., N, K]` where each output matrix solves -// each of the equations -// `matrix[..., :, :]` * `output[..., :, :]` = `rhs[..., :, :]` -// in the least squares sense. -// -// We use the following notation for (complex) matrix and right-hand sides -// in the batch: -// -// `matrix`=\\(A \in \mathbb{C}^{m \times n}\\), -// `rhs`=\\(B \in \mathbb{C}^{m \times k}\\), -// `output`=\\(X \in \mathbb{C}^{n \times k}\\), -// `l2_regularizer`=\\(\lambda \in \mathbb{R}\\). -// -// If `fast` is `True`, then the solution is computed by solving the normal -// equations using Cholesky decomposition. Specifically, if \\(m \ge n\\) then -// \\(X = (A^H A + \lambda I)^{-1} A^H B\\), which solves the least-squares -// problem \\(X = \mathrm{argmin}_{Z \in \Re^{n \times k} } ||A Z - B||_F^2 + -// \lambda ||Z||_F^2\\). If \\(m \lt n\\) then `output` is computed as -// \\(X = A^H (A A^H + \lambda I)^{-1} B\\), which (for \\(\lambda = 0\\)) is the -// minimum-norm solution to the under-determined linear system, i.e. -// \\(X = \mathrm{argmin}_{Z \in \mathbb{C}^{n \times k} } ||Z||_F^2 \\), -// subject to \\(A Z = B\\). Notice that the fast path is only numerically stable -// when \\(A\\) is numerically full rank and has a condition number -// \\(\mathrm{cond}(A) \lt \frac{1}{\sqrt{\epsilon_{mach} } }\\) or\\(\lambda\\) is -// sufficiently large. +// Resize quantized `images` to `size` using quantized bilinear interpolation. // -// If `fast` is `False` an algorithm based on the numerically robust complete -// orthogonal decomposition is used. This computes the minimum-norm -// least-squares solution, even when \\(A\\) is rank deficient. This path is -// typically 6-7 times slower than the fast path. If `fast` is `False` then -// `l2_regularizer` is ignored. +// Input images and output images must be quantized types. // // Arguments: -// matrix: Shape is `[..., M, N]`. -// rhs: Shape is `[..., M, K]`. -// l2_regularizer: Scalar tensor. +// images: 4-D with shape `[batch, height, width, channels]`. +// size: = A 1-D int32 Tensor of 2 elements: `new_height, new_width`. The +// new size for the images. // -// @compatibility(numpy) -// Equivalent to np.linalg.lstsq -// @end_compatibility // -// Returns Shape is `[..., N, K]`. -func MatrixSolveLs(scope *Scope, matrix tf.Output, rhs tf.Output, l2_regularizer tf.Output, optional ...MatrixSolveLsAttr) (output tf.Output) { +// +// Returns 4-D with shape +// `[batch, new_height, new_width, channels]`. +func QuantizedResizeBilinear(scope *Scope, images tf.Output, size tf.Output, min tf.Output, max tf.Output, optional ...QuantizedResizeBilinearAttr) (resized_images tf.Output, out_min tf.Output, out_max tf.Output) { if scope.Err() != nil { return } @@ -15501,159 +18000,232 @@ func MatrixSolveLs(scope *Scope, matrix tf.Output, rhs tf.Output, l2_regularizer a(attrs) } opspec := tf.OpSpec{ - Type: "MatrixSolveLs", + Type: "QuantizedResizeBilinear", Input: []tf.Input{ - matrix, rhs, l2_regularizer, + images, size, min, max, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1), op.Output(2) } -// Elementwise computes the bitwise OR of `x` and `y`. +// Computes the minimum along segments of a tensor. // -// The result will have those bits set, that are set in `x`, `y` or both. The -// computation is performed on the underlying representations of `x` and `y`. -func BitwiseOr(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { +// Read @{$math_ops#Segmentation$the section on segmentation} for an explanation of +// segments. +// +// Computes a tensor such that +// \\(output_i = \min_j(data_j)\\) where `min` is over `j` such +// that `segment_ids[j] == i`. +// +// If the min is empty for a given segment ID `i`, `output[i] = 0`. +// +//
+// +//
+// +// Arguments: +// +// segment_ids: A 1-D tensor whose rank is equal to the rank of `data`'s +// first dimension. Values should be sorted and can be repeated. +// +// Returns Has same shape as data, except for dimension 0 which +// has size `k`, the number of segments. +func SegmentMin(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "BitwiseOr", + Type: "SegmentMin", Input: []tf.Input{ - x, y, + data, segment_ids, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// SparseToSparseSetOperationAttr is an optional argument to SparseToSparseSetOperation. -type SparseToSparseSetOperationAttr func(optionalAttr) +// SdcaOptimizerAttr is an optional argument to SdcaOptimizer. +type SdcaOptimizerAttr func(optionalAttr) -// SparseToSparseSetOperationValidateIndices sets the optional validate_indices attribute to value. +// SdcaOptimizerAdaptative sets the optional adaptative attribute to value. +// +// value: Whether to use Adaptive SDCA for the inner loop. // If not specified, defaults to true -func SparseToSparseSetOperationValidateIndices(value bool) SparseToSparseSetOperationAttr { +func SdcaOptimizerAdaptative(value bool) SdcaOptimizerAttr { return func(m optionalAttr) { - m["validate_indices"] = value + m["adaptative"] = value } } -// Applies set operation along last dimension of 2 `SparseTensor` inputs. -// -// See SetOperationOp::SetOperationFromContext for values of `set_operation`. +// Distributed version of Stochastic Dual Coordinate Ascent (SDCA) optimizer for // -// If `validate_indices` is `True`, `SparseToSparseSetOperation` validates the -// order and range of `set1` and `set2` indices. +// linear models with L1 + L2 regularization. As global optimization objective is +// strongly-convex, the optimizer optimizes the dual objective at each step. The +// optimizer applies each update one example at a time. Examples are sampled +// uniformly, and the optimizer is learning rate free and enjoys linear convergence +// rate. // -// Input `set1` is a `SparseTensor` represented by `set1_indices`, `set1_values`, -// and `set1_shape`. For `set1` ranked `n`, 1st `n-1` dimensions must be the same -// as `set2`. Dimension `n` contains values in a set, duplicates are allowed but -// ignored. +// [Proximal Stochastic Dual Coordinate Ascent](http://arxiv.org/pdf/1211.2717v1.pdf).
+// Shai Shalev-Shwartz, Tong Zhang. 2012 // -// Input `set2` is a `SparseTensor` represented by `set2_indices`, `set2_values`, -// and `set2_shape`. For `set2` ranked `n`, 1st `n-1` dimensions must be the same -// as `set1`. Dimension `n` contains values in a set, duplicates are allowed but -// ignored. +// $$Loss Objective = \sum f_{i} (wx_{i}) + (l2 / 2) * |w|^2 + l1 * |w|$$ // -// If `validate_indices` is `True`, this op validates the order and range of `set1` -// and `set2` indices. +// [Adding vs. Averaging in Distributed Primal-Dual Optimization](http://arxiv.org/abs/1502.03508).
+// Chenxin Ma, Virginia Smith, Martin Jaggi, Michael I. Jordan, +// Peter Richtarik, Martin Takac. 2015 // -// Output `result` is a `SparseTensor` represented by `result_indices`, -// `result_values`, and `result_shape`. For `set1` and `set2` ranked `n`, this -// has rank `n` and the same 1st `n-1` dimensions as `set1` and `set2`. The `nth` -// dimension contains the result of `set_operation` applied to the corresponding -// `[0...n-1]` dimension of `set`. +// [Stochastic Dual Coordinate Ascent with Adaptive Probabilities](https://arxiv.org/abs/1502.08053).
+// Dominik Csiba, Zheng Qu, Peter Richtarik. 2015 // // Arguments: -// set1_indices: 2D `Tensor`, indices of a `SparseTensor`. Must be in row-major -// order. -// set1_values: 1D `Tensor`, values of a `SparseTensor`. Must be in row-major -// order. -// set1_shape: 1D `Tensor`, shape of a `SparseTensor`. `set1_shape[0...n-1]` must -// be the same as `set2_shape[0...n-1]`, `set1_shape[n]` is the -// max set size across `0...n-1` dimensions. -// set2_indices: 2D `Tensor`, indices of a `SparseTensor`. Must be in row-major -// order. -// set2_values: 1D `Tensor`, values of a `SparseTensor`. Must be in row-major -// order. -// set2_shape: 1D `Tensor`, shape of a `SparseTensor`. `set2_shape[0...n-1]` must -// be the same as `set1_shape[0...n-1]`, `set2_shape[n]` is the -// max set size across `0...n-1` dimensions. -// +// sparse_example_indices: a list of vectors which contain example indices. +// sparse_feature_indices: a list of vectors which contain feature indices. +// sparse_feature_values: a list of vectors which contains feature value +// associated with each feature group. +// dense_features: a list of matrices which contains the dense feature values. +// example_weights: a vector which contains the weight associated with each +// example. +// example_labels: a vector which contains the label/target associated with each +// example. +// sparse_indices: a list of vectors where each value is the indices which has +// corresponding weights in sparse_weights. This field maybe omitted for the +// dense approach. +// sparse_weights: a list of vectors where each value is the weight associated with +// a sparse feature group. +// dense_weights: a list of vectors where the values are the weights associated +// with a dense feature group. +// example_state_data: a list of vectors containing the example state data. +// loss_type: Type of the primal loss. Currently SdcaSolver supports logistic, +// squared and hinge losses. +// l1: Symmetric l1 regularization strength. +// l2: Symmetric l2 regularization strength. +// num_loss_partitions: Number of partitions of the global loss function. +// num_inner_iterations: Number of iterations per mini-batch. // -// Returns 2D indices of a `SparseTensor`.1D values of a `SparseTensor`.1D `Tensor` shape of a `SparseTensor`. `result_shape[0...n-1]` is -// the same as the 1st `n-1` dimensions of `set1` and `set2`, `result_shape[n]` -// is the max result set size across all `0...n-1` dimensions. -func SparseToSparseSetOperation(scope *Scope, set1_indices tf.Output, set1_values tf.Output, set1_shape tf.Output, set2_indices tf.Output, set2_values tf.Output, set2_shape tf.Output, set_operation string, optional ...SparseToSparseSetOperationAttr) (result_indices tf.Output, result_values tf.Output, result_shape tf.Output) { +// Returns a list of vectors containing the updated example state +// data.a list of vectors where each value is the delta +// weights associated with a sparse feature group.a list of vectors where the values are the delta +// weights associated with a dense feature group. +func SdcaOptimizer(scope *Scope, sparse_example_indices []tf.Output, sparse_feature_indices []tf.Output, sparse_feature_values []tf.Output, dense_features []tf.Output, example_weights tf.Output, example_labels tf.Output, sparse_indices []tf.Output, sparse_weights []tf.Output, dense_weights []tf.Output, example_state_data tf.Output, loss_type string, l1 float32, l2 float32, num_loss_partitions int64, num_inner_iterations int64, optional ...SdcaOptimizerAttr) (out_example_state_data tf.Output, out_delta_sparse_weights []tf.Output, out_delta_dense_weights []tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"set_operation": set_operation} + attrs := map[string]interface{}{"loss_type": loss_type, "l1": l1, "l2": l2, "num_loss_partitions": num_loss_partitions, "num_inner_iterations": num_inner_iterations} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "SparseToSparseSetOperation", + Type: "SdcaOptimizer", Input: []tf.Input{ - set1_indices, set1_values, set1_shape, set2_indices, set2_values, set2_shape, + tf.OutputList(sparse_example_indices), tf.OutputList(sparse_feature_indices), tf.OutputList(sparse_feature_values), tf.OutputList(dense_features), example_weights, example_labels, tf.OutputList(sparse_indices), tf.OutputList(sparse_weights), tf.OutputList(dense_weights), example_state_data, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + if scope.Err() != nil { + return + } + var idx int + var err error + out_example_state_data = op.Output(idx) + if out_delta_sparse_weights, idx, err = makeOutputList(op, idx, "out_delta_sparse_weights"); err != nil { + scope.UpdateErr("SdcaOptimizer", err) + return + } + if out_delta_dense_weights, idx, err = makeOutputList(op, idx, "out_delta_dense_weights"); err != nil { + scope.UpdateErr("SdcaOptimizer", err) + return + } + return out_example_state_data, out_delta_sparse_weights, out_delta_dense_weights } -// Computes numerical negative value element-wise. +// SparseMatMulAttr is an optional argument to SparseMatMul. +type SparseMatMulAttr func(optionalAttr) + +// SparseMatMulTransposeA sets the optional transpose_a attribute to value. +// If not specified, defaults to false +func SparseMatMulTransposeA(value bool) SparseMatMulAttr { + return func(m optionalAttr) { + m["transpose_a"] = value + } +} + +// SparseMatMulTransposeB sets the optional transpose_b attribute to value. +// If not specified, defaults to false +func SparseMatMulTransposeB(value bool) SparseMatMulAttr { + return func(m optionalAttr) { + m["transpose_b"] = value + } +} + +// SparseMatMulAIsSparse sets the optional a_is_sparse attribute to value. +// If not specified, defaults to false +func SparseMatMulAIsSparse(value bool) SparseMatMulAttr { + return func(m optionalAttr) { + m["a_is_sparse"] = value + } +} + +// SparseMatMulBIsSparse sets the optional b_is_sparse attribute to value. +// If not specified, defaults to false +func SparseMatMulBIsSparse(value bool) SparseMatMulAttr { + return func(m optionalAttr) { + m["b_is_sparse"] = value + } +} + +// Multiply matrix "a" by matrix "b". // -// I.e., \\(y = -x\\). -func Neg(scope *Scope, x tf.Output) (y tf.Output) { +// The inputs must be two-dimensional matrices and the inner dimension of "a" must +// match the outer dimension of "b". This op is optimized for the case where at +// least one of "a" or "b" is sparse. The breakeven for using this versus a dense +// matrix multiply on one platform was 30% zero values in the sparse matrix. +// +// The gradient computation of this operation will only take advantage of sparsity +// in the input gradient when that gradient comes from a Relu. +func SparseMatMul(scope *Scope, a tf.Output, b tf.Output, optional ...SparseMatMulAttr) (product tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "Neg", + Type: "SparseMatMul", Input: []tf.Input{ - x, + a, b, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// FakeQuantWithMinMaxVarsAttr is an optional argument to FakeQuantWithMinMaxVars. -type FakeQuantWithMinMaxVarsAttr func(optionalAttr) - -// FakeQuantWithMinMaxVarsNumBits sets the optional num_bits attribute to value. -// If not specified, defaults to 8 -func FakeQuantWithMinMaxVarsNumBits(value int64) FakeQuantWithMinMaxVarsAttr { - return func(m optionalAttr) { - m["num_bits"] = value - } -} +// ShapeAttr is an optional argument to Shape. +type ShapeAttr func(optionalAttr) -// FakeQuantWithMinMaxVarsNarrowRange sets the optional narrow_range attribute to value. -// If not specified, defaults to false -func FakeQuantWithMinMaxVarsNarrowRange(value bool) FakeQuantWithMinMaxVarsAttr { +// ShapeOutType sets the optional out_type attribute to value. +// If not specified, defaults to DT_INT32 +func ShapeOutType(value tf.DataType) ShapeAttr { return func(m optionalAttr) { - m["narrow_range"] = value + m["out_type"] = value } } -// Fake-quantize the 'inputs' tensor of type float via global float scalars `min` +// Returns the shape of a tensor. // -// and `max` to 'outputs' tensor of same shape as `inputs`. +// This operation returns a 1-D integer tensor representing the shape of `input`. // -// `[min; max]` define the clamping range for the `inputs` data. -// `inputs` values are quantized into the quantization range (`[0; 2^num_bits - 1]` -// when `narrow_range` is false and `[1; 2^num_bits - 1]` when it is true) and -// then de-quantized and output as floats in `[min; max]` interval. -// `num_bits` is the bitwidth of the quantization; between 2 and 8, inclusive. +// For example: // -// This operation has a gradient and thus allows for training `min` and `max` -// values. -func FakeQuantWithMinMaxVars(scope *Scope, inputs tf.Output, min tf.Output, max tf.Output, optional ...FakeQuantWithMinMaxVarsAttr) (outputs tf.Output) { +// ``` +// # 't' is [[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]]] +// shape(t) ==> [2, 2, 3] +// ``` +func Shape(scope *Scope, input tf.Output, optional ...ShapeAttr) (output tf.Output) { if scope.Err() != nil { return } @@ -15662,9 +18234,9 @@ func FakeQuantWithMinMaxVars(scope *Scope, inputs tf.Output, min tf.Output, max a(attrs) } opspec := tf.OpSpec{ - Type: "FakeQuantWithMinMaxVars", + Type: "Shape", Input: []tf.Input{ - inputs, min, max, + input, }, Attrs: attrs, } @@ -15672,175 +18244,188 @@ func FakeQuantWithMinMaxVars(scope *Scope, inputs tf.Output, min tf.Output, max return op.Output(0) } -// Returns the element-wise min of two SparseTensors. +// Computes the power of one value to another. // -// Assumes the two SparseTensors have the same shape, i.e., no broadcasting. +// Given a tensor `x` and a tensor `y`, this operation computes \\(x^y\\) for +// corresponding elements in `x` and `y`. For example: +// +// ``` +// # tensor 'x' is [[2, 2]], [3, 3]] +// # tensor 'y' is [[8, 16], [2, 3]] +// tf.pow(x, y) ==> [[256, 65536], [9, 27]] +// ``` +func Pow(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Pow", + Input: []tf.Input{ + x, y, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Computes fingerprints of the input strings. // // Arguments: -// a_indices: 2-D. `N x R` matrix with the indices of non-empty values in a -// SparseTensor, in the canonical lexicographic ordering. -// a_values: 1-D. `N` non-empty values corresponding to `a_indices`. -// a_shape: 1-D. Shape of the input SparseTensor. -// b_indices: counterpart to `a_indices` for the other operand. -// b_values: counterpart to `a_values` for the other operand; must be of the same dtype. -// b_shape: counterpart to `a_shape` for the other operand; the two shapes must be equal. +// input: vector of strings to compute fingerprints on. // -// Returns 2-D. The indices of the output SparseTensor.1-D. The values of the output SparseTensor. -func SparseSparseMinimum(scope *Scope, a_indices tf.Output, a_values tf.Output, a_shape tf.Output, b_indices tf.Output, b_values tf.Output, b_shape tf.Output) (output_indices tf.Output, output_values tf.Output) { +// Returns a (N,2) shaped matrix where N is the number of elements in the input +// vector. Each row contains the low and high parts of the fingerprint. +func SdcaFprint(scope *Scope, input tf.Output) (output tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "SparseSparseMinimum", + Type: "SdcaFprint", Input: []tf.Input{ - a_indices, a_values, a_shape, b_indices, b_values, b_shape, + input, }, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) + return op.Output(0) } -// TakeManySparseFromTensorsMapAttr is an optional argument to TakeManySparseFromTensorsMap. -type TakeManySparseFromTensorsMapAttr func(optionalAttr) +// RandomPoissonV2Attr is an optional argument to RandomPoissonV2. +type RandomPoissonV2Attr func(optionalAttr) -// TakeManySparseFromTensorsMapContainer sets the optional container attribute to value. +// RandomPoissonV2Seed sets the optional seed attribute to value. // -// value: The container name for the `SparseTensorsMap` read by this op. -// If not specified, defaults to "" -func TakeManySparseFromTensorsMapContainer(value string) TakeManySparseFromTensorsMapAttr { +// value: If either `seed` or `seed2` are set to be non-zero, the random number +// generator is seeded by the given seed. Otherwise, it is seeded by a +// random seed. +// If not specified, defaults to 0 +func RandomPoissonV2Seed(value int64) RandomPoissonV2Attr { return func(m optionalAttr) { - m["container"] = value + m["seed"] = value } } -// TakeManySparseFromTensorsMapSharedName sets the optional shared_name attribute to value. +// RandomPoissonV2Seed2 sets the optional seed2 attribute to value. // -// value: The shared name for the `SparseTensorsMap` read by this op. -// It should not be blank; rather the `shared_name` or unique Operation name -// of the Op that created the original `SparseTensorsMap` should be used. -// If not specified, defaults to "" -func TakeManySparseFromTensorsMapSharedName(value string) TakeManySparseFromTensorsMapAttr { +// value: A second seed to avoid seed collision. +// If not specified, defaults to 0 +func RandomPoissonV2Seed2(value int64) RandomPoissonV2Attr { return func(m optionalAttr) { - m["shared_name"] = value + m["seed2"] = value } } -// Read `SparseTensors` from a `SparseTensorsMap` and concatenate them. -// -// The input `sparse_handles` must be an `int64` matrix of shape `[N, 1]` where -// `N` is the minibatch size and the rows correspond to the output handles of -// `AddSparseToTensorsMap` or `AddManySparseToTensorsMap`. The ranks of the -// original `SparseTensor` objects that went into the given input ops must all -// match. When the final `SparseTensor` is created, it has rank one -// higher than the ranks of the incoming `SparseTensor` objects -// (they have been concatenated along a new row dimension on the left). -// -// The output `SparseTensor` object's shape values for all dimensions but the -// first are the max across the input `SparseTensor` objects' shape values -// for the corresponding dimensions. Its first shape value is `N`, the minibatch -// size. -// -// The input `SparseTensor` objects' indices are assumed ordered in -// standard lexicographic order. If this is not the case, after this -// step run `SparseReorder` to restore index ordering. -// -// For example, if the handles represent an input, which is a `[2, 3]` matrix -// representing two original `SparseTensor` objects: -// -// ``` -// index = [ 0] -// [10] -// [20] -// values = [1, 2, 3] -// shape = [50] -// ``` -// -// and -// -// ``` -// index = [ 2] -// [10] -// values = [4, 5] -// shape = [30] -// ``` +// RandomPoissonV2Dtype sets the optional dtype attribute to value. +// If not specified, defaults to DT_INT64 +func RandomPoissonV2Dtype(value tf.DataType) RandomPoissonV2Attr { + return func(m optionalAttr) { + m["dtype"] = value + } +} + +// Outputs random values from the Poisson distribution(s) described by rate. // -// then the final `SparseTensor` will be: +// This op uses two algorithms, depending on rate. If rate >= 10, then +// the algorithm by Hormann is used to acquire samples via +// transformation-rejection. +// See http://www.sciencedirect.com/science/article/pii/0167668793909974. // -// ``` -// index = [0 0] -// [0 10] -// [0 20] -// [1 2] -// [1 10] -// values = [1, 2, 3, 4, 5] -// shape = [2 50] -// ``` +// Otherwise, Knuth's algorithm is used to acquire samples via multiplying uniform +// random variables. +// See Donald E. Knuth (1969). Seminumerical Algorithms. The Art of Computer +// Programming, Volume 2. Addison Wesley // // Arguments: -// sparse_handles: 1-D, The `N` serialized `SparseTensor` objects. -// Shape: `[N]`. -// dtype: The `dtype` of the `SparseTensor` objects stored in the -// `SparseTensorsMap`. +// shape: 1-D integer tensor. Shape of independent samples to draw from each +// distribution described by the shape parameters given in rate. +// rate: A tensor in which each scalar is a "rate" parameter describing the +// associated poisson distribution. // -// Returns 2-D. The `indices` of the minibatch `SparseTensor`.1-D. The `values` of the minibatch `SparseTensor`.1-D. The `shape` of the minibatch `SparseTensor`. -func TakeManySparseFromTensorsMap(scope *Scope, sparse_handles tf.Output, dtype tf.DataType, optional ...TakeManySparseFromTensorsMapAttr) (sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output) { +// Returns A tensor with shape `shape + shape(rate)`. Each slice +// `[:, ..., :, i0, i1, ...iN]` contains the samples drawn for +// `rate[i0, i1, ...iN]`. +func RandomPoissonV2(scope *Scope, shape tf.Output, rate tf.Output, optional ...RandomPoissonV2Attr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"dtype": dtype} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "TakeManySparseFromTensorsMap", + Type: "RandomPoissonV2", Input: []tf.Input{ - sparse_handles, + shape, rate, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + return op.Output(0) } -// MaxPoolAttr is an optional argument to MaxPool. -type MaxPoolAttr func(optionalAttr) +// MatrixTriangularSolveAttr is an optional argument to MatrixTriangularSolve. +type MatrixTriangularSolveAttr func(optionalAttr) -// MaxPoolDataFormat sets the optional data_format attribute to value. +// MatrixTriangularSolveLower sets the optional lower attribute to value. // -// value: Specify the data format of the input and output data. With the -// default format "NHWC", the data is stored in the order of: -// [batch, in_height, in_width, in_channels]. -// Alternatively, the format could be "NCHW", the data storage order of: -// [batch, in_channels, in_height, in_width]. -// If not specified, defaults to "NHWC" -func MaxPoolDataFormat(value string) MaxPoolAttr { +// value: Boolean indicating whether the innermost matrices in `matrix` are +// lower or upper triangular. +// If not specified, defaults to true +func MatrixTriangularSolveLower(value bool) MatrixTriangularSolveAttr { return func(m optionalAttr) { - m["data_format"] = value + m["lower"] = value } } -// Performs max pooling on the input. +// MatrixTriangularSolveAdjoint sets the optional adjoint attribute to value. +// +// value: Boolean indicating whether to solve with `matrix` or its (block-wise) +// adjoint. +// +// @compatibility(numpy) +// Equivalent to np.linalg.triangular_solve +// @end_compatibility +// If not specified, defaults to false +func MatrixTriangularSolveAdjoint(value bool) MatrixTriangularSolveAttr { + return func(m optionalAttr) { + m["adjoint"] = value + } +} + +// Solves systems of linear equations with upper or lower triangular matrices by +// +// backsubstitution. +// +// `matrix` is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions form +// square matrices. If `lower` is `True` then the strictly upper triangular part +// of each inner-most matrix is assumed to be zero and not accessed. +// If `lower` is False then the strictly lower triangular part of each inner-most +// matrix is assumed to be zero and not accessed. +// `rhs` is a tensor of shape `[..., M, K]`. +// +// The output is a tensor of shape `[..., M, K]`. If `adjoint` is +// `True` then the innermost matrices in `output` satisfy matrix equations +// `matrix[..., :, :] * output[..., :, :] = rhs[..., :, :]`. +// If `adjoint` is `False` then the strictly then the innermost matrices in +// `output` satisfy matrix equations +// `adjoint(matrix[..., i, k]) * output[..., k, j] = rhs[..., i, j]`. // // Arguments: -// input: 4-D input to pool over. -// ksize: The size of the window for each dimension of the input tensor. -// strides: The stride of the sliding window for each dimension of the -// input tensor. -// padding: The type of padding algorithm to use. +// matrix: Shape is `[..., M, M]`. +// rhs: Shape is `[..., M, K]`. // -// Returns The max pooled output tensor. -func MaxPool(scope *Scope, input tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPoolAttr) (output tf.Output) { +// Returns Shape is `[..., M, K]`. +func MatrixTriangularSolve(scope *Scope, matrix tf.Output, rhs tf.Output, optional ...MatrixTriangularSolveAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "MaxPool", + Type: "MatrixTriangularSolve", Input: []tf.Input{ - input, + matrix, rhs, }, Attrs: attrs, } @@ -15848,251 +18433,252 @@ func MaxPool(scope *Scope, input tf.Output, ksize []int64, strides []int64, padd return op.Output(0) } -// Says whether the targets are in the top `K` predictions. -// -// This outputs a `batch_size` bool array, an entry `out[i]` is `true` if the -// prediction for the target class is among the top `k` predictions among -// all predictions for example `i`. Note that the behavior of `InTopK` differs -// from the `TopK` op in its handling of ties; if multiple classes have the -// same prediction value and straddle the top-`k` boundary, all of those -// classes are considered to be in the top `k`. -// -// More formally, let -// -// \\(predictions_i\\) be the predictions for all classes for example `i`, -// \\(targets_i\\) be the target class for example `i`, -// \\(out_i\\) be the output for example `i`, -// -// $$out_i = predictions_{i, targets_i} \in TopKIncludingTies(predictions_i)$$ -// -// Arguments: -// predictions: A `batch_size` x `classes` tensor. -// targets: A `batch_size` vector of class ids. -// k: Number of top elements to look at for computing precision. -// -// Returns Computed precision at `k` as a `bool Tensor`. -func InTopKV2(scope *Scope, predictions tf.Output, targets tf.Output, k tf.Output) (precision tf.Output) { +// Computes inverse hyperbolic sine of x element-wise. +func Asinh(scope *Scope, x tf.Output) (y tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "InTopKV2", + Type: "Asinh", Input: []tf.Input{ - predictions, targets, k, + x, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Assigns a new value to a variable. -// -// Any ReadVariableOp with a control dependency on this op is guaranteed to return -// this value or a subsequent newer value of the variable. +// Creates a dataset with a range of values. Corresponds to python's xrange. // // Arguments: -// resource: handle to the resource in which to store the variable. -// value: the value to set the new tensor to use. -// -// Returns the created operation. -func AssignVariableOp(scope *Scope, resource tf.Output, value tf.Output) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "AssignVariableOp", - Input: []tf.Input{ - resource, value, - }, - } - return scope.AddOperation(opspec) -} - -// Returns a tensor of ones with the same shape and type as x. +// start: corresponds to start in python's xrange(). +// stop: corresponds to stop in python's xrange(). +// step: corresponds to step in python's xrange(). // -// Arguments: -// x: a tensor of type T. // -// Returns a tensor of the same shape and type as x but filled with ones. -func OnesLike(scope *Scope, x tf.Output) (y tf.Output) { +func RangeDataset(scope *Scope, start tf.Output, stop tf.Output, step tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} opspec := tf.OpSpec{ - Type: "OnesLike", + Type: "RangeDataset", Input: []tf.Input{ - x, + start, stop, step, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// The gradient of SparseFillEmptyRows. +// DepthwiseConv2dNativeBackpropInputAttr is an optional argument to DepthwiseConv2dNativeBackpropInput. +type DepthwiseConv2dNativeBackpropInputAttr func(optionalAttr) + +// DepthwiseConv2dNativeBackpropInputDataFormat sets the optional data_format attribute to value. // -// Takes vectors reverse_index_map, shaped `[N]`, and grad_values, -// shaped `[N_full]`, where `N_full >= N` and copies data into either -// `d_values` or `d_default_value`. Here `d_values` is shaped `[N]` and -// `d_default_value` is a scalar. +// value: Specify the data format of the input and output data. With the +// default format "NHWC", the data is stored in the order of: +// [batch, height, width, channels]. +// Alternatively, the format could be "NCHW", the data storage order of: +// [batch, channels, height, width]. +// If not specified, defaults to "NHWC" +func DepthwiseConv2dNativeBackpropInputDataFormat(value string) DepthwiseConv2dNativeBackpropInputAttr { + return func(m optionalAttr) { + m["data_format"] = value + } +} + +// DepthwiseConv2dNativeBackpropInputDilations sets the optional dilations attribute to value. // -// d_values[j] = grad_values[reverse_index_map[j]] -// d_default_value = sum_{k : 0 .. N_full - 1} ( -// grad_values[k] * 1{k not in reverse_index_map}) +// value: 1-D tensor of length 4. The dilation factor for each dimension of +// `input`. If set to k > 1, there will be k-1 skipped cells between each filter +// element on that dimension. The dimension order is determined by the value of +// `data_format`, see above for details. Dilations in the batch and depth +// dimensions must be 1. +// If not specified, defaults to +func DepthwiseConv2dNativeBackpropInputDilations(value []int64) DepthwiseConv2dNativeBackpropInputAttr { + return func(m optionalAttr) { + m["dilations"] = value + } +} + +// Computes the gradients of depthwise convolution with respect to the input. // // Arguments: -// reverse_index_map: 1-D. The reverse index map from SparseFillEmptyRows. -// grad_values: 1-D. The gradients from backprop. +// input_sizes: An integer vector representing the shape of `input`, based +// on `data_format`. For example, if `data_format` is 'NHWC' then +// `input` is a 4-D `[batch, height, width, channels]` tensor. +// filter: 4-D with shape +// `[filter_height, filter_width, in_channels, depthwise_multiplier]`. +// out_backprop: 4-D with shape based on `data_format`. +// For example, if `data_format` is 'NHWC' then +// out_backprop shape is `[batch, out_height, out_width, out_channels]`. +// Gradients w.r.t. the output of the convolution. +// strides: The stride of the sliding window for each dimension of the input +// of the convolution. +// padding: The type of padding algorithm to use. // -// Returns 1-D. The backprop into values.0-D. The backprop into default_value. -func SparseFillEmptyRowsGrad(scope *Scope, reverse_index_map tf.Output, grad_values tf.Output) (d_values tf.Output, d_default_value tf.Output) { +// Returns 4-D with shape according to `data_format`. For example, if +// `data_format` is 'NHWC', output shape is `[batch, in_height, +// in_width, in_channels]`. Gradient w.r.t. the input of the +// convolution. +func DepthwiseConv2dNativeBackpropInput(scope *Scope, input_sizes tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...DepthwiseConv2dNativeBackpropInputAttr) (output tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"strides": strides, "padding": padding} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "SparseFillEmptyRowsGrad", + Type: "DepthwiseConv2dNativeBackpropInput", Input: []tf.Input{ - reverse_index_map, grad_values, + input_sizes, filter, out_backprop, }, + Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) + return op.Output(0) } -// Computes scaled exponential linear: `scale * alpha * (exp(features) - 1)` +// Stops gradient computation. // -// if < 0, `scale * features` otherwise. +// When executed in a graph, this op outputs its input tensor as-is. // -// See [Self-Normalizing Neural Networks](https://arxiv.org/abs/1706.02515) -func Selu(scope *Scope, features tf.Output) (activations tf.Output) { +// When building ops to compute gradients, this op prevents the contribution of +// its inputs to be taken into account. Normally, the gradient generator adds ops +// to a graph to compute the derivatives of a specified 'loss' by recursively +// finding out inputs that contributed to its computation. If you insert this op +// in the graph it inputs are masked from the gradient generator. They are not +// taken into account for computing gradients. +// +// This is useful any time you want to compute a value with TensorFlow but need +// to pretend that the value was a constant. Some examples include: +// +// * The *EM* algorithm where the *M-step* should not involve backpropagation +// through the output of the *E-step*. +// * Contrastive divergence training of Boltzmann machines where, when +// differentiating the energy function, the training must not backpropagate +// through the graph that generated the samples from the model. +// * Adversarial training, where no backprop should happen through the adversarial +// example generation process. +func StopGradient(scope *Scope, input tf.Output) (output tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "Selu", + Type: "StopGradient", Input: []tf.Input{ - features, + input, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// SetSizeAttr is an optional argument to SetSize. -type SetSizeAttr func(optionalAttr) - -// SetSizeValidateIndices sets the optional validate_indices attribute to value. -// If not specified, defaults to true -func SetSizeValidateIndices(value bool) SetSizeAttr { - return func(m optionalAttr) { - m["validate_indices"] = value - } -} - -// Number of unique elements along last dimension of input `set`. -// -// Input `set` is a `SparseTensor` represented by `set_indices`, `set_values`, -// and `set_shape`. The last dimension contains values in a set, duplicates are -// allowed but ignored. -// -// If `validate_indices` is `True`, this op validates the order and range of `set` -// indices. -// -// Arguments: -// set_indices: 2D `Tensor`, indices of a `SparseTensor`. -// set_values: 1D `Tensor`, values of a `SparseTensor`. -// set_shape: 1D `Tensor`, shape of a `SparseTensor`. +// Eagerly executes a python function to compute func(input)->output. The // -// Returns For `set` ranked `n`, this is a `Tensor` with rank `n-1`, and the same 1st -// `n-1` dimensions as `set`. Each value is the number of unique elements in -// the corresponding `[0...n-1]` dimension of `set`. -func SetSize(scope *Scope, set_indices tf.Output, set_values tf.Output, set_shape tf.Output, optional ...SetSizeAttr) (size tf.Output) { +// semantics of the input, output, and attributes are the same as those for +// PyFunc. +func EagerPyFunc(scope *Scope, input []tf.Output, token string, Tout []tf.DataType) (output []tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } + attrs := map[string]interface{}{"token": token, "Tout": Tout} opspec := tf.OpSpec{ - Type: "SetSize", + Type: "EagerPyFunc", Input: []tf.Input{ - set_indices, set_values, set_shape, + tf.OutputList(input), }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + if scope.Err() != nil { + return + } + var idx int + var err error + if output, idx, err = makeOutputList(op, idx, "output"); err != nil { + scope.UpdateErr("EagerPyFunc", err) + return + } + return output } -// Computes the sign and the log of the absolute value of the determinant of +// Adds sparse updates to the variable referenced by `resource`. // -// one or more square matrices. +// This operation computes // -// The input is a tensor of shape `[N, M, M]` whose inner-most 2 dimensions -// form square matrices. The outputs are two tensors containing the signs and -// absolute values of the log determinants for all N input submatrices -// `[..., :, :]` such that the determinant = sign*exp(log_abs_determinant). -// The log_abs_determinant is computed as det(P)*sum(log(diag(LU))) where LU -// is the LU decomposition of the input and P is the corresponding -// permutation matrix. +// # Scalar indices +// ref[indices, ...] += updates[...] +// +// # Vector indices (for each i) +// ref[indices[i], ...] += updates[i, ...] +// +// # High rank indices (for each i, ..., j) +// ref[indices[i, ..., j], ...] += updates[i, ..., j, ...] +// +// Duplicate entries are handled correctly: if multiple `indices` reference +// the same location, their contributions add. +// +// Requires `updates.shape = indices.shape + ref.shape[1:]` or `updates.shape = []`. +// +//
+// +//
// // Arguments: -// input: Shape is `[N, M, M]`. +// resource: Should be from a `Variable` node. +// indices: A tensor of indices into the first dimension of `ref`. +// updates: A tensor of updated values to add to `ref`. // -// Returns The signs of the log determinants of the inputs. Shape is `[N]`.The logs of the absolute values of the determinants -// of the N input matrices. Shape is `[N]`. -func LogMatrixDeterminant(scope *Scope, input tf.Output) (sign tf.Output, log_abs_determinant tf.Output) { +// Returns the created operation. +func ResourceScatterAdd(scope *Scope, resource tf.Output, indices tf.Output, updates tf.Output) (o *tf.Operation) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "LogMatrixDeterminant", + Type: "ResourceScatterAdd", Input: []tf.Input{ - input, + resource, indices, updates, }, } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) + return scope.AddOperation(opspec) } -// SumAttr is an optional argument to Sum. -type SumAttr func(optionalAttr) - -// SumKeepDims sets the optional keep_dims attribute to value. +// Says whether the targets are in the top `K` predictions. // -// value: If true, retain reduced dimensions with length 1. -// If not specified, defaults to false -func SumKeepDims(value bool) SumAttr { - return func(m optionalAttr) { - m["keep_dims"] = value - } -} - -// Computes the sum of elements across dimensions of a tensor. +// This outputs a `batch_size` bool array, an entry `out[i]` is `true` if the +// prediction for the target class is among the top `k` predictions among +// all predictions for example `i`. Note that the behavior of `InTopK` differs +// from the `TopK` op in its handling of ties; if multiple classes have the +// same prediction value and straddle the top-`k` boundary, all of those +// classes are considered to be in the top `k`. // -// Reduces `input` along the dimensions given in `axis`. Unless -// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in -// `axis`. If `keep_dims` is true, the reduced dimensions are -// retained with length 1. +// More formally, let +// +// \\(predictions_i\\) be the predictions for all classes for example `i`, +// \\(targets_i\\) be the target class for example `i`, +// \\(out_i\\) be the output for example `i`, +// +// $$out_i = predictions_{i, targets_i} \in TopKIncludingTies(predictions_i)$$ // // Arguments: -// input: The tensor to reduce. -// axis: The dimensions to reduce. Must be in the range -// `[-rank(input), rank(input))`. +// predictions: A `batch_size` x `classes` tensor. +// targets: A `batch_size` vector of class ids. +// k: Number of top elements to look at for computing precision. // -// Returns The reduced tensor. -func Sum(scope *Scope, input tf.Output, axis tf.Output, optional ...SumAttr) (output tf.Output) { +// Returns Computed Precision at `k` as a `bool Tensor`. +func InTopK(scope *Scope, predictions tf.Output, targets tf.Output, k int64) (precision tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } + attrs := map[string]interface{}{"k": k} opspec := tf.OpSpec{ - Type: "Sum", + Type: "InTopK", Input: []tf.Input{ - input, axis, + predictions, targets, }, Attrs: attrs, } @@ -16100,227 +18686,202 @@ func Sum(scope *Scope, input tf.Output, axis tf.Output, optional ...SumAttr) (ou return op.Output(0) } -// Delete the tensor specified by its handle in the session. -// -// Arguments: -// handle: The handle for a tensor stored in the session state. +// Returns (x - y)(x - y) element-wise. // -// Returns the created operation. -func DeleteSessionTensor(scope *Scope, handle tf.Output) (o *tf.Operation) { +// *NOTE*: `SquaredDifference` supports broadcasting. More about broadcasting +// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) +func SquaredDifference(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "DeleteSessionTensor", + Type: "SquaredDifference", Input: []tf.Input{ - handle, + x, y, }, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0) } -// L2 Loss. -// -// Computes half the L2 norm of a tensor without the `sqrt`: +// Forwards the input to the output. // -// output = sum(t ** 2) / 2 +// This operator represents the loop termination condition used by the +// "pivot" switches of a loop. // // Arguments: -// t: Typically 2-D, but may have any dimensions. +// input: A boolean scalar, representing the branch predicate of the Switch op. // -// Returns 0-D. -func L2Loss(scope *Scope, t tf.Output) (output tf.Output) { +// Returns The same tensor as `input`. +func LoopCond(scope *Scope, input tf.Output) (output tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "L2Loss", + Type: "LoopCond", Input: []tf.Input{ - t, + input, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// DenseToSparseSetOperationAttr is an optional argument to DenseToSparseSetOperation. -type DenseToSparseSetOperationAttr func(optionalAttr) - -// DenseToSparseSetOperationValidateIndices sets the optional validate_indices attribute to value. -// If not specified, defaults to true -func DenseToSparseSetOperationValidateIndices(value bool) DenseToSparseSetOperationAttr { - return func(m optionalAttr) { - m["validate_indices"] = value +// Computes the gradient for the inverse of `x` wrt its input. +// +// Specifically, `grad = -dy * y*y`, where `y = 1/x`, and `dy` +// is the corresponding input gradient. +func ReciprocalGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) { + if scope.Err() != nil { + return } + opspec := tf.OpSpec{ + Type: "ReciprocalGrad", + Input: []tf.Input{ + y, dy, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) } -// Applies set operation along last dimension of `Tensor` and `SparseTensor`. -// -// See SetOperationOp::SetOperationFromContext for values of `set_operation`. -// -// Input `set2` is a `SparseTensor` represented by `set2_indices`, `set2_values`, -// and `set2_shape`. For `set2` ranked `n`, 1st `n-1` dimensions must be the same -// as `set1`. Dimension `n` contains values in a set, duplicates are allowed but -// ignored. -// -// If `validate_indices` is `True`, this op validates the order and range of `set2` -// indices. -// -// Output `result` is a `SparseTensor` represented by `result_indices`, -// `result_values`, and `result_shape`. For `set1` and `set2` ranked `n`, this -// has rank `n` and the same 1st `n-1` dimensions as `set1` and `set2`. The `nth` -// dimension contains the result of `set_operation` applied to the corresponding -// `[0...n-1]` dimension of `set`. -// -// Arguments: -// set1: `Tensor` with rank `n`. 1st `n-1` dimensions must be the same as `set2`. -// Dimension `n` contains values in a set, duplicates are allowed but ignored. -// set2_indices: 2D `Tensor`, indices of a `SparseTensor`. Must be in row-major -// order. -// set2_values: 1D `Tensor`, values of a `SparseTensor`. Must be in row-major -// order. -// set2_shape: 1D `Tensor`, shape of a `SparseTensor`. `set2_shape[0...n-1]` must -// be the same as the 1st `n-1` dimensions of `set1`, `result_shape[n]` is the -// max set size across `n-1` dimensions. -// +// Returns the min of x and y (i.e. x < y ? x : y) element-wise. // -// Returns 2D indices of a `SparseTensor`.1D values of a `SparseTensor`.1D `Tensor` shape of a `SparseTensor`. `result_shape[0...n-1]` is -// the same as the 1st `n-1` dimensions of `set1` and `set2`, `result_shape[n]` -// is the max result set size across all `0...n-1` dimensions. -func DenseToSparseSetOperation(scope *Scope, set1 tf.Output, set2_indices tf.Output, set2_values tf.Output, set2_shape tf.Output, set_operation string, optional ...DenseToSparseSetOperationAttr) (result_indices tf.Output, result_values tf.Output, result_shape tf.Output) { +// *NOTE*: `Minimum` supports broadcasting. More about broadcasting +// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) +func Minimum(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"set_operation": set_operation} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "DenseToSparseSetOperation", + Type: "Minimum", Input: []tf.Input{ - set1, set2_indices, set2_values, set2_shape, + x, y, }, - Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + return op.Output(0) } -// Subtracts a value from the current value of a variable. +// Returns the element-wise sum of a list of tensors. // -// Any ReadVariableOp which depends directly or indirectly on this assign is -// guaranteed to see the incremented value or a subsequent newer one. +// `tf.accumulate_n_v2` performs the same operation as `tf.add_n`, but does not +// wait for all of its inputs to be ready before beginning to sum. This can +// save memory if inputs are ready at different times, since minimum temporary +// storage is proportional to the output size rather than the inputs size. // -// Outputs the incremented value, which can be used to totally order the -// increments to this variable. +// Unlike the original `accumulate_n`, `accumulate_n_v2` is differentiable. // -// Arguments: -// resource: handle to the resource in which to store the variable. -// value: the value by which the variable will be incremented. +// Returns a `Tensor` of same shape and type as the elements of `inputs`. // -// Returns the created operation. -func AssignSubVariableOp(scope *Scope, resource tf.Output, value tf.Output) (o *tf.Operation) { +// Arguments: +// inputs: A list of `Tensor` objects, each with same shape and type. +// shape: Shape of elements of `inputs`. +func AccumulateNV2(scope *Scope, inputs []tf.Output, shape tf.Shape) (sum tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"shape": shape} opspec := tf.OpSpec{ - Type: "AssignSubVariableOp", + Type: "AccumulateNV2", Input: []tf.Input{ - resource, value, + tf.OutputList(inputs), }, + Attrs: attrs, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0) } -// RestoreAttr is an optional argument to Restore. -type RestoreAttr func(optionalAttr) - -// RestorePreferredShard sets the optional preferred_shard attribute to value. +// Convert the quantized 'input' tensor into a lower-precision 'output', using the // -// value: Index of file to open first if multiple files match -// `file_pattern`. -// If not specified, defaults to -1 -func RestorePreferredShard(value int64) RestoreAttr { - return func(m optionalAttr) { - m["preferred_shard"] = value - } -} - -// Restores a tensor from checkpoint files. +// actual distribution of the values to maximize the usage of the lower bit depth +// and adjusting the output min and max ranges accordingly. // -// Reads a tensor stored in one or several files. If there are several files (for -// instance because a tensor was saved as slices), `file_pattern` may contain -// wildcard symbols (`*` and `?`) in the filename portion only, not in the -// directory portion. +// [input_min, input_max] are scalar floats that specify the range for the float +// interpretation of the 'input' data. For example, if input_min is -1.0f and +// input_max is 1.0f, and we are dealing with quint16 quantized data, then a 0 +// value in the 16-bit data should be interpreted as -1.0f, and a 65535 means 1.0f. // -// If a `file_pattern` matches several files, `preferred_shard` can be used to hint -// in which file the requested tensor is likely to be found. This op will first -// open the file at index `preferred_shard` in the list of matching files and try -// to restore tensors from that file. Only if some tensors or tensor slices are -// not found in that first file, then the Op opens all the files. Setting -// `preferred_shard` to match the value passed as the `shard` input -// of a matching `Save` Op may speed up Restore. This attribute only affects -// performance, not correctness. The default value -1 means files are processed in -// order. +// This operator tries to squeeze as much precision as possible into an output with +// a lower bit depth by calculating the actual min and max values found in the +// data. For example, maybe that quint16 input has no values lower than 16,384 and +// none higher than 49,152. That means only half the range is actually needed, all +// the float interpretations are between -0.5f and 0.5f, so if we want to compress +// the data into a quint8 output, we can use that range rather than the theoretical +// -1.0f to 1.0f that is suggested by the input min and max. // -// See also `RestoreSlice`. +// In practice, this is most useful for taking output from operations like +// QuantizedMatMul that can produce higher bit-depth outputs than their inputs and +// may have large potential output ranges, but in practice have a distribution of +// input values that only uses a small fraction of the possible range. By feeding +// that output into this operator, we can reduce it from 32 bits down to 8 with +// minimal loss of accuracy. // -// Arguments: -// file_pattern: Must have a single element. The pattern of the files from -// which we read the tensor. -// tensor_name: Must have a single element. The name of the tensor to be -// restored. -// dt: The type of the tensor to be restored. +// Arguments: // -// Returns The restored tensor. -func Restore(scope *Scope, file_pattern tf.Output, tensor_name tf.Output, dt tf.DataType, optional ...RestoreAttr) (tensor tf.Output) { +// input_min: The float value that the minimum quantized input value represents. +// input_max: The float value that the maximum quantized input value represents. +// out_type: The type of the output. Should be a lower bit depth than Tinput. +// +// Returns The float value that the minimum quantized output value represents.The float value that the maximum quantized output value represents. +func QuantizeDownAndShrinkRange(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output, out_type tf.DataType) (output tf.Output, output_min tf.Output, output_max tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"dt": dt} - for _, a := range optional { - a(attrs) - } + attrs := map[string]interface{}{"out_type": out_type} opspec := tf.OpSpec{ - Type: "Restore", + Type: "QuantizeDownAndShrinkRange", Input: []tf.Input{ - file_pattern, tensor_name, + input, input_min, input_max, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1), op.Output(2) } -// QuantizedResizeBilinearAttr is an optional argument to QuantizedResizeBilinear. -type QuantizedResizeBilinearAttr func(optionalAttr) +// RandomGammaAttr is an optional argument to RandomGamma. +type RandomGammaAttr func(optionalAttr) -// QuantizedResizeBilinearAlignCorners sets the optional align_corners attribute to value. +// RandomGammaSeed sets the optional seed attribute to value. // -// value: If true, rescale input by (new_height - 1) / (height - 1), which -// exactly aligns the 4 corners of images and resized images. If false, rescale -// by new_height / height. Treat similarly the width dimension. -// If not specified, defaults to false -func QuantizedResizeBilinearAlignCorners(value bool) QuantizedResizeBilinearAttr { +// value: If either `seed` or `seed2` are set to be non-zero, the random number +// generator is seeded by the given seed. Otherwise, it is seeded by a +// random seed. +// If not specified, defaults to 0 +func RandomGammaSeed(value int64) RandomGammaAttr { return func(m optionalAttr) { - m["align_corners"] = value + m["seed"] = value } } -// Resize quantized `images` to `size` using quantized bilinear interpolation. -// -// Input images and output images must be quantized types. +// RandomGammaSeed2 sets the optional seed2 attribute to value. // -// Arguments: -// images: 4-D with shape `[batch, height, width, channels]`. -// size: = A 1-D int32 Tensor of 2 elements: `new_height, new_width`. The -// new size for the images. +// value: A second seed to avoid seed collision. +// If not specified, defaults to 0 +func RandomGammaSeed2(value int64) RandomGammaAttr { + return func(m optionalAttr) { + m["seed2"] = value + } +} + +// Outputs random values from the Gamma distribution(s) described by alpha. // +// This op uses the algorithm by Marsaglia et al. to acquire samples via +// transformation-rejection from pairs of uniform and normal random variables. +// See http://dl.acm.org/citation.cfm?id=358414 // +// Arguments: +// shape: 1-D integer tensor. Shape of independent samples to draw from each +// distribution described by the shape parameters given in alpha. +// alpha: A tensor in which each scalar is a "shape" parameter describing the +// associated gamma distribution. // -// Returns 4-D with shape -// `[batch, new_height, new_width, channels]`. -func QuantizedResizeBilinear(scope *Scope, images tf.Output, size tf.Output, min tf.Output, max tf.Output, optional ...QuantizedResizeBilinearAttr) (resized_images tf.Output, out_min tf.Output, out_max tf.Output) { +// Returns A tensor with shape `shape + shape(alpha)`. Each slice +// `[:, ..., :, i0, i1, ...iN]` contains the samples drawn for +// `alpha[i0, i1, ...iN]`. The dtype of the output matches the dtype of alpha. +func RandomGamma(scope *Scope, shape tf.Output, alpha tf.Output, optional ...RandomGammaAttr) (output tf.Output) { if scope.Err() != nil { return } @@ -16329,256 +18890,250 @@ func QuantizedResizeBilinear(scope *Scope, images tf.Output, size tf.Output, min a(attrs) } opspec := tf.OpSpec{ - Type: "QuantizedResizeBilinear", + Type: "RandomGamma", Input: []tf.Input{ - images, size, min, max, + shape, alpha, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + return op.Output(0) } -// Computes the minimum along segments of a tensor. +// Computes the product along segments of a tensor. // // Read @{$math_ops#segmentation$the section on segmentation} for an explanation of // segments. // -// Computes a tensor such that -// \\(output_i = \min_j(data_j)\\) where `min` is over `j` such -// that `segment_ids[j] == i`. +// This operator is similar to the unsorted segment sum operator found +// [(here)](../../../api_docs/python/math_ops.md#UnsortedSegmentSum). +// Instead of computing the sum over segments, it computes the product of all +// entries belonging to a segment such that: // -// If the min is empty for a given segment ID `i`, `output[i] = 0`. +// \\(output_i = \prod_j data_j\\) where the product is over `j` such +// that `segment_ids[j] == i`. // -//
-// -//
+// If there is no entry for a given segment ID `i`, it outputs 1. // // Arguments: // // segment_ids: A 1-D tensor whose rank is equal to the rank of `data`'s -// first dimension. Values should be sorted and can be repeated. +// first dimension. +// // // Returns Has same shape as data, except for dimension 0 which -// has size `k`, the number of segments. -func SegmentMin(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.Output) { +// has size `num_segments`. +func UnsortedSegmentProd(scope *Scope, data tf.Output, segment_ids tf.Output, num_segments tf.Output) (output tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "SegmentMin", + Type: "UnsortedSegmentProd", Input: []tf.Input{ - data, segment_ids, + data, segment_ids, num_segments, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// SdcaOptimizerAttr is an optional argument to SdcaOptimizer. -type SdcaOptimizerAttr func(optionalAttr) +// RandomUniformIntAttr is an optional argument to RandomUniformInt. +type RandomUniformIntAttr func(optionalAttr) -// SdcaOptimizerAdaptative sets the optional adaptative attribute to value. +// RandomUniformIntSeed sets the optional seed attribute to value. // -// value: Whether to use Adapative SDCA for the inner loop. -// If not specified, defaults to false -func SdcaOptimizerAdaptative(value bool) SdcaOptimizerAttr { +// value: If either `seed` or `seed2` are set to be non-zero, the random number +// generator is seeded by the given seed. Otherwise, it is seeded by a +// random seed. +// If not specified, defaults to 0 +func RandomUniformIntSeed(value int64) RandomUniformIntAttr { return func(m optionalAttr) { - m["adaptative"] = value + m["seed"] = value } } -// Distributed version of Stochastic Dual Coordinate Ascent (SDCA) optimizer for -// -// linear models with L1 + L2 regularization. As global optimization objective is -// strongly-convex, the optimizer optimizes the dual objective at each step. The -// optimizer applies each update one example at a time. Examples are sampled -// uniformly, and the optimizer is learning rate free and enjoys linear convergence -// rate. -// -// [Proximal Stochastic Dual Coordinate Ascent](http://arxiv.org/pdf/1211.2717v1.pdf).
-// Shai Shalev-Shwartz, Tong Zhang. 2012 +// RandomUniformIntSeed2 sets the optional seed2 attribute to value. // -// $$Loss Objective = \sum f_{i} (wx_{i}) + (l2 / 2) * |w|^2 + l1 * |w|$$ +// value: A second seed to avoid seed collision. +// If not specified, defaults to 0 +func RandomUniformIntSeed2(value int64) RandomUniformIntAttr { + return func(m optionalAttr) { + m["seed2"] = value + } +} + +// Outputs random integers from a uniform distribution. // -// [Adding vs. Averaging in Distributed Primal-Dual Optimization](http://arxiv.org/abs/1502.03508).
-// Chenxin Ma, Virginia Smith, Martin Jaggi, Michael I. Jordan, -// Peter Richtarik, Martin Takac. 2015 +// The generated values are uniform integers in the range `[minval, maxval)`. +// The lower bound `minval` is included in the range, while the upper bound +// `maxval` is excluded. // -// [Stochastic Dual Coordinate Ascent with Adaptive Probabilities](https://arxiv.org/abs/1502.08053).
-// Dominik Csiba, Zheng Qu, Peter Richtarik. 2015 +// The random integers are slightly biased unless `maxval - minval` is an exact +// power of two. The bias is small for values of `maxval - minval` significantly +// smaller than the range of the output (either `2^32` or `2^64`). // // Arguments: -// sparse_example_indices: a list of vectors which contain example indices. -// sparse_feature_indices: a list of vectors which contain feature indices. -// sparse_feature_values: a list of vectors which contains feature value -// associated with each feature group. -// dense_features: a list of matrices which contains the dense feature values. -// example_weights: a vector which contains the weight associated with each -// example. -// example_labels: a vector which contains the label/target associated with each -// example. -// sparse_indices: a list of vectors where each value is the indices which has -// corresponding weights in sparse_weights. This field maybe omitted for the -// dense approach. -// sparse_weights: a list of vectors where each value is the weight associated with -// a sparse feature group. -// dense_weights: a list of vectors where the values are the weights associated -// with a dense feature group. -// example_state_data: a list of vectors containing the example state data. -// loss_type: Type of the primal loss. Currently SdcaSolver supports logistic, -// squared and hinge losses. -// l1: Symmetric l1 regularization strength. -// l2: Symmetric l2 regularization strength. -// num_loss_partitions: Number of partitions of the global loss function. -// num_inner_iterations: Number of iterations per mini-batch. +// shape: The shape of the output tensor. +// minval: 0-D. Inclusive lower bound on the generated integers. +// maxval: 0-D. Exclusive upper bound on the generated integers. // -// Returns a list of vectors containing the updated example state -// data.a list of vectors where each value is the delta -// weights associated with a sparse feature group.a list of vectors where the values are the delta -// weights associated with a dense feature group. -func SdcaOptimizer(scope *Scope, sparse_example_indices []tf.Output, sparse_feature_indices []tf.Output, sparse_feature_values []tf.Output, dense_features []tf.Output, example_weights tf.Output, example_labels tf.Output, sparse_indices []tf.Output, sparse_weights []tf.Output, dense_weights []tf.Output, example_state_data tf.Output, loss_type string, l1 float32, l2 float32, num_loss_partitions int64, num_inner_iterations int64, optional ...SdcaOptimizerAttr) (out_example_state_data tf.Output, out_delta_sparse_weights []tf.Output, out_delta_dense_weights []tf.Output) { +// Returns A tensor of the specified shape filled with uniform random integers. +func RandomUniformInt(scope *Scope, shape tf.Output, minval tf.Output, maxval tf.Output, optional ...RandomUniformIntAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"loss_type": loss_type, "l1": l1, "l2": l2, "num_loss_partitions": num_loss_partitions, "num_inner_iterations": num_inner_iterations} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "SdcaOptimizer", + Type: "RandomUniformInt", Input: []tf.Input{ - tf.OutputList(sparse_example_indices), tf.OutputList(sparse_feature_indices), tf.OutputList(sparse_feature_values), tf.OutputList(dense_features), example_weights, example_labels, tf.OutputList(sparse_indices), tf.OutputList(sparse_weights), tf.OutputList(dense_weights), example_state_data, + shape, minval, maxval, }, Attrs: attrs, } op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - out_example_state_data = op.Output(idx) - if out_delta_sparse_weights, idx, err = makeOutputList(op, idx, "out_delta_sparse_weights"); err != nil { - scope.UpdateErr("SdcaOptimizer", err) - return - } - if out_delta_dense_weights, idx, err = makeOutputList(op, idx, "out_delta_dense_weights"); err != nil { - scope.UpdateErr("SdcaOptimizer", err) - return - } - return out_example_state_data, out_delta_sparse_weights, out_delta_dense_weights + return op.Output(0) } -// SparseMatMulAttr is an optional argument to SparseMatMul. -type SparseMatMulAttr func(optionalAttr) - -// SparseMatMulTransposeA sets the optional transpose_a attribute to value. -// If not specified, defaults to false -func SparseMatMulTransposeA(value bool) SparseMatMulAttr { - return func(m optionalAttr) { - m["transpose_a"] = value - } -} +// SkipgramAttr is an optional argument to Skipgram. +type SkipgramAttr func(optionalAttr) -// SparseMatMulTransposeB sets the optional transpose_b attribute to value. -// If not specified, defaults to false -func SparseMatMulTransposeB(value bool) SparseMatMulAttr { +// SkipgramWindowSize sets the optional window_size attribute to value. +// +// value: The number of words to predict to the left and right of the target. +// If not specified, defaults to 5 +func SkipgramWindowSize(value int64) SkipgramAttr { return func(m optionalAttr) { - m["transpose_b"] = value + m["window_size"] = value } } -// SparseMatMulAIsSparse sets the optional a_is_sparse attribute to value. -// If not specified, defaults to false -func SparseMatMulAIsSparse(value bool) SparseMatMulAttr { +// SkipgramMinCount sets the optional min_count attribute to value. +// +// value: The minimum number of word occurrences for it to be included in the +// vocabulary. +// If not specified, defaults to 5 +func SkipgramMinCount(value int64) SkipgramAttr { return func(m optionalAttr) { - m["a_is_sparse"] = value + m["min_count"] = value } } -// SparseMatMulBIsSparse sets the optional b_is_sparse attribute to value. -// If not specified, defaults to false -func SparseMatMulBIsSparse(value bool) SparseMatMulAttr { +// SkipgramSubsample sets the optional subsample attribute to value. +// +// value: Threshold for word occurrence. Words that appear with higher +// frequency will be randomly down-sampled. Set to 0 to disable. +// If not specified, defaults to 0.001 +func SkipgramSubsample(value float32) SkipgramAttr { return func(m optionalAttr) { - m["b_is_sparse"] = value + m["subsample"] = value } } -// Multiply matrix "a" by matrix "b". +// Parses a text file and creates a batch of examples. // -// The inputs must be two-dimensional matrices and the inner dimension of "a" must -// match the outer dimension of "b". This op is optimized for the case where at -// least one of "a" or "b" is sparse. The breakeven for using this versus a dense -// matrix multiply on one platform was 30% zero values in the sparse matrix. +// DEPRECATED at GraphDef version 19: Moving word2vec into tensorflow_models/tutorials and deprecating its ops here as a result // -// The gradient computation of this operation will only take advantage of sparsity -// in the input gradient when that gradient comes from a Relu. -func SparseMatMul(scope *Scope, a tf.Output, b tf.Output, optional ...SparseMatMulAttr) (product tf.Output) { +// Arguments: +// filename: The corpus's text file name. +// batch_size: The size of produced batch. +// +// Returns A vector of words in the corpus.Frequencies of words. Sorted in the non-ascending order.Number of words per epoch in the data file.The current epoch number.The total number of words processed so far.A vector of word ids.A vector of word ids. +func Skipgram(scope *Scope, filename string, batch_size int64, optional ...SkipgramAttr) (vocab_word tf.Output, vocab_freq tf.Output, words_per_epoch tf.Output, current_epoch tf.Output, total_words_processed tf.Output, examples tf.Output, labels tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"filename": filename, "batch_size": batch_size} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "SparseMatMul", - Input: []tf.Input{ - a, b, - }, + Type: "Skipgram", + Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4), op.Output(5), op.Output(6) } -// Computes the power of one value to another. +// StringToNumberAttr is an optional argument to StringToNumber. +type StringToNumberAttr func(optionalAttr) + +// StringToNumberOutType sets the optional out_type attribute to value. // -// Given a tensor `x` and a tensor `y`, this operation computes \\(x^y\\) for -// corresponding elements in `x` and `y`. For example: +// value: The numeric type to interpret each string in `string_tensor` as. +// If not specified, defaults to DT_FLOAT +func StringToNumberOutType(value tf.DataType) StringToNumberAttr { + return func(m optionalAttr) { + m["out_type"] = value + } +} + +// Converts each string in the input Tensor to the specified numeric type. // -// ``` -// # tensor 'x' is [[2, 2]], [3, 3]] -// # tensor 'y' is [[8, 16], [2, 3]] -// tf.pow(x, y) ==> [[256, 65536], [9, 27]] -// ``` -func Pow(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { +// (Note that int32 overflow results in an error while float overflow +// results in a rounded value.) +// +// Returns A Tensor of the same shape as the input `string_tensor`. +func StringToNumber(scope *Scope, string_tensor tf.Output, optional ...StringToNumberAttr) (output tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "Pow", + Type: "StringToNumber", Input: []tf.Input{ - x, y, + string_tensor, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// ShapeAttr is an optional argument to Shape. -type ShapeAttr func(optionalAttr) +// ResourceApplyFtrlV2Attr is an optional argument to ResourceApplyFtrlV2. +type ResourceApplyFtrlV2Attr func(optionalAttr) -// ShapeOutType sets the optional out_type attribute to value. -// If not specified, defaults to DT_INT32 -func ShapeOutType(value tf.DataType) ShapeAttr { +// ResourceApplyFtrlV2UseLocking sets the optional use_locking attribute to value. +// +// value: If `True`, updating of the var and accum tensors will be protected +// by a lock; otherwise the behavior is undefined, but may exhibit less +// contention. +// If not specified, defaults to false +func ResourceApplyFtrlV2UseLocking(value bool) ResourceApplyFtrlV2Attr { return func(m optionalAttr) { - m["out_type"] = value + m["use_locking"] = value } } -// Returns the shape of a tensor. +// Update '*var' according to the Ftrl-proximal scheme. // -// This operation returns a 1-D integer tensor representing the shape of `input`. +// grad_with_shrinkage = grad + 2 * l2_shrinkage * var +// accum_new = accum + grad_with_shrinkage * grad_with_shrinkage +// linear += grad_with_shrinkage + +// (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var +// quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2 +// var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0 +// accum = accum_new // -// For example: +// Arguments: +// var_: Should be from a Variable(). +// accum: Should be from a Variable(). +// linear: Should be from a Variable(). +// grad: The gradient. +// lr: Scaling factor. Must be a scalar. +// l1: L1 regulariation. Must be a scalar. +// l2: L2 shrinkage regulariation. Must be a scalar. // -// ``` -// # 't' is [[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]]] -// shape(t) ==> [2, 2, 3] -// ``` -func Shape(scope *Scope, input tf.Output, optional ...ShapeAttr) (output tf.Output) { +// lr_power: Scaling factor. Must be a scalar. +// +// Returns the created operation. +func ResourceApplyFtrlV2(scope *Scope, var_ tf.Output, accum tf.Output, linear tf.Output, grad tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, l2_shrinkage tf.Output, lr_power tf.Output, optional ...ResourceApplyFtrlV2Attr) (o *tf.Operation) { if scope.Err() != nil { return } @@ -16587,92 +19142,114 @@ func Shape(scope *Scope, input tf.Output, optional ...ShapeAttr) (output tf.Outp a(attrs) } opspec := tf.OpSpec{ - Type: "Shape", + Type: "ResourceApplyFtrlV2", Input: []tf.Input{ - input, + var_, accum, linear, grad, lr, l1, l2, l2_shrinkage, lr_power, }, Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0) + return scope.AddOperation(opspec) } -// Computes fingerprints of the input strings. +// TruncatedNormalAttr is an optional argument to TruncatedNormal. +type TruncatedNormalAttr func(optionalAttr) + +// TruncatedNormalSeed sets the optional seed attribute to value. +// +// value: If either `seed` or `seed2` are set to be non-zero, the random number +// generator is seeded by the given seed. Otherwise, it is seeded by a +// random seed. +// If not specified, defaults to 0 +func TruncatedNormalSeed(value int64) TruncatedNormalAttr { + return func(m optionalAttr) { + m["seed"] = value + } +} + +// TruncatedNormalSeed2 sets the optional seed2 attribute to value. +// +// value: A second seed to avoid seed collision. +// If not specified, defaults to 0 +func TruncatedNormalSeed2(value int64) TruncatedNormalAttr { + return func(m optionalAttr) { + m["seed2"] = value + } +} + +// Outputs random values from a truncated normal distribution. +// +// The generated values follow a normal distribution with mean 0 and standard +// deviation 1, except that values whose magnitude is more than 2 standard +// deviations from the mean are dropped and re-picked. // // Arguments: -// input: vector of strings to compute fingerprints on. +// shape: The shape of the output tensor. +// dtype: The type of the output. // -// Returns a (N,2) shaped matrix where N is the number of elements in the input -// vector. Each row contains the low and high parts of the fingerprint. -func SdcaFprint(scope *Scope, input tf.Output) (output tf.Output) { +// Returns A tensor of the specified shape filled with random truncated normal +// values. +func TruncatedNormal(scope *Scope, shape tf.Output, dtype tf.DataType, optional ...TruncatedNormalAttr) (output tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"dtype": dtype} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "SdcaFprint", + Type: "TruncatedNormal", Input: []tf.Input{ - input, + shape, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// RandomPoissonV2Attr is an optional argument to RandomPoissonV2. -type RandomPoissonV2Attr func(optionalAttr) +// RandomShuffleAttr is an optional argument to RandomShuffle. +type RandomShuffleAttr func(optionalAttr) -// RandomPoissonV2Seed sets the optional seed attribute to value. +// RandomShuffleSeed sets the optional seed attribute to value. // // value: If either `seed` or `seed2` are set to be non-zero, the random number // generator is seeded by the given seed. Otherwise, it is seeded by a // random seed. // If not specified, defaults to 0 -func RandomPoissonV2Seed(value int64) RandomPoissonV2Attr { +func RandomShuffleSeed(value int64) RandomShuffleAttr { return func(m optionalAttr) { m["seed"] = value } } -// RandomPoissonV2Seed2 sets the optional seed2 attribute to value. +// RandomShuffleSeed2 sets the optional seed2 attribute to value. // // value: A second seed to avoid seed collision. // If not specified, defaults to 0 -func RandomPoissonV2Seed2(value int64) RandomPoissonV2Attr { +func RandomShuffleSeed2(value int64) RandomShuffleAttr { return func(m optionalAttr) { m["seed2"] = value } } -// RandomPoissonV2Dtype sets the optional dtype attribute to value. -// If not specified, defaults to DT_INT64 -func RandomPoissonV2Dtype(value tf.DataType) RandomPoissonV2Attr { - return func(m optionalAttr) { - m["dtype"] = value - } -} - -// Outputs random values from the Poisson distribution(s) described by rate. +// Randomly shuffles a tensor along its first dimension. // -// This op uses two algorithms, depending on rate. If rate >= 10, then -// the algorithm by Hormann is used to acquire samples via -// transformation-rejection. -// See http://www.sciencedirect.com/science/article/pii/0167668793909974. +// The tensor is shuffled along dimension 0, such that each `value[j]` is mapped +// to one and only one `output[i]`. For example, a mapping that might occur for a +// 3x2 tensor is: // -// Otherwise, Knuth's algorithm is used to acquire samples via multiplying uniform -// random variables. -// See Donald E. Knuth (1969). Seminumerical Algorithms. The Art of Computer -// Programming, Volume 2. Addison Wesley +// ``` +// [[1, 2], [[5, 6], +// [3, 4], ==> [1, 2], +// [5, 6]] [3, 4]] +// ``` // // Arguments: -// shape: 1-D integer tensor. Shape of independent samples to draw from each -// distribution described by the shape parameters given in rate. -// rate: A tensor in which each scalar is a "rate" parameter describing the -// associated poisson distribution. +// value: The tensor to be shuffled. // -// Returns A tensor with shape `shape + shape(rate)`. Each slice -// `[:, ..., :, i0, i1, ...iN]` contains the samples drawn for -// `rate[i0, i1, ...iN]`. -func RandomPoissonV2(scope *Scope, shape tf.Output, rate tf.Output, optional ...RandomPoissonV2Attr) (output tf.Output) { +// Returns A tensor of same shape and type as `value`, shuffled along its first +// dimension. +func RandomShuffle(scope *Scope, value tf.Output, optional ...RandomShuffleAttr) (output tf.Output) { if scope.Err() != nil { return } @@ -16681,9 +19258,9 @@ func RandomPoissonV2(scope *Scope, shape tf.Output, rate tf.Output, optional ... a(attrs) } opspec := tf.OpSpec{ - Type: "RandomPoissonV2", + Type: "RandomShuffle", Input: []tf.Input{ - shape, rate, + value, }, Attrs: attrs, } @@ -16691,70 +19268,99 @@ func RandomPoissonV2(scope *Scope, shape tf.Output, rate tf.Output, optional ... return op.Output(0) } -// MatrixTriangularSolveAttr is an optional argument to MatrixTriangularSolve. -type MatrixTriangularSolveAttr func(optionalAttr) +// OrderedMapIncompleteSizeAttr is an optional argument to OrderedMapIncompleteSize. +type OrderedMapIncompleteSizeAttr func(optionalAttr) + +// OrderedMapIncompleteSizeCapacity sets the optional capacity attribute to value. +// If not specified, defaults to 0 +// +// REQUIRES: value >= 0 +func OrderedMapIncompleteSizeCapacity(value int64) OrderedMapIncompleteSizeAttr { + return func(m optionalAttr) { + m["capacity"] = value + } +} + +// OrderedMapIncompleteSizeMemoryLimit sets the optional memory_limit attribute to value. +// If not specified, defaults to 0 +// +// REQUIRES: value >= 0 +func OrderedMapIncompleteSizeMemoryLimit(value int64) OrderedMapIncompleteSizeAttr { + return func(m optionalAttr) { + m["memory_limit"] = value + } +} + +// OrderedMapIncompleteSizeContainer sets the optional container attribute to value. +// If not specified, defaults to "" +func OrderedMapIncompleteSizeContainer(value string) OrderedMapIncompleteSizeAttr { + return func(m optionalAttr) { + m["container"] = value + } +} + +// OrderedMapIncompleteSizeSharedName sets the optional shared_name attribute to value. +// If not specified, defaults to "" +func OrderedMapIncompleteSizeSharedName(value string) OrderedMapIncompleteSizeAttr { + return func(m optionalAttr) { + m["shared_name"] = value + } +} + +// Op returns the number of incomplete elements in the underlying container. +func OrderedMapIncompleteSize(scope *Scope, dtypes []tf.DataType, optional ...OrderedMapIncompleteSizeAttr) (size tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"dtypes": dtypes} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "OrderedMapIncompleteSize", + + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// DecodeRawAttr is an optional argument to DecodeRaw. +type DecodeRawAttr func(optionalAttr) -// MatrixTriangularSolveLower sets the optional lower attribute to value. +// DecodeRawLittleEndian sets the optional little_endian attribute to value. // -// value: Boolean indicating whether the innermost matrices in `matrix` are -// lower or upper triangular. +// value: Whether the input `bytes` are in little-endian order. +// Ignored for `out_type` values that are stored in a single byte like +// `uint8`. // If not specified, defaults to true -func MatrixTriangularSolveLower(value bool) MatrixTriangularSolveAttr { - return func(m optionalAttr) { - m["lower"] = value - } -} - -// MatrixTriangularSolveAdjoint sets the optional adjoint attribute to value. -// -// value: Boolean indicating whether to solve with `matrix` or its (block-wise) -// adjoint. -// -// @compatibility(numpy) -// Equivalent to np.linalg.triangular_solve -// @end_compatibility -// If not specified, defaults to false -func MatrixTriangularSolveAdjoint(value bool) MatrixTriangularSolveAttr { +func DecodeRawLittleEndian(value bool) DecodeRawAttr { return func(m optionalAttr) { - m["adjoint"] = value + m["little_endian"] = value } } -// Solves systems of linear equations with upper or lower triangular matrices by -// -// backsubstitution. -// -// `matrix` is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions form -// square matrices. If `lower` is `True` then the strictly upper triangular part -// of each inner-most matrix is assumed to be zero and not accessed. -// If `lower` is False then the strictly lower triangular part of each inner-most -// matrix is assumed to be zero and not accessed. -// `rhs` is a tensor of shape `[..., M, K]`. -// -// The output is a tensor of shape `[..., M, K]`. If `adjoint` is -// `True` then the innermost matrices in `output` satisfy matrix equations -// `matrix[..., :, :] * output[..., :, :] = rhs[..., :, :]`. -// If `adjoint` is `False` then the strictly then the innermost matrices in -// `output` satisfy matrix equations -// `adjoint(matrix[..., i, k]) * output[..., k, j] = rhs[..., i, j]`. +// Reinterpret the bytes of a string as a vector of numbers. // // Arguments: -// matrix: Shape is `[..., M, M]`. -// rhs: Shape is `[..., M, K]`. +// bytes: All the elements must have the same length. // -// Returns Shape is `[..., M, K]`. -func MatrixTriangularSolve(scope *Scope, matrix tf.Output, rhs tf.Output, optional ...MatrixTriangularSolveAttr) (output tf.Output) { +// +// Returns A Tensor with one more dimension than the input `bytes`. The +// added dimension will have size equal to the length of the elements +// of `bytes` divided by the number of bytes to represent `out_type`. +func DecodeRaw(scope *Scope, bytes tf.Output, out_type tf.DataType, optional ...DecodeRawAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"out_type": out_type} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "MatrixTriangularSolve", + Type: "DecodeRaw", Input: []tf.Input{ - matrix, rhs, + bytes, }, Attrs: attrs, } @@ -16762,350 +19368,436 @@ func MatrixTriangularSolve(scope *Scope, matrix tf.Output, rhs tf.Output, option return op.Output(0) } -// Computes inverse hyperbolic sine of x element-wise. -func Asinh(scope *Scope, x tf.Output) (y tf.Output) { +// Copy a tensor setting everything outside a central band in each innermost matrix +// +// to zero. +// +// The `band` part is computed as follows: +// Assume `input` has `k` dimensions `[I, J, K, ..., M, N]`, then the output is a +// tensor with the same shape where +// +// `band[i, j, k, ..., m, n] = in_band(m, n) * input[i, j, k, ..., m, n]`. +// +// The indicator function +// +// `in_band(m, n) = (num_lower < 0 || (m-n) <= num_lower)) && +// (num_upper < 0 || (n-m) <= num_upper)`. +// +// For example: +// +// ``` +// # if 'input' is [[ 0, 1, 2, 3] +// [-1, 0, 1, 2] +// [-2, -1, 0, 1] +// [-3, -2, -1, 0]], +// +// tf.matrix_band_part(input, 1, -1) ==> [[ 0, 1, 2, 3] +// [-1, 0, 1, 2] +// [ 0, -1, 0, 1] +// [ 0, 0, -1, 0]], +// +// tf.matrix_band_part(input, 2, 1) ==> [[ 0, 1, 0, 0] +// [-1, 0, 1, 0] +// [-2, -1, 0, 1] +// [ 0, -2, -1, 0]] +// ``` +// +// Useful special cases: +// +// ``` +// tf.matrix_band_part(input, 0, -1) ==> Upper triangular part. +// tf.matrix_band_part(input, -1, 0) ==> Lower triangular part. +// tf.matrix_band_part(input, 0, 0) ==> Diagonal. +// ``` +// +// Arguments: +// input: Rank `k` tensor. +// num_lower: 0-D tensor. Number of subdiagonals to keep. If negative, keep entire +// lower triangle. +// num_upper: 0-D tensor. Number of superdiagonals to keep. If negative, keep +// entire upper triangle. +// +// Returns Rank `k` tensor of the same shape as input. The extracted banded tensor. +func MatrixBandPart(scope *Scope, input tf.Output, num_lower tf.Output, num_upper tf.Output) (band tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "Asinh", + Type: "MatrixBandPart", Input: []tf.Input{ - x, + input, num_lower, num_upper, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Creates a dataset with a range of values. Corresponds to python's xrange. +// Counts the number of occurrences of each value in an integer array. // -// Arguments: -// start: corresponds to start in python's xrange(). -// stop: corresponds to stop in python's xrange(). -// step: corresponds to step in python's xrange(). +// Outputs a vector with length `size` and the same dtype as `weights`. If +// `weights` are empty, then index `i` stores the number of times the value `i` is +// counted in `arr`. If `weights` are non-empty, then index `i` stores the sum of +// the value in `weights` at each index where the corresponding value in `arr` is +// `i`. // +// Values in `arr` outside of the range [0, size) are ignored. // -func RangeDataset(scope *Scope, start tf.Output, stop tf.Output, step tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { +// Arguments: +// arr: int32 `Tensor`. +// size: non-negative int32 scalar `Tensor`. +// weights: is an int32, int64, float32, or float64 `Tensor` with the same +// shape as `arr`, or a length-0 `Tensor`, in which case it acts as all weights +// equal to 1. +// +// Returns 1D `Tensor` with length equal to `size`. The counts or summed weights for +// each value in the range [0, size). +func Bincount(scope *Scope, arr tf.Output, size tf.Output, weights tf.Output) (bins tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} opspec := tf.OpSpec{ - Type: "RangeDataset", + Type: "Bincount", Input: []tf.Input{ - start, stop, step, + arr, size, weights, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// DepthwiseConv2dNativeBackpropInputAttr is an optional argument to DepthwiseConv2dNativeBackpropInput. -type DepthwiseConv2dNativeBackpropInputAttr func(optionalAttr) +// CumsumAttr is an optional argument to Cumsum. +type CumsumAttr func(optionalAttr) -// DepthwiseConv2dNativeBackpropInputDataFormat sets the optional data_format attribute to value. +// CumsumExclusive sets the optional exclusive attribute to value. // -// value: Specify the data format of the input and output data. With the -// default format "NHWC", the data is stored in the order of: -// [batch, height, width, channels]. -// Alternatively, the format could be "NCHW", the data storage order of: -// [batch, channels, height, width]. -// If not specified, defaults to "NHWC" -func DepthwiseConv2dNativeBackpropInputDataFormat(value string) DepthwiseConv2dNativeBackpropInputAttr { +// value: If `True`, perform exclusive cumsum. +// If not specified, defaults to false +func CumsumExclusive(value bool) CumsumAttr { return func(m optionalAttr) { - m["data_format"] = value + m["exclusive"] = value } } -// DepthwiseConv2dNativeBackpropInputDilations sets the optional dilations attribute to value. +// CumsumReverse sets the optional reverse attribute to value. // -// value: 1-D tensor of length 4. The dilation factor for each dimension of -// `input`. If set to k > 1, there will be k-1 skipped cells between each filter -// element on that dimension. The dimension order is determined by the value of -// `data_format`, see above for details. Dilations in the batch and depth -// dimensions must be 1. -// If not specified, defaults to -func DepthwiseConv2dNativeBackpropInputDilations(value []int64) DepthwiseConv2dNativeBackpropInputAttr { +// value: A `bool` (default: False). +// If not specified, defaults to false +func CumsumReverse(value bool) CumsumAttr { return func(m optionalAttr) { - m["dilations"] = value + m["reverse"] = value } } -// Computes the gradients of depthwise convolution with respect to the input. +// Compute the cumulative sum of the tensor `x` along `axis`. // -// Arguments: -// input_sizes: An integer vector representing the shape of `input`, based -// on `data_format`. For example, if `data_format` is 'NHWC' then -// `input` is a 4-D `[batch, height, width, channels]` tensor. -// filter: 4-D with shape -// `[filter_height, filter_width, in_channels, depthwise_multiplier]`. -// out_backprop: 4-D with shape based on `data_format`. -// For example, if `data_format` is 'NHWC' then -// out_backprop shape is `[batch, out_height, out_width, out_channels]`. -// Gradients w.r.t. the output of the convolution. -// strides: The stride of the sliding window for each dimension of the input -// of the convolution. -// padding: The type of padding algorithm to use. +// By default, this op performs an inclusive cumsum, which means that the first +// element of the input is identical to the first element of the output: // -// Returns 4-D with shape according to `data_format`. For example, if -// `data_format` is 'NHWC', output shape is `[batch, in_height, -// in_width, in_channels]`. Gradient w.r.t. the input of the -// convolution. -func DepthwiseConv2dNativeBackpropInput(scope *Scope, input_sizes tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...DepthwiseConv2dNativeBackpropInputAttr) (output tf.Output) { +// ```python +// tf.cumsum([a, b, c]) # => [a, a + b, a + b + c] +// ``` +// +// By setting the `exclusive` kwarg to `True`, an exclusive cumsum is +// performed instead: +// +// ```python +// tf.cumsum([a, b, c], exclusive=True) # => [0, a, a + b] +// ``` +// +// By setting the `reverse` kwarg to `True`, the cumsum is performed in the +// opposite direction: +// +// ```python +// tf.cumsum([a, b, c], reverse=True) # => [a + b + c, b + c, c] +// ``` +// +// This is more efficient than using separate `tf.reverse` ops. +// +// The `reverse` and `exclusive` kwargs can also be combined: +// +// ```python +// tf.cumsum([a, b, c], exclusive=True, reverse=True) # => [b + c, c, 0] +// ``` +// +// Arguments: +// x: A `Tensor`. Must be one of the following types: `float32`, `float64`, +// `int64`, `int32`, `uint8`, `uint16`, `int16`, `int8`, `complex64`, +// `complex128`, `qint8`, `quint8`, `qint32`, `half`. +// axis: A `Tensor` of type `int32` (default: 0). Must be in the range +// `[-rank(x), rank(x))`. +func Cumsum(scope *Scope, x tf.Output, axis tf.Output, optional ...CumsumAttr) (out tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"strides": strides, "padding": padding} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "DepthwiseConv2dNativeBackpropInput", + Type: "Cumsum", Input: []tf.Input{ - input_sizes, filter, out_backprop, + x, axis, }, Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0) + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// CumprodAttr is an optional argument to Cumprod. +type CumprodAttr func(optionalAttr) + +// CumprodExclusive sets the optional exclusive attribute to value. +// +// value: If `True`, perform exclusive cumprod. +// If not specified, defaults to false +func CumprodExclusive(value bool) CumprodAttr { + return func(m optionalAttr) { + m["exclusive"] = value + } } -// Adds sparse updates to the variable referenced by `resource`. +// CumprodReverse sets the optional reverse attribute to value. // -// This operation computes +// value: A `bool` (default: False). +// If not specified, defaults to false +func CumprodReverse(value bool) CumprodAttr { + return func(m optionalAttr) { + m["reverse"] = value + } +} + +// Compute the cumulative product of the tensor `x` along `axis`. // -// # Scalar indices -// ref[indices, ...] += updates[...] +// By default, this op performs an inclusive cumprod, which means that the first +// element of the input is identical to the first element of the output: // -// # Vector indices (for each i) -// ref[indices[i], ...] += updates[i, ...] +// ```python +// tf.cumprod([a, b, c]) # => [a, a * b, a * b * c] +// ``` // -// # High rank indices (for each i, ..., j) -// ref[indices[i, ..., j], ...] += updates[i, ..., j, ...] +// By setting the `exclusive` kwarg to `True`, an exclusive cumprod is +// performed instead: // -// Duplicate entries are handled correctly: if multiple `indices` reference -// the same location, their contributions add. +// ```python +// tf.cumprod([a, b, c], exclusive=True) # => [1, a, a * b] +// ``` // -// Requires `updates.shape = indices.shape + ref.shape[1:]`. +// By setting the `reverse` kwarg to `True`, the cumprod is performed in the +// opposite direction: // -//
-// -//
+// ```python +// tf.cumprod([a, b, c], reverse=True) # => [a * b * c, b * c, c] +// ``` // -// Arguments: -// resource: Should be from a `Variable` node. -// indices: A tensor of indices into the first dimension of `ref`. -// updates: A tensor of updated values to add to `ref`. +// This is more efficient than using separate `tf.reverse` ops. // -// Returns the created operation. -func ResourceScatterAdd(scope *Scope, resource tf.Output, indices tf.Output, updates tf.Output) (o *tf.Operation) { +// The `reverse` and `exclusive` kwargs can also be combined: +// +// ```python +// tf.cumprod([a, b, c], exclusive=True, reverse=True) # => [b * c, c, 1] +// ``` +// +// Arguments: +// x: A `Tensor`. Must be one of the following types: `float32`, `float64`, +// `int64`, `int32`, `uint8`, `uint16`, `int16`, `int8`, `complex64`, +// `complex128`, `qint8`, `quint8`, `qint32`, `half`. +// axis: A `Tensor` of type `int32` (default: 0). Must be in the range +// `[-rank(x), rank(x))`. +func Cumprod(scope *Scope, x tf.Output, axis tf.Output, optional ...CumprodAttr) (out tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "ResourceScatterAdd", + Type: "Cumprod", Input: []tf.Input{ - resource, indices, updates, + x, axis, }, + Attrs: attrs, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0) } -// Says whether the targets are in the top `K` predictions. +// QuantizedMatMulAttr is an optional argument to QuantizedMatMul. +type QuantizedMatMulAttr func(optionalAttr) + +// QuantizedMatMulToutput sets the optional Toutput attribute to value. +// If not specified, defaults to DT_QINT32 +func QuantizedMatMulToutput(value tf.DataType) QuantizedMatMulAttr { + return func(m optionalAttr) { + m["Toutput"] = value + } +} + +// QuantizedMatMulTransposeA sets the optional transpose_a attribute to value. // -// This outputs a `batch_size` bool array, an entry `out[i]` is `true` if the -// prediction for the target class is among the top `k` predictions among -// all predictions for example `i`. Note that the behavior of `InTopK` differs -// from the `TopK` op in its handling of ties; if multiple classes have the -// same prediction value and straddle the top-`k` boundary, all of those -// classes are considered to be in the top `k`. +// value: If true, `a` is transposed before multiplication. +// If not specified, defaults to false +func QuantizedMatMulTransposeA(value bool) QuantizedMatMulAttr { + return func(m optionalAttr) { + m["transpose_a"] = value + } +} + +// QuantizedMatMulTransposeB sets the optional transpose_b attribute to value. // -// More formally, let +// value: If true, `b` is transposed before multiplication. +// If not specified, defaults to false +func QuantizedMatMulTransposeB(value bool) QuantizedMatMulAttr { + return func(m optionalAttr) { + m["transpose_b"] = value + } +} + +// QuantizedMatMulTactivation sets the optional Tactivation attribute to value. // -// \\(predictions_i\\) be the predictions for all classes for example `i`, -// \\(targets_i\\) be the target class for example `i`, -// \\(out_i\\) be the output for example `i`, +// value: The type of output produced by activation function +// following this operation. +// If not specified, defaults to DT_QUINT8 +func QuantizedMatMulTactivation(value tf.DataType) QuantizedMatMulAttr { + return func(m optionalAttr) { + m["Tactivation"] = value + } +} + +// Perform a quantized matrix multiplication of `a` by the matrix `b`. // -// $$out_i = predictions_{i, targets_i} \in TopKIncludingTies(predictions_i)$$ +// The inputs must be two-dimensional matrices and the inner dimension of +// `a` (after being transposed if `transpose_a` is non-zero) must match the +// outer dimension of `b` (after being transposed if `transposed_b` is +// non-zero). // // Arguments: -// predictions: A `batch_size` x `classes` tensor. -// targets: A `batch_size` vector of class ids. -// k: Number of top elements to look at for computing precision. +// a: Must be a two-dimensional tensor. +// b: Must be a two-dimensional tensor. +// min_a: The float value that the lowest quantized `a` value represents. +// max_a: The float value that the highest quantized `a` value represents. +// min_b: The float value that the lowest quantized `b` value represents. +// max_b: The float value that the highest quantized `b` value represents. // -// Returns Computed Precision at `k` as a `bool Tensor`. -func InTopK(scope *Scope, predictions tf.Output, targets tf.Output, k int64) (precision tf.Output) { +// Returns The float value that the lowest quantized output value represents.The float value that the highest quantized output value represents. +func QuantizedMatMul(scope *Scope, a tf.Output, b tf.Output, min_a tf.Output, max_a tf.Output, min_b tf.Output, max_b tf.Output, optional ...QuantizedMatMulAttr) (out tf.Output, min_out tf.Output, max_out tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"k": k} + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "InTopK", + Type: "QuantizedMatMul", Input: []tf.Input{ - predictions, targets, + a, b, min_a, max_a, min_b, max_b, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1), op.Output(2) } -// Computes the gradient for the inverse of `x` wrt its input. +// Does nothing. Serves as a control trigger for scheduling. // -// Specifically, `grad = -dy * y*y`, where `y = 1/x`, and `dy` -// is the corresponding input gradient. -func ReciprocalGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ReciprocalGrad", - Input: []tf.Input{ - y, dy, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Returns the min of x and y (i.e. x < y ? x : y) element-wise. +// Only useful as a placeholder for control edges. // -// *NOTE*: `Minimum` supports broadcasting. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func Minimum(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { +// Returns the created operation. +func ControlTrigger(scope *Scope) (o *tf.Operation) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "Minimum", - Input: []tf.Input{ - x, y, - }, + Type: "ControlTrigger", } - op := scope.AddOperation(opspec) - return op.Output(0) + return scope.AddOperation(opspec) } -// Returns the element-wise sum of a list of tensors. -// -// `tf.accumulate_n_v2` performs the same operation as `tf.add_n`, but does not -// wait for all of its inputs to be ready before beginning to sum. This can -// save memory if inputs are ready at different times, since minimum temporary -// storage is proportional to the output size rather than the inputs size. +// Batch normalization. // -// Unlike the original `accumulate_n`, `accumulate_n_v2` is differentiable. +// DEPRECATED at GraphDef version 9: Use tf.nn.batch_normalization() // -// Returns a `Tensor` of same shape and type as the elements of `inputs`. +// This op is deprecated. Prefer `tf.nn.batch_normalization`. // // Arguments: -// inputs: A list of `Tensor` objects, each with same shape and type. -// shape: Shape of elements of `inputs`. -func AccumulateNV2(scope *Scope, inputs []tf.Output, shape tf.Shape) (sum tf.Output) { +// t: A 4D input Tensor. +// m: A 1D mean Tensor with size matching the last dimension of t. +// This is the first output from tf.nn.moments, +// or a saved moving average thereof. +// v: A 1D variance Tensor with size matching the last dimension of t. +// This is the second output from tf.nn.moments, +// or a saved moving average thereof. +// beta: A 1D beta Tensor with size matching the last dimension of t. +// An offset to be added to the normalized tensor. +// gamma: A 1D gamma Tensor with size matching the last dimension of t. +// If "scale_after_normalization" is true, this tensor will be multiplied +// with the normalized tensor. +// variance_epsilon: A small float number to avoid dividing by 0. +// scale_after_normalization: A bool indicating whether the resulted tensor +// needs to be multiplied with gamma. +func BatchNormWithGlobalNormalization(scope *Scope, t tf.Output, m tf.Output, v tf.Output, beta tf.Output, gamma tf.Output, variance_epsilon float32, scale_after_normalization bool) (result tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"shape": shape} + attrs := map[string]interface{}{"variance_epsilon": variance_epsilon, "scale_after_normalization": scale_after_normalization} opspec := tf.OpSpec{ - Type: "AccumulateNV2", + Type: "BatchNormWithGlobalNormalization", Input: []tf.Input{ - tf.OutputList(inputs), + t, m, v, beta, gamma, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Convert the quantized 'input' tensor into a lower-precision 'output', using the -// -// actual distribution of the values to maximize the usage of the lower bit depth -// and adjusting the output min and max ranges accordingly. -// -// [input_min, input_max] are scalar floats that specify the range for the float -// interpretation of the 'input' data. For example, if input_min is -1.0f and -// input_max is 1.0f, and we are dealing with quint16 quantized data, then a 0 -// value in the 16-bit data should be interpreted as -1.0f, and a 65535 means 1.0f. -// -// This operator tries to squeeze as much precision as possible into an output with -// a lower bit depth by calculating the actual min and max values found in the -// data. For example, maybe that quint16 input has no values lower than 16,384 and -// none higher than 49,152. That means only half the range is actually needed, all -// the float interpretations are between -0.5f and 0.5f, so if we want to compress -// the data into a quint8 output, we can use that range rather than the theoretical -// -1.0f to 1.0f that is suggested by the input min and max. -// -// In practice, this is most useful for taking output from operations like -// QuantizedMatMul that can produce higher bit-depth outputs than their inputs and -// may have large potential output ranges, but in practice have a distribution of -// input values that only uses a small fraction of the possible range. By feeding -// that output into this operator, we can reduce it from 32 bits down to 8 with -// minimal loss of accuracy. -// -// Arguments: -// -// input_min: The float value that the minimum quantized input value represents. -// input_max: The float value that the maximum quantized input value represents. -// out_type: The type of the output. Should be a lower bit depth than Tinput. + return op.Output(0) +} + +// Deprecated. Use TensorArrayReadV3 // -// Returns The float value that the minimum quantized output value represents.The float value that the maximum quantized output value represents. -func QuantizeDownAndShrinkRange(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output, out_type tf.DataType) (output tf.Output, output_min tf.Output, output_max tf.Output) { +// DEPRECATED at GraphDef version 26: Use TensorArrayReadV3 +func TensorArrayReadV2(scope *Scope, handle tf.Output, index tf.Output, flow_in tf.Output, dtype tf.DataType) (value tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"out_type": out_type} + attrs := map[string]interface{}{"dtype": dtype} opspec := tf.OpSpec{ - Type: "QuantizeDownAndShrinkRange", + Type: "TensorArrayReadV2", Input: []tf.Input{ - input, input_min, input_max, + handle, index, flow_in, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + return op.Output(0) } -// RandomGammaAttr is an optional argument to RandomGamma. -type RandomGammaAttr func(optionalAttr) +// QuantizedMulAttr is an optional argument to QuantizedMul. +type QuantizedMulAttr func(optionalAttr) -// RandomGammaSeed sets the optional seed attribute to value. -// -// value: If either `seed` or `seed2` are set to be non-zero, the random number -// generator is seeded by the given seed. Otherwise, it is seeded by a -// random seed. -// If not specified, defaults to 0 -func RandomGammaSeed(value int64) RandomGammaAttr { +// QuantizedMulToutput sets the optional Toutput attribute to value. +// If not specified, defaults to DT_QINT32 +func QuantizedMulToutput(value tf.DataType) QuantizedMulAttr { return func(m optionalAttr) { - m["seed"] = value + m["Toutput"] = value } } -// RandomGammaSeed2 sets the optional seed2 attribute to value. +// Returns x * y element-wise, working on quantized buffers. // -// value: A second seed to avoid seed collision. -// If not specified, defaults to 0 -func RandomGammaSeed2(value int64) RandomGammaAttr { - return func(m optionalAttr) { - m["seed2"] = value - } -} - -// Outputs random values from the Gamma distribution(s) described by alpha. +// Arguments: // -// This op uses the algorithm by Marsaglia et al. to acquire samples via -// transformation-rejection from pairs of uniform and normal random variables. -// See http://dl.acm.org/citation.cfm?id=358414 // -// Arguments: -// shape: 1-D integer tensor. Shape of independent samples to draw from each -// distribution described by the shape parameters given in alpha. -// alpha: A tensor in which each scalar is a "shape" parameter describing the -// associated gamma distribution. +// min_x: The float value that the lowest quantized `x` value represents. +// max_x: The float value that the highest quantized `x` value represents. +// min_y: The float value that the lowest quantized `y` value represents. +// max_y: The float value that the highest quantized `y` value represents. // -// Returns A tensor with shape `shape + shape(alpha)`. Each slice -// `[:, ..., :, i0, i1, ...iN]` contains the samples drawn for -// `alpha[i0, i1, ...iN]`. The dtype of the output matches the dtype of alpha. -func RandomGamma(scope *Scope, shape tf.Output, alpha tf.Output, optional ...RandomGammaAttr) (output tf.Output) { +// Returns The float value that the lowest quantized output value represents.The float value that the highest quantized output value represents. +// +// *NOTE*: `QuantizedMul` supports limited forms of broadcasting. More about +// broadcasting [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) +func QuantizedMul(scope *Scope, x tf.Output, y tf.Output, min_x tf.Output, max_x tf.Output, min_y tf.Output, max_y tf.Output, optional ...QuantizedMulAttr) (z tf.Output, min_z tf.Output, max_z tf.Output) { if scope.Err() != nil { return } @@ -17114,63 +19806,42 @@ func RandomGamma(scope *Scope, shape tf.Output, alpha tf.Output, optional ...Ran a(attrs) } opspec := tf.OpSpec{ - Type: "RandomGamma", + Type: "QuantizedMul", Input: []tf.Input{ - shape, alpha, + x, y, min_x, max_x, min_y, max_y, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) -} - -// QuantizeAndDequantizeAttr is an optional argument to QuantizeAndDequantize. -type QuantizeAndDequantizeAttr func(optionalAttr) - -// QuantizeAndDequantizeSignedInput sets the optional signed_input attribute to value. -// If not specified, defaults to true -func QuantizeAndDequantizeSignedInput(value bool) QuantizeAndDequantizeAttr { - return func(m optionalAttr) { - m["signed_input"] = value - } -} - -// QuantizeAndDequantizeNumBits sets the optional num_bits attribute to value. -// If not specified, defaults to 8 -func QuantizeAndDequantizeNumBits(value int64) QuantizeAndDequantizeAttr { - return func(m optionalAttr) { - m["num_bits"] = value - } -} - -// QuantizeAndDequantizeRangeGiven sets the optional range_given attribute to value. -// If not specified, defaults to false -func QuantizeAndDequantizeRangeGiven(value bool) QuantizeAndDequantizeAttr { - return func(m optionalAttr) { - m["range_given"] = value - } + return op.Output(0), op.Output(1), op.Output(2) } -// QuantizeAndDequantizeInputMin sets the optional input_min attribute to value. -// If not specified, defaults to 0 -func QuantizeAndDequantizeInputMin(value float32) QuantizeAndDequantizeAttr { - return func(m optionalAttr) { - m["input_min"] = value - } -} +// QuantizedAddAttr is an optional argument to QuantizedAdd. +type QuantizedAddAttr func(optionalAttr) -// QuantizeAndDequantizeInputMax sets the optional input_max attribute to value. -// If not specified, defaults to 0 -func QuantizeAndDequantizeInputMax(value float32) QuantizeAndDequantizeAttr { +// QuantizedAddToutput sets the optional Toutput attribute to value. +// If not specified, defaults to DT_QINT32 +func QuantizedAddToutput(value tf.DataType) QuantizedAddAttr { return func(m optionalAttr) { - m["input_max"] = value + m["Toutput"] = value } } -// Use QuantizeAndDequantizeV2 instead. +// Returns x + y element-wise, working on quantized buffers. // -// DEPRECATED at GraphDef version 22: Replaced by QuantizeAndDequantizeV2 -func QuantizeAndDequantize(scope *Scope, input tf.Output, optional ...QuantizeAndDequantizeAttr) (output tf.Output) { +// Arguments: +// +// +// min_x: The float value that the lowest quantized `x` value represents. +// max_x: The float value that the highest quantized `x` value represents. +// min_y: The float value that the lowest quantized `y` value represents. +// max_y: The float value that the highest quantized `y` value represents. +// +// Returns The float value that the lowest quantized output value represents.The float value that the highest quantized output value represents. +// +// *NOTE*: `QuantizedAdd` supports limited forms of broadcasting. More about +// broadcasting [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) +func QuantizedAdd(scope *Scope, x tf.Output, y tf.Output, min_x tf.Output, max_x tf.Output, min_y tf.Output, max_y tf.Output, optional ...QuantizedAddAttr) (z tf.Output, min_z tf.Output, max_z tf.Output) { if scope.Err() != nil { return } @@ -17179,461 +19850,402 @@ func QuantizeAndDequantize(scope *Scope, input tf.Output, optional ...QuantizeAn a(attrs) } opspec := tf.OpSpec{ - Type: "QuantizeAndDequantize", + Type: "QuantizedAdd", Input: []tf.Input{ - input, + x, y, min_x, max_x, min_y, max_y, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1), op.Output(2) } -// Returns locations of nonzero / true values in a tensor. -// -// This operation returns the coordinates of true elements in `condition`. The -// coordinates are returned in a 2-D tensor where the first dimension (rows) -// represents the number of true elements, and the second dimension (columns) -// represents the coordinates of the true elements. Keep in mind, the shape of -// the output tensor can vary depending on how many true values there are in -// `condition`. Indices are output in row-major order. -// -// For example: -// -// ``` -// # 'input' tensor is [[True, False] -// # [True, False]] -// # 'input' has two true values, so output has two coordinates. -// # 'input' has rank of 2, so coordinates have two indices. -// where(input) ==> [[0, 0], -// [1, 0]] -// -// # `condition` tensor is [[[True, False] -// # [True, False]] -// # [[False, True] -// # [False, True]] -// # [[False, False] -// # [False, True]]] -// # 'input' has 5 true values, so output has 5 coordinates. -// # 'input' has rank of 3, so coordinates have three indices. -// where(input) ==> [[0, 0, 0], -// [0, 1, 0], -// [1, 0, 1], -// [1, 1, 1], -// [2, 1, 1]] -// -// # `condition` tensor is [[[1.5, 0.0] -// # [-0.5, 0.0]] -// # [[0.0, 0.25] -// # [0.0, 0.75]] -// # [[0.0, 0.0] -// # [0.0, 0.01]]] -// # 'input' has 5 nonzero values, so output has 5 coordinates. -// # 'input' has rank of 3, so coordinates have three indices. -// where(input) ==> [[0, 0, 0], -// [0, 1, 0], -// [1, 0, 1], -// [1, 1, 1], -// [2, 1, 1]] +// MfccAttr is an optional argument to Mfcc. +type MfccAttr func(optionalAttr) + +// MfccUpperFrequencyLimit sets the optional upper_frequency_limit attribute to value. // -// # `condition` tensor is [[[1.5 + 0.0j, 0.0 + 0.0j] -// # [0.0 + 0.5j, 0.0 + 0.0j]] -// # [[0.0 + 0.0j, 0.25 + 1.5j] -// # [0.0 + 0.0j, 0.75 + 0.0j]] -// # [[0.0 + 0.0j, 0.0 + 0.0j] -// # [0.0 + 0.0j, 0.01 + 0.0j]]] -// # 'input' has 5 nonzero magnitude values, so output has 5 coordinates. -// # 'input' has rank of 3, so coordinates have three indices. -// where(input) ==> [[0, 0, 0], -// [0, 1, 0], -// [1, 0, 1], -// [1, 1, 1], -// [2, 1, 1]] -// ``` -func Where(scope *Scope, condition tf.Output) (index tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Where", - Input: []tf.Input{ - condition, - }, +// value: The highest frequency to use when calculating the +// ceptstrum. +// If not specified, defaults to 4000 +func MfccUpperFrequencyLimit(value float32) MfccAttr { + return func(m optionalAttr) { + m["upper_frequency_limit"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// QueueDequeueV2Attr is an optional argument to QueueDequeueV2. -type QueueDequeueV2Attr func(optionalAttr) +// MfccLowerFrequencyLimit sets the optional lower_frequency_limit attribute to value. +// +// value: The lowest frequency to use when calculating the +// ceptstrum. +// If not specified, defaults to 20 +func MfccLowerFrequencyLimit(value float32) MfccAttr { + return func(m optionalAttr) { + m["lower_frequency_limit"] = value + } +} -// QueueDequeueV2TimeoutMs sets the optional timeout_ms attribute to value. +// MfccFilterbankChannelCount sets the optional filterbank_channel_count attribute to value. // -// value: If the queue is empty, this operation will block for up to -// timeout_ms milliseconds. -// Note: This option is not supported yet. -// If not specified, defaults to -1 -func QueueDequeueV2TimeoutMs(value int64) QueueDequeueV2Attr { +// value: Resolution of the Mel bank used internally. +// If not specified, defaults to 40 +func MfccFilterbankChannelCount(value int64) MfccAttr { return func(m optionalAttr) { - m["timeout_ms"] = value + m["filterbank_channel_count"] = value } } -// Dequeues a tuple of one or more tensors from the given queue. +// MfccDctCoefficientCount sets the optional dct_coefficient_count attribute to value. // -// This operation has k outputs, where k is the number of components -// in the tuples stored in the given queue, and output i is the ith -// component of the dequeued tuple. +// value: How many output channels to produce per time slice. +// If not specified, defaults to 13 +func MfccDctCoefficientCount(value int64) MfccAttr { + return func(m optionalAttr) { + m["dct_coefficient_count"] = value + } +} + +// Transforms a spectrogram into a form that's useful for speech recognition. // -// N.B. If the queue is empty, this operation will block until an element -// has been dequeued (or 'timeout_ms' elapses, if specified). +// Mel Frequency Cepstral Coefficients are a way of representing audio data that's +// been effective as an input feature for machine learning. They are created by +// taking the spectrum of a spectrogram (a 'cepstrum'), and discarding some of the +// higher frequencies that are less significant to the human ear. They have a long +// history in the speech recognition world, and https://en.wikipedia.org/wiki/Mel-frequency_cepstrum +// is a good resource to learn more. // // Arguments: -// handle: The handle to a queue. -// component_types: The type of each component in a tuple. -// -// Returns One or more tensors that were dequeued as a tuple. -func QueueDequeueV2(scope *Scope, handle tf.Output, component_types []tf.DataType, optional ...QueueDequeueV2Attr) (components []tf.Output) { +// spectrogram: Typically produced by the Spectrogram op, with magnitude_squared +// set to true. +// sample_rate: How many samples per second the source audio used. +func Mfcc(scope *Scope, spectrogram tf.Output, sample_rate tf.Output, optional ...MfccAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"component_types": component_types} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "QueueDequeueV2", + Type: "Mfcc", Input: []tf.Input{ - handle, + spectrogram, sample_rate, }, Attrs: attrs, } op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Given a quantized tensor described by (input, input_min, input_max), outputs a +// +// range that covers the actual values present in that tensor. This op is +// typically used to produce the requested_output_min and requested_output_max for +// Requantize. +// +// Arguments: +// +// input_min: The float value that the minimum quantized input value represents. +// input_max: The float value that the maximum quantized input value represents. +// +// Returns The computed min output.the computed max output. +func RequantizationRange(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output) (output_min tf.Output, output_max tf.Output) { if scope.Err() != nil { return } - var idx int - var err error - if components, idx, err = makeOutputList(op, idx, "components"); err != nil { - scope.UpdateErr("QueueDequeueV2", err) - return + opspec := tf.OpSpec{ + Type: "RequantizationRange", + Input: []tf.Input{ + input, input_min, input_max, + }, } - return components + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1) } -// RandomUniformIntAttr is an optional argument to RandomUniformInt. -type RandomUniformIntAttr func(optionalAttr) - -// RandomUniformIntSeed sets the optional seed attribute to value. +// Rolls the elements of a tensor along an axis. // -// value: If either `seed` or `seed2` are set to be non-zero, the random number -// generator is seeded by the given seed. Otherwise, it is seeded by a -// random seed. -// If not specified, defaults to 0 -func RandomUniformIntSeed(value int64) RandomUniformIntAttr { - return func(m optionalAttr) { - m["seed"] = value - } -} - -// RandomUniformIntSeed2 sets the optional seed2 attribute to value. +// The elements are shifted positively (towards larger indices) by the offset of +// `shift` along the dimension of `axis`. Negative `shift` values will shift +// elements in the opposite direction. Elements that roll passed the last position +// will wrap around to the first and vice versa. Multiple shifts along multiple +// axes may be specified. // -// value: A second seed to avoid seed collision. -// If not specified, defaults to 0 -func RandomUniformIntSeed2(value int64) RandomUniformIntAttr { - return func(m optionalAttr) { - m["seed2"] = value - } -} - -// Outputs random integers from a uniform distribution. +// For example: // -// The generated values are uniform integers in the range `[minval, maxval)`. -// The lower bound `minval` is included in the range, while the upper bound -// `maxval` is excluded. +// ``` +// # 't' is [0, 1, 2, 3, 4] +// roll(t, shift=2, axis=0) ==> [3, 4, 0, 1, 2] // -// The random integers are slightly biased unless `maxval - minval` is an exact -// power of two. The bias is small for values of `maxval - minval` significantly -// smaller than the range of the output (either `2^32` or `2^64`). +// # shifting along multiple dimensions +// # 't' is [[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]] +// roll(t, shift=[1, -2], axis=[0, 1]) ==> [[7, 8, 9, 5, 6], [2, 3, 4, 0, 1]] +// +// # shifting along the same axis multiple times +// # 't' is [[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]] +// roll(t, shift=[2, -3], axis=[1, 1]) ==> [[1, 2, 3, 4, 0], [6, 7, 8, 9, 5]] +// ``` // // Arguments: -// shape: The shape of the output tensor. -// minval: 0-D. Inclusive lower bound on the generated integers. -// maxval: 0-D. Exclusive upper bound on the generated integers. // -// Returns A tensor of the specified shape filled with uniform random integers. -func RandomUniformInt(scope *Scope, shape tf.Output, minval tf.Output, maxval tf.Output, optional ...RandomUniformIntAttr) (output tf.Output) { +// shift: Dimension must be 0-D or 1-D. `shift[i]` specifies the number of places by which +// elements are shifted positively (towards larger indices) along the dimension +// specified by `axis[i]`. Negative shifts will roll the elements in the opposite +// direction. +// axis: Dimension must be 0-D or 1-D. `axis[i]` specifies the dimension that the shift +// `shift[i]` should occur. If the same axis is referenced more than once, the +// total shift for that axis will be the sum of all the shifts that belong to that +// axis. +// +// Returns Has the same shape and size as the input. The elements are shifted +// positively (towards larger indices) by the offsets of `shift` along the +// dimensions of `axis`. +func Roll(scope *Scope, input tf.Output, shift tf.Output, axis tf.Output) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "RandomUniformInt", + Type: "Roll", Input: []tf.Input{ - shape, minval, maxval, + input, shift, axis, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// SkipgramAttr is an optional argument to Skipgram. -type SkipgramAttr func(optionalAttr) +// MapPeekAttr is an optional argument to MapPeek. +type MapPeekAttr func(optionalAttr) -// SkipgramWindowSize sets the optional window_size attribute to value. +// MapPeekCapacity sets the optional capacity attribute to value. +// If not specified, defaults to 0 // -// value: The number of words to predict to the left and right of the target. -// If not specified, defaults to 5 -func SkipgramWindowSize(value int64) SkipgramAttr { +// REQUIRES: value >= 0 +func MapPeekCapacity(value int64) MapPeekAttr { return func(m optionalAttr) { - m["window_size"] = value + m["capacity"] = value } } -// SkipgramMinCount sets the optional min_count attribute to value. +// MapPeekMemoryLimit sets the optional memory_limit attribute to value. +// If not specified, defaults to 0 // -// value: The minimum number of word occurrences for it to be included in the -// vocabulary. -// If not specified, defaults to 5 -func SkipgramMinCount(value int64) SkipgramAttr { +// REQUIRES: value >= 0 +func MapPeekMemoryLimit(value int64) MapPeekAttr { return func(m optionalAttr) { - m["min_count"] = value + m["memory_limit"] = value } } -// SkipgramSubsample sets the optional subsample attribute to value. -// -// value: Threshold for word occurrence. Words that appear with higher -// frequency will be randomly down-sampled. Set to 0 to disable. -// If not specified, defaults to 0.001 -func SkipgramSubsample(value float32) SkipgramAttr { +// MapPeekContainer sets the optional container attribute to value. +// If not specified, defaults to "" +func MapPeekContainer(value string) MapPeekAttr { return func(m optionalAttr) { - m["subsample"] = value + m["container"] = value } } -// Parses a text file and creates a batch of examples. -// -// DEPRECATED at GraphDef version 19: Moving word2vec into tensorflow_models/tutorials and deprecating its ops here as a result -// -// Arguments: -// filename: The corpus's text file name. -// batch_size: The size of produced batch. +// MapPeekSharedName sets the optional shared_name attribute to value. +// If not specified, defaults to "" +func MapPeekSharedName(value string) MapPeekAttr { + return func(m optionalAttr) { + m["shared_name"] = value + } +} + +// Op peeks at the values at the specified key. If the // -// Returns A vector of words in the corpus.Frequencies of words. Sorted in the non-ascending order.Number of words per epoch in the data file.The current epoch number.The total number of words processed so far.A vector of word ids.A vector of word ids. -func Skipgram(scope *Scope, filename string, batch_size int64, optional ...SkipgramAttr) (vocab_word tf.Output, vocab_freq tf.Output, words_per_epoch tf.Output, current_epoch tf.Output, total_words_processed tf.Output, examples tf.Output, labels tf.Output) { +// underlying container does not contain this key +// this op will block until it does. +func MapPeek(scope *Scope, key tf.Output, indices tf.Output, dtypes []tf.DataType, optional ...MapPeekAttr) (values []tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"filename": filename, "batch_size": batch_size} + attrs := map[string]interface{}{"dtypes": dtypes} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "Skipgram", - + Type: "MapPeek", + Input: []tf.Input{ + key, indices, + }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4), op.Output(5), op.Output(6) -} - -// StringToNumberAttr is an optional argument to StringToNumber. -type StringToNumberAttr func(optionalAttr) - -// StringToNumberOutType sets the optional out_type attribute to value. -// -// value: The numeric type to interpret each string in `string_tensor` as. -// If not specified, defaults to DT_FLOAT -func StringToNumberOutType(value tf.DataType) StringToNumberAttr { - return func(m optionalAttr) { - m["out_type"] = value + if scope.Err() != nil { + return + } + var idx int + var err error + if values, idx, err = makeOutputList(op, idx, "values"); err != nil { + scope.UpdateErr("MapPeek", err) + return } + return values } -// Converts each string in the input Tensor to the specified numeric type. +// Looks up keys in a table, outputs the corresponding values. // -// (Note that int32 overflow results in an error while float overflow -// results in a rounded value.) +// The tensor `keys` must of the same type as the keys of the table. +// The output `values` is of the type of the table values. // -// Returns A Tensor of the same shape as the input `string_tensor`. -func StringToNumber(scope *Scope, string_tensor tf.Output, optional ...StringToNumberAttr) (output tf.Output) { +// The scalar `default_value` is the value output for keys not present in the +// table. It must also be of the same type as the table values. +// +// Arguments: +// table_handle: Handle to the table. +// keys: Any shape. Keys to look up. +// +// +// Returns Same shape as `keys`. Values found in the table, or `default_values` +// for missing keys. +func LookupTableFindV2(scope *Scope, table_handle tf.Output, keys tf.Output, default_value tf.Output) (values tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "StringToNumber", + Type: "LookupTableFindV2", Input: []tf.Input{ - string_tensor, + table_handle, keys, default_value, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// ResourceApplyFtrlV2Attr is an optional argument to ResourceApplyFtrlV2. -type ResourceApplyFtrlV2Attr func(optionalAttr) - -// ResourceApplyFtrlV2UseLocking sets the optional use_locking attribute to value. -// -// value: If `True`, updating of the var and accum tensors will be protected -// by a lock; otherwise the behavior is undefined, but may exhibit less -// contention. -// If not specified, defaults to false -func ResourceApplyFtrlV2UseLocking(value bool) ResourceApplyFtrlV2Attr { - return func(m optionalAttr) { - m["use_locking"] = value - } -} - -// Update '*var' according to the Ftrl-proximal scheme. +// Bucketizes 'input' based on 'boundaries'. // -// grad_with_shrinkage = grad + 2 * l2_shrinkage * var -// accum_new = accum + grad_with_shrinkage * grad_with_shrinkage -// linear += grad_with_shrinkage + -// (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var -// quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2 -// var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0 -// accum = accum_new +// For example, if the inputs are +// boundaries = [0, 10, 100] +// input = [[-5, 10000] +// [150, 10] +// [5, 100]] // -// Arguments: -// var_: Should be from a Variable(). -// accum: Should be from a Variable(). -// linear: Should be from a Variable(). -// grad: The gradient. -// lr: Scaling factor. Must be a scalar. -// l1: L1 regulariation. Must be a scalar. -// l2: L2 shrinkage regulariation. Must be a scalar. +// then the output will be +// output = [[0, 3] +// [3, 2] +// [1, 3]] // -// lr_power: Scaling factor. Must be a scalar. +// Arguments: +// input: Any shape of Tensor contains with int or float type. +// boundaries: A sorted list of floats gives the boundary of the buckets. // -// Returns the created operation. -func ResourceApplyFtrlV2(scope *Scope, var_ tf.Output, accum tf.Output, linear tf.Output, grad tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, l2_shrinkage tf.Output, lr_power tf.Output, optional ...ResourceApplyFtrlV2Attr) (o *tf.Operation) { +// Returns Same shape with 'input', each value of input replaced with bucket index. +// +// @compatibility(numpy) +// Equivalent to np.digitize. +// @end_compatibility +func Bucketize(scope *Scope, input tf.Output, boundaries []float32) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } + attrs := map[string]interface{}{"boundaries": boundaries} opspec := tf.OpSpec{ - Type: "ResourceApplyFtrlV2", + Type: "Bucketize", Input: []tf.Input{ - var_, accum, linear, grad, lr, l1, l2, l2_shrinkage, lr_power, + input, }, Attrs: attrs, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0) } -// TruncatedNormalAttr is an optional argument to TruncatedNormal. -type TruncatedNormalAttr func(optionalAttr) - -// TruncatedNormalSeed sets the optional seed attribute to value. +// Calculates gains for each feature and returns the best possible split information for the feature. // -// value: If either `seed` or `seed2` are set to be non-zero, the random number -// generator is seeded by the given seed. Otherwise, it is seeded by a -// random seed. -// If not specified, defaults to 0 -func TruncatedNormalSeed(value int64) TruncatedNormalAttr { - return func(m optionalAttr) { - m["seed"] = value - } -} - -// TruncatedNormalSeed2 sets the optional seed2 attribute to value. +// The split information is the best threshold (bucket id), gains and left/right node contributions per node for each feature. // -// value: A second seed to avoid seed collision. -// If not specified, defaults to 0 -func TruncatedNormalSeed2(value int64) TruncatedNormalAttr { - return func(m optionalAttr) { - m["seed2"] = value - } -} - -// Outputs random values from a truncated normal distribution. +// It is possible that not all nodes can be split on each feature. Hence, the list of possible nodes can differ between the features. Therefore, we return `node_ids_list` for each feature, containing the list of nodes that this feature can be used to split. // -// The generated values follow a normal distribution with mean 0 and standard -// deviation 1, except that values whose magnitude is more than 2 standard -// deviations from the mean are dropped and re-picked. +// In this manner, the output is the best split per features and per node, so that it needs to be combined later to produce the best split for each node (among all possible features). +// +// The length of output lists are all of the same length, `num_features`. +// The output shapes are compatible in a way that the first dimension of all tensors of all lists are the same and equal to the number of possible split nodes for each feature. // // Arguments: -// shape: The shape of the output tensor. -// dtype: The type of the output. +// node_id_range: A Rank 1 tensor (shape=[2]) to specify the range [first, last) of node ids to process within `stats_summary_list`. The nodes are iterated between the two nodes specified by the tensor, as like `for node_id in range(node_id_range[0], node_id_range[1])` (Note that the last index node_id_range[1] is exclusive). +// stats_summary_list: A list of Rank 3 tensor (#shape=[max_splits, bucket, 2]) for accumulated stats summary (gradient/hessian) per node per buckets for each feature. The first dimension of the tensor is the maximum number of splits, and thus not all elements of it will be used, but only the indexes specified by node_ids will be used. +// l1: l1 regularization factor on leaf weights, per instance based. +// l2: l2 regularization factor on leaf weights, per instance based. +// tree_complexity: adjustment to the gain, per leaf based. +// min_node_weight: mininum avg of hessians in a node before required for the node to be considered for splitting. +// max_splits: the number of nodes that can be split in the whole tree. Used as a dimension of output tensors. // -// Returns A tensor of the specified shape filled with random truncated normal -// values. -func TruncatedNormal(scope *Scope, shape tf.Output, dtype tf.DataType, optional ...TruncatedNormalAttr) (output tf.Output) { +// Returns An output list of Rank 1 tensors indicating possible split node ids for each feature. The length of the list is num_features, but each tensor has different size as each feature provides different possible nodes. See above for details like shapes and sizes.An output list of Rank 1 tensors indicating the best gains for each feature to split for certain nodes. See above for details like shapes and sizes.An output list of Rank 1 tensors indicating the bucket id to compare with (as a threshold) for split in each node. See above for details like shapes and sizes.A list of Rank 2 tensors indicating the contribution of the left nodes when branching from parent nodes (given by the tensor element in the output node_ids_list) to the left direction by the given threshold for each feature. This value will be used to make the left node value by adding to the parent node value. Second dimension size is 1 for 1-dimensional logits, but would be larger for multi-class problems. See above for details like shapes and sizes.A list of Rank 2 tensors, with the same shape/conditions as left_node_contribs_list, but just that the value is for the right node. +func BoostedTreesCalculateBestGainsPerFeature(scope *Scope, node_id_range tf.Output, stats_summary_list []tf.Output, l1 tf.Output, l2 tf.Output, tree_complexity tf.Output, min_node_weight tf.Output, max_splits int64) (node_ids_list []tf.Output, gains_list []tf.Output, thresholds_list []tf.Output, left_node_contribs_list []tf.Output, right_node_contribs_list []tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"dtype": dtype} - for _, a := range optional { - a(attrs) - } + attrs := map[string]interface{}{"max_splits": max_splits} opspec := tf.OpSpec{ - Type: "TruncatedNormal", + Type: "BoostedTreesCalculateBestGainsPerFeature", Input: []tf.Input{ - shape, + node_id_range, tf.OutputList(stats_summary_list), l1, l2, tree_complexity, min_node_weight, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + if scope.Err() != nil { + return + } + var idx int + var err error + if node_ids_list, idx, err = makeOutputList(op, idx, "node_ids_list"); err != nil { + scope.UpdateErr("BoostedTreesCalculateBestGainsPerFeature", err) + return + } + if gains_list, idx, err = makeOutputList(op, idx, "gains_list"); err != nil { + scope.UpdateErr("BoostedTreesCalculateBestGainsPerFeature", err) + return + } + if thresholds_list, idx, err = makeOutputList(op, idx, "thresholds_list"); err != nil { + scope.UpdateErr("BoostedTreesCalculateBestGainsPerFeature", err) + return + } + if left_node_contribs_list, idx, err = makeOutputList(op, idx, "left_node_contribs_list"); err != nil { + scope.UpdateErr("BoostedTreesCalculateBestGainsPerFeature", err) + return + } + if right_node_contribs_list, idx, err = makeOutputList(op, idx, "right_node_contribs_list"); err != nil { + scope.UpdateErr("BoostedTreesCalculateBestGainsPerFeature", err) + return + } + return node_ids_list, gains_list, thresholds_list, left_node_contribs_list, right_node_contribs_list } -// RandomShuffleAttr is an optional argument to RandomShuffle. -type RandomShuffleAttr func(optionalAttr) +// EncodePngAttr is an optional argument to EncodePng. +type EncodePngAttr func(optionalAttr) -// RandomShuffleSeed sets the optional seed attribute to value. +// EncodePngCompression sets the optional compression attribute to value. // -// value: If either `seed` or `seed2` are set to be non-zero, the random number -// generator is seeded by the given seed. Otherwise, it is seeded by a -// random seed. -// If not specified, defaults to 0 -func RandomShuffleSeed(value int64) RandomShuffleAttr { +// value: Compression level. +// If not specified, defaults to -1 +func EncodePngCompression(value int64) EncodePngAttr { return func(m optionalAttr) { - m["seed"] = value + m["compression"] = value } } -// RandomShuffleSeed2 sets the optional seed2 attribute to value. +// PNG-encode an image. // -// value: A second seed to avoid seed collision. -// If not specified, defaults to 0 -func RandomShuffleSeed2(value int64) RandomShuffleAttr { - return func(m optionalAttr) { - m["seed2"] = value - } -} - -// Randomly shuffles a tensor along its first dimension. +// `image` is a 3-D uint8 or uint16 Tensor of shape `[height, width, channels]` +// where `channels` is: // -// The tensor is shuffled along dimension 0, such that each `value[j]` is mapped -// to one and only one `output[i]`. For example, a mapping that might occur for a -// 3x2 tensor is: +// * 1: for grayscale. +// * 2: for grayscale + alpha. +// * 3: for RGB. +// * 4: for RGBA. // -// ``` -// [[1, 2], [[5, 6], -// [3, 4], ==> [1, 2], -// [5, 6]] [3, 4]] -// ``` +// The ZLIB compression level, `compression`, can be -1 for the PNG-encoder +// default or a value from 0 to 9. 9 is the highest compression level, generating +// the smallest output, but is slower. // // Arguments: -// value: The tensor to be shuffled. +// image: 3-D with shape `[height, width, channels]`. // -// Returns A tensor of same shape and type as `value`, shuffled along its first -// dimension. -func RandomShuffle(scope *Scope, value tf.Output, optional ...RandomShuffleAttr) (output tf.Output) { +// Returns 0-D. PNG-encoded image. +func EncodePng(scope *Scope, image tf.Output, optional ...EncodePngAttr) (contents tf.Output) { if scope.Err() != nil { return } @@ -17642,9 +20254,9 @@ func RandomShuffle(scope *Scope, value tf.Output, optional ...RandomShuffleAttr) a(attrs) } opspec := tf.OpSpec{ - Type: "RandomShuffle", + Type: "EncodePng", Input: []tf.Input{ - value, + image, }, Attrs: attrs, } @@ -17652,389 +20264,448 @@ func RandomShuffle(scope *Scope, value tf.Output, optional ...RandomShuffleAttr) return op.Output(0) } -// OrderedMapIncompleteSizeAttr is an optional argument to OrderedMapIncompleteSize. -type OrderedMapIncompleteSizeAttr func(optionalAttr) - -// OrderedMapIncompleteSizeCapacity sets the optional capacity attribute to value. -// If not specified, defaults to 0 +// Updates the table to associates keys with values. // -// REQUIRES: value >= 0 -func OrderedMapIncompleteSizeCapacity(value int64) OrderedMapIncompleteSizeAttr { - return func(m optionalAttr) { - m["capacity"] = value +// The tensor `keys` must be of the same type as the keys of the table. +// The tensor `values` must be of the type of the table values. +// +// Arguments: +// table_handle: Handle to the table. +// keys: Any shape. Keys to look up. +// values: Values to associate with keys. +// +// Returns the created operation. +func LookupTableInsertV2(scope *Scope, table_handle tf.Output, keys tf.Output, values tf.Output) (o *tf.Operation) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "LookupTableInsertV2", + Input: []tf.Input{ + table_handle, keys, values, + }, } + return scope.AddOperation(opspec) } -// OrderedMapIncompleteSizeMemoryLimit sets the optional memory_limit attribute to value. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func OrderedMapIncompleteSizeMemoryLimit(value int64) OrderedMapIncompleteSizeAttr { - return func(m optionalAttr) { - m["memory_limit"] = value +// Returns element-wise smallest integer in not less than x. +func Ceil(scope *Scope, x tf.Output) (y tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Ceil", + Input: []tf.Input{ + x, + }, } + op := scope.AddOperation(opspec) + return op.Output(0) } -// OrderedMapIncompleteSizeContainer sets the optional container attribute to value. -// If not specified, defaults to "" -func OrderedMapIncompleteSizeContainer(value string) OrderedMapIncompleteSizeAttr { - return func(m optionalAttr) { - m["container"] = value +// Computes the number of elements in the given table. +// +// Arguments: +// table_handle: Handle to the table. +// +// Returns Scalar that contains number of elements in the table. +func LookupTableSizeV2(scope *Scope, table_handle tf.Output) (size tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "LookupTableSizeV2", + Input: []tf.Input{ + table_handle, + }, } + op := scope.AddOperation(opspec) + return op.Output(0) } -// OrderedMapIncompleteSizeSharedName sets the optional shared_name attribute to value. -// If not specified, defaults to "" -func OrderedMapIncompleteSizeSharedName(value string) OrderedMapIncompleteSizeAttr { +// ResizeBilinearGradAttr is an optional argument to ResizeBilinearGrad. +type ResizeBilinearGradAttr func(optionalAttr) + +// ResizeBilinearGradAlignCorners sets the optional align_corners attribute to value. +// +// value: If true, the centers of the 4 corner pixels of the input and grad tensors are +// aligned. Defaults to false. +// If not specified, defaults to false +func ResizeBilinearGradAlignCorners(value bool) ResizeBilinearGradAttr { return func(m optionalAttr) { - m["shared_name"] = value + m["align_corners"] = value } } -// Op returns the number of incomplete elements in the underlying container. -func OrderedMapIncompleteSize(scope *Scope, dtypes []tf.DataType, optional ...OrderedMapIncompleteSizeAttr) (size tf.Output) { +// Computes the gradient of bilinear interpolation. +// +// Arguments: +// grads: 4-D with shape `[batch, height, width, channels]`. +// original_image: 4-D with shape `[batch, orig_height, orig_width, channels]`, +// The image tensor that was resized. +// +// Returns 4-D with shape `[batch, orig_height, orig_width, channels]`. +// Gradients with respect to the input image. Input image must have been +// float or double. +func ResizeBilinearGrad(scope *Scope, grads tf.Output, original_image tf.Output, optional ...ResizeBilinearGradAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"dtypes": dtypes} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "OrderedMapIncompleteSize", - + Type: "ResizeBilinearGrad", + Input: []tf.Input{ + grads, original_image, + }, Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// DecodeRawAttr is an optional argument to DecodeRaw. -type DecodeRawAttr func(optionalAttr) - -// DecodeRawLittleEndian sets the optional little_endian attribute to value. -// -// value: Whether the input `bytes` are in little-endian order. -// Ignored for `out_type` values that are stored in a single byte like -// `uint8`. -// If not specified, defaults to true -func DecodeRawLittleEndian(value bool) DecodeRawAttr { - return func(m optionalAttr) { - m["little_endian"] = value - } -} - -// Reinterpret the bytes of a string as a vector of numbers. +// Outputs all keys and values in the table. // // Arguments: -// bytes: All the elements must have the same length. +// table_handle: Handle to the table. // // -// Returns A Tensor with one more dimension than the input `bytes`. The -// added dimension will have size equal to the length of the elements -// of `bytes` divided by the number of bytes to represent `out_type`. -func DecodeRaw(scope *Scope, bytes tf.Output, out_type tf.DataType, optional ...DecodeRawAttr) (output tf.Output) { +// +// Returns Vector of all keys present in the table.Tensor of all values in the table. Indexed in parallel with `keys`. +func LookupTableExportV2(scope *Scope, table_handle tf.Output, Tkeys tf.DataType, Tvalues tf.DataType) (keys tf.Output, values tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"out_type": out_type} - for _, a := range optional { - a(attrs) - } + attrs := map[string]interface{}{"Tkeys": Tkeys, "Tvalues": Tvalues} opspec := tf.OpSpec{ - Type: "DecodeRaw", + Type: "LookupTableExportV2", Input: []tf.Input{ - bytes, + table_handle, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1) } -// Copy a tensor setting everything outside a central band in each innermost matrix -// -// to zero. -// -// The `band` part is computed as follows: -// Assume `input` has `k` dimensions `[I, J, K, ..., M, N]`, then the output is a -// tensor with the same shape where -// -// `band[i, j, k, ..., m, n] = in_band(m, n) * input[i, j, k, ..., m, n]`. -// -// The indicator function -// -// `in_band(m, n) = (num_lower < 0 || (m-n) <= num_lower)) && -// (num_upper < 0 || (n-m) <= num_upper)`. -// -// For example: -// -// ``` -// # if 'input' is [[ 0, 1, 2, 3] -// [-1, 0, 1, 2] -// [-2, -1, 0, 1] -// [-3, -2, -1, 0]], -// -// tf.matrix_band_part(input, 1, -1) ==> [[ 0, 1, 2, 3] -// [-1, 0, 1, 2] -// [ 0, -1, 0, 1] -// [ 0, 0, -1, 0]], -// -// tf.matrix_band_part(input, 2, 1) ==> [[ 0, 1, 0, 0] -// [-1, 0, 1, 0] -// [-2, -1, 0, 1] -// [ 0, -2, -1, 0]] -// ``` -// -// Useful special cases: +// Replaces the contents of the table with the specified keys and values. // -// ``` -// tf.matrix_band_part(input, 0, -1) ==> Upper triangular part. -// tf.matrix_band_part(input, -1, 0) ==> Lower triangular part. -// tf.matrix_band_part(input, 0, 0) ==> Diagonal. -// ``` +// The tensor `keys` must be of the same type as the keys of the table. +// The tensor `values` must be of the type of the table values. // // Arguments: -// input: Rank `k` tensor. -// num_lower: 0-D tensor. Number of subdiagonals to keep. If negative, keep entire -// lower triangle. -// num_upper: 0-D tensor. Number of superdiagonals to keep. If negative, keep -// entire upper triangle. +// table_handle: Handle to the table. +// keys: Any shape. Keys to look up. +// values: Values to associate with keys. // -// Returns Rank `k` tensor of the same shape as input. The extracted banded tensor. -func MatrixBandPart(scope *Scope, input tf.Output, num_lower tf.Output, num_upper tf.Output) (band tf.Output) { +// Returns the created operation. +func LookupTableImportV2(scope *Scope, table_handle tf.Output, keys tf.Output, values tf.Output) (o *tf.Operation) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "MatrixBandPart", + Type: "LookupTableImportV2", Input: []tf.Input{ - input, num_lower, num_upper, + table_handle, keys, values, }, } - op := scope.AddOperation(opspec) - return op.Output(0) + return scope.AddOperation(opspec) } -// QuantizedMatMulAttr is an optional argument to QuantizedMatMul. -type QuantizedMatMulAttr func(optionalAttr) +// MapUnstageNoKeyAttr is an optional argument to MapUnstageNoKey. +type MapUnstageNoKeyAttr func(optionalAttr) -// QuantizedMatMulToutput sets the optional Toutput attribute to value. -// If not specified, defaults to DT_QINT32 -func QuantizedMatMulToutput(value tf.DataType) QuantizedMatMulAttr { +// MapUnstageNoKeyCapacity sets the optional capacity attribute to value. +// If not specified, defaults to 0 +// +// REQUIRES: value >= 0 +func MapUnstageNoKeyCapacity(value int64) MapUnstageNoKeyAttr { return func(m optionalAttr) { - m["Toutput"] = value + m["capacity"] = value } } -// QuantizedMatMulTransposeA sets the optional transpose_a attribute to value. +// MapUnstageNoKeyMemoryLimit sets the optional memory_limit attribute to value. +// If not specified, defaults to 0 // -// value: If true, `a` is transposed before multiplication. -// If not specified, defaults to false -func QuantizedMatMulTransposeA(value bool) QuantizedMatMulAttr { +// REQUIRES: value >= 0 +func MapUnstageNoKeyMemoryLimit(value int64) MapUnstageNoKeyAttr { return func(m optionalAttr) { - m["transpose_a"] = value + m["memory_limit"] = value } } -// QuantizedMatMulTransposeB sets the optional transpose_b attribute to value. -// -// value: If true, `b` is transposed before multiplication. -// If not specified, defaults to false -func QuantizedMatMulTransposeB(value bool) QuantizedMatMulAttr { +// MapUnstageNoKeyContainer sets the optional container attribute to value. +// If not specified, defaults to "" +func MapUnstageNoKeyContainer(value string) MapUnstageNoKeyAttr { return func(m optionalAttr) { - m["transpose_b"] = value + m["container"] = value } } -// QuantizedMatMulTactivation sets the optional Tactivation attribute to value. -// -// value: The type of output produced by activation function -// following this operation. -// If not specified, defaults to DT_QUINT8 -func QuantizedMatMulTactivation(value tf.DataType) QuantizedMatMulAttr { +// MapUnstageNoKeySharedName sets the optional shared_name attribute to value. +// If not specified, defaults to "" +func MapUnstageNoKeySharedName(value string) MapUnstageNoKeyAttr { return func(m optionalAttr) { - m["Tactivation"] = value + m["shared_name"] = value } } -// Perform a quantized matrix multiplication of `a` by the matrix `b`. -// -// The inputs must be two-dimensional matrices and the inner dimension of -// `a` (after being transposed if `transpose_a` is non-zero) must match the -// outer dimension of `b` (after being transposed if `transposed_b` is -// non-zero). -// -// Arguments: -// a: Must be a two-dimensional tensor. -// b: Must be a two-dimensional tensor. -// min_a: The float value that the lowest quantized `a` value represents. -// max_a: The float value that the highest quantized `a` value represents. -// min_b: The float value that the lowest quantized `b` value represents. -// max_b: The float value that the highest quantized `b` value represents. +// Op removes and returns a random (key, value) // -// Returns The float value that the lowest quantized output value represents.The float value that the highest quantized output value represents. -func QuantizedMatMul(scope *Scope, a tf.Output, b tf.Output, min_a tf.Output, max_a tf.Output, min_b tf.Output, max_b tf.Output, optional ...QuantizedMatMulAttr) (out tf.Output, min_out tf.Output, max_out tf.Output) { +// from the underlying container. If the underlying container +// does not contain elements, the op will block until it does. +func MapUnstageNoKey(scope *Scope, indices tf.Output, dtypes []tf.DataType, optional ...MapUnstageNoKeyAttr) (key tf.Output, values []tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"dtypes": dtypes} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "QuantizedMatMul", + Type: "MapUnstageNoKey", Input: []tf.Input{ - a, b, min_a, max_a, min_b, max_b, + indices, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// Does nothing. Serves as a control trigger for scheduling. -// -// Only useful as a placeholder for control edges. -// -// Returns the created operation. -func ControlTrigger(scope *Scope) (o *tf.Operation) { if scope.Err() != nil { return } - opspec := tf.OpSpec{ - Type: "ControlTrigger", + var idx int + var err error + key = op.Output(idx) + if values, idx, err = makeOutputList(op, idx, "values"); err != nil { + scope.UpdateErr("MapUnstageNoKey", err) + return } - return scope.AddOperation(opspec) + return key, values } -// Batch normalization. -// -// DEPRECATED at GraphDef version 9: Use tf.nn.batch_normalization() +// HashTableV2Attr is an optional argument to HashTableV2. +type HashTableV2Attr func(optionalAttr) + +// HashTableV2Container sets the optional container attribute to value. // -// This op is deprecated. Prefer `tf.nn.batch_normalization`. +// value: If non-empty, this table is placed in the given container. +// Otherwise, a default container is used. +// If not specified, defaults to "" +func HashTableV2Container(value string) HashTableV2Attr { + return func(m optionalAttr) { + m["container"] = value + } +} + +// HashTableV2SharedName sets the optional shared_name attribute to value. // -// Arguments: -// t: A 4D input Tensor. -// m: A 1D mean Tensor with size matching the last dimension of t. -// This is the first output from tf.nn.moments, -// or a saved moving average thereof. -// v: A 1D variance Tensor with size matching the last dimension of t. -// This is the second output from tf.nn.moments, -// or a saved moving average thereof. -// beta: A 1D beta Tensor with size matching the last dimension of t. -// An offset to be added to the normalized tensor. -// gamma: A 1D gamma Tensor with size matching the last dimension of t. -// If "scale_after_normalization" is true, this tensor will be multiplied -// with the normalized tensor. -// variance_epsilon: A small float number to avoid dividing by 0. -// scale_after_normalization: A bool indicating whether the resulted tensor -// needs to be multiplied with gamma. -func BatchNormWithGlobalNormalization(scope *Scope, t tf.Output, m tf.Output, v tf.Output, beta tf.Output, gamma tf.Output, variance_epsilon float32, scale_after_normalization bool) (result tf.Output) { - if scope.Err() != nil { - return +// value: If non-empty, this table is shared under the given name across +// multiple sessions. +// If not specified, defaults to "" +func HashTableV2SharedName(value string) HashTableV2Attr { + return func(m optionalAttr) { + m["shared_name"] = value } - attrs := map[string]interface{}{"variance_epsilon": variance_epsilon, "scale_after_normalization": scale_after_normalization} - opspec := tf.OpSpec{ - Type: "BatchNormWithGlobalNormalization", - Input: []tf.Input{ - t, m, v, beta, gamma, - }, - Attrs: attrs, +} + +// HashTableV2UseNodeNameSharing sets the optional use_node_name_sharing attribute to value. +// +// value: If true and shared_name is empty, the table is shared +// using the node name. +// If not specified, defaults to false +func HashTableV2UseNodeNameSharing(value bool) HashTableV2Attr { + return func(m optionalAttr) { + m["use_node_name_sharing"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// Deprecated. Use TensorArrayReadV3 +// Creates a non-initialized hash table. // -// DEPRECATED at GraphDef version 26: Use TensorArrayReadV3 -func TensorArrayReadV2(scope *Scope, handle tf.Output, index tf.Output, flow_in tf.Output, dtype tf.DataType) (value tf.Output) { +// This op creates a hash table, specifying the type of its keys and values. +// Before using the table you will have to initialize it. After initialization the +// table will be immutable. +// +// Arguments: +// key_dtype: Type of the table keys. +// value_dtype: Type of the table values. +// +// Returns Handle to a table. +func HashTableV2(scope *Scope, key_dtype tf.DataType, value_dtype tf.DataType, optional ...HashTableV2Attr) (table_handle tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"dtype": dtype} - opspec := tf.OpSpec{ - Type: "TensorArrayReadV2", - Input: []tf.Input{ - handle, index, flow_in, - }, + attrs := map[string]interface{}{"key_dtype": key_dtype, "value_dtype": value_dtype} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "HashTableV2", + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// QuantizedMulAttr is an optional argument to QuantizedMul. -type QuantizedMulAttr func(optionalAttr) +// MutableHashTableV2Attr is an optional argument to MutableHashTableV2. +type MutableHashTableV2Attr func(optionalAttr) -// QuantizedMulToutput sets the optional Toutput attribute to value. -// If not specified, defaults to DT_QINT32 -func QuantizedMulToutput(value tf.DataType) QuantizedMulAttr { +// MutableHashTableV2Container sets the optional container attribute to value. +// +// value: If non-empty, this table is placed in the given container. +// Otherwise, a default container is used. +// If not specified, defaults to "" +func MutableHashTableV2Container(value string) MutableHashTableV2Attr { return func(m optionalAttr) { - m["Toutput"] = value + m["container"] = value } } -// Returns x * y element-wise, working on quantized buffers. +// MutableHashTableV2SharedName sets the optional shared_name attribute to value. // -// Arguments: +// value: If non-empty, this table is shared under the given name across +// multiple sessions. +// If not specified, defaults to "" +func MutableHashTableV2SharedName(value string) MutableHashTableV2Attr { + return func(m optionalAttr) { + m["shared_name"] = value + } +} + +// MutableHashTableV2UseNodeNameSharing sets the optional use_node_name_sharing attribute to value. // +// value: If true and shared_name is empty, the table is shared +// using the node name. +// If not specified, defaults to false +func MutableHashTableV2UseNodeNameSharing(value bool) MutableHashTableV2Attr { + return func(m optionalAttr) { + m["use_node_name_sharing"] = value + } +} + +// Creates an empty hash table. // -// min_x: The float value that the lowest quantized `x` value represents. -// max_x: The float value that the highest quantized `x` value represents. -// min_y: The float value that the lowest quantized `y` value represents. -// max_y: The float value that the highest quantized `y` value represents. +// This op creates a mutable hash table, specifying the type of its keys and +// values. Each value must be a scalar. Data can be inserted into the table using +// the insert operations. It does not support the initialization operation. // -// Returns The float value that the lowest quantized output value represents.The float value that the highest quantized output value represents. +// Arguments: +// key_dtype: Type of the table keys. +// value_dtype: Type of the table values. // -// *NOTE*: `QuantizedMul` supports limited forms of broadcasting. More about -// broadcasting [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func QuantizedMul(scope *Scope, x tf.Output, y tf.Output, min_x tf.Output, max_x tf.Output, min_y tf.Output, max_y tf.Output, optional ...QuantizedMulAttr) (z tf.Output, min_z tf.Output, max_z tf.Output) { +// Returns Handle to a table. +func MutableHashTableV2(scope *Scope, key_dtype tf.DataType, value_dtype tf.DataType, optional ...MutableHashTableV2Attr) (table_handle tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"key_dtype": key_dtype, "value_dtype": value_dtype} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "QuantizedMul", - Input: []tf.Input{ - x, y, min_x, max_x, min_y, max_y, - }, + Type: "MutableHashTableV2", + Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + return op.Output(0) } -// QuantizedAddAttr is an optional argument to QuantizedAdd. -type QuantizedAddAttr func(optionalAttr) +// DequantizeAttr is an optional argument to Dequantize. +type DequantizeAttr func(optionalAttr) -// QuantizedAddToutput sets the optional Toutput attribute to value. -// If not specified, defaults to DT_QINT32 -func QuantizedAddToutput(value tf.DataType) QuantizedAddAttr { +// DequantizeMode sets the optional mode attribute to value. +// If not specified, defaults to "MIN_COMBINED" +func DequantizeMode(value string) DequantizeAttr { return func(m optionalAttr) { - m["Toutput"] = value + m["mode"] = value } } -// Returns x + y element-wise, working on quantized buffers. +// Dequantize the 'input' tensor into a float Tensor. // -// Arguments: +// [min_range, max_range] are scalar floats that specify the range for +// the 'input' data. The 'mode' attribute controls exactly which calculations are +// used to convert the float values to their quantized equivalents. // +// In 'MIN_COMBINED' mode, each value of the tensor will undergo the following: // -// min_x: The float value that the lowest quantized `x` value represents. -// max_x: The float value that the highest quantized `x` value represents. -// min_y: The float value that the lowest quantized `y` value represents. -// max_y: The float value that the highest quantized `y` value represents. +// ``` +// if T == qint8, in[i] += (range(T) + 1)/ 2.0 +// out[i] = min_range + (in[i]* (max_range - min_range) / range(T)) +// ``` +// here `range(T) = numeric_limits::max() - numeric_limits::min()` // -// Returns The float value that the lowest quantized output value represents.The float value that the highest quantized output value represents. +// *MIN_COMBINED Mode Example* // -// *NOTE*: `QuantizedAdd` supports limited forms of broadcasting. More about -// broadcasting [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func QuantizedAdd(scope *Scope, x tf.Output, y tf.Output, min_x tf.Output, max_x tf.Output, min_y tf.Output, max_y tf.Output, optional ...QuantizedAddAttr) (z tf.Output, min_z tf.Output, max_z tf.Output) { +// If the input comes from a QuantizedRelu6, the output type is +// quint8 (range of 0-255) but the possible range of QuantizedRelu6 is +// 0-6. The min_range and max_range values are therefore 0.0 and 6.0. +// Dequantize on quint8 will take each value, cast to float, and multiply +// by 6 / 255. +// Note that if quantizedtype is qint8, the operation will additionally add +// each value by 128 prior to casting. +// +// If the mode is 'MIN_FIRST', then this approach is used: +// +// ```c++ +// num_discrete_values = 1 << (# of bits in T) +// range_adjust = num_discrete_values / (num_discrete_values - 1) +// range = (range_max - range_min) * range_adjust +// range_scale = range / num_discrete_values +// const double offset_input = static_cast(input) - lowest_quantized; +// result = range_min + ((input - numeric_limits::min()) * range_scale) +// ``` +// +// *SCALED mode Example* +// +// `SCALED` mode matches the quantization approach used in +// `QuantizeAndDequantize{V2|V3}`. +// +// If the mode is `SCALED`, we do not use the full range of the output type, +// choosing to elide the lowest possible value for symmetry (e.g., output range is +// -127 to 127, not -128 to 127 for signed 8 bit quantization), so that 0.0 maps to +// 0. +// +// We first find the range of values in our tensor. The +// range we use is always centered on 0, so we find m such that +// ```c++ +// m = max(abs(input_min), abs(input_max)) +// ``` +// +// Our input tensor range is then `[-m, m]`. +// +// Next, we choose our fixed-point quantization buckets, `[min_fixed, max_fixed]`. +// If T is signed, this is +// ``` +// num_bits = sizeof(T) * 8 +// [min_fixed, max_fixed] = +// [-(1 << (num_bits - 1) - 1), (1 << (num_bits - 1)) - 1] +// ``` +// +// Otherwise, if T is unsigned, the fixed-point range is +// ``` +// [min_fixed, max_fixed] = [0, (1 << num_bits) - 1] +// ``` +// +// From this we compute our scaling factor, s: +// ```c++ +// s = (2 * m) / (max_fixed - min_fixed) +// ``` +// +// Now we can dequantize the elements of our tensor: +// ```c++ +// result = input * s +// ``` +// +// Arguments: +// +// min_range: The minimum scalar value possibly produced for the input. +// max_range: The maximum scalar value possibly produced for the input. +func Dequantize(scope *Scope, input tf.Output, min_range tf.Output, max_range tf.Output, optional ...DequantizeAttr) (output tf.Output) { if scope.Err() != nil { return } @@ -18043,250 +20714,199 @@ func QuantizedAdd(scope *Scope, x tf.Output, y tf.Output, min_x tf.Output, max_x a(attrs) } opspec := tf.OpSpec{ - Type: "QuantizedAdd", + Type: "Dequantize", Input: []tf.Input{ - x, y, min_x, max_x, min_y, max_y, + input, min_range, max_range, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + return op.Output(0) } -// MfccAttr is an optional argument to Mfcc. -type MfccAttr func(optionalAttr) - -// MfccUpperFrequencyLimit sets the optional upper_frequency_limit attribute to value. +// Flips all bits elementwise. // -// value: The highest frequency to use when calculating the -// ceptstrum. -// If not specified, defaults to 4000 -func MfccUpperFrequencyLimit(value float32) MfccAttr { - return func(m optionalAttr) { - m["upper_frequency_limit"] = value +// The result will have exactly those bits set, that are not set in `x`. The +// computation is performed on the underlying representation of x. +func Invert(scope *Scope, x tf.Output) (y tf.Output) { + if scope.Err() != nil { + return } -} - -// MfccLowerFrequencyLimit sets the optional lower_frequency_limit attribute to value. -// -// value: The lowest frequency to use when calculating the -// ceptstrum. -// If not specified, defaults to 20 -func MfccLowerFrequencyLimit(value float32) MfccAttr { - return func(m optionalAttr) { - m["lower_frequency_limit"] = value + opspec := tf.OpSpec{ + Type: "Invert", + Input: []tf.Input{ + x, + }, } + op := scope.AddOperation(opspec) + return op.Output(0) } -// MfccFilterbankChannelCount sets the optional filterbank_channel_count attribute to value. +// Inverse 3D fast Fourier transform. // -// value: Resolution of the Mel bank used internally. -// If not specified, defaults to 40 -func MfccFilterbankChannelCount(value int64) MfccAttr { - return func(m optionalAttr) { - m["filterbank_channel_count"] = value - } -} - -// MfccDctCoefficientCount sets the optional dct_coefficient_count attribute to value. +// Computes the inverse 3-dimensional discrete Fourier transform over the +// inner-most 3 dimensions of `input`. // -// value: How many output channels to produce per time slice. -// If not specified, defaults to 13 -func MfccDctCoefficientCount(value int64) MfccAttr { - return func(m optionalAttr) { - m["dct_coefficient_count"] = value +// Arguments: +// input: A complex64 tensor. +// +// Returns A complex64 tensor of the same shape as `input`. The inner-most 3 +// dimensions of `input` are replaced with their inverse 3D Fourier transform. +// +// @compatibility(numpy) +// Equivalent to np.fft.ifftn with 3 dimensions. +// @end_compatibility +func IFFT3D(scope *Scope, input tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "IFFT3D", + Input: []tf.Input{ + input, + }, } + op := scope.AddOperation(opspec) + return op.Output(0) } -// Transforms a spectrogram into a form that's useful for speech recognition. -// -// Mel Frequency Cepstral Coefficients are a way of representing audio data that's -// been effective as an input feature for machine learning. They are created by -// taking the spectrum of a spectrogram (a 'cepstrum'), and discarding some of the -// higher frequencies that are less significant to the human ear. They have a long -// history in the speech recognition world, and https://en.wikipedia.org/wiki/Mel-frequency_cepstrum -// is a good resource to learn more. +// Deprecated. Disallowed in GraphDef version >= 2. // -// Arguments: -// spectrogram: Typically produced by the Spectrogram op, with magnitude_squared -// set to true. -// sample_rate: How many samples per second the source audio used. -func Mfcc(scope *Scope, spectrogram tf.Output, sample_rate tf.Output, optional ...MfccAttr) (output tf.Output) { +// DEPRECATED at GraphDef version 2: Use AdjustContrastv2 instead +func AdjustContrast(scope *Scope, images tf.Output, contrast_factor tf.Output, min_value tf.Output, max_value tf.Output) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "Mfcc", + Type: "AdjustContrast", Input: []tf.Input{ - spectrogram, sample_rate, + images, contrast_factor, min_value, max_value, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Given a quantized tensor described by (input, input_min, input_max), outputs a -// -// range that covers the actual values present in that tensor. This op is -// typically used to produce the requested_output_min and requested_output_max for -// Requantize. +// Table initializer that takes two tensors for keys and values respectively. // // Arguments: +// table_handle: Handle to a table which will be initialized. +// keys: Keys of type Tkey. +// values: Values of type Tval. // -// input_min: The float value that the minimum quantized input value represents. -// input_max: The float value that the maximum quantized input value represents. -// -// Returns The computed min output.the computed max output. -func RequantizationRange(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output) (output_min tf.Output, output_max tf.Output) { +// Returns the created operation. +func InitializeTableV2(scope *Scope, table_handle tf.Output, keys tf.Output, values tf.Output) (o *tf.Operation) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "RequantizationRange", + Type: "InitializeTableV2", Input: []tf.Input{ - input, input_min, input_max, + table_handle, keys, values, }, } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) + return scope.AddOperation(opspec) } -// MapPeekAttr is an optional argument to MapPeek. -type MapPeekAttr func(optionalAttr) +// PrintAttr is an optional argument to Print. +type PrintAttr func(optionalAttr) -// MapPeekCapacity sets the optional capacity attribute to value. -// If not specified, defaults to 0 +// PrintMessage sets the optional message attribute to value. // -// REQUIRES: value >= 0 -func MapPeekCapacity(value int64) MapPeekAttr { +// value: A string, prefix of the error message. +// If not specified, defaults to "" +func PrintMessage(value string) PrintAttr { return func(m optionalAttr) { - m["capacity"] = value + m["message"] = value } } -// MapPeekMemoryLimit sets the optional memory_limit attribute to value. -// If not specified, defaults to 0 +// PrintFirstN sets the optional first_n attribute to value. // -// REQUIRES: value >= 0 -func MapPeekMemoryLimit(value int64) MapPeekAttr { - return func(m optionalAttr) { - m["memory_limit"] = value - } -} - -// MapPeekContainer sets the optional container attribute to value. -// If not specified, defaults to "" -func MapPeekContainer(value string) MapPeekAttr { +// value: Only log `first_n` number of times. -1 disables logging. +// If not specified, defaults to -1 +func PrintFirstN(value int64) PrintAttr { return func(m optionalAttr) { - m["container"] = value + m["first_n"] = value } } -// MapPeekSharedName sets the optional shared_name attribute to value. -// If not specified, defaults to "" -func MapPeekSharedName(value string) MapPeekAttr { +// PrintSummarize sets the optional summarize attribute to value. +// +// value: Only print this many entries of each tensor. +// If not specified, defaults to 3 +func PrintSummarize(value int64) PrintAttr { return func(m optionalAttr) { - m["shared_name"] = value + m["summarize"] = value } } -// Op peeks at the values at the specified key. If the +// Prints a list of tensors. // -// underlying container does not contain this key -// this op will block until it does. -func MapPeek(scope *Scope, key tf.Output, indices tf.Output, dtypes []tf.DataType, optional ...MapPeekAttr) (values []tf.Output) { +// Passes `input` through to `output` and prints `data` when evaluating. +// +// Arguments: +// input: The tensor passed to `output` +// data: A list of tensors to print out when op is evaluated. +// +// Returns = The unmodified `input` tensor +func Print(scope *Scope, input tf.Output, data []tf.Output, optional ...PrintAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"dtypes": dtypes} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "MapPeek", + Type: "Print", Input: []tf.Input{ - key, indices, + input, tf.OutputList(data), }, Attrs: attrs, } op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if values, idx, err = makeOutputList(op, idx, "values"); err != nil { - scope.UpdateErr("MapPeek", err) - return - } - return values + return op.Output(0) } -// Looks up keys in a table, outputs the corresponding values. -// -// The tensor `keys` must of the same type as the keys of the table. -// The output `values` is of the type of the table values. -// -// The scalar `default_value` is the value output for keys not present in the -// table. It must also be of the same type as the table values. +// Outputs a `Summary` protocol buffer with a tensor and per-plugin data. // // Arguments: -// table_handle: Handle to the table. -// keys: Any shape. Keys to look up. -// -// -// Returns Same shape as `keys`. Values found in the table, or `default_values` -// for missing keys. -func LookupTableFindV2(scope *Scope, table_handle tf.Output, keys tf.Output, default_value tf.Output) (values tf.Output) { +// tag: A string attached to this summary. Used for organization in TensorBoard. +// tensor: A tensor to serialize. +// serialized_summary_metadata: A serialized SummaryMetadata proto. Contains plugin +// data. +func TensorSummaryV2(scope *Scope, tag tf.Output, tensor tf.Output, serialized_summary_metadata tf.Output) (summary tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "LookupTableFindV2", + Type: "TensorSummaryV2", Input: []tf.Input{ - table_handle, keys, default_value, + tag, tensor, serialized_summary_metadata, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Bucketizes 'input' based on 'boundaries'. -// -// For example, if the inputs are -// boundaries = [0, 10, 100] -// input = [[-5, 10000] -// [150, 10] -// [5, 100]] -// -// then the output will be -// output = [[0, 3] -// [3, 2] -// [1, 3]] +// Creates a dataset that asynchronously prefetches elements from `input_dataset`. // // Arguments: -// input: Any shape of Tensor contains with int or float type. -// boundaries: A sorted list of floats gives the boundary of the buckets. // -// Returns Same shape with 'input', each value of input replaced with bucket index. +// buffer_size: The maximum number of elements to buffer in an iterator over +// this dataset. // -// @compatibility(numpy) -// Equivalent to np.digitize. -// @end_compatibility -func Bucketize(scope *Scope, input tf.Output, boundaries []float32) (output tf.Output) { +// +func PrefetchDataset(scope *Scope, input_dataset tf.Output, buffer_size tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"boundaries": boundaries} + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} opspec := tf.OpSpec{ - Type: "Bucketize", + Type: "PrefetchDataset", Input: []tf.Input{ - input, + input_dataset, buffer_size, }, Attrs: attrs, } @@ -18294,38 +20914,48 @@ func Bucketize(scope *Scope, input tf.Output, boundaries []float32) (output tf.O return op.Output(0) } -// EncodePngAttr is an optional argument to EncodePng. -type EncodePngAttr func(optionalAttr) +// TensorSummaryAttr is an optional argument to TensorSummary. +type TensorSummaryAttr func(optionalAttr) -// EncodePngCompression sets the optional compression attribute to value. +// TensorSummaryDescription sets the optional description attribute to value. // -// value: Compression level. -// If not specified, defaults to -1 -func EncodePngCompression(value int64) EncodePngAttr { +// value: A json-encoded SummaryDescription proto. +// If not specified, defaults to "" +func TensorSummaryDescription(value string) TensorSummaryAttr { return func(m optionalAttr) { - m["compression"] = value + m["description"] = value } } -// PNG-encode an image. +// TensorSummaryLabels sets the optional labels attribute to value. // -// `image` is a 3-D uint8 or uint16 Tensor of shape `[height, width, channels]` -// where `channels` is: +// value: An unused list of strings. +// If not specified, defaults to <> +func TensorSummaryLabels(value []string) TensorSummaryAttr { + return func(m optionalAttr) { + m["labels"] = value + } +} + +// TensorSummaryDisplayName sets the optional display_name attribute to value. // -// * 1: for grayscale. -// * 2: for grayscale + alpha. -// * 3: for RGB. -// * 4: for RGBA. +// value: An unused string. +// If not specified, defaults to "" +func TensorSummaryDisplayName(value string) TensorSummaryAttr { + return func(m optionalAttr) { + m["display_name"] = value + } +} + +// Outputs a `Summary` protocol buffer with a tensor. // -// The ZLIB compression level, `compression`, can be -1 for the PNG-encoder -// default or a value from 0 to 9. 9 is the highest compression level, generating -// the smallest output, but is slower. +// This op is being phased out in favor of TensorSummaryV2, which lets callers pass +// a tag as well as a serialized SummaryMetadata proto string that contains +// plugin-specific data. We will keep this op to maintain backwards compatibility. // // Arguments: -// image: 3-D with shape `[height, width, channels]`. -// -// Returns 0-D. PNG-encoded image. -func EncodePng(scope *Scope, image tf.Output, optional ...EncodePngAttr) (contents tf.Output) { +// tensor: A tensor to serialize. +func TensorSummary(scope *Scope, tensor tf.Output, optional ...TensorSummaryAttr) (summary tf.Output) { if scope.Err() != nil { return } @@ -18334,9 +20964,9 @@ func EncodePng(scope *Scope, image tf.Output, optional ...EncodePngAttr) (conten a(attrs) } opspec := tf.OpSpec{ - Type: "EncodePng", + Type: "TensorSummary", Input: []tf.Input{ - image, + tensor, }, Attrs: attrs, } @@ -18344,449 +20974,431 @@ func EncodePng(scope *Scope, image tf.Output, optional ...EncodePngAttr) (conten return op.Output(0) } -// Updates the table to associates keys with values. -// -// The tensor `keys` must be of the same type as the keys of the table. -// The tensor `values` must be of the type of the table values. -// -// Arguments: -// table_handle: Handle to the table. -// keys: Any shape. Keys to look up. -// values: Values to associate with keys. +// Computes the gradient for the tanh of `x` wrt its input. // -// Returns the created operation. -func LookupTableInsertV2(scope *Scope, table_handle tf.Output, keys tf.Output, values tf.Output) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "LookupTableInsertV2", - Input: []tf.Input{ - table_handle, keys, values, - }, - } - return scope.AddOperation(opspec) -} - -// Returns element-wise smallest integer in not less than x. -func Ceil(scope *Scope, x tf.Output) (y tf.Output) { +// Specifically, `grad = dy * (1 - y*y)`, where `y = tanh(x)`, and `dy` +// is the corresponding input gradient. +func TanhGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "Ceil", + Type: "TanhGrad", Input: []tf.Input{ - x, + y, dy, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Computes the number of elements in the given table. +// Reduces sparse updates into the variable referenced by `resource` using the `max` operation. +// +// This operation computes +// +// # Scalar indices +// ref[indices, ...] = max(ref[indices, ...], updates[...]) +// +// # Vector indices (for each i) +// ref[indices[i], ...] = max(ref[indices[i], ...], updates[i, ...]) +// +// # High rank indices (for each i, ..., j) +// ref[indices[i, ..., j], ...] = max(ref[indices[i, ..., j], ...], updates[i, ..., j, ...]) +// +// Duplicate entries are handled correctly: if multiple `indices` reference +// the same location, their contributions are combined. +// +// Requires `updates.shape = indices.shape + ref.shape[1:]` or `updates.shape = []`. +// +//
+// +//
// // Arguments: -// table_handle: Handle to the table. +// resource: Should be from a `Variable` node. +// indices: A tensor of indices into the first dimension of `ref`. +// updates: A tensor of updated values to add to `ref`. // -// Returns Scalar that contains number of elements in the table. -func LookupTableSizeV2(scope *Scope, table_handle tf.Output) (size tf.Output) { +// Returns the created operation. +func ResourceScatterMax(scope *Scope, resource tf.Output, indices tf.Output, updates tf.Output) (o *tf.Operation) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "LookupTableSizeV2", + Type: "ResourceScatterMax", Input: []tf.Input{ - table_handle, + resource, indices, updates, }, } - op := scope.AddOperation(opspec) - return op.Output(0) + return scope.AddOperation(opspec) } -// ResizeBilinearGradAttr is an optional argument to ResizeBilinearGrad. -type ResizeBilinearGradAttr func(optionalAttr) - -// ResizeBilinearGradAlignCorners sets the optional align_corners attribute to value. +// Outputs a `Summary` protocol buffer with scalar values. // -// value: If true, rescale grads by (orig_height - 1) / (height - 1), which -// exactly aligns the 4 corners of grads and original_image. If false, rescale by -// orig_height / height. Treat similarly the width dimension. -// If not specified, defaults to false -func ResizeBilinearGradAlignCorners(value bool) ResizeBilinearGradAttr { - return func(m optionalAttr) { - m["align_corners"] = value - } -} - -// Computes the gradient of bilinear interpolation. +// The input `tags` and `values` must have the same shape. The generated summary +// has a summary value for each tag-value pair in `tags` and `values`. // // Arguments: -// grads: 4-D with shape `[batch, height, width, channels]`. -// original_image: 4-D with shape `[batch, orig_height, orig_width, channels]`, -// The image tensor that was resized. +// tags: Tags for the summary. +// values: Same shape as `tags. Values for the summary. // -// Returns 4-D with shape `[batch, orig_height, orig_width, channels]`. -// Gradients with respect to the input image. Input image must have been -// float or double. -func ResizeBilinearGrad(scope *Scope, grads tf.Output, original_image tf.Output, optional ...ResizeBilinearGradAttr) (output tf.Output) { +// Returns Scalar. Serialized `Summary` protocol buffer. +func ScalarSummary(scope *Scope, tags tf.Output, values tf.Output) (summary tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "ResizeBilinearGrad", + Type: "ScalarSummary", Input: []tf.Input{ - grads, original_image, + tags, values, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Outputs all keys and values in the table. +// Outputs a `Summary` protocol buffer with a histogram. // -// Arguments: -// table_handle: Handle to the table. +// The generated +// [`Summary`](https://www.tensorflow.org/code/tensorflow/core/framework/summary.proto) +// has one summary value containing a histogram for `values`. // +// This op reports an `InvalidArgument` error if any value is not finite. // +// Arguments: +// tag: Scalar. Tag to use for the `Summary.Value`. +// values: Any shape. Values to use to build the histogram. // -// Returns Vector of all keys present in the table.Tensor of all values in the table. Indexed in parallel with `keys`. -func LookupTableExportV2(scope *Scope, table_handle tf.Output, Tkeys tf.DataType, Tvalues tf.DataType) (keys tf.Output, values tf.Output) { +// Returns Scalar. Serialized `Summary` protocol buffer. +func HistogramSummary(scope *Scope, tag tf.Output, values tf.Output) (summary tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"Tkeys": Tkeys, "Tvalues": Tvalues} opspec := tf.OpSpec{ - Type: "LookupTableExportV2", + Type: "HistogramSummary", Input: []tf.Input{ - table_handle, + tag, values, }, - Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) + return op.Output(0) } -// Replaces the contents of the table with the specified keys and values. -// -// The tensor `keys` must be of the same type as the keys of the table. -// The tensor `values` must be of the type of the table values. +// Computes the number of elements in the given queue. // // Arguments: -// table_handle: Handle to the table. -// keys: Any shape. Keys to look up. -// values: Values to associate with keys. +// handle: The handle to a queue. // -// Returns the created operation. -func LookupTableImportV2(scope *Scope, table_handle tf.Output, keys tf.Output, values tf.Output) (o *tf.Operation) { +// Returns The number of elements in the given queue. +func QueueSizeV2(scope *Scope, handle tf.Output) (size tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "LookupTableImportV2", + Type: "QueueSizeV2", Input: []tf.Input{ - table_handle, keys, values, + handle, }, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0) } -// MapUnstageNoKeyAttr is an optional argument to MapUnstageNoKey. -type MapUnstageNoKeyAttr func(optionalAttr) +// ImageSummaryAttr is an optional argument to ImageSummary. +type ImageSummaryAttr func(optionalAttr) -// MapUnstageNoKeyCapacity sets the optional capacity attribute to value. -// If not specified, defaults to 0 +// ImageSummaryMaxImages sets the optional max_images attribute to value. // -// REQUIRES: value >= 0 -func MapUnstageNoKeyCapacity(value int64) MapUnstageNoKeyAttr { - return func(m optionalAttr) { - m["capacity"] = value - } -} - -// MapUnstageNoKeyMemoryLimit sets the optional memory_limit attribute to value. -// If not specified, defaults to 0 +// value: Max number of batch elements to generate images for. +// If not specified, defaults to 3 // -// REQUIRES: value >= 0 -func MapUnstageNoKeyMemoryLimit(value int64) MapUnstageNoKeyAttr { - return func(m optionalAttr) { - m["memory_limit"] = value - } -} - -// MapUnstageNoKeyContainer sets the optional container attribute to value. -// If not specified, defaults to "" -func MapUnstageNoKeyContainer(value string) MapUnstageNoKeyAttr { +// REQUIRES: value >= 1 +func ImageSummaryMaxImages(value int64) ImageSummaryAttr { return func(m optionalAttr) { - m["container"] = value + m["max_images"] = value } } -// MapUnstageNoKeySharedName sets the optional shared_name attribute to value. -// If not specified, defaults to "" -func MapUnstageNoKeySharedName(value string) MapUnstageNoKeyAttr { +// ImageSummaryBadColor sets the optional bad_color attribute to value. +// +// value: Color to use for pixels with non-finite values. +// If not specified, defaults to > int_val:255 int_val:0 int_val:0 int_val:255 > +func ImageSummaryBadColor(value tf.Tensor) ImageSummaryAttr { return func(m optionalAttr) { - m["shared_name"] = value + m["bad_color"] = value } } -// Op removes and returns a random (key, value) +// Outputs a `Summary` protocol buffer with images. // -// from the underlying container. If the underlying container -// does not contain elements, the op will block until it does. -func MapUnstageNoKey(scope *Scope, indices tf.Output, dtypes []tf.DataType, optional ...MapUnstageNoKeyAttr) (key tf.Output, values []tf.Output) { +// The summary has up to `max_images` summary values containing images. The +// images are built from `tensor` which must be 4-D with shape `[batch_size, +// height, width, channels]` and where `channels` can be: +// +// * 1: `tensor` is interpreted as Grayscale. +// * 3: `tensor` is interpreted as RGB. +// * 4: `tensor` is interpreted as RGBA. +// +// The images have the same number of channels as the input tensor. For float +// input, the values are normalized one image at a time to fit in the range +// `[0, 255]`. `uint8` values are unchanged. The op uses two different +// normalization algorithms: +// +// * If the input values are all positive, they are rescaled so the largest one +// is 255. +// +// * If any input value is negative, the values are shifted so input value 0.0 +// is at 127. They are then rescaled so that either the smallest value is 0, +// or the largest one is 255. +// +// The `tag` argument is a scalar `Tensor` of type `string`. It is used to +// build the `tag` of the summary values: +// +// * If `max_images` is 1, the summary value tag is '*tag*/image'. +// * If `max_images` is greater than 1, the summary value tags are +// generated sequentially as '*tag*/image/0', '*tag*/image/1', etc. +// +// The `bad_color` argument is the color to use in the generated images for +// non-finite input values. It is a `uint8` 1-D tensor of length `channels`. +// Each element must be in the range `[0, 255]` (It represents the value of a +// pixel in the output image). Non-finite values in the input tensor are +// replaced by this tensor in the output image. The default value is the color +// red. +// +// Arguments: +// tag: Scalar. Used to build the `tag` attribute of the summary values. +// tensor: 4-D of shape `[batch_size, height, width, channels]` where +// `channels` is 1, 3, or 4. +// +// Returns Scalar. Serialized `Summary` protocol buffer. +func ImageSummary(scope *Scope, tag tf.Output, tensor tf.Output, optional ...ImageSummaryAttr) (summary tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"dtypes": dtypes} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "MapUnstageNoKey", + Type: "ImageSummary", Input: []tf.Input{ - indices, + tag, tensor, }, Attrs: attrs, } op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - key = op.Output(idx) - if values, idx, err = makeOutputList(op, idx, "values"); err != nil { - scope.UpdateErr("MapUnstageNoKey", err) - return - } - return key, values + return op.Output(0) } -// HashTableV2Attr is an optional argument to HashTableV2. -type HashTableV2Attr func(optionalAttr) +// AudioSummaryV2Attr is an optional argument to AudioSummaryV2. +type AudioSummaryV2Attr func(optionalAttr) -// HashTableV2Container sets the optional container attribute to value. +// AudioSummaryV2MaxOutputs sets the optional max_outputs attribute to value. // -// value: If non-empty, this table is placed in the given container. -// Otherwise, a default container is used. -// If not specified, defaults to "" -func HashTableV2Container(value string) HashTableV2Attr { +// value: Max number of batch elements to generate audio for. +// If not specified, defaults to 3 +// +// REQUIRES: value >= 1 +func AudioSummaryV2MaxOutputs(value int64) AudioSummaryV2Attr { return func(m optionalAttr) { - m["container"] = value + m["max_outputs"] = value + } +} + +// Outputs a `Summary` protocol buffer with audio. +// +// The summary has up to `max_outputs` summary values containing audio. The +// audio is built from `tensor` which must be 3-D with shape `[batch_size, +// frames, channels]` or 2-D with shape `[batch_size, frames]`. The values are +// assumed to be in the range of `[-1.0, 1.0]` with a sample rate of `sample_rate`. +// +// The `tag` argument is a scalar `Tensor` of type `string`. It is used to +// build the `tag` of the summary values: +// +// * If `max_outputs` is 1, the summary value tag is '*tag*/audio'. +// * If `max_outputs` is greater than 1, the summary value tags are +// generated sequentially as '*tag*/audio/0', '*tag*/audio/1', etc. +// +// Arguments: +// tag: Scalar. Used to build the `tag` attribute of the summary values. +// tensor: 2-D of shape `[batch_size, frames]`. +// sample_rate: The sample rate of the signal in hertz. +// +// Returns Scalar. Serialized `Summary` protocol buffer. +func AudioSummaryV2(scope *Scope, tag tf.Output, tensor tf.Output, sample_rate tf.Output, optional ...AudioSummaryV2Attr) (summary tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "AudioSummaryV2", + Input: []tf.Input{ + tag, tensor, sample_rate, + }, + Attrs: attrs, } + op := scope.AddOperation(opspec) + return op.Output(0) } -// HashTableV2SharedName sets the optional shared_name attribute to value. -// -// value: If non-empty, this table is shared under the given name across -// multiple sessions. -// If not specified, defaults to "" -func HashTableV2SharedName(value string) HashTableV2Attr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} +// AvgPoolAttr is an optional argument to AvgPool. +type AvgPoolAttr func(optionalAttr) -// HashTableV2UseNodeNameSharing sets the optional use_node_name_sharing attribute to value. +// AvgPoolDataFormat sets the optional data_format attribute to value. // -// value: If true and shared_name is empty, the table is shared -// using the node name. -// If not specified, defaults to false -func HashTableV2UseNodeNameSharing(value bool) HashTableV2Attr { +// value: Specify the data format of the input and output data. With the +// default format "NHWC", the data is stored in the order of: +// [batch, in_height, in_width, in_channels]. +// Alternatively, the format could be "NCHW", the data storage order of: +// [batch, in_channels, in_height, in_width]. +// If not specified, defaults to "NHWC" +func AvgPoolDataFormat(value string) AvgPoolAttr { return func(m optionalAttr) { - m["use_node_name_sharing"] = value + m["data_format"] = value } } -// Creates a non-initialized hash table. +// Performs average pooling on the input. // -// This op creates a hash table, specifying the type of its keys and values. -// Before using the table you will have to initialize it. After initialization the -// table will be immutable. +// Each entry in `output` is the mean of the corresponding size `ksize` +// window in `value`. // // Arguments: -// key_dtype: Type of the table keys. -// value_dtype: Type of the table values. +// value: 4-D with shape `[batch, height, width, channels]`. +// ksize: The size of the sliding window for each dimension of `value`. +// strides: The stride of the sliding window for each dimension of `value`. +// padding: The type of padding algorithm to use. // -// Returns Handle to a table. -func HashTableV2(scope *Scope, key_dtype tf.DataType, value_dtype tf.DataType, optional ...HashTableV2Attr) (table_handle tf.Output) { +// Returns The average pooled output tensor. +func AvgPool(scope *Scope, value tf.Output, ksize []int64, strides []int64, padding string, optional ...AvgPoolAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"key_dtype": key_dtype, "value_dtype": value_dtype} + attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "HashTableV2", - + Type: "AvgPool", + Input: []tf.Input{ + value, + }, Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// MutableHashTableV2Attr is an optional argument to MutableHashTableV2. -type MutableHashTableV2Attr func(optionalAttr) - -// MutableHashTableV2Container sets the optional container attribute to value. +// Merges summaries. // -// value: If non-empty, this table is placed in the given container. -// Otherwise, a default container is used. -// If not specified, defaults to "" -func MutableHashTableV2Container(value string) MutableHashTableV2Attr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// MutableHashTableV2SharedName sets the optional shared_name attribute to value. +// This op creates a +// [`Summary`](https://www.tensorflow.org/code/tensorflow/core/framework/summary.proto) +// protocol buffer that contains the union of all the values in the input +// summaries. // -// value: If non-empty, this table is shared under the given name across -// multiple sessions. -// If not specified, defaults to "" -func MutableHashTableV2SharedName(value string) MutableHashTableV2Attr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// MutableHashTableV2UseNodeNameSharing sets the optional use_node_name_sharing attribute to value. +// When the Op is run, it reports an `InvalidArgument` error if multiple values +// in the summaries to merge use the same tag. // -// value: If true and shared_name is empty, the table is shared -// using the node name. -// If not specified, defaults to false -func MutableHashTableV2UseNodeNameSharing(value bool) MutableHashTableV2Attr { - return func(m optionalAttr) { - m["use_node_name_sharing"] = value +// Arguments: +// inputs: Can be of any shape. Each must contain serialized `Summary` protocol +// buffers. +// +// Returns Scalar. Serialized `Summary` protocol buffer. +func MergeSummary(scope *Scope, inputs []tf.Output) (summary tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "MergeSummary", + Input: []tf.Input{ + tf.OutputList(inputs), + }, } + op := scope.AddOperation(opspec) + return op.Output(0) } -// Creates an empty hash table. -// -// This op creates a mutable hash table, specifying the type of its keys and -// values. Each value must be a scalar. Data can be inserted into the table using -// the insert operations. It does not support the initialization operation. +// Computes the gradient of morphological 2-D dilation with respect to the filter. // // Arguments: -// key_dtype: Type of the table keys. -// value_dtype: Type of the table values. +// input: 4-D with shape `[batch, in_height, in_width, depth]`. +// filter: 3-D with shape `[filter_height, filter_width, depth]`. +// out_backprop: 4-D with shape `[batch, out_height, out_width, depth]`. +// strides: 1-D of length 4. The stride of the sliding window for each dimension of +// the input tensor. Must be: `[1, stride_height, stride_width, 1]`. +// rates: 1-D of length 4. The input stride for atrous morphological dilation. +// Must be: `[1, rate_height, rate_width, 1]`. +// padding: The type of padding algorithm to use. // -// Returns Handle to a table. -func MutableHashTableV2(scope *Scope, key_dtype tf.DataType, value_dtype tf.DataType, optional ...MutableHashTableV2Attr) (table_handle tf.Output) { +// Returns 3-D with shape `[filter_height, filter_width, depth]`. +func Dilation2DBackpropFilter(scope *Scope, input tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, rates []int64, padding string) (filter_backprop tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"key_dtype": key_dtype, "value_dtype": value_dtype} - for _, a := range optional { - a(attrs) - } + attrs := map[string]interface{}{"strides": strides, "rates": rates, "padding": padding} opspec := tf.OpSpec{ - Type: "MutableHashTableV2", - + Type: "Dilation2DBackpropFilter", + Input: []tf.Input{ + input, filter, out_backprop, + }, Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// DequantizeAttr is an optional argument to Dequantize. -type DequantizeAttr func(optionalAttr) +// AddSparseToTensorsMapAttr is an optional argument to AddSparseToTensorsMap. +type AddSparseToTensorsMapAttr func(optionalAttr) -// DequantizeMode sets the optional mode attribute to value. -// If not specified, defaults to "MIN_COMBINED" -func DequantizeMode(value string) DequantizeAttr { +// AddSparseToTensorsMapContainer sets the optional container attribute to value. +// +// value: The container name for the `SparseTensorsMap` created by this op. +// If not specified, defaults to "" +func AddSparseToTensorsMapContainer(value string) AddSparseToTensorsMapAttr { return func(m optionalAttr) { - m["mode"] = value + m["container"] = value } } -// Dequantize the 'input' tensor into a float Tensor. -// -// [min_range, max_range] are scalar floats that specify the range for -// the 'input' data. The 'mode' attribute controls exactly which calculations are -// used to convert the float values to their quantized equivalents. -// -// In 'MIN_COMBINED' mode, each value of the tensor will undergo the following: -// -// ``` -// if T == qint8, in[i] += (range(T) + 1)/ 2.0 -// out[i] = min_range + (in[i]* (max_range - min_range) / range(T)) -// ``` -// here `range(T) = numeric_limits::max() - numeric_limits::min()` -// -// *MIN_COMBINED Mode Example* -// -// If the input comes from a QuantizedRelu6, the output type is -// quint8 (range of 0-255) but the possible range of QuantizedRelu6 is -// 0-6. The min_range and max_range values are therefore 0.0 and 6.0. -// Dequantize on quint8 will take each value, cast to float, and multiply -// by 6 / 255. -// Note that if quantizedtype is qint8, the operation will additionally add -// each value by 128 prior to casting. -// -// If the mode is 'MIN_FIRST', then this approach is used: -// -// ```c++ -// num_discrete_values = 1 << (# of bits in T) -// range_adjust = num_discrete_values / (num_discrete_values - 1) -// range = (range_max - range_min) * range_adjust -// range_scale = range / num_discrete_values -// const double offset_input = static_cast(input) - lowest_quantized; -// result = range_min + ((input - numeric_limits::min()) * range_scale) -// ``` -// -// *SCALED mode Example* -// -// `SCALED` mode matches the quantization approach used in -// `QuantizeAndDequantize{V2|V3}`. -// -// If the mode is `SCALED`, we do not use the full range of the output type, -// choosing to elide the lowest possible value for symmetry (e.g., output range is -// -127 to 127, not -128 to 127 for signed 8 bit quantization), so that 0.0 maps to -// 0. -// -// We first find the range of values in our tensor. The -// range we use is always centered on 0, so we find m such that -// ```c++ -// m = max(abs(input_min), abs(input_max)) -// ``` -// -// Our input tensor range is then `[-m, m]`. +// AddSparseToTensorsMapSharedName sets the optional shared_name attribute to value. // -// Next, we choose our fixed-point quantization buckets, `[min_fixed, max_fixed]`. -// If T is signed, this is -// ``` -// num_bits = sizeof(T) * 8 -// [min_fixed, max_fixed] = -// [-(1 << (num_bits - 1) - 1), (1 << (num_bits - 1)) - 1] -// ``` +// value: The shared name for the `SparseTensorsMap` created by this op. +// If blank, the new Operation's unique name is used. +// If not specified, defaults to "" +func AddSparseToTensorsMapSharedName(value string) AddSparseToTensorsMapAttr { + return func(m optionalAttr) { + m["shared_name"] = value + } +} + +// Add a `SparseTensor` to a `SparseTensorsMap` return its handle. // -// Otherwise, if T is unsigned, the fixed-point range is -// ``` -// [min_fixed, max_fixed] = [0, (1 << num_bits) - 1] -// ``` +// A `SparseTensor` is represented by three tensors: `sparse_indices`, +// `sparse_values`, and `sparse_shape`. // -// From this we compute our scaling factor, s: -// ```c++ -// s = (2 * m) / (max_fixed - min_fixed) -// ``` +// This operator takes the given `SparseTensor` and adds it to a container +// object (a `SparseTensorsMap`). A unique key within this container is generated +// in the form of an `int64`, and this is the value that is returned. // -// Now we can dequantize the elements of our tensor: -// ```c++ -// result = input * s -// ``` +// The `SparseTensor` can then be read out as part of a minibatch by passing +// the key as a vector element to `TakeManySparseFromTensorsMap`. To ensure +// the correct `SparseTensorsMap` is accessed, ensure that the same +// `container` and `shared_name` are passed to that Op. If no `shared_name` +// is provided here, instead use the *name* of the Operation created by calling +// `AddSparseToTensorsMap` as the `shared_name` passed to +// `TakeManySparseFromTensorsMap`. Ensure the Operations are colocated. // // Arguments: -// -// min_range: The minimum scalar value possibly produced for the input. -// max_range: The maximum scalar value possibly produced for the input. -func Dequantize(scope *Scope, input tf.Output, min_range tf.Output, max_range tf.Output, optional ...DequantizeAttr) (output tf.Output) { +// sparse_indices: 2-D. The `indices` of the `SparseTensor`. +// sparse_values: 1-D. The `values` of the `SparseTensor`. +// sparse_shape: 1-D. The `shape` of the `SparseTensor`. +// +// Returns 0-D. The handle of the `SparseTensor` now stored in the +// `SparseTensorsMap`. +func AddSparseToTensorsMap(scope *Scope, sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output, optional ...AddSparseToTensorsMapAttr) (sparse_handle tf.Output) { if scope.Err() != nil { return } @@ -18795,9 +21407,9 @@ func Dequantize(scope *Scope, input tf.Output, min_range tf.Output, max_range tf a(attrs) } opspec := tf.OpSpec{ - Type: "Dequantize", + Type: "AddSparseToTensorsMap", Input: []tf.Input{ - input, min_range, max_range, + sparse_indices, sparse_values, sparse_shape, }, Attrs: attrs, } @@ -18805,116 +21417,81 @@ func Dequantize(scope *Scope, input tf.Output, min_range tf.Output, max_range tf return op.Output(0) } -// Flips all bits elementwise. +// Returns a list list which has the passed-in `Tensor` as last element and the other elements of the given list in `input_handle`. // -// The result will have exactly those bits set, that are not set in `x`. The -// computation is performed on the underlying representation of x. -func Invert(scope *Scope, x tf.Output) (y tf.Output) { +// tensor: The tensor to put on the list. +// input_handle: The old list. +// output_handle: A list with the elements of the old list followed by tensor. +// element_dtype: the type of elements in the list. +// element_shape: a shape compatible with that of elements in the list. +func TensorListPushBack(scope *Scope, input_handle tf.Output, tensor tf.Output) (output_handle tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "Invert", + Type: "TensorListPushBack", Input: []tf.Input{ - x, + input_handle, tensor, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Deprecated. Disallowed in GraphDef version >= 2. +// Returns the number of tensors in the input tensor list. // -// DEPRECATED at GraphDef version 2: Use AdjustContrastv2 instead -func AdjustContrast(scope *Scope, images tf.Output, contrast_factor tf.Output, min_value tf.Output, max_value tf.Output) (output tf.Output) { +// input_handle: the input list +// length: the number of tensors in the list +func TensorListLength(scope *Scope, input_handle tf.Output) (length tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "AdjustContrast", + Type: "TensorListLength", Input: []tf.Input{ - images, contrast_factor, min_value, max_value, + input_handle, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Table initializer that takes two tensors for keys and values respectively. -// -// Arguments: -// table_handle: Handle to a table which will be initialized. -// keys: Keys of type Tkey. -// values: Values of type Tval. +// The shape of the elements of the given list, as a tensor. // -// Returns the created operation. -func InitializeTableV2(scope *Scope, table_handle tf.Output, keys tf.Output, values tf.Output) (o *tf.Operation) { +// input_handle: the list +// element_shape: the shape of elements of the list +func TensorListElementShape(scope *Scope, input_handle tf.Output, shape_type tf.DataType) (element_shape tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"shape_type": shape_type} opspec := tf.OpSpec{ - Type: "InitializeTableV2", + Type: "TensorListElementShape", Input: []tf.Input{ - table_handle, keys, values, + input_handle, }, + Attrs: attrs, } - return scope.AddOperation(opspec) -} - -// PrintAttr is an optional argument to Print. -type PrintAttr func(optionalAttr) - -// PrintMessage sets the optional message attribute to value. -// -// value: A string, prefix of the error message. -// If not specified, defaults to "" -func PrintMessage(value string) PrintAttr { - return func(m optionalAttr) { - m["message"] = value - } -} - -// PrintFirstN sets the optional first_n attribute to value. -// -// value: Only log `first_n` number of times. -1 disables logging. -// If not specified, defaults to -1 -func PrintFirstN(value int64) PrintAttr { - return func(m optionalAttr) { - m["first_n"] = value - } -} - -// PrintSummarize sets the optional summarize attribute to value. -// -// value: Only print this many entries of each tensor. -// If not specified, defaults to 3 -func PrintSummarize(value int64) PrintAttr { - return func(m optionalAttr) { - m["summarize"] = value - } + op := scope.AddOperation(opspec) + return op.Output(0) } -// Prints a list of tensors. +// Returns the item in the list with the given index. // -// Passes `input` through to `output` and prints `data` when evaluating. +// input_handle: the list +// index: the position in the list from which an element will be retrieved +// item: the element at that position // -// Arguments: -// input: The tensor passed to `output` -// data: A list of tensors to print out when op is evaluated. // -// Returns = The unmodified `input` tensor -func Print(scope *Scope, input tf.Output, data []tf.Output, optional ...PrintAttr) (output tf.Output) { +func TensorListGetItem(scope *Scope, input_handle tf.Output, index tf.Output, element_dtype tf.DataType) (item tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } + attrs := map[string]interface{}{"element_dtype": element_dtype} opspec := tf.OpSpec{ - Type: "Print", + Type: "TensorListGetItem", Input: []tf.Input{ - input, tf.OutputList(data), + input_handle, index, }, Attrs: attrs, } @@ -18922,268 +21499,335 @@ func Print(scope *Scope, input tf.Output, data []tf.Output, optional ...PrintAtt return op.Output(0) } -// Outputs a `Summary` protocol buffer with a tensor and per-plugin data. +// Computes the matrix exponential of one or more square matrices: +// +// exp(A) = \sum_{n=0}^\infty A^n/n! +// +// The exponential is computed using a combination of the scaling and squaring +// method and the Pade approximation. Details can be founds in: +// Nicholas J. Higham, "The scaling and squaring method for the matrix exponential +// revisited," SIAM J. Matrix Anal. Applic., 26:1179-1193, 2005. +// +// The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions +// form square matrices. The output is a tensor of the same shape as the input +// containing the exponential for all input submatrices `[..., :, :]`. // // Arguments: -// tag: A string attached to this summary. Used for organization in TensorBoard. -// tensor: A tensor to serialize. -// serialized_summary_metadata: A serialized SummaryMetadata proto. Contains plugin -// data. -func TensorSummaryV2(scope *Scope, tag tf.Output, tensor tf.Output, serialized_summary_metadata tf.Output) (summary tf.Output) { +// input: Shape is `[..., M, M]`. +// +// Returns Shape is `[..., M, M]`. +// +// @compatibility(scipy) +// Equivalent to scipy.linalg.expm +// @end_compatibility +func MatrixExponential(scope *Scope, input tf.Output) (output tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "TensorSummaryV2", + Type: "MatrixExponential", Input: []tf.Input{ - tag, tensor, serialized_summary_metadata, + input, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Creates a dataset that asynchronously prefetches elements from `input_dataset`. +// Computes the matrix logarithm of one or more square matrices: // -// Arguments: // -// buffer_size: The maximum number of elements to buffer in an iterator over -// this dataset. +// log(exp(A)) = A // +// This op is only defined for complex matrices. If A is positive-definite and +// real, then casting to a complex matrix, taking the logarithm and casting back +// to a real matrix will give the correct result. // -func PrefetchDataset(scope *Scope, input_dataset tf.Output, buffer_size tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { +// This function computes the matrix logarithm using the Schur-Parlett algorithm. +// Details of the algorithm can be found in Section 11.6.2 of: +// Nicholas J. Higham, Functions of Matrices: Theory and Computation, SIAM 2008. +// ISBN 978-0-898716-46-7. +// +// The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions +// form square matrices. The output is a tensor of the same shape as the input +// containing the exponential for all input submatrices `[..., :, :]`. +// +// Arguments: +// input: Shape is `[..., M, M]`. +// +// Returns Shape is `[..., M, M]`. +// +// @compatibility(scipy) +// Equivalent to scipy.linalg.logm +// @end_compatibility +func MatrixLogarithm(scope *Scope, input tf.Output) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} opspec := tf.OpSpec{ - Type: "PrefetchDataset", + Type: "MatrixLogarithm", Input: []tf.Input{ - input_dataset, buffer_size, + input, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// TensorSummaryAttr is an optional argument to TensorSummary. -type TensorSummaryAttr func(optionalAttr) +// QueueDequeueUpToV2Attr is an optional argument to QueueDequeueUpToV2. +type QueueDequeueUpToV2Attr func(optionalAttr) -// TensorSummaryDescription sets the optional description attribute to value. +// QueueDequeueUpToV2TimeoutMs sets the optional timeout_ms attribute to value. // -// value: A json-encoded SummaryDescription proto. -// If not specified, defaults to "" -func TensorSummaryDescription(value string) TensorSummaryAttr { +// value: If the queue has fewer than n elements, this operation +// will block for up to timeout_ms milliseconds. +// Note: This option is not supported yet. +// If not specified, defaults to -1 +func QueueDequeueUpToV2TimeoutMs(value int64) QueueDequeueUpToV2Attr { return func(m optionalAttr) { - m["description"] = value + m["timeout_ms"] = value } } -// TensorSummaryLabels sets the optional labels attribute to value. +// Dequeues `n` tuples of one or more tensors from the given queue. // -// value: An unused list of strings. -// If not specified, defaults to <> -func TensorSummaryLabels(value []string) TensorSummaryAttr { - return func(m optionalAttr) { - m["labels"] = value +// This operation is not supported by all queues. If a queue does not support +// DequeueUpTo, then an Unimplemented error is returned. +// +// If the queue is closed and there are more than 0 but less than `n` +// elements remaining, then instead of returning an OutOfRange error like +// QueueDequeueMany, less than `n` elements are returned immediately. If +// the queue is closed and there are 0 elements left in the queue, then +// an OutOfRange error is returned just like in QueueDequeueMany. +// Otherwise the behavior is identical to QueueDequeueMany: +// +// This operation concatenates queue-element component tensors along the +// 0th dimension to make a single component tensor. All of the components +// in the dequeued tuple will have size n in the 0th dimension. +// +// This operation has `k` outputs, where `k` is the number of components in +// the tuples stored in the given queue, and output `i` is the ith +// component of the dequeued tuple. +// +// Arguments: +// handle: The handle to a queue. +// n: The number of tuples to dequeue. +// component_types: The type of each component in a tuple. +// +// Returns One or more tensors that were dequeued as a tuple. +func QueueDequeueUpToV2(scope *Scope, handle tf.Output, n tf.Output, component_types []tf.DataType, optional ...QueueDequeueUpToV2Attr) (components []tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"component_types": component_types} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "QueueDequeueUpToV2", + Input: []tf.Input{ + handle, n, + }, + Attrs: attrs, } + op := scope.AddOperation(opspec) + if scope.Err() != nil { + return + } + var idx int + var err error + if components, idx, err = makeOutputList(op, idx, "components"); err != nil { + scope.UpdateErr("QueueDequeueUpToV2", err) + return + } + return components } -// TensorSummaryDisplayName sets the optional display_name attribute to value. +// Computes the Cholesky decomposition of one or more square matrices. // -// value: An unused string. -// If not specified, defaults to "" -func TensorSummaryDisplayName(value string) TensorSummaryAttr { - return func(m optionalAttr) { - m["display_name"] = value +// The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions +// form square matrices. +// +// The input has to be symmetric and positive definite. Only the lower-triangular +// part of the input will be used for this operation. The upper-triangular part +// will not be read. +// +// The output is a tensor of the same shape as the input +// containing the Cholesky decompositions for all input submatrices `[..., :, :]`. +// +// **Note**: The gradient computation on GPU is faster for large matrices but +// not for large batch dimensions when the submatrices are small. In this +// case it might be faster to use the CPU. +// +// Arguments: +// input: Shape is `[..., M, M]`. +// +// Returns Shape is `[..., M, M]`. +func Cholesky(scope *Scope, input tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Cholesky", + Input: []tf.Input{ + input, + }, } + op := scope.AddOperation(opspec) + return op.Output(0) } -// Outputs a `Summary` protocol buffer with a tensor. +// Writes contents to the file at input filename. Creates file and recursively // -// This op is being phased out in favor of TensorSummaryV2, which lets callers pass -// a tag as well as a serialized SummaryMetadata proto string that contains -// plugin-specific data. We will keep this op to maintain backwards compatibility. +// creates directory if not existing. // // Arguments: -// tensor: A tensor to serialize. -func TensorSummary(scope *Scope, tensor tf.Output, optional ...TensorSummaryAttr) (summary tf.Output) { +// filename: scalar. The name of the file to which we write the contents. +// contents: scalar. The content to be written to the output file. +// +// Returns the created operation. +func WriteFile(scope *Scope, filename tf.Output, contents tf.Output) (o *tf.Operation) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "TensorSummary", + Type: "WriteFile", Input: []tf.Input{ - tensor, + filename, contents, }, - Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0) + return scope.AddOperation(opspec) } -// Computes the gradient for the tanh of `x` wrt its input. +// AllAttr is an optional argument to All. +type AllAttr func(optionalAttr) + +// AllKeepDims sets the optional keep_dims attribute to value. // -// Specifically, `grad = dy * (1 - y*y)`, where `y = tanh(x)`, and `dy` -// is the corresponding input gradient. -func TanhGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "TanhGrad", - Input: []tf.Input{ - y, dy, - }, +// value: If true, retain reduced dimensions with length 1. +// If not specified, defaults to false +func AllKeepDims(value bool) AllAttr { + return func(m optionalAttr) { + m["keep_dims"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// Outputs a `Summary` protocol buffer with scalar values. +// Computes the "logical and" of elements across dimensions of a tensor. // -// The input `tags` and `values` must have the same shape. The generated summary -// has a summary value for each tag-value pair in `tags` and `values`. +// Reduces `input` along the dimensions given in `axis`. Unless +// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in +// `axis`. If `keep_dims` is true, the reduced dimensions are +// retained with length 1. // // Arguments: -// tags: Tags for the summary. -// values: Same shape as `tags. Values for the summary. +// input: The tensor to reduce. +// axis: The dimensions to reduce. Must be in the range +// `[-rank(input), rank(input))`. // -// Returns Scalar. Serialized `Summary` protocol buffer. -func ScalarSummary(scope *Scope, tags tf.Output, values tf.Output) (summary tf.Output) { +// Returns The reduced tensor. +func All(scope *Scope, input tf.Output, axis tf.Output, optional ...AllAttr) (output tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "ScalarSummary", + Type: "All", Input: []tf.Input{ - tags, values, + input, axis, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Outputs a `Summary` protocol buffer with a histogram. +// Computes the Eigen Decomposition of a batch of square self-adjoint matrices. // -// The generated -// [`Summary`](https://www.tensorflow.org/code/tensorflow/core/framework/summary.proto) -// has one summary value containing a histogram for `values`. +// DEPRECATED at GraphDef version 11: Use SelfAdjointEigV2 instead. // -// This op reports an `InvalidArgument` error if any value is not finite. +// The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions +// form square matrices, with the same constraints as the single matrix +// SelfAdjointEig. +// +// The result is a [..., M+1, M] matrix with [..., 0,:] containing the +// eigenvalues, and subsequent [...,1:, :] containing the eigenvectors. The eigenvalues +// are sorted in non-decreasing order. // // Arguments: -// tag: Scalar. Tag to use for the `Summary.Value`. -// values: Any shape. Values to use to build the histogram. +// input: Shape is `[..., M, M]`. // -// Returns Scalar. Serialized `Summary` protocol buffer. -func HistogramSummary(scope *Scope, tag tf.Output, values tf.Output) (summary tf.Output) { +// Returns Shape is `[..., M+1, M]`. +func SelfAdjointEig(scope *Scope, input tf.Output) (output tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "HistogramSummary", + Type: "SelfAdjointEig", Input: []tf.Input{ - tag, values, + input, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Computes the number of elements in the given queue. +// Computes softplus gradients for a softplus operation. // // Arguments: -// handle: The handle to a queue. +// gradients: The backpropagated gradients to the corresponding softplus operation. +// features: The features passed as input to the corresponding softplus operation. // -// Returns The number of elements in the given queue. -func QueueSizeV2(scope *Scope, handle tf.Output) (size tf.Output) { +// Returns The gradients: `gradients / (1 + exp(-features))`. +func SoftplusGrad(scope *Scope, gradients tf.Output, features tf.Output) (backprops tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "QueueSizeV2", + Type: "SoftplusGrad", Input: []tf.Input{ - handle, + gradients, features, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// ImageSummaryAttr is an optional argument to ImageSummary. -type ImageSummaryAttr func(optionalAttr) - -// ImageSummaryMaxImages sets the optional max_images attribute to value. -// -// value: Max number of batch elements to generate images for. -// If not specified, defaults to 3 -// -// REQUIRES: value >= 1 -func ImageSummaryMaxImages(value int64) ImageSummaryAttr { - return func(m optionalAttr) { - m["max_images"] = value - } -} +// SelfAdjointEigV2Attr is an optional argument to SelfAdjointEigV2. +type SelfAdjointEigV2Attr func(optionalAttr) -// ImageSummaryBadColor sets the optional bad_color attribute to value. +// SelfAdjointEigV2ComputeV sets the optional compute_v attribute to value. // -// value: Color to use for pixels with non-finite values. -// If not specified, defaults to > int_val:255 int_val:0 int_val:0 int_val:255 > -func ImageSummaryBadColor(value tf.Tensor) ImageSummaryAttr { +// value: If `True` then eigenvectors will be computed and returned in `v`. +// Otherwise, only the eigenvalues will be computed. +// If not specified, defaults to true +func SelfAdjointEigV2ComputeV(value bool) SelfAdjointEigV2Attr { return func(m optionalAttr) { - m["bad_color"] = value + m["compute_v"] = value } } -// Outputs a `Summary` protocol buffer with images. -// -// The summary has up to `max_images` summary values containing images. The -// images are built from `tensor` which must be 4-D with shape `[batch_size, -// height, width, channels]` and where `channels` can be: -// -// * 1: `tensor` is interpreted as Grayscale. -// * 3: `tensor` is interpreted as RGB. -// * 4: `tensor` is interpreted as RGBA. -// -// The images have the same number of channels as the input tensor. For float -// input, the values are normalized one image at a time to fit in the range -// `[0, 255]`. `uint8` values are unchanged. The op uses two different -// normalization algorithms: -// -// * If the input values are all positive, they are rescaled so the largest one -// is 255. -// -// * If any input value is negative, the values are shifted so input value 0.0 -// is at 127. They are then rescaled so that either the smallest value is 0, -// or the largest one is 255. -// -// The `tag` argument is a scalar `Tensor` of type `string`. It is used to -// build the `tag` of the summary values: +// Computes the eigen decomposition of one or more square self-adjoint matrices. // -// * If `max_images` is 1, the summary value tag is '*tag*/image'. -// * If `max_images` is greater than 1, the summary value tags are -// generated sequentially as '*tag*/image/0', '*tag*/image/1', etc. +// Computes the eigenvalues and (optionally) eigenvectors of each inner matrix in +// `input` such that `input[..., :, :] = v[..., :, :] * diag(e[..., :])`. The eigenvalues +// are sorted in non-decreasing order. // -// The `bad_color` argument is the color to use in the generated images for -// non-finite input values. It is a `unit8` 1-D tensor of length `channels`. -// Each element must be in the range `[0, 255]` (It represents the value of a -// pixel in the output image). Non-finite values in the input tensor are -// replaced by this tensor in the output image. The default value is the color -// red. +// ```python +// # a is a tensor. +// # e is a tensor of eigenvalues. +// # v is a tensor of eigenvectors. +// e, v = self_adjoint_eig(a) +// e = self_adjoint_eig(a, compute_v=False) +// ``` // // Arguments: -// tag: Scalar. Used to build the `tag` attribute of the summary values. -// tensor: 4-D of shape `[batch_size, height, width, channels]` where -// `channels` is 1, 3, or 4. +// input: `Tensor` input of shape `[N, N]`. // -// Returns Scalar. Serialized `Summary` protocol buffer. -func ImageSummary(scope *Scope, tag tf.Output, tensor tf.Output, optional ...ImageSummaryAttr) (summary tf.Output) { +// Returns Eigenvalues. Shape is `[N]`.Eigenvectors. Shape is `[N, N]`. +func SelfAdjointEigV2(scope *Scope, input tf.Output, optional ...SelfAdjointEigV2Attr) (e tf.Output, v tf.Output) { if scope.Err() != nil { return } @@ -19192,52 +21836,95 @@ func ImageSummary(scope *Scope, tag tf.Output, tensor tf.Output, optional ...Ima a(attrs) } opspec := tf.OpSpec{ - Type: "ImageSummary", + Type: "SelfAdjointEigV2", Input: []tf.Input{ - tag, tensor, + input, }, Attrs: attrs, } op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1) +} + +// Adjust the saturation of one or more images. +// +// `images` is a tensor of at least 3 dimensions. The last dimension is +// interpretted as channels, and must be three. +// +// The input image is considered in the RGB colorspace. Conceptually, the RGB +// colors are first mapped into HSV. A scale is then applied all the saturation +// values, and then remapped back to RGB colorspace. +// +// Arguments: +// images: Images to adjust. At least 3-D. +// scale: A float scale to add to the saturation. +// +// Returns The hue-adjusted image or images. +func AdjustSaturation(scope *Scope, images tf.Output, scale tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "AdjustSaturation", + Input: []tf.Input{ + images, scale, + }, + } + op := scope.AddOperation(opspec) return op.Output(0) } -// AudioSummaryV2Attr is an optional argument to AudioSummaryV2. -type AudioSummaryV2Attr func(optionalAttr) +// SvdAttr is an optional argument to Svd. +type SvdAttr func(optionalAttr) -// AudioSummaryV2MaxOutputs sets the optional max_outputs attribute to value. -// -// value: Max number of batch elements to generate audio for. -// If not specified, defaults to 3 +// SvdComputeUv sets the optional compute_uv attribute to value. // -// REQUIRES: value >= 1 -func AudioSummaryV2MaxOutputs(value int64) AudioSummaryV2Attr { +// value: If true, left and right singular vectors will be +// computed and returned in `u` and `v`, respectively. +// If false, `u` and `v` are not set and should never referenced. +// If not specified, defaults to true +func SvdComputeUv(value bool) SvdAttr { return func(m optionalAttr) { - m["max_outputs"] = value + m["compute_uv"] = value } } -// Outputs a `Summary` protocol buffer with audio. +// SvdFullMatrices sets the optional full_matrices attribute to value. // -// The summary has up to `max_outputs` summary values containing audio. The -// audio is built from `tensor` which must be 3-D with shape `[batch_size, -// frames, channels]` or 2-D with shape `[batch_size, frames]`. The values are -// assumed to be in the range of `[-1.0, 1.0]` with a sample rate of `sample_rate`. +// value: If true, compute full-sized `u` and `v`. If false +// (the default), compute only the leading `P` singular vectors. +// Ignored if `compute_uv` is `False`. +// If not specified, defaults to false +func SvdFullMatrices(value bool) SvdAttr { + return func(m optionalAttr) { + m["full_matrices"] = value + } +} + +// Computes the singular value decompositions of one or more matrices. // -// The `tag` argument is a scalar `Tensor` of type `string`. It is used to -// build the `tag` of the summary values: +// Computes the SVD of each inner matrix in `input` such that +// `input[..., :, :] = u[..., :, :] * diag(s[..., :, :]) * transpose(v[..., :, :])` // -// * If `max_outputs` is 1, the summary value tag is '*tag*/audio'. -// * If `max_outputs` is greater than 1, the summary value tags are -// generated sequentially as '*tag*/audio/0', '*tag*/audio/1', etc. +// ```python +// # a is a tensor containing a batch of matrices. +// # s is a tensor of singular values for each matrix. +// # u is the tensor containing of left singular vectors for each matrix. +// # v is the tensor containing of right singular vectors for each matrix. +// s, u, v = svd(a) +// s, _, _ = svd(a, compute_uv=False) +// ``` // // Arguments: -// tag: Scalar. Used to build the `tag` attribute of the summary values. -// tensor: 2-D of shape `[batch_size, frames]`. -// sample_rate: The sample rate of the signal in hertz. +// input: A tensor of shape `[..., M, N]` whose inner-most 2 dimensions +// form matrices of size `[M, N]`. Let `P` be the minimum of `M` and `N`. // -// Returns Scalar. Serialized `Summary` protocol buffer. -func AudioSummaryV2(scope *Scope, tag tf.Output, tensor tf.Output, sample_rate tf.Output, optional ...AudioSummaryV2Attr) (summary tf.Output) { +// Returns Singular values. Shape is `[..., P]`.Left singular vectors. If `full_matrices` is `False` then shape is +// `[..., M, P]`; if `full_matrices` is `True` then shape is +// `[..., M, M]`. Undefined if `compute_uv` is `False`.Left singular vectors. If `full_matrices` is `False` then shape is +// `[..., N, P]`. If `full_matrices` is `True` then shape is `[..., N, N]`. +// Undefined if `compute_uv` is false. +func Svd(scope *Scope, input tf.Output, optional ...SvdAttr) (s tf.Output, u tf.Output, v tf.Output) { if scope.Err() != nil { return } @@ -19246,625 +21933,602 @@ func AudioSummaryV2(scope *Scope, tag tf.Output, tensor tf.Output, sample_rate t a(attrs) } opspec := tf.OpSpec{ - Type: "AudioSummaryV2", + Type: "Svd", Input: []tf.Input{ - tag, tensor, sample_rate, + input, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1), op.Output(2) } -// AvgPoolAttr is an optional argument to AvgPool. -type AvgPoolAttr func(optionalAttr) +// QueueEnqueueManyV2Attr is an optional argument to QueueEnqueueManyV2. +type QueueEnqueueManyV2Attr func(optionalAttr) -// AvgPoolDataFormat sets the optional data_format attribute to value. +// QueueEnqueueManyV2TimeoutMs sets the optional timeout_ms attribute to value. // -// value: Specify the data format of the input and output data. With the -// default format "NHWC", the data is stored in the order of: -// [batch, in_height, in_width, in_channels]. -// Alternatively, the format could be "NCHW", the data storage order of: -// [batch, in_channels, in_height, in_width]. -// If not specified, defaults to "NHWC" -func AvgPoolDataFormat(value string) AvgPoolAttr { +// value: If the queue is too full, this operation will block for up +// to timeout_ms milliseconds. +// Note: This option is not supported yet. +// If not specified, defaults to -1 +func QueueEnqueueManyV2TimeoutMs(value int64) QueueEnqueueManyV2Attr { return func(m optionalAttr) { - m["data_format"] = value + m["timeout_ms"] = value } } -// Performs average pooling on the input. +// Enqueues zero or more tuples of one or more tensors in the given queue. // -// Each entry in `output` is the mean of the corresponding size `ksize` -// window in `value`. +// This operation slices each component tensor along the 0th dimension to +// make multiple queue elements. All of the tuple components must have the +// same size in the 0th dimension. +// +// The components input has k elements, which correspond to the components of +// tuples stored in the given queue. +// +// N.B. If the queue is full, this operation will block until the given +// elements have been enqueued (or 'timeout_ms' elapses, if specified). // // Arguments: -// value: 4-D with shape `[batch, height, width, channels]`. -// ksize: The size of the sliding window for each dimension of `value`. -// strides: The stride of the sliding window for each dimension of `value`. -// padding: The type of padding algorithm to use. +// handle: The handle to a queue. +// components: One or more tensors from which the enqueued tensors should +// be taken. // -// Returns The average pooled output tensor. -func AvgPool(scope *Scope, value tf.Output, ksize []int64, strides []int64, padding string, optional ...AvgPoolAttr) (output tf.Output) { +// Returns the created operation. +func QueueEnqueueManyV2(scope *Scope, handle tf.Output, components []tf.Output, optional ...QueueEnqueueManyV2Attr) (o *tf.Operation) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "AvgPool", + Type: "QueueEnqueueManyV2", Input: []tf.Input{ - value, + handle, tf.OutputList(components), }, Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0) + return scope.AddOperation(opspec) } -// Merges summaries. +// Computes the product along segments of a tensor. // -// This op creates a -// [`Summary`](https://www.tensorflow.org/code/tensorflow/core/framework/summary.proto) -// protocol buffer that contains the union of all the values in the input -// summaries. +// Read @{$math_ops#Segmentation$the section on segmentation} for an explanation of +// segments. // -// When the Op is run, it reports an `InvalidArgument` error if multiple values -// in the summaries to merge use the same tag. +// Computes a tensor such that +// \\(output_i = \prod_j data_j\\) where the product is over `j` such +// that `segment_ids[j] == i`. // -// Arguments: -// inputs: Can be of any shape. Each must contain serialized `Summary` protocol -// buffers. +// If the product is empty for a given segment ID `i`, `output[i] = 1`. // -// Returns Scalar. Serialized `Summary` protocol buffer. -func MergeSummary(scope *Scope, inputs []tf.Output) (summary tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "MergeSummary", - Input: []tf.Input{ - tf.OutputList(inputs), - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes the gradient of morphological 2-D dilation with respect to the filter. +//
+// +//
// // Arguments: -// input: 4-D with shape `[batch, in_height, in_width, depth]`. -// filter: 3-D with shape `[filter_height, filter_width, depth]`. -// out_backprop: 4-D with shape `[batch, out_height, out_width, depth]`. -// strides: 1-D of length 4. The stride of the sliding window for each dimension of -// the input tensor. Must be: `[1, stride_height, stride_width, 1]`. -// rates: 1-D of length 4. The input stride for atrous morphological dilation. -// Must be: `[1, rate_height, rate_width, 1]`. -// padding: The type of padding algorithm to use. // -// Returns 3-D with shape `[filter_height, filter_width, depth]`. -func Dilation2DBackpropFilter(scope *Scope, input tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, rates []int64, padding string) (filter_backprop tf.Output) { +// segment_ids: A 1-D tensor whose rank is equal to the rank of `data`'s +// first dimension. Values should be sorted and can be repeated. +// +// Returns Has same shape as data, except for dimension 0 which +// has size `k`, the number of segments. +func SegmentProd(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"strides": strides, "rates": rates, "padding": padding} opspec := tf.OpSpec{ - Type: "Dilation2DBackpropFilter", + Type: "SegmentProd", Input: []tf.Input{ - input, filter, out_backprop, + data, segment_ids, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// AddSparseToTensorsMapAttr is an optional argument to AddSparseToTensorsMap. -type AddSparseToTensorsMapAttr func(optionalAttr) - -// AddSparseToTensorsMapContainer sets the optional container attribute to value. -// -// value: The container name for the `SparseTensorsMap` created by this op. -// If not specified, defaults to "" -func AddSparseToTensorsMapContainer(value string) AddSparseToTensorsMapAttr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// AddSparseToTensorsMapSharedName sets the optional shared_name attribute to value. -// -// value: The shared name for the `SparseTensorsMap` created by this op. -// If blank, the new Operation's unique name is used. -// If not specified, defaults to "" -func AddSparseToTensorsMapSharedName(value string) AddSparseToTensorsMapAttr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// Add a `SparseTensor` to a `SparseTensorsMap` return its handle. -// -// A `SparseTensor` is represented by three tensors: `sparse_indices`, -// `sparse_values`, and `sparse_shape`. +// Converts one or more images from RGB to HSV. // -// This operator takes the given `SparseTensor` and adds it to a container -// object (a `SparseTensorsMap`). A unique key within this container is generated -// in the form of an `int64`, and this is the value that is returned. +// Outputs a tensor of the same shape as the `images` tensor, containing the HSV +// value of the pixels. The output is only well defined if the value in `images` +// are in `[0,1]`. // -// The `SparseTensor` can then be read out as part of a minibatch by passing -// the key as a vector element to `TakeManySparseFromTensorsMap`. To ensure -// the correct `SparseTensorsMap` is accessed, ensure that the same -// `container` and `shared_name` are passed to that Op. If no `shared_name` -// is provided here, instead use the *name* of the Operation created by calling -// `AddSparseToTensorsMap` as the `shared_name` passed to -// `TakeManySparseFromTensorsMap`. Ensure the Operations are colocated. +// `output[..., 0]` contains hue, `output[..., 1]` contains saturation, and +// `output[..., 2]` contains value. All HSV values are in `[0,1]`. A hue of 0 +// corresponds to pure red, hue 1/3 is pure green, and 2/3 is pure blue. // // Arguments: -// sparse_indices: 2-D. The `indices` of the `SparseTensor`. -// sparse_values: 1-D. The `values` of the `SparseTensor`. -// sparse_shape: 1-D. The `shape` of the `SparseTensor`. +// images: 1-D or higher rank. RGB data to convert. Last dimension must be size 3. // -// Returns 0-D. The handle of the `SparseTensor` now stored in the -// `SparseTensorsMap`. -func AddSparseToTensorsMap(scope *Scope, sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output, optional ...AddSparseToTensorsMapAttr) (sparse_handle tf.Output) { +// Returns `images` converted to HSV. +func RGBToHSV(scope *Scope, images tf.Output) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "AddSparseToTensorsMap", + Type: "RGBToHSV", Input: []tf.Input{ - sparse_indices, sparse_values, sparse_shape, + images, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Computes the matrix exponential of one or more square matrices: -// -// exp(A) = \sum_{n=0}^\infty A^n/n! -// -// The exponential is computed using a combination of the scaling and squaring -// method and the Pade approximation. Details can be founds in: -// Nicholas J. Higham, "The scaling and squaring method for the matrix exponential -// revisited," SIAM J. Matrix Anal. Applic., 26:1179-1193, 2005. -// -// The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions -// form square matrices. The output is a tensor of the same shape as the input -// containing the exponential for all input submatrices `[..., :, :]`. -// -// Arguments: -// input: Shape is `[..., M, M]`. -// -// Returns Shape is `[..., M, M]`. +// Does nothing. Only useful as a placeholder for control edges. // -// @compatibility(scipy) -// Equivalent to scipy.linalg.expm -// @end_compatibility -func MatrixExponential(scope *Scope, input tf.Output) (output tf.Output) { +// Returns the created operation. +func NoOp(scope *Scope) (o *tf.Operation) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "MatrixExponential", - Input: []tf.Input{ - input, - }, + Type: "NoOp", } - op := scope.AddOperation(opspec) - return op.Output(0) + return scope.AddOperation(opspec) } -// QueueDequeueUpToV2Attr is an optional argument to QueueDequeueUpToV2. -type QueueDequeueUpToV2Attr func(optionalAttr) +// MergeV2CheckpointsAttr is an optional argument to MergeV2Checkpoints. +type MergeV2CheckpointsAttr func(optionalAttr) -// QueueDequeueUpToV2TimeoutMs sets the optional timeout_ms attribute to value. +// MergeV2CheckpointsDeleteOldDirs sets the optional delete_old_dirs attribute to value. // -// value: If the queue has fewer than n elements, this operation -// will block for up to timeout_ms milliseconds. -// Note: This option is not supported yet. -// If not specified, defaults to -1 -func QueueDequeueUpToV2TimeoutMs(value int64) QueueDequeueUpToV2Attr { +// value: see above. +// If not specified, defaults to true +func MergeV2CheckpointsDeleteOldDirs(value bool) MergeV2CheckpointsAttr { return func(m optionalAttr) { - m["timeout_ms"] = value + m["delete_old_dirs"] = value } } -// Dequeues `n` tuples of one or more tensors from the given queue. -// -// This operation is not supported by all queues. If a queue does not support -// DequeueUpTo, then an Unimplemented error is returned. -// -// If the queue is closed and there are more than 0 but less than `n` -// elements remaining, then instead of returning an OutOfRange error like -// QueueDequeueMany, less than `n` elements are returned immediately. If -// the queue is closed and there are 0 elements left in the queue, then -// an OutOfRange error is returned just like in QueueDequeueMany. -// Otherwise the behavior is identical to QueueDequeueMany: +// V2 format specific: merges the metadata files of sharded checkpoints. The // -// This operation concatenates queue-element component tensors along the -// 0th dimension to make a single component tensor. All of the components -// in the dequeued tuple will have size n in the 0th dimension. +// result is one logical checkpoint, with one physical metadata file and renamed +// data files. // -// This operation has `k` outputs, where `k` is the number of components in -// the tuples stored in the given queue, and output `i` is the ith -// component of the dequeued tuple. +// Intended for "grouping" multiple checkpoints in a sharded checkpoint setup. +// +// If delete_old_dirs is true, attempts to delete recursively the dirname of each +// path in the input checkpoint_prefixes. This is useful when those paths are non +// user-facing temporary locations. // // Arguments: -// handle: The handle to a queue. -// n: The number of tuples to dequeue. -// component_types: The type of each component in a tuple. +// checkpoint_prefixes: prefixes of V2 checkpoints to merge. +// destination_prefix: scalar. The desired final prefix. Allowed to be the same +// as one of the checkpoint_prefixes. // -// Returns One or more tensors that were dequeued as a tuple. -func QueueDequeueUpToV2(scope *Scope, handle tf.Output, n tf.Output, component_types []tf.DataType, optional ...QueueDequeueUpToV2Attr) (components []tf.Output) { +// Returns the created operation. +func MergeV2Checkpoints(scope *Scope, checkpoint_prefixes tf.Output, destination_prefix tf.Output, optional ...MergeV2CheckpointsAttr) (o *tf.Operation) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"component_types": component_types} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "QueueDequeueUpToV2", + Type: "MergeV2Checkpoints", Input: []tf.Input{ - handle, n, + checkpoint_prefixes, destination_prefix, }, Attrs: attrs, } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if components, idx, err = makeOutputList(op, idx, "components"); err != nil { - scope.UpdateErr("QueueDequeueUpToV2", err) - return - } - return components + return scope.AddOperation(opspec) } -// Computes the Cholesky decomposition of one or more square matrices. -// -// The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions -// form square matrices. +// Saves input tensors slices to disk. // -// The input has to be symmetric and positive definite. Only the lower-triangular -// part of the input will be used for this operation. The upper-triangular part -// will not be read. +// This is like `Save` except that tensors can be listed in the saved file as being +// a slice of a larger tensor. `shapes_and_slices` specifies the shape of the +// larger tensor and the slice that this tensor covers. `shapes_and_slices` must +// have as many elements as `tensor_names`. // -// The output is a tensor of the same shape as the input -// containing the Cholesky decompositions for all input submatrices `[..., :, :]`. +// Elements of the `shapes_and_slices` input must either be: // -// **Note**: The gradient computation on GPU is faster for large matrices but -// not for large batch dimensions when the submatrices are small. In this -// case it might be faster to use the CPU. +// * The empty string, in which case the corresponding tensor is +// saved normally. +// * A string of the form `dim0 dim1 ... dimN-1 slice-spec` where the +// `dimI` are the dimensions of the larger tensor and `slice-spec` +// specifies what part is covered by the tensor to save. // -// Arguments: -// input: Shape is `[..., M, M]`. +// `slice-spec` itself is a `:`-separated list: `slice0:slice1:...:sliceN-1` +// where each `sliceI` is either: // -// Returns Shape is `[..., M, M]`. -func Cholesky(scope *Scope, input tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Cholesky", - Input: []tf.Input{ - input, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Writes contents to the file at input filename. Creates file and recursively +// * The string `-` meaning that the slice covers all indices of this dimension +// * `start,length` where `start` and `length` are integers. In that +// case the slice covers `length` indices starting at `start`. // -// creates directory if not existing. +// See also `Save`. // // Arguments: -// filename: scalar. The name of the file to which we write the contents. -// contents: scalar. The content to be written to the output file. +// filename: Must have a single element. The name of the file to which we write the +// tensor. +// tensor_names: Shape `[N]`. The names of the tensors to be saved. +// shapes_and_slices: Shape `[N]`. The shapes and slice specifications to use when +// saving the tensors. +// data: `N` tensors to save. // // Returns the created operation. -func WriteFile(scope *Scope, filename tf.Output, contents tf.Output) (o *tf.Operation) { +func SaveSlices(scope *Scope, filename tf.Output, tensor_names tf.Output, shapes_and_slices tf.Output, data []tf.Output) (o *tf.Operation) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "WriteFile", + Type: "SaveSlices", Input: []tf.Input{ - filename, contents, + filename, tensor_names, shapes_and_slices, tf.OutputList(data), }, } return scope.AddOperation(opspec) } -// AllAttr is an optional argument to All. -type AllAttr func(optionalAttr) +// DenseToDenseSetOperationAttr is an optional argument to DenseToDenseSetOperation. +type DenseToDenseSetOperationAttr func(optionalAttr) -// AllKeepDims sets the optional keep_dims attribute to value. -// -// value: If true, retain reduced dimensions with length 1. -// If not specified, defaults to false -func AllKeepDims(value bool) AllAttr { +// DenseToDenseSetOperationValidateIndices sets the optional validate_indices attribute to value. +// If not specified, defaults to true +func DenseToDenseSetOperationValidateIndices(value bool) DenseToDenseSetOperationAttr { return func(m optionalAttr) { - m["keep_dims"] = value + m["validate_indices"] = value } } -// Computes the "logical and" of elements across dimensions of a tensor. +// Applies set operation along last dimension of 2 `Tensor` inputs. // -// Reduces `input` along the dimensions given in `axis`. Unless -// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in -// `axis`. If `keep_dims` is true, the reduced dimensions are -// retained with length 1. +// See SetOperationOp::SetOperationFromContext for values of `set_operation`. +// +// Output `result` is a `SparseTensor` represented by `result_indices`, +// `result_values`, and `result_shape`. For `set1` and `set2` ranked `n`, this +// has rank `n` and the same 1st `n-1` dimensions as `set1` and `set2`. The `nth` +// dimension contains the result of `set_operation` applied to the corresponding +// `[0...n-1]` dimension of `set`. // // Arguments: -// input: The tensor to reduce. -// axis: The dimensions to reduce. Must be in the range -// `[-rank(input), rank(input))`. +// set1: `Tensor` with rank `n`. 1st `n-1` dimensions must be the same as `set2`. +// Dimension `n` contains values in a set, duplicates are allowed but ignored. +// set2: `Tensor` with rank `n`. 1st `n-1` dimensions must be the same as `set1`. +// Dimension `n` contains values in a set, duplicates are allowed but ignored. // -// Returns The reduced tensor. -func All(scope *Scope, input tf.Output, axis tf.Output, optional ...AllAttr) (output tf.Output) { +// +// Returns 2D indices of a `SparseTensor`.1D values of a `SparseTensor`.1D `Tensor` shape of a `SparseTensor`. `result_shape[0...n-1]` is +// the same as the 1st `n-1` dimensions of `set1` and `set2`, `result_shape[n]` +// is the max result set size across all `0...n-1` dimensions. +func DenseToDenseSetOperation(scope *Scope, set1 tf.Output, set2 tf.Output, set_operation string, optional ...DenseToDenseSetOperationAttr) (result_indices tf.Output, result_values tf.Output, result_shape tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"set_operation": set_operation} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "All", + Type: "DenseToDenseSetOperation", Input: []tf.Input{ - input, axis, + set1, set2, }, Attrs: attrs, } op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1), op.Output(2) +} + +// Generate a sharded filename. The filename is printf formatted as +// +// %s-%05d-of-%05d, basename, shard, num_shards. +func ShardedFilename(scope *Scope, basename tf.Output, shard tf.Output, num_shards tf.Output) (filename tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "ShardedFilename", + Input: []tf.Input{ + basename, shard, num_shards, + }, + } + op := scope.AddOperation(opspec) return op.Output(0) } -// Computes the Eigen Decomposition of a batch of square self-adjoint matrices. +// BatchToSpace for N-D tensors of type T. // -// DEPRECATED at GraphDef version 11: Use SelfAdjointEigV2 instead. +// This operation reshapes the "batch" dimension 0 into `M + 1` dimensions of shape +// `block_shape + [batch]`, interleaves these blocks back into the grid defined by +// the spatial dimensions `[1, ..., M]`, to obtain a result with the same rank as +// the input. The spatial dimensions of this intermediate result are then +// optionally cropped according to `crops` to produce the output. This is the +// reverse of SpaceToBatch. See below for a precise description. // -// The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions -// form square matrices, with the same constraints as the single matrix -// SelfAdjointEig. +// Arguments: +// input: N-D with shape `input_shape = [batch] + spatial_shape + remaining_shape`, +// where spatial_shape has M dimensions. +// block_shape: 1-D with shape `[M]`, all values must be >= 1. +// crops: 2-D with shape `[M, 2]`, all values must be >= 0. +// `crops[i] = [crop_start, crop_end]` specifies the amount to crop from input +// dimension `i + 1`, which corresponds to spatial dimension `i`. It is +// required that +// `crop_start[i] + crop_end[i] <= block_shape[i] * input_shape[i + 1]`. +// +// This operation is equivalent to the following steps: +// +// 1. Reshape `input` to `reshaped` of shape: +// [block_shape[0], ..., block_shape[M-1], +// batch / prod(block_shape), +// input_shape[1], ..., input_shape[N-1]] +// +// 2. Permute dimensions of `reshaped` to produce `permuted` of shape +// [batch / prod(block_shape), +// +// input_shape[1], block_shape[0], +// ..., +// input_shape[M], block_shape[M-1], +// +// input_shape[M+1], ..., input_shape[N-1]] +// +// 3. Reshape `permuted` to produce `reshaped_permuted` of shape +// [batch / prod(block_shape), +// +// input_shape[1] * block_shape[0], +// ..., +// input_shape[M] * block_shape[M-1], +// +// input_shape[M+1], +// ..., +// input_shape[N-1]] +// +// 4. Crop the start and end of dimensions `[1, ..., M]` of +// `reshaped_permuted` according to `crops` to produce the output of shape: +// [batch / prod(block_shape), +// +// input_shape[1] * block_shape[0] - crops[0,0] - crops[0,1], +// ..., +// input_shape[M] * block_shape[M-1] - crops[M-1,0] - crops[M-1,1], +// +// input_shape[M+1], ..., input_shape[N-1]] +// +// Some examples: +// +// (1) For the following input of shape `[4, 1, 1, 1]`, `block_shape = [2, 2]`, and +// `crops = [[0, 0], [0, 0]]`: +// +// ``` +// [[[[1]]], [[[2]]], [[[3]]], [[[4]]]] +// ``` +// +// The output tensor has shape `[1, 2, 2, 1]` and value: +// +// ``` +// x = [[[[1], [2]], [[3], [4]]]] +// ``` +// +// (2) For the following input of shape `[4, 1, 1, 3]`, `block_shape = [2, 2]`, and +// `crops = [[0, 0], [0, 0]]`: +// +// ``` +// [[[1, 2, 3]], [[4, 5, 6]], [[7, 8, 9]], [[10, 11, 12]]] +// ``` +// +// The output tensor has shape `[1, 2, 2, 3]` and value: +// +// ``` +// x = [[[[1, 2, 3], [4, 5, 6]], +// [[7, 8, 9], [10, 11, 12]]]] +// ``` +// +// (3) For the following input of shape `[4, 2, 2, 1]`, `block_shape = [2, 2]`, and +// `crops = [[0, 0], [0, 0]]`: +// +// ``` +// x = [[[[1], [3]], [[9], [11]]], +// [[[2], [4]], [[10], [12]]], +// [[[5], [7]], [[13], [15]]], +// [[[6], [8]], [[14], [16]]]] +// ``` +// +// The output tensor has shape `[1, 4, 4, 1]` and value: // -// The result is a [..., M+1, M] matrix with [..., 0,:] containing the -// eigenvalues, and subsequent [...,1:, :] containing the eigenvectors. +// ``` +// x = [[[1], [2], [3], [4]], +// [[5], [6], [7], [8]], +// [[9], [10], [11], [12]], +// [[13], [14], [15], [16]]] +// ``` // -// Arguments: -// input: Shape is `[..., M, M]`. +// (4) For the following input of shape `[8, 1, 3, 1]`, `block_shape = [2, 2]`, and +// `crops = [[0, 0], [2, 0]]`: // -// Returns Shape is `[..., M+1, M]`. -func SelfAdjointEig(scope *Scope, input tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SelfAdjointEig", - Input: []tf.Input{ - input, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes softplus gradients for a softplus operation. +// ``` +// x = [[[[0], [1], [3]]], [[[0], [9], [11]]], +// [[[0], [2], [4]]], [[[0], [10], [12]]], +// [[[0], [5], [7]]], [[[0], [13], [15]]], +// [[[0], [6], [8]]], [[[0], [14], [16]]]] +// ``` // -// Arguments: -// gradients: The backpropagated gradients to the corresponding softplus operation. -// features: The features passed as input to the corresponding softplus operation. +// The output tensor has shape `[2, 2, 4, 1]` and value: // -// Returns The gradients: `gradients / (1 + exp(-features))`. -func SoftplusGrad(scope *Scope, gradients tf.Output, features tf.Output) (backprops tf.Output) { +// ``` +// x = [[[[1], [2], [3], [4]], +// [[5], [6], [7], [8]]], +// [[[9], [10], [11], [12]], +// [[13], [14], [15], [16]]]] +// ``` +func BatchToSpaceND(scope *Scope, input tf.Output, block_shape tf.Output, crops tf.Output) (output tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "SoftplusGrad", + Type: "BatchToSpaceND", Input: []tf.Input{ - gradients, features, + input, block_shape, crops, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// SelfAdjointEigV2Attr is an optional argument to SelfAdjointEigV2. -type SelfAdjointEigV2Attr func(optionalAttr) +// UnpackAttr is an optional argument to Unpack. +type UnpackAttr func(optionalAttr) -// SelfAdjointEigV2ComputeV sets the optional compute_v attribute to value. +// UnpackAxis sets the optional axis attribute to value. // -// value: If `True` then eigenvectors will be computed and returned in `v`. -// Otherwise, only the eigenvalues will be computed. -// If not specified, defaults to true -func SelfAdjointEigV2ComputeV(value bool) SelfAdjointEigV2Attr { +// value: Dimension along which to unpack. Negative values wrap around, so the +// valid range is `[-R, R)`. +// If not specified, defaults to 0 +func UnpackAxis(value int64) UnpackAttr { return func(m optionalAttr) { - m["compute_v"] = value + m["axis"] = value } } -// Computes the eigen decomposition of one or more square self-adjoint matrices. +// Unpacks a given dimension of a rank-`R` tensor into `num` rank-`(R-1)` tensors. // -// Computes the eigenvalues and (optionally) eigenvectors of each inner matrix in -// `input` such that `input[..., :, :] = v[..., :, :] * diag(e[..., :])`. +// Unpacks `num` tensors from `value` by chipping it along the `axis` dimension. +// For example, given a tensor of shape `(A, B, C, D)`; // -// ```python -// # a is a tensor. -// # e is a tensor of eigenvalues. -// # v is a tensor of eigenvectors. -// e, v = self_adjoint_eig(a) -// e = self_adjoint_eig(a, compute_v=False) -// ``` +// If `axis == 0` then the i'th tensor in `output` is the slice `value[i, :, :, :]` +// and each tensor in `output` will have shape `(B, C, D)`. (Note that the +// dimension unpacked along is gone, unlike `split`). +// +// If `axis == 1` then the i'th tensor in `output` is the slice `value[:, i, :, :]` +// and each tensor in `output` will have shape `(A, C, D)`. +// Etc. +// +// This is the opposite of `pack`. // // Arguments: -// input: `Tensor` input of shape `[N, N]`. +// value: 1-D or higher, with `axis` dimension size equal to `num`. // -// Returns Eigenvalues. Shape is `[N]`.Eigenvectors. Shape is `[N, N]`. -func SelfAdjointEigV2(scope *Scope, input tf.Output, optional ...SelfAdjointEigV2Attr) (e tf.Output, v tf.Output) { +// +// Returns The list of tensors unpacked from `value`. +func Unpack(scope *Scope, value tf.Output, num int64, optional ...UnpackAttr) (output []tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"num": num} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "SelfAdjointEigV2", + Type: "Unpack", Input: []tf.Input{ - input, + value, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) + if scope.Err() != nil { + return + } + var idx int + var err error + if output, idx, err = makeOutputList(op, idx, "output"); err != nil { + scope.UpdateErr("Unpack", err) + return + } + return output } -// Adjust the saturation of one or more images. -// -// `images` is a tensor of at least 3 dimensions. The last dimension is -// interpretted as channels, and must be three. -// -// The input image is considered in the RGB colorspace. Conceptually, the RGB -// colors are first mapped into HSV. A scale is then applied all the saturation -// values, and then remapped back to RGB colorspace. +// Increments variable pointed to by 'resource' until it reaches 'limit'. // // Arguments: -// images: Images to adjust. At least 3-D. -// scale: A float scale to add to the saturation. +// resource: Should be from a scalar `Variable` node. +// limit: If incrementing ref would bring it above limit, instead generates an +// 'OutOfRange' error. // -// Returns The hue-adjusted image or images. -func AdjustSaturation(scope *Scope, images tf.Output, scale tf.Output) (output tf.Output) { +// +// Returns A copy of the input before increment. If nothing else modifies the +// input, the values produced will all be distinct. +func ResourceCountUpTo(scope *Scope, resource tf.Output, limit int64, T tf.DataType) (output tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"limit": limit, "T": T} opspec := tf.OpSpec{ - Type: "AdjustSaturation", + Type: "ResourceCountUpTo", Input: []tf.Input{ - images, scale, + resource, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// SvdAttr is an optional argument to Svd. -type SvdAttr func(optionalAttr) - -// SvdComputeUv sets the optional compute_uv attribute to value. +// Delete the stack from its resource container. // -// value: If true, left and right singular vectors will be -// computed and returned in `u` and `v`, respectively. -// If false, `u` and `v` are not set and should never referenced. -// If not specified, defaults to true -func SvdComputeUv(value bool) SvdAttr { - return func(m optionalAttr) { - m["compute_uv"] = value - } -} - -// SvdFullMatrices sets the optional full_matrices attribute to value. +// Arguments: +// handle: The handle to a stack. // -// value: If true, compute full-sized `u` and `v`. If false -// (the default), compute only the leading `P` singular vectors. -// Ignored if `compute_uv` is `False`. -// If not specified, defaults to false -func SvdFullMatrices(value bool) SvdAttr { - return func(m optionalAttr) { - m["full_matrices"] = value +// Returns the created operation. +func StackCloseV2(scope *Scope, handle tf.Output) (o *tf.Operation) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "StackCloseV2", + Input: []tf.Input{ + handle, + }, } + return scope.AddOperation(opspec) } -// Computes the singular value decompositions of one or more matrices. -// -// Computes the SVD of each inner matrix in `input` such that -// `input[..., :, :] = u[..., :, :] * diag(s[..., :, :]) * transpose(v[..., :, :])` -// -// ```python -// # a is a tensor containing a batch of matrices. -// # s is a tensor of singular values for each matrix. -// # u is the tensor containing of left singular vectors for each matrix. -// # v is the tensor containing of right singular vectors for each matrix. -// s, u, v = svd(a) -// s, _, _ = svd(a, compute_uv=False) -// ``` -// -// Arguments: -// input: A tensor of shape `[..., M, N]` whose inner-most 2 dimensions -// form matrices of size `[M, N]`. Let `P` be the minimum of `M` and `N`. -// -// Returns Singular values. Shape is `[..., P]`.Left singular vectors. If `full_matrices` is `False` then shape is -// `[..., M, P]`; if `full_matrices` is `True` then shape is -// `[..., M, M]`. Undefined if `compute_uv` is `False`.Left singular vectors. If `full_matrices` is `False` then shape is -// `[..., N, P]`. If `full_matrices` is `True` then shape is `[..., N, N]`. -// Undefined if `compute_uv` is false. -func Svd(scope *Scope, input tf.Output, optional ...SvdAttr) (s tf.Output, u tf.Output, v tf.Output) { +// Generate a glob pattern matching all sharded file names. +func ShardedFilespec(scope *Scope, basename tf.Output, num_shards tf.Output) (filename tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "Svd", + Type: "ShardedFilespec", Input: []tf.Input{ - input, + basename, num_shards, }, - Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + return op.Output(0) } -// QueueEnqueueManyV2Attr is an optional argument to QueueEnqueueManyV2. -type QueueEnqueueManyV2Attr func(optionalAttr) +// TextLineReaderV2Attr is an optional argument to TextLineReaderV2. +type TextLineReaderV2Attr func(optionalAttr) -// QueueEnqueueManyV2TimeoutMs sets the optional timeout_ms attribute to value. +// TextLineReaderV2SkipHeaderLines sets the optional skip_header_lines attribute to value. // -// value: If the queue is too full, this operation will block for up -// to timeout_ms milliseconds. -// Note: This option is not supported yet. -// If not specified, defaults to -1 -func QueueEnqueueManyV2TimeoutMs(value int64) QueueEnqueueManyV2Attr { +// value: Number of lines to skip from the beginning of every file. +// If not specified, defaults to 0 +func TextLineReaderV2SkipHeaderLines(value int64) TextLineReaderV2Attr { return func(m optionalAttr) { - m["timeout_ms"] = value + m["skip_header_lines"] = value } } -// Enqueues zero or more tuples of one or more tensors in the given queue. -// -// This operation slices each component tensor along the 0th dimension to -// make multiple queue elements. All of the tuple components must have the -// same size in the 0th dimension. -// -// The components input has k elements, which correspond to the components of -// tuples stored in the given queue. +// TextLineReaderV2Container sets the optional container attribute to value. // -// N.B. If the queue is full, this operation will block until the given -// elements have been enqueued (or 'timeout_ms' elapses, if specified). +// value: If non-empty, this reader is placed in the given container. +// Otherwise, a default container is used. +// If not specified, defaults to "" +func TextLineReaderV2Container(value string) TextLineReaderV2Attr { + return func(m optionalAttr) { + m["container"] = value + } +} + +// TextLineReaderV2SharedName sets the optional shared_name attribute to value. // -// Arguments: -// handle: The handle to a queue. -// components: One or more tensors from which the enqueued tensors should -// be taken. +// value: If non-empty, this reader is named in the given bucket +// with this shared_name. Otherwise, the node name is used instead. +// If not specified, defaults to "" +func TextLineReaderV2SharedName(value string) TextLineReaderV2Attr { + return func(m optionalAttr) { + m["shared_name"] = value + } +} + +// A Reader that outputs the lines of a file delimited by '\n'. // -// Returns the created operation. -func QueueEnqueueManyV2(scope *Scope, handle tf.Output, components []tf.Output, optional ...QueueEnqueueManyV2Attr) (o *tf.Operation) { +// Returns The handle to reference the Reader. +func TextLineReaderV2(scope *Scope, optional ...TextLineReaderV2Attr) (reader_handle tf.Output) { if scope.Err() != nil { return } @@ -19873,123 +22537,181 @@ func QueueEnqueueManyV2(scope *Scope, handle tf.Output, components []tf.Output, a(attrs) } opspec := tf.OpSpec{ - Type: "QueueEnqueueManyV2", - Input: []tf.Input{ - handle, tf.OutputList(components), - }, + Type: "TextLineReaderV2", + Attrs: attrs, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0) } -// Computes the product along segments of a tensor. +// LoadAndRemapMatrixAttr is an optional argument to LoadAndRemapMatrix. +type LoadAndRemapMatrixAttr func(optionalAttr) + +// LoadAndRemapMatrixMaxRowsInMemory sets the optional max_rows_in_memory attribute to value. // -// Read @{$math_ops#segmentation$the section on segmentation} for an explanation of -// segments. +// value: The maximum number of rows to load from the checkpoint at +// once. If less than or equal to 0, the entire matrix will be loaded into +// memory. Setting this arg trades increased disk reads for lower memory usage. +// If not specified, defaults to -1 +func LoadAndRemapMatrixMaxRowsInMemory(value int64) LoadAndRemapMatrixAttr { + return func(m optionalAttr) { + m["max_rows_in_memory"] = value + } +} + +// Loads a 2-D (matrix) `Tensor` with name `old_tensor_name` from the checkpoint // -// Computes a tensor such that -// \\(output_i = \prod_j data_j\\) where the product is over `j` such -// that `segment_ids[j] == i`. +// at `ckpt_path` and potentially reorders its rows and columns using the +// specified remappings. // -// If the product is empty for a given segment ID `i`, `output[i] = 1`. +// Most users should use one of the wrapper initializers (such as +// `tf.contrib.framework.load_and_remap_matrix_initializer`) instead of this +// function directly. // -//
-// -//
+// The remappings are 1-D tensors with the following properties: // -// Arguments: +// * `row_remapping` must have exactly `num_rows` entries. Row `i` of the output +// matrix will be initialized from the row corresponding to index +// `row_remapping[i]` in the old `Tensor` from the checkpoint. +// * `col_remapping` must have either 0 entries (indicating that no column +// reordering is needed) or `num_cols` entries. If specified, column `j` of the +// output matrix will be initialized from the column corresponding to index +// `col_remapping[j]` in the old `Tensor` from the checkpoint. +// * A value of -1 in either of the remappings signifies a "missing" entry. In that +// case, values from the `initializing_values` tensor will be used to fill that +// missing row or column. If `row_remapping` has `r` missing entries and +// `col_remapping` has `c` missing entries, then the following condition must be +// true: // -// segment_ids: A 1-D tensor whose rank is equal to the rank of `data`'s -// first dimension. Values should be sorted and can be repeated. +// `(r * num_cols) + (c * num_rows) - (r * c) == len(initializing_values)` // -// Returns Has same shape as data, except for dimension 0 which -// has size `k`, the number of segments. -func SegmentProd(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.Output) { +// The remapping tensors can be generated using the GenerateVocabRemapping op. +// +// As an example, with row_remapping = [1, 0, -1], col_remapping = [0, 2, -1], +// initializing_values = [0.5, -0.5, 0.25, -0.25, 42], and w(i, j) representing +// the value from row i, column j of the old tensor in the checkpoint, the output +// matrix will look like the following: +// +// [[w(1, 0), w(1, 2), 0.5], +// [w(0, 0), w(0, 2), -0.5], +// [0.25, -0.25, 42]] +// +// Arguments: +// ckpt_path: Path to the TensorFlow checkpoint (version 2, `TensorBundle`) from +// which the old matrix `Tensor` will be loaded. +// old_tensor_name: Name of the 2-D `Tensor` to load from checkpoint. +// row_remapping: An int `Tensor` of row remappings (generally created by +// `generate_vocab_remapping`). Even if no row remapping is needed, this must +// still be an index-valued Tensor (e.g. [0, 1, 2, ...]), or a shifted +// index-valued `Tensor` (e.g. [8, 9, 10, ...], for partitioned `Variables`). +// col_remapping: An int `Tensor` of column remappings (generally created by +// `generate_vocab_remapping`). May be a size-0 `Tensor` if only row remapping +// is to be done (e.g. column ordering is the same). +// initializing_values: A float `Tensor` containing values to fill in for cells +// in the output matrix that are not loaded from the checkpoint. Length must be +// exactly the same as the number of missing / new cells. +// num_rows: Number of rows (length of the 1st dimension) in the output matrix. +// num_cols: Number of columns (length of the 2nd dimension) in the output matrix. +// +// Returns Output matrix containing existing values loaded from the +// checkpoint, and with any missing values filled in from initializing_values. +func LoadAndRemapMatrix(scope *Scope, ckpt_path tf.Output, old_tensor_name tf.Output, row_remapping tf.Output, col_remapping tf.Output, initializing_values tf.Output, num_rows int64, num_cols int64, optional ...LoadAndRemapMatrixAttr) (output_matrix tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"num_rows": num_rows, "num_cols": num_cols} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "SegmentProd", + Type: "LoadAndRemapMatrix", Input: []tf.Input{ - data, segment_ids, + ckpt_path, old_tensor_name, row_remapping, col_remapping, initializing_values, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Converts one or more images from RGB to HSV. -// -// Outputs a tensor of the same shape as the `images` tensor, containing the HSV -// value of the pixels. The output is only well defined if the value in `images` -// are in `[0,1]`. -// -// `output[..., 0]` contains hue, `output[..., 1]` contains saturation, and -// `output[..., 2]` contains value. All HSV values are in `[0,1]`. A hue of 0 -// corresponds to pure red, hue 1/3 is pure green, and 2/3 is pure blue. +// TFRecordReaderV2Attr is an optional argument to TFRecordReaderV2. +type TFRecordReaderV2Attr func(optionalAttr) + +// TFRecordReaderV2Container sets the optional container attribute to value. // -// Arguments: -// images: 1-D or higher rank. RGB data to convert. Last dimension must be size 3. +// value: If non-empty, this reader is placed in the given container. +// Otherwise, a default container is used. +// If not specified, defaults to "" +func TFRecordReaderV2Container(value string) TFRecordReaderV2Attr { + return func(m optionalAttr) { + m["container"] = value + } +} + +// TFRecordReaderV2SharedName sets the optional shared_name attribute to value. // -// Returns `images` converted to HSV. -func RGBToHSV(scope *Scope, images tf.Output) (output tf.Output) { - if scope.Err() != nil { - return +// value: If non-empty, this reader is named in the given bucket +// with this shared_name. Otherwise, the node name is used instead. +// If not specified, defaults to "" +func TFRecordReaderV2SharedName(value string) TFRecordReaderV2Attr { + return func(m optionalAttr) { + m["shared_name"] = value } - opspec := tf.OpSpec{ - Type: "RGBToHSV", - Input: []tf.Input{ - images, - }, +} + +// TFRecordReaderV2CompressionType sets the optional compression_type attribute to value. +// If not specified, defaults to "" +func TFRecordReaderV2CompressionType(value string) TFRecordReaderV2Attr { + return func(m optionalAttr) { + m["compression_type"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// Does nothing. Only useful as a placeholder for control edges. +// A Reader that outputs the records from a TensorFlow Records file. // -// Returns the created operation. -func NoOp(scope *Scope) (o *tf.Operation) { +// Returns The handle to reference the Reader. +func TFRecordReaderV2(scope *Scope, optional ...TFRecordReaderV2Attr) (reader_handle tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "NoOp", + Type: "TFRecordReaderV2", + + Attrs: attrs, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0) } -// MergeV2CheckpointsAttr is an optional argument to MergeV2Checkpoints. -type MergeV2CheckpointsAttr func(optionalAttr) +// QuantizeAndDequantizeV3Attr is an optional argument to QuantizeAndDequantizeV3. +type QuantizeAndDequantizeV3Attr func(optionalAttr) -// MergeV2CheckpointsDeleteOldDirs sets the optional delete_old_dirs attribute to value. -// -// value: see above. +// QuantizeAndDequantizeV3SignedInput sets the optional signed_input attribute to value. // If not specified, defaults to true -func MergeV2CheckpointsDeleteOldDirs(value bool) MergeV2CheckpointsAttr { +func QuantizeAndDequantizeV3SignedInput(value bool) QuantizeAndDequantizeV3Attr { return func(m optionalAttr) { - m["delete_old_dirs"] = value + m["signed_input"] = value } } -// V2 format specific: merges the metadata files of sharded checkpoints. The -// -// result is one logical checkpoint, with one physical metadata file and renamed -// data files. -// -// Intended for "grouping" multiple checkpoints in a sharded checkpoint setup. -// -// If delete_old_dirs is true, attempts to delete recursively the dirname of each -// path in the input checkpoint_prefixes. This is useful when those paths are non -// user-facing temporary locations. -// -// Arguments: -// checkpoint_prefixes: prefixes of V2 checkpoints to merge. -// destination_prefix: scalar. The desired final prefix. Allowed to be the same -// as one of the checkpoint_prefixes. +// QuantizeAndDequantizeV3RangeGiven sets the optional range_given attribute to value. +// If not specified, defaults to true +func QuantizeAndDequantizeV3RangeGiven(value bool) QuantizeAndDequantizeV3Attr { + return func(m optionalAttr) { + m["range_given"] = value + } +} + +// Quantizes then dequantizes a tensor. // -// Returns the created operation. -func MergeV2Checkpoints(scope *Scope, checkpoint_prefixes tf.Output, destination_prefix tf.Output, optional ...MergeV2CheckpointsAttr) (o *tf.Operation) { +// This is almost identical to QuantizeAndDequantizeV2, except that num_bits is a +// tensor, so its value can change during training. +func QuantizeAndDequantizeV3(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output, num_bits tf.Output, optional ...QuantizeAndDequantizeV3Attr) (output tf.Output) { if scope.Err() != nil { return } @@ -19998,425 +22720,349 @@ func MergeV2Checkpoints(scope *Scope, checkpoint_prefixes tf.Output, destination a(attrs) } opspec := tf.OpSpec{ - Type: "MergeV2Checkpoints", + Type: "QuantizeAndDequantizeV3", Input: []tf.Input{ - checkpoint_prefixes, destination_prefix, + input, input_min, input_max, num_bits, }, Attrs: attrs, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0) } -// Saves input tensors slices to disk. -// -// This is like `Save` except that tensors can be listed in the saved file as being -// a slice of a larger tensor. `shapes_and_slices` specifies the shape of the -// larger tensor and the slice that this tensor covers. `shapes_and_slices` must -// have as many elements as `tensor_names`. -// -// Elements of the `shapes_and_slices` input must either be: -// -// * The empty string, in which case the corresponding tensor is -// saved normally. -// * A string of the form `dim0 dim1 ... dimN-1 slice-spec` where the -// `dimI` are the dimensions of the larger tensor and `slice-spec` -// specifies what part is covered by the tensor to save. -// -// `slice-spec` itself is a `:`-separated list: `slice0:slice1:...:sliceN-1` -// where each `sliceI` is either: +// IdentityReaderV2Attr is an optional argument to IdentityReaderV2. +type IdentityReaderV2Attr func(optionalAttr) + +// IdentityReaderV2Container sets the optional container attribute to value. // -// * The string `-` meaning that the slice covers all indices of this dimension -// * `start,length` where `start` and `length` are integers. In that -// case the slice covers `length` indices starting at `start`. +// value: If non-empty, this reader is placed in the given container. +// Otherwise, a default container is used. +// If not specified, defaults to "" +func IdentityReaderV2Container(value string) IdentityReaderV2Attr { + return func(m optionalAttr) { + m["container"] = value + } +} + +// IdentityReaderV2SharedName sets the optional shared_name attribute to value. // -// See also `Save`. +// value: If non-empty, this reader is named in the given bucket +// with this shared_name. Otherwise, the node name is used instead. +// If not specified, defaults to "" +func IdentityReaderV2SharedName(value string) IdentityReaderV2Attr { + return func(m optionalAttr) { + m["shared_name"] = value + } +} + +// A Reader that outputs the queued work as both the key and value. // -// Arguments: -// filename: Must have a single element. The name of the file to which we write the -// tensor. -// tensor_names: Shape `[N]`. The names of the tensors to be saved. -// shapes_and_slices: Shape `[N]`. The shapes and slice specifications to use when -// saving the tensors. -// data: `N` tensors to save. +// To use, enqueue strings in a Queue. ReaderRead will take the front +// work string and output (work, work). // -// Returns the created operation. -func SaveSlices(scope *Scope, filename tf.Output, tensor_names tf.Output, shapes_and_slices tf.Output, data []tf.Output) (o *tf.Operation) { +// Returns The handle to reference the Reader. +func IdentityReaderV2(scope *Scope, optional ...IdentityReaderV2Attr) (reader_handle tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "SaveSlices", - Input: []tf.Input{ - filename, tensor_names, shapes_and_slices, tf.OutputList(data), - }, + Type: "IdentityReaderV2", + + Attrs: attrs, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0) } -// DenseToDenseSetOperationAttr is an optional argument to DenseToDenseSetOperation. -type DenseToDenseSetOperationAttr func(optionalAttr) +// ResourceApplyGradientDescentAttr is an optional argument to ResourceApplyGradientDescent. +type ResourceApplyGradientDescentAttr func(optionalAttr) -// DenseToDenseSetOperationValidateIndices sets the optional validate_indices attribute to value. -// If not specified, defaults to true -func DenseToDenseSetOperationValidateIndices(value bool) DenseToDenseSetOperationAttr { +// ResourceApplyGradientDescentUseLocking sets the optional use_locking attribute to value. +// +// value: If `True`, the subtraction will be protected by a lock; +// otherwise the behavior is undefined, but may exhibit less contention. +// If not specified, defaults to false +func ResourceApplyGradientDescentUseLocking(value bool) ResourceApplyGradientDescentAttr { return func(m optionalAttr) { - m["validate_indices"] = value + m["use_locking"] = value } } -// Applies set operation along last dimension of 2 `Tensor` inputs. -// -// See SetOperationOp::SetOperationFromContext for values of `set_operation`. -// -// Output `result` is a `SparseTensor` represented by `result_indices`, -// `result_values`, and `result_shape`. For `set1` and `set2` ranked `n`, this -// has rank `n` and the same 1st `n-1` dimensions as `set1` and `set2`. The `nth` -// dimension contains the result of `set_operation` applied to the corresponding -// `[0...n-1]` dimension of `set`. +// Update '*var' by subtracting 'alpha' * 'delta' from it. // // Arguments: -// set1: `Tensor` with rank `n`. 1st `n-1` dimensions must be the same as `set2`. -// Dimension `n` contains values in a set, duplicates are allowed but ignored. -// set2: `Tensor` with rank `n`. 1st `n-1` dimensions must be the same as `set1`. -// Dimension `n` contains values in a set, duplicates are allowed but ignored. -// +// var_: Should be from a Variable(). +// alpha: Scaling factor. Must be a scalar. +// delta: The change. // -// Returns 2D indices of a `SparseTensor`.1D values of a `SparseTensor`.1D `Tensor` shape of a `SparseTensor`. `result_shape[0...n-1]` is -// the same as the 1st `n-1` dimensions of `set1` and `set2`, `result_shape[n]` -// is the max result set size across all `0...n-1` dimensions. -func DenseToDenseSetOperation(scope *Scope, set1 tf.Output, set2 tf.Output, set_operation string, optional ...DenseToDenseSetOperationAttr) (result_indices tf.Output, result_values tf.Output, result_shape tf.Output) { +// Returns the created operation. +func ResourceApplyGradientDescent(scope *Scope, var_ tf.Output, alpha tf.Output, delta tf.Output, optional ...ResourceApplyGradientDescentAttr) (o *tf.Operation) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"set_operation": set_operation} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "DenseToDenseSetOperation", + Type: "ResourceApplyGradientDescent", Input: []tf.Input{ - set1, set2, + var_, alpha, delta, }, Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + return scope.AddOperation(opspec) } -// Generate a sharded filename. The filename is printf formatted as +// Returns the next record (key, value pair) produced by a Reader. // -// %s-%05d-of-%05d, basename, shard, num_shards. -func ShardedFilename(scope *Scope, basename tf.Output, shard tf.Output, num_shards tf.Output) (filename tf.Output) { +// Will dequeue from the input queue if necessary (e.g. when the +// Reader needs to start reading from a new file since it has finished +// with the previous file). +// +// Arguments: +// reader_handle: Handle to a Reader. +// queue_handle: Handle to a Queue, with string work items. +// +// Returns A scalar.A scalar. +func ReaderReadV2(scope *Scope, reader_handle tf.Output, queue_handle tf.Output) (key tf.Output, value tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "ShardedFilename", + Type: "ReaderReadV2", Input: []tf.Input{ - basename, shard, num_shards, + reader_handle, queue_handle, }, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1) } -// BatchToSpace for N-D tensors of type T. +// Returns up to `num_records` (key, value) pairs produced by a Reader. // -// This operation reshapes the "batch" dimension 0 into `M + 1` dimensions of shape -// `block_shape + [batch]`, interleaves these blocks back into the grid defined by -// the spatial dimensions `[1, ..., M]`, to obtain a result with the same rank as -// the input. The spatial dimensions of this intermediate result are then -// optionally cropped according to `crops` to produce the output. This is the -// reverse of SpaceToBatch. See below for a precise description. +// Will dequeue from the input queue if necessary (e.g. when the +// Reader needs to start reading from a new file since it has finished +// with the previous file). +// It may return less than `num_records` even before the last batch. // // Arguments: -// input: N-D with shape `input_shape = [batch] + spatial_shape + remaining_shape`, -// where spatial_shape has M dimensions. -// block_shape: 1-D with shape `[M]`, all values must be >= 1. -// crops: 2-D with shape `[M, 2]`, all values must be >= 0. -// `crops[i] = [crop_start, crop_end]` specifies the amount to crop from input -// dimension `i + 1`, which corresponds to spatial dimension `i`. It is -// required that -// `crop_start[i] + crop_end[i] <= block_shape[i] * input_shape[i + 1]`. -// -// This operation is equivalent to the following steps: -// -// 1. Reshape `input` to `reshaped` of shape: -// [block_shape[0], ..., block_shape[M-1], -// batch / prod(block_shape), -// input_shape[1], ..., input_shape[N-1]] -// -// 2. Permute dimensions of `reshaped` to produce `permuted` of shape -// [batch / prod(block_shape), -// -// input_shape[1], block_shape[0], -// ..., -// input_shape[M], block_shape[M-1], -// -// input_shape[M+1], ..., input_shape[N-1]] -// -// 3. Reshape `permuted` to produce `reshaped_permuted` of shape -// [batch / prod(block_shape), -// -// input_shape[1] * block_shape[0], -// ..., -// input_shape[M] * block_shape[M-1], -// -// input_shape[M+1], -// ..., -// input_shape[N-1]] -// -// 4. Crop the start and end of dimensions `[1, ..., M]` of -// `reshaped_permuted` according to `crops` to produce the output of shape: -// [batch / prod(block_shape), -// -// input_shape[1] * block_shape[0] - crops[0,0] - crops[0,1], -// ..., -// input_shape[M] * block_shape[M-1] - crops[M-1,0] - crops[M-1,1], -// -// input_shape[M+1], ..., input_shape[N-1]] -// -// Some examples: -// -// (1) For the following input of shape `[4, 1, 1, 1]`, `block_shape = [2, 2]`, and -// `crops = [[0, 0], [0, 0]]`: -// -// ``` -// [[[[1]]], [[[2]]], [[[3]]], [[[4]]]] -// ``` -// -// The output tensor has shape `[1, 2, 2, 1]` and value: -// -// ``` -// x = [[[[1], [2]], [[3], [4]]]] -// ``` -// -// (2) For the following input of shape `[4, 1, 1, 3]`, `block_shape = [2, 2]`, and -// `crops = [[0, 0], [0, 0]]`: -// -// ``` -// [[[1, 2, 3]], [[4, 5, 6]], [[7, 8, 9]], [[10, 11, 12]]] -// ``` -// -// The output tensor has shape `[1, 2, 2, 3]` and value: -// -// ``` -// x = [[[[1, 2, 3], [4, 5, 6]], -// [[7, 8, 9], [10, 11, 12]]]] -// ``` -// -// (3) For the following input of shape `[4, 2, 2, 1]`, `block_shape = [2, 2]`, and -// `crops = [[0, 0], [0, 0]]`: -// -// ``` -// x = [[[[1], [3]], [[9], [11]]], -// [[[2], [4]], [[10], [12]]], -// [[[5], [7]], [[13], [15]]], -// [[[6], [8]], [[14], [16]]]] -// ``` -// -// The output tensor has shape `[1, 4, 4, 1]` and value: -// -// ``` -// x = [[[1], [2], [3], [4]], -// [[5], [6], [7], [8]], -// [[9], [10], [11], [12]], -// [[13], [14], [15], [16]]] -// ``` -// -// (4) For the following input of shape `[8, 1, 3, 1]`, `block_shape = [2, 2]`, and -// `crops = [[0, 0], [2, 0]]`: -// -// ``` -// x = [[[[0], [1], [3]]], [[[0], [9], [11]]], -// [[[0], [2], [4]]], [[[0], [10], [12]]], -// [[[0], [5], [7]]], [[[0], [13], [15]]], -// [[[0], [6], [8]]], [[[0], [14], [16]]]] -// ``` -// -// The output tensor has shape `[2, 2, 4, 1]` and value: +// reader_handle: Handle to a `Reader`. +// queue_handle: Handle to a `Queue`, with string work items. +// num_records: number of records to read from `Reader`. // -// ``` -// x = [[[[1], [2], [3], [4]], -// [[5], [6], [7], [8]]], -// [[[9], [10], [11], [12]], -// [[13], [14], [15], [16]]]] -// ``` -func BatchToSpaceND(scope *Scope, input tf.Output, block_shape tf.Output, crops tf.Output) (output tf.Output) { +// Returns A 1-D tensor.A 1-D tensor. +func ReaderReadUpToV2(scope *Scope, reader_handle tf.Output, queue_handle tf.Output, num_records tf.Output) (keys tf.Output, values tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "BatchToSpaceND", + Type: "ReaderReadUpToV2", Input: []tf.Input{ - input, block_shape, crops, + reader_handle, queue_handle, num_records, }, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1) } -// UnpackAttr is an optional argument to Unpack. -type UnpackAttr func(optionalAttr) +// ResourceApplyAdamAttr is an optional argument to ResourceApplyAdam. +type ResourceApplyAdamAttr func(optionalAttr) + +// ResourceApplyAdamUseLocking sets the optional use_locking attribute to value. +// +// value: If `True`, updating of the var, m, and v tensors will be protected +// by a lock; otherwise the behavior is undefined, but may exhibit less +// contention. +// If not specified, defaults to false +func ResourceApplyAdamUseLocking(value bool) ResourceApplyAdamAttr { + return func(m optionalAttr) { + m["use_locking"] = value + } +} -// UnpackAxis sets the optional axis attribute to value. +// ResourceApplyAdamUseNesterov sets the optional use_nesterov attribute to value. // -// value: Dimension along which to unpack. Negative values wrap around, so the -// valid range is `[-R, R)`. -// If not specified, defaults to 0 -func UnpackAxis(value int64) UnpackAttr { +// value: If `True`, uses the nesterov update. +// If not specified, defaults to false +func ResourceApplyAdamUseNesterov(value bool) ResourceApplyAdamAttr { return func(m optionalAttr) { - m["axis"] = value + m["use_nesterov"] = value } } -// Unpacks a given dimension of a rank-`R` tensor into `num` rank-`(R-1)` tensors. -// -// Unpacks `num` tensors from `value` by chipping it along the `axis` dimension. -// For example, given a tensor of shape `(A, B, C, D)`; -// -// If `axis == 0` then the i'th tensor in `output` is the slice `value[i, :, :, :]` -// and each tensor in `output` will have shape `(B, C, D)`. (Note that the -// dimension unpacked along is gone, unlike `split`). -// -// If `axis == 1` then the i'th tensor in `output` is the slice `value[:, i, :, :]` -// and each tensor in `output` will have shape `(A, C, D)`. -// Etc. +// Update '*var' according to the Adam algorithm. // -// This is the opposite of `pack`. +// lr_t <- learning_rate * sqrt(1 - beta2^t) / (1 - beta1^t) +// m_t <- beta1 * m_{t-1} + (1 - beta1) * g_t +// v_t <- beta2 * v_{t-1} + (1 - beta2) * g_t * g_t +// variable <- variable - lr_t * m_t / (sqrt(v_t) + epsilon) // // Arguments: -// value: 1-D or higher, with `axis` dimension size equal to `num`. -// +// var_: Should be from a Variable(). +// m: Should be from a Variable(). +// v: Should be from a Variable(). +// beta1_power: Must be a scalar. +// beta2_power: Must be a scalar. +// lr: Scaling factor. Must be a scalar. +// beta1: Momentum factor. Must be a scalar. +// beta2: Momentum factor. Must be a scalar. +// epsilon: Ridge term. Must be a scalar. +// grad: The gradient. // -// Returns The list of tensors unpacked from `value`. -func Unpack(scope *Scope, value tf.Output, num int64, optional ...UnpackAttr) (output []tf.Output) { +// Returns the created operation. +func ResourceApplyAdam(scope *Scope, var_ tf.Output, m tf.Output, v tf.Output, beta1_power tf.Output, beta2_power tf.Output, lr tf.Output, beta1 tf.Output, beta2 tf.Output, epsilon tf.Output, grad tf.Output, optional ...ResourceApplyAdamAttr) (o *tf.Operation) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"num": num} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "Unpack", + Type: "ResourceApplyAdam", Input: []tf.Input{ - value, + var_, m, v, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad, }, Attrs: attrs, } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if output, idx, err = makeOutputList(op, idx, "output"); err != nil { - scope.UpdateErr("Unpack", err) - return - } - return output + return scope.AddOperation(opspec) } -// Increments variable pointed to by 'resource' until it reaches 'limit'. +// Store the input tensor in the state of the current session. // // Arguments: -// resource: Should be from a scalar `Variable` node. -// limit: If incrementing ref would bring it above limit, instead generates an -// 'OutOfRange' error. -// +// value: The tensor to be stored. // -// Returns A copy of the input before increment. If nothing else modifies the -// input, the values produced will all be distinct. -func ResourceCountUpTo(scope *Scope, resource tf.Output, limit int64, T tf.DataType) (output tf.Output) { +// Returns The handle for the tensor stored in the session state, represented +// as a ResourceHandle object. +func GetSessionHandleV2(scope *Scope, value tf.Output) (handle tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"limit": limit, "T": T} opspec := tf.OpSpec{ - Type: "ResourceCountUpTo", + Type: "GetSessionHandleV2", Input: []tf.Input{ - resource, + value, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Delete the stack from its resource container. -// -// Arguments: -// handle: The handle to a stack. +// ResizeBicubicGradAttr is an optional argument to ResizeBicubicGrad. +type ResizeBicubicGradAttr func(optionalAttr) + +// ResizeBicubicGradAlignCorners sets the optional align_corners attribute to value. // -// Returns the created operation. -func StackCloseV2(scope *Scope, handle tf.Output) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "StackCloseV2", - Input: []tf.Input{ - handle, - }, +// value: If true, the centers of the 4 corner pixels of the input and grad tensors are +// aligned. Defaults to false. +// If not specified, defaults to false +func ResizeBicubicGradAlignCorners(value bool) ResizeBicubicGradAttr { + return func(m optionalAttr) { + m["align_corners"] = value } - return scope.AddOperation(opspec) } -// Generate a glob pattern matching all sharded file names. -func ShardedFilespec(scope *Scope, basename tf.Output, num_shards tf.Output) (filename tf.Output) { +// Computes the gradient of bicubic interpolation. +// +// Arguments: +// grads: 4-D with shape `[batch, height, width, channels]`. +// original_image: 4-D with shape `[batch, orig_height, orig_width, channels]`, +// The image tensor that was resized. +// +// Returns 4-D with shape `[batch, orig_height, orig_width, channels]`. +// Gradients with respect to the input image. Input image must have been +// float or double. +func ResizeBicubicGrad(scope *Scope, grads tf.Output, original_image tf.Output, optional ...ResizeBicubicGradAttr) (output tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "ShardedFilespec", + Type: "ResizeBicubicGrad", Input: []tf.Input{ - basename, num_shards, + grads, original_image, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// TextLineReaderV2Attr is an optional argument to TextLineReaderV2. -type TextLineReaderV2Attr func(optionalAttr) +// ResizeNearestNeighborAttr is an optional argument to ResizeNearestNeighbor. +type ResizeNearestNeighborAttr func(optionalAttr) -// TextLineReaderV2SkipHeaderLines sets the optional skip_header_lines attribute to value. +// ResizeNearestNeighborAlignCorners sets the optional align_corners attribute to value. // -// value: Number of lines to skip from the beginning of every file. -// If not specified, defaults to 0 -func TextLineReaderV2SkipHeaderLines(value int64) TextLineReaderV2Attr { +// value: If true, the centers of the 4 corner pixels of the input and output tensors are +// aligned, preserving the values at the corner pixels. Defaults to false. +// If not specified, defaults to false +func ResizeNearestNeighborAlignCorners(value bool) ResizeNearestNeighborAttr { return func(m optionalAttr) { - m["skip_header_lines"] = value + m["align_corners"] = value } } -// TextLineReaderV2Container sets the optional container attribute to value. +// Resize `images` to `size` using nearest neighbor interpolation. // -// value: If non-empty, this reader is placed in the given container. -// Otherwise, a default container is used. -// If not specified, defaults to "" -func TextLineReaderV2Container(value string) TextLineReaderV2Attr { - return func(m optionalAttr) { - m["container"] = value +// Arguments: +// images: 4-D with shape `[batch, height, width, channels]`. +// size: = A 1-D int32 Tensor of 2 elements: `new_height, new_width`. The +// new size for the images. +// +// Returns 4-D with shape +// `[batch, new_height, new_width, channels]`. +func ResizeNearestNeighbor(scope *Scope, images tf.Output, size tf.Output, optional ...ResizeNearestNeighborAttr) (resized_images tf.Output) { + if scope.Err() != nil { + return } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "ResizeNearestNeighbor", + Input: []tf.Input{ + images, size, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) } -// TextLineReaderV2SharedName sets the optional shared_name attribute to value. +// ResizeNearestNeighborGradAttr is an optional argument to ResizeNearestNeighborGrad. +type ResizeNearestNeighborGradAttr func(optionalAttr) + +// ResizeNearestNeighborGradAlignCorners sets the optional align_corners attribute to value. // -// value: If non-empty, this reader is named in the given bucket -// with this shared_name. Otherwise, the node name is used instead. -// If not specified, defaults to "" -func TextLineReaderV2SharedName(value string) TextLineReaderV2Attr { +// value: If true, the centers of the 4 corner pixels of the input and grad tensors are +// aligned. Defaults to false. +// If not specified, defaults to false +func ResizeNearestNeighborGradAlignCorners(value bool) ResizeNearestNeighborGradAttr { return func(m optionalAttr) { - m["shared_name"] = value + m["align_corners"] = value } } -// A Reader that outputs the lines of a file delimited by '\n'. +// Computes the gradient of nearest neighbor interpolation. // -// Returns The handle to reference the Reader. -func TextLineReaderV2(scope *Scope, optional ...TextLineReaderV2Attr) (reader_handle tf.Output) { +// Arguments: +// grads: 4-D with shape `[batch, height, width, channels]`. +// size: = A 1-D int32 Tensor of 2 elements: `orig_height, orig_width`. The +// original input size. +// +// Returns 4-D with shape `[batch, orig_height, orig_width, channels]`. Gradients +// with respect to the input image. +func ResizeNearestNeighborGrad(scope *Scope, grads tf.Output, size tf.Output, optional ...ResizeNearestNeighborGradAttr) (output tf.Output) { if scope.Err() != nil { return } @@ -20425,97 +23071,50 @@ func TextLineReaderV2(scope *Scope, optional ...TextLineReaderV2Attr) (reader_ha a(attrs) } opspec := tf.OpSpec{ - Type: "TextLineReaderV2", - + Type: "ResizeNearestNeighborGrad", + Input: []tf.Input{ + grads, size, + }, Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// LoadAndRemapMatrixAttr is an optional argument to LoadAndRemapMatrix. -type LoadAndRemapMatrixAttr func(optionalAttr) +// ExtractJpegShapeAttr is an optional argument to ExtractJpegShape. +type ExtractJpegShapeAttr func(optionalAttr) -// LoadAndRemapMatrixMaxRowsInMemory sets the optional max_rows_in_memory attribute to value. +// ExtractJpegShapeOutputType sets the optional output_type attribute to value. // -// value: The maximum number of rows to load from the checkpoint at -// once. If less than or equal to 0, the entire matrix will be loaded into -// memory. Setting this arg trades increased disk reads for lower memory usage. -// If not specified, defaults to -1 -func LoadAndRemapMatrixMaxRowsInMemory(value int64) LoadAndRemapMatrixAttr { +// value: (Optional) The output type of the operation (int32 or int64). +// Defaults to int32. +// If not specified, defaults to DT_INT32 +func ExtractJpegShapeOutputType(value tf.DataType) ExtractJpegShapeAttr { return func(m optionalAttr) { - m["max_rows_in_memory"] = value - } -} - -// Loads a 2-D (matrix) `Tensor` with name `old_tensor_name` from the checkpoint -// -// at `ckpt_path` and potentially reorders its rows and columns using the -// specified remappings. -// -// Most users should use one of the wrapper initializers (such as -// `tf.contrib.framework.load_and_remap_matrix_initializer`) instead of this -// function directly. -// -// The remappings are 1-D tensors with the following properties: -// -// * `row_remapping` must have exactly `num_rows` entries. Row `i` of the output -// matrix will be initialized from the row corresponding to index -// `row_remapping[i]` in the old `Tensor` from the checkpoint. -// * `col_remapping` must have either 0 entries (indicating that no column -// reordering is needed) or `num_cols` entries. If specified, column `j` of the -// output matrix will be initialized from the column corresponding to index -// `col_remapping[j]` in the old `Tensor` from the checkpoint. -// * A value of -1 in either of the remappings signifies a "missing" entry. In that -// case, values from the `initializing_values` tensor will be used to fill that -// missing row or column. If `row_remapping` has `r` missing entries and -// `col_remapping` has `c` missing entries, then the following condition must be -// true: -// -// `(r * num_cols) + (c * num_rows) - (r * c) == len(initializing_values)` -// -// The remapping tensors can be generated using the GenerateVocabRemapping op. -// -// As an example, with row_remapping = [1, 0, -1], col_remapping = [0, 2, -1], -// initializing_values = [0.5, -0.5, 0.25, -0.25, 42], and w(i, j) representing -// the value from row i, column j of the old tensor in the checkpoint, the output -// matrix will look like the following: + m["output_type"] = value + } +} + +// Extract the shape information of a JPEG-encoded image. // -// [[w(1, 0), w(1, 2), 0.5], -// [w(0, 0), w(0, 2), -0.5], -// [0.25, -0.25, 42]] +// This op only parses the image header, so it is much faster than DecodeJpeg. // // Arguments: -// ckpt_path: Path to the TensorFlow checkpoint (version 2, `TensorBundle`) from -// which the old matrix `Tensor` will be loaded. -// old_tensor_name: Name of the 2-D `Tensor` to load from checkpoint. -// row_remapping: An int `Tensor` of row remappings (generally created by -// `generate_vocab_remapping`). Even if no row remapping is needed, this must -// still be an index-valued Tensor (e.g. [0, 1, 2, ...]), or a shifted -// index-valued `Tensor` (e.g. [8, 9, 10, ...], for partitioned `Variables`). -// col_remapping: An int `Tensor` of column remappings (generally created by -// `generate_vocab_remapping`). May be a size-0 `Tensor` if only row remapping -// is to be done (e.g. column ordering is the same). -// initializing_values: A float `Tensor` containing values to fill in for cells -// in the output matrix that are not loaded from the checkpoint. Length must be -// exactly the same as the number of missing / new cells. -// num_rows: Number of rows (length of the 1st dimension) in the output matrix. -// num_cols: Number of columns (length of the 2nd dimension) in the output matrix. +// contents: 0-D. The JPEG-encoded image. // -// Returns Output matrix containing existing values loaded from the -// checkpoint, and with any missing values filled in from initializing_values. -func LoadAndRemapMatrix(scope *Scope, ckpt_path tf.Output, old_tensor_name tf.Output, row_remapping tf.Output, col_remapping tf.Output, initializing_values tf.Output, num_rows int64, num_cols int64, optional ...LoadAndRemapMatrixAttr) (output_matrix tf.Output) { +// Returns 1-D. The image shape with format [height, width, channels]. +func ExtractJpegShape(scope *Scope, contents tf.Output, optional ...ExtractJpegShapeAttr) (image_shape tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"num_rows": num_rows, "num_cols": num_cols} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "LoadAndRemapMatrix", + Type: "ExtractJpegShape", Input: []tf.Input{ - ckpt_path, old_tensor_name, row_remapping, col_remapping, initializing_values, + contents, }, Attrs: attrs, } @@ -20523,52 +23122,81 @@ func LoadAndRemapMatrix(scope *Scope, ckpt_path tf.Output, old_tensor_name tf.Ou return op.Output(0) } -// TFRecordReaderV2Attr is an optional argument to TFRecordReaderV2. -type TFRecordReaderV2Attr func(optionalAttr) +// PaddingFIFOQueueV2Attr is an optional argument to PaddingFIFOQueueV2. +type PaddingFIFOQueueV2Attr func(optionalAttr) -// TFRecordReaderV2Container sets the optional container attribute to value. +// PaddingFIFOQueueV2Shapes sets the optional shapes attribute to value. // -// value: If non-empty, this reader is placed in the given container. -// Otherwise, a default container is used. -// If not specified, defaults to "" -func TFRecordReaderV2Container(value string) TFRecordReaderV2Attr { +// value: The shape of each component in a value. The length of this attr must +// be either 0 or the same as the length of component_types. +// Shapes of fixed rank but variable size are allowed by setting +// any shape dimension to -1. In this case, the inputs' shape may vary along +// the given dimension, and DequeueMany will pad the given dimension with +// zeros up to the maximum shape of all elements in the given batch. +// If the length of this attr is 0, different queue elements may have +// different ranks and shapes, but only one element may be dequeued at a time. +// If not specified, defaults to <> +// +// REQUIRES: len(value) >= 0 +func PaddingFIFOQueueV2Shapes(value []tf.Shape) PaddingFIFOQueueV2Attr { return func(m optionalAttr) { - m["container"] = value + m["shapes"] = value } } -// TFRecordReaderV2SharedName sets the optional shared_name attribute to value. +// PaddingFIFOQueueV2Capacity sets the optional capacity attribute to value. // -// value: If non-empty, this reader is named in the given bucket -// with this shared_name. Otherwise, the node name is used instead. +// value: The upper bound on the number of elements in this queue. +// Negative numbers mean no limit. +// If not specified, defaults to -1 +func PaddingFIFOQueueV2Capacity(value int64) PaddingFIFOQueueV2Attr { + return func(m optionalAttr) { + m["capacity"] = value + } +} + +// PaddingFIFOQueueV2Container sets the optional container attribute to value. +// +// value: If non-empty, this queue is placed in the given container. +// Otherwise, a default container is used. // If not specified, defaults to "" -func TFRecordReaderV2SharedName(value string) TFRecordReaderV2Attr { +func PaddingFIFOQueueV2Container(value string) PaddingFIFOQueueV2Attr { return func(m optionalAttr) { - m["shared_name"] = value + m["container"] = value } } -// TFRecordReaderV2CompressionType sets the optional compression_type attribute to value. +// PaddingFIFOQueueV2SharedName sets the optional shared_name attribute to value. +// +// value: If non-empty, this queue will be shared under the given name +// across multiple sessions. // If not specified, defaults to "" -func TFRecordReaderV2CompressionType(value string) TFRecordReaderV2Attr { +func PaddingFIFOQueueV2SharedName(value string) PaddingFIFOQueueV2Attr { return func(m optionalAttr) { - m["compression_type"] = value + m["shared_name"] = value } } -// A Reader that outputs the records from a TensorFlow Records file. +// A queue that produces elements in first-in first-out order. // -// Returns The handle to reference the Reader. -func TFRecordReaderV2(scope *Scope, optional ...TFRecordReaderV2Attr) (reader_handle tf.Output) { +// Variable-size shapes are allowed by setting the corresponding shape dimensions +// to 0 in the shape attr. In this case DequeueMany will pad up to the maximum +// size of any given element in the minibatch. See below for details. +// +// Arguments: +// component_types: The type of each component in a value. +// +// Returns The handle to the queue. +func PaddingFIFOQueueV2(scope *Scope, component_types []tf.DataType, optional ...PaddingFIFOQueueV2Attr) (handle tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"component_types": component_types} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "TFRecordReaderV2", + Type: "PaddingFIFOQueueV2", Attrs: attrs, } @@ -20576,30 +23204,50 @@ func TFRecordReaderV2(scope *Scope, optional ...TFRecordReaderV2Attr) (reader_ha return op.Output(0) } -// QuantizeAndDequantizeV3Attr is an optional argument to QuantizeAndDequantizeV3. -type QuantizeAndDequantizeV3Attr func(optionalAttr) +// DecodePngAttr is an optional argument to DecodePng. +type DecodePngAttr func(optionalAttr) -// QuantizeAndDequantizeV3SignedInput sets the optional signed_input attribute to value. -// If not specified, defaults to true -func QuantizeAndDequantizeV3SignedInput(value bool) QuantizeAndDequantizeV3Attr { +// DecodePngChannels sets the optional channels attribute to value. +// +// value: Number of color channels for the decoded image. +// If not specified, defaults to 0 +func DecodePngChannels(value int64) DecodePngAttr { return func(m optionalAttr) { - m["signed_input"] = value + m["channels"] = value } } -// QuantizeAndDequantizeV3RangeGiven sets the optional range_given attribute to value. -// If not specified, defaults to true -func QuantizeAndDequantizeV3RangeGiven(value bool) QuantizeAndDequantizeV3Attr { +// DecodePngDtype sets the optional dtype attribute to value. +// If not specified, defaults to DT_UINT8 +func DecodePngDtype(value tf.DataType) DecodePngAttr { return func(m optionalAttr) { - m["range_given"] = value + m["dtype"] = value } } -// Quantizes then dequantizes a tensor. +// Decode a PNG-encoded image to a uint8 or uint16 tensor. // -// This is almost identical to QuantizeAndDequantizeV2, except that num_bits is a -// tensor, so its value can change during training. -func QuantizeAndDequantizeV3(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output, num_bits tf.Output, optional ...QuantizeAndDequantizeV3Attr) (output tf.Output) { +// The attr `channels` indicates the desired number of color channels for the +// decoded image. +// +// Accepted values are: +// +// * 0: Use the number of channels in the PNG-encoded image. +// * 1: output a grayscale image. +// * 3: output an RGB image. +// * 4: output an RGBA image. +// +// If needed, the PNG-encoded image is transformed to match the requested number +// of color channels. +// +// This op also supports decoding JPEGs and non-animated GIFs since the interface +// is the same, though it is cleaner to use `tf.image.decode_image`. +// +// Arguments: +// contents: 0-D. The PNG-encoded image. +// +// Returns 3-D with shape `[height, width, channels]`. +func DecodePng(scope *Scope, contents tf.Output, optional ...DecodePngAttr) (image tf.Output) { if scope.Err() != nil { return } @@ -20608,9 +23256,9 @@ func QuantizeAndDequantizeV3(scope *Scope, input tf.Output, input_min tf.Output, a(attrs) } opspec := tf.OpSpec{ - Type: "QuantizeAndDequantizeV3", + Type: "DecodePng", Input: []tf.Input{ - input, input_min, input_max, num_bits, + contents, }, Attrs: attrs, } @@ -20618,318 +23266,256 @@ func QuantizeAndDequantizeV3(scope *Scope, input tf.Output, input_min tf.Output, return op.Output(0) } -// IdentityReaderV2Attr is an optional argument to IdentityReaderV2. -type IdentityReaderV2Attr func(optionalAttr) - -// IdentityReaderV2Container sets the optional container attribute to value. +// Decode the first frame of a GIF-encoded image to a uint8 tensor. // -// value: If non-empty, this reader is placed in the given container. -// Otherwise, a default container is used. -// If not specified, defaults to "" -func IdentityReaderV2Container(value string) IdentityReaderV2Attr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// IdentityReaderV2SharedName sets the optional shared_name attribute to value. +// GIF with frame or transparency compression are not supported +// convert animated GIF from compressed to uncompressed by: // -// value: If non-empty, this reader is named in the given bucket -// with this shared_name. Otherwise, the node name is used instead. -// If not specified, defaults to "" -func IdentityReaderV2SharedName(value string) IdentityReaderV2Attr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// A Reader that outputs the queued work as both the key and value. +// convert $src.gif -coalesce $dst.gif // -// To use, enqueue strings in a Queue. ReaderRead will take the front -// work string and output (work, work). +// This op also supports decoding JPEGs and PNGs, though it is cleaner to use +// `tf.image.decode_image`. // -// Returns The handle to reference the Reader. -func IdentityReaderV2(scope *Scope, optional ...IdentityReaderV2Attr) (reader_handle tf.Output) { +// Arguments: +// contents: 0-D. The GIF-encoded image. +// +// Returns 4-D with shape `[num_frames, height, width, 3]`. RGB order +func DecodeGif(scope *Scope, contents tf.Output) (image tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "IdentityReaderV2", - - Attrs: attrs, + Type: "DecodeGif", + Input: []tf.Input{ + contents, + }, } op := scope.AddOperation(opspec) return op.Output(0) } -// ResourceApplyGradientDescentAttr is an optional argument to ResourceApplyGradientDescent. -type ResourceApplyGradientDescentAttr func(optionalAttr) - -// ResourceApplyGradientDescentUseLocking sets the optional use_locking attribute to value. -// -// value: If `True`, the subtraction will be protected by a lock; -// otherwise the behavior is undefined, but may exhibit less contention. -// If not specified, defaults to false -func ResourceApplyGradientDescentUseLocking(value bool) ResourceApplyGradientDescentAttr { - return func(m optionalAttr) { - m["use_locking"] = value - } -} - -// Update '*var' by subtracting 'alpha' * 'delta' from it. -// -// Arguments: -// var_: Should be from a Variable(). -// alpha: Scaling factor. Must be a scalar. -// delta: The change. +// Computes the gradient of the sigmoid of `x` wrt its input. // -// Returns the created operation. -func ResourceApplyGradientDescent(scope *Scope, var_ tf.Output, alpha tf.Output, delta tf.Output, optional ...ResourceApplyGradientDescentAttr) (o *tf.Operation) { +// Specifically, `grad = dy * y * (1 - y)`, where `y = sigmoid(x)`, and +// `dy` is the corresponding input gradient. +func SigmoidGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "ResourceApplyGradientDescent", + Type: "SigmoidGrad", Input: []tf.Input{ - var_, alpha, delta, + y, dy, }, - Attrs: attrs, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0) } -// Returns the next record (key, value pair) produced by a Reader. +// Convert one or more images from HSV to RGB. // -// Will dequeue from the input queue if necessary (e.g. when the -// Reader needs to start reading from a new file since it has finished -// with the previous file). +// Outputs a tensor of the same shape as the `images` tensor, containing the RGB +// value of the pixels. The output is only well defined if the value in `images` +// are in `[0,1]`. +// +// See `rgb_to_hsv` for a description of the HSV encoding. // // Arguments: -// reader_handle: Handle to a Reader. -// queue_handle: Handle to a Queue, with string work items. +// images: 1-D or higher rank. HSV data to convert. Last dimension must be size 3. // -// Returns A scalar.A scalar. -func ReaderReadV2(scope *Scope, reader_handle tf.Output, queue_handle tf.Output) (key tf.Output, value tf.Output) { +// Returns `images` converted to RGB. +func HSVToRGB(scope *Scope, images tf.Output) (output tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "ReaderReadV2", + Type: "HSVToRGB", Input: []tf.Input{ - reader_handle, queue_handle, + images, }, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) + return op.Output(0) } -// Returns up to `num_records` (key, value) pairs produced by a Reader. -// -// Will dequeue from the input queue if necessary (e.g. when the -// Reader needs to start reading from a new file since it has finished -// with the previous file). -// It may return less than `num_records` even before the last batch. +// Retrieves the tree ensemble resource stamp token, number of trees and growing statistics. // // Arguments: -// reader_handle: Handle to a `Reader`. -// queue_handle: Handle to a `Queue`, with string work items. -// num_records: number of records to read from `Reader`. +// tree_ensemble_handle: Handle to the tree ensemble. // -// Returns A 1-D tensor.A 1-D tensor. -func ReaderReadUpToV2(scope *Scope, reader_handle tf.Output, queue_handle tf.Output, num_records tf.Output) (keys tf.Output, values tf.Output) { +// Returns Stamp token of the tree ensemble resource.The number of trees in the tree ensemble resource.The number of trees that were finished successfully.The number of layers we attempted to build (but not necessarily succeeded).Rank size 2 tensor that contains start and end ids of the nodes in the latest +// layer. +func BoostedTreesGetEnsembleStates(scope *Scope, tree_ensemble_handle tf.Output) (stamp_token tf.Output, num_trees tf.Output, num_finalized_trees tf.Output, num_attempted_layers tf.Output, last_layer_nodes_range tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "ReaderReadUpToV2", + Type: "BoostedTreesGetEnsembleStates", Input: []tf.Input{ - reader_handle, queue_handle, num_records, + tree_ensemble_handle, }, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) + return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4) } -// Restore a Reader to its initial clean state. -// -// Arguments: -// reader_handle: Handle to a Reader. +// Gets the next output from the given iterator. // -// Returns the created operation. -func ReaderResetV2(scope *Scope, reader_handle tf.Output) (o *tf.Operation) { +// This operation is a synchronous version IteratorGetNext. It should only be used +// in situations where the iterator does not block the calling thread, or where +// the calling thread is not a member of the thread pool used to execute parallel +// operations (e.g. in eager mode). +func IteratorGetNextSync(scope *Scope, iterator tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (components []tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} opspec := tf.OpSpec{ - Type: "ReaderResetV2", + Type: "IteratorGetNextSync", Input: []tf.Input{ - reader_handle, + iterator, }, + Attrs: attrs, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + if scope.Err() != nil { + return + } + var idx int + var err error + if components, idx, err = makeOutputList(op, idx, "components"); err != nil { + scope.UpdateErr("IteratorGetNextSync", err) + return + } + return components } -// ResourceApplyAdamAttr is an optional argument to ResourceApplyAdam. -type ResourceApplyAdamAttr func(optionalAttr) +// SampleDistortedBoundingBoxV2Attr is an optional argument to SampleDistortedBoundingBoxV2. +type SampleDistortedBoundingBoxV2Attr func(optionalAttr) -// ResourceApplyAdamUseLocking sets the optional use_locking attribute to value. +// SampleDistortedBoundingBoxV2Seed sets the optional seed attribute to value. // -// value: If `True`, updating of the var, m, and v tensors will be protected -// by a lock; otherwise the behavior is undefined, but may exhibit less -// contention. -// If not specified, defaults to false -func ResourceApplyAdamUseLocking(value bool) ResourceApplyAdamAttr { +// value: If either `seed` or `seed2` are set to non-zero, the random number +// generator is seeded by the given `seed`. Otherwise, it is seeded by a random +// seed. +// If not specified, defaults to 0 +func SampleDistortedBoundingBoxV2Seed(value int64) SampleDistortedBoundingBoxV2Attr { return func(m optionalAttr) { - m["use_locking"] = value + m["seed"] = value } } -// ResourceApplyAdamUseNesterov sets the optional use_nesterov attribute to value. +// SampleDistortedBoundingBoxV2Seed2 sets the optional seed2 attribute to value. // -// value: If `True`, uses the nesterov update. -// If not specified, defaults to false -func ResourceApplyAdamUseNesterov(value bool) ResourceApplyAdamAttr { +// value: A second seed to avoid seed collision. +// If not specified, defaults to 0 +func SampleDistortedBoundingBoxV2Seed2(value int64) SampleDistortedBoundingBoxV2Attr { return func(m optionalAttr) { - m["use_nesterov"] = value - } -} - -// Update '*var' according to the Adam algorithm. -// -// lr_t <- learning_rate * sqrt(1 - beta2^t) / (1 - beta1^t) -// m_t <- beta1 * m_{t-1} + (1 - beta1) * g_t -// v_t <- beta2 * v_{t-1} + (1 - beta2) * g_t * g_t -// variable <- variable - lr_t * m_t / (sqrt(v_t) + epsilon) -// -// Arguments: -// var_: Should be from a Variable(). -// m: Should be from a Variable(). -// v: Should be from a Variable(). -// beta1_power: Must be a scalar. -// beta2_power: Must be a scalar. -// lr: Scaling factor. Must be a scalar. -// beta1: Momentum factor. Must be a scalar. -// beta2: Momentum factor. Must be a scalar. -// epsilon: Ridge term. Must be a scalar. -// grad: The gradient. -// -// Returns the created operation. -func ResourceApplyAdam(scope *Scope, var_ tf.Output, m tf.Output, v tf.Output, beta1_power tf.Output, beta2_power tf.Output, lr tf.Output, beta1 tf.Output, beta2 tf.Output, epsilon tf.Output, grad tf.Output, optional ...ResourceApplyAdamAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ResourceApplyAdam", - Input: []tf.Input{ - var_, m, v, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad, - }, - Attrs: attrs, + m["seed2"] = value } - return scope.AddOperation(opspec) } -// Store the input tensor in the state of the current session. -// -// Arguments: -// value: The tensor to be stored. +// SampleDistortedBoundingBoxV2AspectRatioRange sets the optional aspect_ratio_range attribute to value. // -// Returns The handle for the tensor stored in the session state, represented -// as a ResourceHandle object. -func GetSessionHandleV2(scope *Scope, value tf.Output) (handle tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "GetSessionHandleV2", - Input: []tf.Input{ - value, - }, +// value: The cropped area of the image must have an aspect ratio = +// width / height within this range. +// If not specified, defaults to +func SampleDistortedBoundingBoxV2AspectRatioRange(value []float32) SampleDistortedBoundingBoxV2Attr { + return func(m optionalAttr) { + m["aspect_ratio_range"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// ResizeBicubicGradAttr is an optional argument to ResizeBicubicGrad. -type ResizeBicubicGradAttr func(optionalAttr) - -// ResizeBicubicGradAlignCorners sets the optional align_corners attribute to value. +// SampleDistortedBoundingBoxV2AreaRange sets the optional area_range attribute to value. // -// value: If true, rescale grads by (orig_height - 1) / (height - 1), which -// exactly aligns the 4 corners of grads and original_image. If false, rescale by -// orig_height / height. Treat similarly the width dimension. -// If not specified, defaults to false -func ResizeBicubicGradAlignCorners(value bool) ResizeBicubicGradAttr { +// value: The cropped area of the image must contain a fraction of the +// supplied image within in this range. +// If not specified, defaults to +func SampleDistortedBoundingBoxV2AreaRange(value []float32) SampleDistortedBoundingBoxV2Attr { return func(m optionalAttr) { - m["align_corners"] = value + m["area_range"] = value } } -// Computes the gradient of bicubic interpolation. -// -// Arguments: -// grads: 4-D with shape `[batch, height, width, channels]`. -// original_image: 4-D with shape `[batch, orig_height, orig_width, channels]`, -// The image tensor that was resized. +// SampleDistortedBoundingBoxV2MaxAttempts sets the optional max_attempts attribute to value. // -// Returns 4-D with shape `[batch, orig_height, orig_width, channels]`. -// Gradients with respect to the input image. Input image must have been -// float or double. -func ResizeBicubicGrad(scope *Scope, grads tf.Output, original_image tf.Output, optional ...ResizeBicubicGradAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ResizeBicubicGrad", - Input: []tf.Input{ - grads, original_image, - }, - Attrs: attrs, +// value: Number of attempts at generating a cropped region of the image +// of the specified constraints. After `max_attempts` failures, return the entire +// image. +// If not specified, defaults to 100 +func SampleDistortedBoundingBoxV2MaxAttempts(value int64) SampleDistortedBoundingBoxV2Attr { + return func(m optionalAttr) { + m["max_attempts"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// ResizeNearestNeighborAttr is an optional argument to ResizeNearestNeighbor. -type ResizeNearestNeighborAttr func(optionalAttr) - -// ResizeNearestNeighborAlignCorners sets the optional align_corners attribute to value. +// SampleDistortedBoundingBoxV2UseImageIfNoBoundingBoxes sets the optional use_image_if_no_bounding_boxes attribute to value. // -// value: If true, rescale input by (new_height - 1) / (height - 1), which -// exactly aligns the 4 corners of images and resized images. If false, rescale -// by new_height / height. Treat similarly the width dimension. +// value: Controls behavior if no bounding boxes supplied. +// If true, assume an implicit bounding box covering the whole input. If false, +// raise an error. // If not specified, defaults to false -func ResizeNearestNeighborAlignCorners(value bool) ResizeNearestNeighborAttr { +func SampleDistortedBoundingBoxV2UseImageIfNoBoundingBoxes(value bool) SampleDistortedBoundingBoxV2Attr { return func(m optionalAttr) { - m["align_corners"] = value + m["use_image_if_no_bounding_boxes"] = value } } -// Resize `images` to `size` using nearest neighbor interpolation. +// Generate a single randomly distorted bounding box for an image. +// +// Bounding box annotations are often supplied in addition to ground-truth labels +// in image recognition or object localization tasks. A common technique for +// training such a system is to randomly distort an image while preserving +// its content, i.e. *data augmentation*. This Op outputs a randomly distorted +// localization of an object, i.e. bounding box, given an `image_size`, +// `bounding_boxes` and a series of constraints. +// +// The output of this Op is a single bounding box that may be used to crop the +// original image. The output is returned as 3 tensors: `begin`, `size` and +// `bboxes`. The first 2 tensors can be fed directly into `tf.slice` to crop the +// image. The latter may be supplied to `tf.image.draw_bounding_boxes` to visualize +// what the bounding box looks like. +// +// Bounding boxes are supplied and returned as `[y_min, x_min, y_max, x_max]`. The +// bounding box coordinates are floats in `[0.0, 1.0]` relative to the width and +// height of the underlying image. +// +// For example, +// +// ```python +// # Generate a single distorted bounding box. +// begin, size, bbox_for_draw = tf.image.sample_distorted_bounding_box( +// tf.shape(image), +// bounding_boxes=bounding_boxes) +// +// # Draw the bounding box in an image summary. +// image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0), +// bbox_for_draw) +// tf.summary.image('images_with_box', image_with_box) +// +// # Employ the bounding box to distort the image. +// distorted_image = tf.slice(image, begin, size) +// ``` +// +// Note that if no bounding box information is available, setting +// `use_image_if_no_bounding_boxes = true` will assume there is a single implicit +// bounding box covering the whole image. If `use_image_if_no_bounding_boxes` is +// false and no bounding boxes are supplied, an error is raised. // // Arguments: -// images: 4-D with shape `[batch, height, width, channels]`. -// size: = A 1-D int32 Tensor of 2 elements: `new_height, new_width`. The -// new size for the images. +// image_size: 1-D, containing `[height, width, channels]`. +// bounding_boxes: 3-D with shape `[batch, N, 4]` describing the N bounding boxes +// associated with the image. +// min_object_covered: The cropped area of the image must contain at least this +// fraction of any bounding box supplied. The value of this parameter should be +// non-negative. In the case of 0, the cropped area does not need to overlap +// any of the bounding boxes supplied. // -// Returns 4-D with shape -// `[batch, new_height, new_width, channels]`. -func ResizeNearestNeighbor(scope *Scope, images tf.Output, size tf.Output, optional ...ResizeNearestNeighborAttr) (resized_images tf.Output) { +// Returns 1-D, containing `[offset_height, offset_width, 0]`. Provide as input to +// `tf.slice`.1-D, containing `[target_height, target_width, -1]`. Provide as input to +// `tf.slice`.3-D with shape `[1, 1, 4]` containing the distorted bounding box. +// Provide as input to `tf.image.draw_bounding_boxes`. +func SampleDistortedBoundingBoxV2(scope *Scope, image_size tf.Output, bounding_boxes tf.Output, min_object_covered tf.Output, optional ...SampleDistortedBoundingBoxV2Attr) (begin tf.Output, size tf.Output, bboxes tf.Output) { if scope.Err() != nil { return } @@ -20938,82 +23524,88 @@ func ResizeNearestNeighbor(scope *Scope, images tf.Output, size tf.Output, optio a(attrs) } opspec := tf.OpSpec{ - Type: "ResizeNearestNeighbor", + Type: "SampleDistortedBoundingBoxV2", Input: []tf.Input{ - images, size, + image_size, bounding_boxes, min_object_covered, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1), op.Output(2) } -// ResizeNearestNeighborGradAttr is an optional argument to ResizeNearestNeighborGrad. -type ResizeNearestNeighborGradAttr func(optionalAttr) +// ExtractGlimpseAttr is an optional argument to ExtractGlimpse. +type ExtractGlimpseAttr func(optionalAttr) -// ResizeNearestNeighborGradAlignCorners sets the optional align_corners attribute to value. +// ExtractGlimpseCentered sets the optional centered attribute to value. // -// value: If true, rescale grads by (orig_height - 1) / (height - 1), which -// exactly aligns the 4 corners of grads and original_image. If false, rescale by -// orig_height / height. Treat similarly the width dimension. -// If not specified, defaults to false -func ResizeNearestNeighborGradAlignCorners(value bool) ResizeNearestNeighborGradAttr { +// value: indicates if the offset coordinates are centered relative to +// the image, in which case the (0, 0) offset is relative to the center +// of the input images. If false, the (0,0) offset corresponds to the +// upper left corner of the input images. +// If not specified, defaults to true +func ExtractGlimpseCentered(value bool) ExtractGlimpseAttr { return func(m optionalAttr) { - m["align_corners"] = value + m["centered"] = value } } -// Computes the gradient of nearest neighbor interpolation. -// -// Arguments: -// grads: 4-D with shape `[batch, height, width, channels]`. -// size: = A 1-D int32 Tensor of 2 elements: `orig_height, orig_width`. The -// original input size. +// ExtractGlimpseNormalized sets the optional normalized attribute to value. // -// Returns 4-D with shape `[batch, orig_height, orig_width, channels]`. Gradients -// with respect to the input image. -func ResizeNearestNeighborGrad(scope *Scope, grads tf.Output, size tf.Output, optional ...ResizeNearestNeighborGradAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ResizeNearestNeighborGrad", - Input: []tf.Input{ - grads, size, - }, - Attrs: attrs, +// value: indicates if the offset coordinates are normalized. +// If not specified, defaults to true +func ExtractGlimpseNormalized(value bool) ExtractGlimpseAttr { + return func(m optionalAttr) { + m["normalized"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// ExtractJpegShapeAttr is an optional argument to ExtractJpegShape. -type ExtractJpegShapeAttr func(optionalAttr) - -// ExtractJpegShapeOutputType sets the optional output_type attribute to value. +// ExtractGlimpseUniformNoise sets the optional uniform_noise attribute to value. // -// value: (Optional) The output type of the operation (int32 or int64). -// Defaults to int32. -// If not specified, defaults to DT_INT32 -func ExtractJpegShapeOutputType(value tf.DataType) ExtractJpegShapeAttr { +// value: indicates if the noise should be generated using a +// uniform distribution or a Gaussian distribution. +// If not specified, defaults to true +func ExtractGlimpseUniformNoise(value bool) ExtractGlimpseAttr { return func(m optionalAttr) { - m["output_type"] = value + m["uniform_noise"] = value } } -// Extract the shape information of a JPEG-encoded image. +// Extracts a glimpse from the input tensor. // -// This op only parses the image header, so it is much faster than DecodeJpeg. +// Returns a set of windows called glimpses extracted at location +// `offsets` from the input tensor. If the windows only partially +// overlaps the inputs, the non overlapping areas will be filled with +// random noise. +// +// The result is a 4-D tensor of shape `[batch_size, glimpse_height, +// glimpse_width, channels]`. The channels and batch dimensions are the +// same as that of the input tensor. The height and width of the output +// windows are specified in the `size` parameter. +// +// The argument `normalized` and `centered` controls how the windows are built: +// +// * If the coordinates are normalized but not centered, 0.0 and 1.0 +// correspond to the minimum and maximum of each height and width +// dimension. +// * If the coordinates are both normalized and centered, they range from +// -1.0 to 1.0. The coordinates (-1.0, -1.0) correspond to the upper +// left corner, the lower right corner is located at (1.0, 1.0) and the +// center is at (0, 0). +// * If the coordinates are not normalized they are interpreted as +// numbers of pixels. // // Arguments: -// contents: 0-D. The JPEG-encoded image. +// input: A 4-D float tensor of shape `[batch_size, height, width, channels]`. +// size: A 1-D tensor of 2 elements containing the size of the glimpses +// to extract. The glimpse height must be specified first, following +// by the glimpse width. +// offsets: A 2-D integer tensor of shape `[batch_size, 2]` containing +// the y, x locations of the center of each window. // -// Returns 1-D. The image shape with format [height, width, channels]. -func ExtractJpegShape(scope *Scope, contents tf.Output, optional ...ExtractJpegShapeAttr) (image_shape tf.Output) { +// Returns A tensor representing the glimpses `[batch_size, +// glimpse_height, glimpse_width, channels]`. +func ExtractGlimpse(scope *Scope, input tf.Output, size tf.Output, offsets tf.Output, optional ...ExtractGlimpseAttr) (glimpse tf.Output) { if scope.Err() != nil { return } @@ -21022,9 +23614,9 @@ func ExtractJpegShape(scope *Scope, contents tf.Output, optional ...ExtractJpegS a(attrs) } opspec := tf.OpSpec{ - Type: "ExtractJpegShape", + Type: "ExtractGlimpse", Input: []tf.Input{ - contents, + input, size, offsets, }, Attrs: attrs, } @@ -21032,143 +23624,121 @@ func ExtractJpegShape(scope *Scope, contents tf.Output, optional ...ExtractJpegS return op.Output(0) } -// PaddingFIFOQueueV2Attr is an optional argument to PaddingFIFOQueueV2. -type PaddingFIFOQueueV2Attr func(optionalAttr) - -// PaddingFIFOQueueV2Shapes sets the optional shapes attribute to value. -// -// value: The shape of each component in a value. The length of this attr must -// be either 0 or the same as the length of component_types. -// Shapes of fixed rank but variable size are allowed by setting -// any shape dimension to -1. In this case, the inputs' shape may vary along -// the given dimension, and DequeueMany will pad the given dimension with -// zeros up to the maximum shape of all elements in the given batch. -// If the length of this attr is 0, different queue elements may have -// different ranks and shapes, but only one element may be dequeued at a time. -// If not specified, defaults to <> +// A container for an iterator resource. // -// REQUIRES: len(value) >= 0 -func PaddingFIFOQueueV2Shapes(value []tf.Shape) PaddingFIFOQueueV2Attr { - return func(m optionalAttr) { - m["shapes"] = value +// Returns A handle to the iterator that can be passed to a "MakeIterator" +// or "IteratorGetNext" op. +func Iterator(scope *Scope, shared_name string, container string, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { + if scope.Err() != nil { + return } -} + attrs := map[string]interface{}{"shared_name": shared_name, "container": container, "output_types": output_types, "output_shapes": output_shapes} + opspec := tf.OpSpec{ + Type: "Iterator", -// PaddingFIFOQueueV2Capacity sets the optional capacity attribute to value. -// -// value: The upper bound on the number of elements in this queue. -// Negative numbers mean no limit. -// If not specified, defaults to -1 -func PaddingFIFOQueueV2Capacity(value int64) PaddingFIFOQueueV2Attr { - return func(m optionalAttr) { - m["capacity"] = value + Attrs: attrs, } + op := scope.AddOperation(opspec) + return op.Output(0) } -// PaddingFIFOQueueV2Container sets the optional container attribute to value. -// -// value: If non-empty, this queue is placed in the given container. -// Otherwise, a default container is used. -// If not specified, defaults to "" -func PaddingFIFOQueueV2Container(value string) PaddingFIFOQueueV2Attr { - return func(m optionalAttr) { - m["container"] = value - } -} +// CropAndResizeGradImageAttr is an optional argument to CropAndResizeGradImage. +type CropAndResizeGradImageAttr func(optionalAttr) -// PaddingFIFOQueueV2SharedName sets the optional shared_name attribute to value. +// CropAndResizeGradImageMethod sets the optional method attribute to value. // -// value: If non-empty, this queue will be shared under the given name -// across multiple sessions. -// If not specified, defaults to "" -func PaddingFIFOQueueV2SharedName(value string) PaddingFIFOQueueV2Attr { +// value: A string specifying the interpolation method. Only 'bilinear' is +// supported for now. +// If not specified, defaults to "bilinear" +func CropAndResizeGradImageMethod(value string) CropAndResizeGradImageAttr { return func(m optionalAttr) { - m["shared_name"] = value + m["method"] = value } } -// A queue that produces elements in first-in first-out order. -// -// Variable-size shapes are allowed by setting the corresponding shape dimensions -// to 0 in the shape attr. In this case DequeueMany will pad up to the maximum -// size of any given element in the minibatch. See below for details. +// Computes the gradient of the crop_and_resize op wrt the input image tensor. // // Arguments: -// component_types: The type of each component in a value. +// grads: A 4-D tensor of shape `[num_boxes, crop_height, crop_width, depth]`. +// boxes: A 2-D tensor of shape `[num_boxes, 4]`. The `i`-th row of the tensor +// specifies the coordinates of a box in the `box_ind[i]` image and is specified +// in normalized coordinates `[y1, x1, y2, x2]`. A normalized coordinate value of +// `y` is mapped to the image coordinate at `y * (image_height - 1)`, so as the +// `[0, 1]` interval of normalized image height is mapped to +// `[0, image_height - 1] in image height coordinates. We do allow y1 > y2, in +// which case the sampled crop is an up-down flipped version of the original +// image. The width dimension is treated similarly. Normalized coordinates +// outside the `[0, 1]` range are allowed, in which case we use +// `extrapolation_value` to extrapolate the input image values. +// box_ind: A 1-D tensor of shape `[num_boxes]` with int32 values in `[0, batch)`. +// The value of `box_ind[i]` specifies the image that the `i`-th box refers to. +// image_size: A 1-D tensor with value `[batch, image_height, image_width, depth]` +// containing the original image size. Both `image_height` and `image_width` need +// to be positive. // -// Returns The handle to the queue. -func PaddingFIFOQueueV2(scope *Scope, component_types []tf.DataType, optional ...PaddingFIFOQueueV2Attr) (handle tf.Output) { +// +// Returns A 4-D tensor of shape `[batch, image_height, image_width, depth]`. +func CropAndResizeGradImage(scope *Scope, grads tf.Output, boxes tf.Output, box_ind tf.Output, image_size tf.Output, T tf.DataType, optional ...CropAndResizeGradImageAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"component_types": component_types} + attrs := map[string]interface{}{"T": T} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "PaddingFIFOQueueV2", - + Type: "CropAndResizeGradImage", + Input: []tf.Input{ + grads, boxes, box_ind, image_size, + }, Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// DecodePngAttr is an optional argument to DecodePng. -type DecodePngAttr func(optionalAttr) +// ShuffleDatasetAttr is an optional argument to ShuffleDataset. +type ShuffleDatasetAttr func(optionalAttr) -// DecodePngChannels sets the optional channels attribute to value. +// ShuffleDatasetReshuffleEachIteration sets the optional reshuffle_each_iteration attribute to value. // -// value: Number of color channels for the decoded image. -// If not specified, defaults to 0 -func DecodePngChannels(value int64) DecodePngAttr { - return func(m optionalAttr) { - m["channels"] = value - } -} - -// DecodePngDtype sets the optional dtype attribute to value. -// If not specified, defaults to DT_UINT8 -func DecodePngDtype(value tf.DataType) DecodePngAttr { +// value: If true, each iterator over this dataset will be given +// a different pseudorandomly generated seed, based on a sequence seeded by the +// `seed` and `seed2` inputs. If false, each iterator will be given the same +// seed, and repeated iteration over this dataset will yield the exact same +// sequence of results. +// If not specified, defaults to true +func ShuffleDatasetReshuffleEachIteration(value bool) ShuffleDatasetAttr { return func(m optionalAttr) { - m["dtype"] = value + m["reshuffle_each_iteration"] = value } } -// Decode a PNG-encoded image to a uint8 or uint16 tensor. -// -// The attr `channels` indicates the desired number of color channels for the -// decoded image. -// -// Accepted values are: -// -// * 0: Use the number of channels in the PNG-encoded image. -// * 1: output a grayscale image. -// * 3: output an RGB image. -// * 4: output an RGBA image. +// Creates a dataset that shuffles elements from `input_dataset` pseudorandomly. // -// If needed, the PNG-encoded image is transformed to match the requested number -// of color channels. +// Arguments: // -// This op also supports decoding JPEGs and non-animated GIFs since the interface -// is the same, though it is cleaner to use `tf.image.decode_image`. +// buffer_size: The number of output elements to buffer in an iterator over +// this dataset. Compare with the `min_after_dequeue` attr when creating a +// `RandomShuffleQueue`. +// seed: A scalar seed for the random number generator. If either `seed` or +// `seed2` is set to be non-zero, the random number generator is seeded +// by the given seed. Otherwise, a random seed is used. +// seed2: A second scalar seed to avoid seed collision. // -// Arguments: -// contents: 0-D. The PNG-encoded image. // -// Returns 3-D with shape `[height, width, channels]`. -func DecodePng(scope *Scope, contents tf.Output, optional ...DecodePngAttr) (image tf.Output) { +func ShuffleDataset(scope *Scope, input_dataset tf.Output, buffer_size tf.Output, seed tf.Output, seed2 tf.Output, output_types []tf.DataType, output_shapes []tf.Shape, optional ...ShuffleDatasetAttr) (handle tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "DecodePng", + Type: "ShuffleDataset", Input: []tf.Input{ - contents, + input_dataset, buffer_size, seed, seed2, }, Attrs: attrs, } @@ -21176,83 +23746,69 @@ func DecodePng(scope *Scope, contents tf.Output, optional ...DecodePngAttr) (ima return op.Output(0) } -// Decode the first frame of a GIF-encoded image to a uint8 tensor. -// -// GIF with frame or transparency compression are not supported -// convert animated GIF from compressed to uncompressed by: -// -// convert $src.gif -coalesce $dst.gif +// 3D fast Fourier transform. // -// This op also supports decoding JPEGs and PNGs, though it is cleaner to use -// `tf.image.decode_image`. +// Computes the 3-dimensional discrete Fourier transform over the inner-most 3 +// dimensions of `input`. // // Arguments: -// contents: 0-D. The GIF-encoded image. +// input: A complex64 tensor. // -// Returns 4-D with shape `[num_frames, height, width, 3]`. RGB order -func DecodeGif(scope *Scope, contents tf.Output) (image tf.Output) { +// Returns A complex64 tensor of the same shape as `input`. The inner-most 3 +// dimensions of `input` are replaced with their 3D Fourier transform. +// +// @compatibility(numpy) +// Equivalent to np.fft.fftn with 3 dimensions. +// @end_compatibility +func FFT3D(scope *Scope, input tf.Output) (output tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "DecodeGif", + Type: "FFT3D", Input: []tf.Input{ - contents, + input, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// ResourceApplyCenteredRMSPropAttr is an optional argument to ResourceApplyCenteredRMSProp. -type ResourceApplyCenteredRMSPropAttr func(optionalAttr) +// CropAndResizeGradBoxesAttr is an optional argument to CropAndResizeGradBoxes. +type CropAndResizeGradBoxesAttr func(optionalAttr) -// ResourceApplyCenteredRMSPropUseLocking sets the optional use_locking attribute to value. +// CropAndResizeGradBoxesMethod sets the optional method attribute to value. // -// value: If `True`, updating of the var, mg, ms, and mom tensors is -// protected by a lock; otherwise the behavior is undefined, but may exhibit less -// contention. -// If not specified, defaults to false -func ResourceApplyCenteredRMSPropUseLocking(value bool) ResourceApplyCenteredRMSPropAttr { +// value: A string specifying the interpolation method. Only 'bilinear' is +// supported for now. +// If not specified, defaults to "bilinear" +func CropAndResizeGradBoxesMethod(value string) CropAndResizeGradBoxesAttr { return func(m optionalAttr) { - m["use_locking"] = value + m["method"] = value } } -// Update '*var' according to the centered RMSProp algorithm. -// -// The centered RMSProp algorithm uses an estimate of the centered second moment -// (i.e., the variance) for normalization, as opposed to regular RMSProp, which -// uses the (uncentered) second moment. This often helps with training, but is -// slightly more expensive in terms of computation and memory. -// -// Note that in dense implementation of this algorithm, mg, ms, and mom will -// update even if the grad is zero, but in this sparse implementation, mg, ms, -// and mom will not update in iterations during which the grad is zero. -// -// mean_square = decay * mean_square + (1-decay) * gradient ** 2 -// mean_grad = decay * mean_grad + (1-decay) * gradient -// -// Delta = learning_rate * gradient / sqrt(mean_square + epsilon - mean_grad ** 2) -// -// mg <- rho * mg_{t-1} + (1-rho) * grad -// ms <- rho * ms_{t-1} + (1-rho) * grad * grad -// mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms - mg * mg + epsilon) -// var <- var - mom +// Computes the gradient of the crop_and_resize op wrt the input boxes tensor. // // Arguments: -// var_: Should be from a Variable(). -// mg: Should be from a Variable(). -// ms: Should be from a Variable(). -// mom: Should be from a Variable(). -// lr: Scaling factor. Must be a scalar. -// rho: Decay rate. Must be a scalar. -// -// epsilon: Ridge term. Must be a scalar. -// grad: The gradient. +// grads: A 4-D tensor of shape `[num_boxes, crop_height, crop_width, depth]`. +// image: A 4-D tensor of shape `[batch, image_height, image_width, depth]`. +// Both `image_height` and `image_width` need to be positive. +// boxes: A 2-D tensor of shape `[num_boxes, 4]`. The `i`-th row of the tensor +// specifies the coordinates of a box in the `box_ind[i]` image and is specified +// in normalized coordinates `[y1, x1, y2, x2]`. A normalized coordinate value of +// `y` is mapped to the image coordinate at `y * (image_height - 1)`, so as the +// `[0, 1]` interval of normalized image height is mapped to +// `[0, image_height - 1] in image height coordinates. We do allow y1 > y2, in +// which case the sampled crop is an up-down flipped version of the original +// image. The width dimension is treated similarly. Normalized coordinates +// outside the `[0, 1]` range are allowed, in which case we use +// `extrapolation_value` to extrapolate the input image values. +// box_ind: A 1-D tensor of shape `[num_boxes]` with int32 values in `[0, batch)`. +// The value of `box_ind[i]` specifies the image that the `i`-th box refers to. // -// Returns the created operation. -func ResourceApplyCenteredRMSProp(scope *Scope, var_ tf.Output, mg tf.Output, ms tf.Output, mom tf.Output, lr tf.Output, rho tf.Output, momentum tf.Output, epsilon tf.Output, grad tf.Output, optional ...ResourceApplyCenteredRMSPropAttr) (o *tf.Operation) { +// Returns A 2-D tensor of shape `[num_boxes, 4]`. +func CropAndResizeGradBoxes(scope *Scope, grads tf.Output, image tf.Output, boxes tf.Output, box_ind tf.Output, optional ...CropAndResizeGradBoxesAttr) (output tf.Output) { if scope.Err() != nil { return } @@ -21261,753 +23817,698 @@ func ResourceApplyCenteredRMSProp(scope *Scope, var_ tf.Output, mg tf.Output, ms a(attrs) } opspec := tf.OpSpec{ - Type: "ResourceApplyCenteredRMSProp", + Type: "CropAndResizeGradBoxes", Input: []tf.Input{ - var_, mg, ms, mom, lr, rho, momentum, epsilon, grad, + grads, image, boxes, box_ind, }, Attrs: attrs, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0) } -// Returns a list of tensors with the same shapes and contents as the input -// -// tensors. +// Saves tensors in V2 checkpoint format. // -// This op can be used to override the gradient for complicated functions. For -// example, suppose y = f(x) and we wish to apply a custom function g for backprop -// such that dx = g(dy). In Python, +// By default, saves the named tensors in full. If the caller wishes to save +// specific slices of full tensors, "shape_and_slices" should be non-empty strings +// and correspondingly well-formed. // -// ```python -// with tf.get_default_graph().gradient_override_map( -// {'IdentityN': 'OverrideGradientWithG'}): -// y, _ = identity_n([f(x), x]) +// Arguments: +// prefix: Must have a single element. The prefix of the V2 checkpoint to which we +// write the tensors. +// tensor_names: shape {N}. The names of the tensors to be saved. +// shape_and_slices: shape {N}. The slice specs of the tensors to be saved. +// Empty strings indicate that they are non-partitioned tensors. +// tensors: `N` tensors to save. // -// @tf.RegisterGradient('OverrideGradientWithG') -// def ApplyG(op, dy, _): -// return [None, g(dy)] # Do not backprop to f(x). -// ``` -func IdentityN(scope *Scope, input []tf.Output) (output []tf.Output) { +// Returns the created operation. +func SaveV2(scope *Scope, prefix tf.Output, tensor_names tf.Output, shape_and_slices tf.Output, tensors []tf.Output) (o *tf.Operation) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "IdentityN", + Type: "SaveV2", Input: []tf.Input{ - tf.OutputList(input), + prefix, tensor_names, shape_and_slices, tf.OutputList(tensors), }, } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return + return scope.AddOperation(opspec) +} + +// StatsAggregatorHandleAttr is an optional argument to StatsAggregatorHandle. +type StatsAggregatorHandleAttr func(optionalAttr) + +// StatsAggregatorHandleContainer sets the optional container attribute to value. +// If not specified, defaults to "" +func StatsAggregatorHandleContainer(value string) StatsAggregatorHandleAttr { + return func(m optionalAttr) { + m["container"] = value } - var idx int - var err error - if output, idx, err = makeOutputList(op, idx, "output"); err != nil { - scope.UpdateErr("IdentityN", err) - return +} + +// StatsAggregatorHandleSharedName sets the optional shared_name attribute to value. +// If not specified, defaults to "" +func StatsAggregatorHandleSharedName(value string) StatsAggregatorHandleAttr { + return func(m optionalAttr) { + m["shared_name"] = value } - return output } -// Computes the gradient of the sigmoid of `x` wrt its input. -// -// Specifically, `grad = dy * y * (1 - y)`, where `y = sigmoid(x)`, and -// `dy` is the corresponding input gradient. -func SigmoidGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) { +// Creates a statistics manager resource. +func StatsAggregatorHandle(scope *Scope, optional ...StatsAggregatorHandleAttr) (handle tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "SigmoidGrad", - Input: []tf.Input{ - y, dy, - }, + Type: "StatsAggregatorHandle", + + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Convert one or more images from HSV to RGB. +// Greedily selects a subset of bounding boxes in descending order of score, // -// Outputs a tensor of the same shape as the `images` tensor, containing the RGB -// value of the pixels. The output is only well defined if the value in `images` -// are in `[0,1]`. +// pruning away boxes that have high intersection-over-union (IOU) overlap +// with previously selected boxes. Bounding boxes are supplied as +// [y1, x1, y2, x2], where (y1, x1) and (y2, x2) are the coordinates of any +// diagonal pair of box corners and the coordinates can be provided as normalized +// (i.e., lying in the interval [0, 1]) or absolute. Note that this algorithm +// is agnostic to where the origin is in the coordinate system. Note that this +// algorithm is invariant to orthogonal transformations and translations +// of the coordinate system; thus translating or reflections of the coordinate +// system result in the same boxes being selected by the algorithm. // -// See `rgb_to_hsv` for a description of the HSV encoding. +// The output of this operation is a set of integers indexing into the input +// collection of bounding boxes representing the selected boxes. The bounding +// box coordinates corresponding to the selected indices can then be obtained +// using the `tf.gather operation`. For example: +// +// selected_indices = tf.image.non_max_suppression_v2( +// boxes, scores, max_output_size, iou_threshold) +// selected_boxes = tf.gather(boxes, selected_indices) // // Arguments: -// images: 1-D or higher rank. HSV data to convert. Last dimension must be size 3. +// boxes: A 2-D float tensor of shape `[num_boxes, 4]`. +// scores: A 1-D float tensor of shape `[num_boxes]` representing a single +// score corresponding to each box (each row of boxes). +// max_output_size: A scalar integer tensor representing the maximum number of +// boxes to be selected by non max suppression. +// iou_threshold: A 0-D float tensor representing the threshold for deciding whether +// boxes overlap too much with respect to IOU. // -// Returns `images` converted to RGB. -func HSVToRGB(scope *Scope, images tf.Output) (output tf.Output) { +// Returns A 1-D integer tensor of shape `[M]` representing the selected +// indices from the boxes tensor, where `M <= max_output_size`. +func NonMaxSuppressionV2(scope *Scope, boxes tf.Output, scores tf.Output, max_output_size tf.Output, iou_threshold tf.Output) (selected_indices tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "HSVToRGB", + Type: "NonMaxSuppressionV2", Input: []tf.Input{ - images, + boxes, scores, max_output_size, iou_threshold, }, } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// SampleDistortedBoundingBoxV2Attr is an optional argument to SampleDistortedBoundingBoxV2. -type SampleDistortedBoundingBoxV2Attr func(optionalAttr) - -// SampleDistortedBoundingBoxV2Seed sets the optional seed attribute to value. -// -// value: If either `seed` or `seed2` are set to non-zero, the random number -// generator is seeded by the given `seed`. Otherwise, it is seeded by a random -// seed. -// If not specified, defaults to 0 -func SampleDistortedBoundingBoxV2Seed(value int64) SampleDistortedBoundingBoxV2Attr { - return func(m optionalAttr) { - m["seed"] = value - } -} - -// SampleDistortedBoundingBoxV2Seed2 sets the optional seed2 attribute to value. -// -// value: A second seed to avoid seed collision. -// If not specified, defaults to 0 -func SampleDistortedBoundingBoxV2Seed2(value int64) SampleDistortedBoundingBoxV2Attr { - return func(m optionalAttr) { - m["seed2"] = value - } + op := scope.AddOperation(opspec) + return op.Output(0) } -// SampleDistortedBoundingBoxV2AspectRatioRange sets the optional aspect_ratio_range attribute to value. -// -// value: The cropped area of the image must have an aspect ratio = -// width / height within this range. -// If not specified, defaults to -func SampleDistortedBoundingBoxV2AspectRatioRange(value []float32) SampleDistortedBoundingBoxV2Attr { - return func(m optionalAttr) { - m["aspect_ratio_range"] = value - } -} +// EncodeProtoAttr is an optional argument to EncodeProto. +type EncodeProtoAttr func(optionalAttr) -// SampleDistortedBoundingBoxV2AreaRange sets the optional area_range attribute to value. -// -// value: The cropped area of the image must contain a fraction of the -// supplied image within in this range. -// If not specified, defaults to -func SampleDistortedBoundingBoxV2AreaRange(value []float32) SampleDistortedBoundingBoxV2Attr { +// EncodeProtoDescriptorSource sets the optional descriptor_source attribute to value. +// If not specified, defaults to "local://" +func EncodeProtoDescriptorSource(value string) EncodeProtoAttr { return func(m optionalAttr) { - m["area_range"] = value + m["descriptor_source"] = value } } -// SampleDistortedBoundingBoxV2MaxAttempts sets the optional max_attempts attribute to value. +// The op serializes protobuf messages provided in the input tensors. // -// value: Number of attempts at generating a cropped region of the image -// of the specified constraints. After `max_attempts` failures, return the entire -// image. -// If not specified, defaults to 100 -func SampleDistortedBoundingBoxV2MaxAttempts(value int64) SampleDistortedBoundingBoxV2Attr { - return func(m optionalAttr) { - m["max_attempts"] = value - } -} - -// SampleDistortedBoundingBoxV2UseImageIfNoBoundingBoxes sets the optional use_image_if_no_bounding_boxes attribute to value. +// The types of the tensors in `values` must match the schema for the +// fields specified in `field_names`. All the tensors in `values` must +// have a common shape prefix, *batch_shape*. // -// value: Controls behavior if no bounding boxes supplied. -// If true, assume an implicit bounding box covering the whole input. If false, -// raise an error. -// If not specified, defaults to false -func SampleDistortedBoundingBoxV2UseImageIfNoBoundingBoxes(value bool) SampleDistortedBoundingBoxV2Attr { - return func(m optionalAttr) { - m["use_image_if_no_bounding_boxes"] = value - } -} - -// Generate a single randomly distorted bounding box for an image. +// The `sizes` tensor specifies repeat counts for each field. The repeat +// count (last dimension) of a each tensor in `values` must be greater +// than or equal to corresponding repeat count in `sizes`. // -// Bounding box annotations are often supplied in addition to ground-truth labels -// in image recognition or object localization tasks. A common technique for -// training such a system is to randomly distort an image while preserving -// its content, i.e. *data augmentation*. This Op outputs a randomly distorted -// localization of an object, i.e. bounding box, given an `image_size`, -// `bounding_boxes` and a series of constraints. +// A `message_type` name must be provided to give context for the field +// names. The actual message descriptor can be looked up either in the +// linked-in descriptor pool or a filename provided by the caller using +// the `descriptor_source` attribute. // -// The output of this Op is a single bounding box that may be used to crop the -// original image. The output is returned as 3 tensors: `begin`, `size` and -// `bboxes`. The first 2 tensors can be fed directly into `tf.slice` to crop the -// image. The latter may be supplied to `tf.image.draw_bounding_boxes` to visualize -// what the bounding box looks like. +// The `descriptor_source` attribute selects a source of protocol +// descriptors to consult when looking up `message_type`. This may be a +// filename containing a serialized `FileDescriptorSet` message, +// or the special value `local://`, in which case only descriptors linked +// into the code will be searched; the filename can be on any filesystem +// accessible to TensorFlow. // -// Bounding boxes are supplied and returned as `[y_min, x_min, y_max, x_max]`. The -// bounding box coordinates are floats in `[0.0, 1.0]` relative to the width and -// height of the underlying image. +// You can build a `descriptor_source` file using the `--descriptor_set_out` +// and `--include_imports` options to the protocol compiler `protoc`. // -// For example, +// The `local://` database only covers descriptors linked into the +// code via C++ libraries, not Python imports. You can link in a proto descriptor +// by creating a cc_library target with alwayslink=1. // -// ```python -// # Generate a single distorted bounding box. -// begin, size, bbox_for_draw = tf.image.sample_distorted_bounding_box( -// tf.shape(image), -// bounding_boxes=bounding_boxes) +// There are a few special cases in the value mapping: // -// # Draw the bounding box in an image summary. -// image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0), -// bbox_for_draw) -// tf.summary.image('images_with_box', image_with_box) +// Submessage and group fields must be pre-serialized as TensorFlow strings. // -// # Employ the bounding box to distort the image. -// distorted_image = tf.slice(image, begin, size) -// ``` +// TensorFlow lacks support for unsigned int64s, so they must be +// represented as `tf.int64` with the same twos-complement bit pattern +// (the obvious way). // -// Note that if no bounding box information is available, setting -// `use_image_if_no_bounding_boxes = true` will assume there is a single implicit -// bounding box covering the whole image. If `use_image_if_no_bounding_boxes` is -// false and no bounding boxes are supplied, an error is raised. +// Unsigned int32 values can be represented exactly with `tf.int64`, or +// with sign wrapping if the input is of type `tf.int32`. // // Arguments: -// image_size: 1-D, containing `[height, width, channels]`. -// bounding_boxes: 3-D with shape `[batch, N, 4]` describing the N bounding boxes -// associated with the image. -// min_object_covered: The cropped area of the image must contain at least this -// fraction of any bounding box supplied. The value of this parameter should be -// non-negative. In the case of 0, the cropped area does not need to overlap -// any of the bounding boxes supplied. +// sizes: Tensor of int32 with shape `[batch_shape, len(field_names)]`. +// values: List of tensors containing values for the corresponding field. +// field_names: List of strings containing proto field names. +// message_type: Name of the proto message type to decode. // -// Returns 1-D, containing `[offset_height, offset_width, 0]`. Provide as input to -// `tf.slice`.1-D, containing `[target_height, target_width, -1]`. Provide as input to -// `tf.slice`.3-D with shape `[1, 1, 4]` containing the distorted bounding box. -// Provide as input to `tf.image.draw_bounding_boxes`. -func SampleDistortedBoundingBoxV2(scope *Scope, image_size tf.Output, bounding_boxes tf.Output, min_object_covered tf.Output, optional ...SampleDistortedBoundingBoxV2Attr) (begin tf.Output, size tf.Output, bboxes tf.Output) { +// Returns Tensor of serialized protos with shape `batch_shape`. +func EncodeProto(scope *Scope, sizes tf.Output, values []tf.Output, field_names []string, message_type string, optional ...EncodeProtoAttr) (bytes tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"field_names": field_names, "message_type": message_type} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "SampleDistortedBoundingBoxV2", + Type: "EncodeProto", Input: []tf.Input{ - image_size, bounding_boxes, min_object_covered, + sizes, tf.OutputList(values), }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + return op.Output(0) } -// ExtractGlimpseAttr is an optional argument to ExtractGlimpse. -type ExtractGlimpseAttr func(optionalAttr) - -// ExtractGlimpseCentered sets the optional centered attribute to value. +// Creates a TensorArray for storing the gradients of values in the given handle. // -// value: indicates if the offset coordinates are centered relative to -// the image, in which case the (0, 0) offset is relative to the center -// of the input images. If false, the (0,0) offset corresponds to the -// upper left corner of the input images. -// If not specified, defaults to true -func ExtractGlimpseCentered(value bool) ExtractGlimpseAttr { - return func(m optionalAttr) { - m["centered"] = value - } -} - -// ExtractGlimpseNormalized sets the optional normalized attribute to value. +// If the given TensorArray gradient already exists, returns a reference to it. // -// value: indicates if the offset coordinates are normalized. -// If not specified, defaults to true -func ExtractGlimpseNormalized(value bool) ExtractGlimpseAttr { - return func(m optionalAttr) { - m["normalized"] = value - } -} - -// ExtractGlimpseUniformNoise sets the optional uniform_noise attribute to value. +// Locks the size of the original TensorArray by disabling its dynamic size flag. // -// value: indicates if the noise should be generated using a -// uniform distribution or a Gaussian distribution. -// If not specified, defaults to true -func ExtractGlimpseUniformNoise(value bool) ExtractGlimpseAttr { - return func(m optionalAttr) { - m["uniform_noise"] = value - } -} - -// Extracts a glimpse from the input tensor. +// **A note about the input flow_in:** // -// Returns a set of windows called glimpses extracted at location -// `offsets` from the input tensor. If the windows only partially -// overlaps the inputs, the non overlapping areas will be filled with -// random noise. +// The handle flow_in forces the execution of the gradient lookup to occur +// only after certain other operations have occurred. For example, when +// the forward TensorArray is dynamically sized, writes to this TensorArray +// may resize the object. The gradient TensorArray is statically sized based +// on the size of the forward TensorArray when this operation executes. +// Furthermore, the size of the forward TensorArray is frozen by this call. +// As a result, the flow is used to ensure that the call to generate the gradient +// TensorArray only happens after all writes are executed. // -// The result is a 4-D tensor of shape `[batch_size, glimpse_height, -// glimpse_width, channels]`. The channels and batch dimensions are the -// same as that of the input tensor. The height and width of the output -// windows are specified in the `size` parameter. +// In the case of dynamically sized TensorArrays, gradient computation should +// only be performed on read operations that have themselves been chained via +// flow to occur only after all writes have executed. That way the final size +// of the forward TensorArray is known when this operation is called. // -// The argument `normalized` and `centered` controls how the windows are built: +// **A note about the source attribute:** // -// * If the coordinates are normalized but not centered, 0.0 and 1.0 -// correspond to the minimum and maximum of each height and width -// dimension. -// * If the coordinates are both normalized and centered, they range from -// -1.0 to 1.0. The coordinates (-1.0, -1.0) correspond to the upper -// left corner, the lower right corner is located at (1.0, 1.0) and the -// center is at (0, 0). -// * If the coordinates are not normalized they are interpreted as -// numbers of pixels. +// TensorArray gradient calls use an accumulator TensorArray object. If +// multiple gradients are calculated and run in the same session, the multiple +// gradient nodes may accidentally flow through the same accumulator TensorArray. +// This double counts and generally breaks the TensorArray gradient flow. // -// Arguments: -// input: A 4-D float tensor of shape `[batch_size, height, width, channels]`. -// size: A 1-D tensor of 2 elements containing the size of the glimpses -// to extract. The glimpse height must be specified first, following -// by the glimpse width. -// offsets: A 2-D integer tensor of shape `[batch_size, 2]` containing -// the y, x locations of the center of each window. +// The solution is to identify which gradient call this particular +// TensorArray gradient is being called in. This is performed by identifying +// a unique string (e.g. "gradients", "gradients_1", ...) from the input +// gradient Tensor's name. This string is used as a suffix when creating +// the TensorArray gradient object here (the attribute `source`). // -// Returns A tensor representing the glimpses `[batch_size, -// glimpse_height, glimpse_width, channels]`. -func ExtractGlimpse(scope *Scope, input tf.Output, size tf.Output, offsets tf.Output, optional ...ExtractGlimpseAttr) (glimpse tf.Output) { +// The attribute `source` is added as a suffix to the forward TensorArray's +// name when performing the creation / lookup, so that each separate gradient +// calculation gets its own TensorArray accumulator. +// +// Arguments: +// handle: The handle to the forward TensorArray. +// flow_in: A float scalar that enforces proper chaining of operations. +// source: The gradient source string, used to decide which gradient TensorArray +// to return. +func TensorArrayGradV3(scope *Scope, handle tf.Output, flow_in tf.Output, source string) (grad_handle tf.Output, flow_out tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } + attrs := map[string]interface{}{"source": source} opspec := tf.OpSpec{ - Type: "ExtractGlimpse", + Type: "TensorArrayGradV3", Input: []tf.Input{ - input, size, offsets, + handle, flow_in, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1) } -// A container for an iterator resource. +// DecodeProtoV2Attr is an optional argument to DecodeProtoV2. +type DecodeProtoV2Attr func(optionalAttr) + +// DecodeProtoV2DescriptorSource sets the optional descriptor_source attribute to value. // -// Returns A handle to the iterator that can be passed to a "MakeIterator" -// or "IteratorGetNext" op. -func Iterator(scope *Scope, shared_name string, container string, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { - if scope.Err() != nil { - return +// value: Either the special value `local://` or a path to a file containing +// a serialized `FileDescriptorSet`. +// If not specified, defaults to "local://" +func DecodeProtoV2DescriptorSource(value string) DecodeProtoV2Attr { + return func(m optionalAttr) { + m["descriptor_source"] = value } - attrs := map[string]interface{}{"shared_name": shared_name, "container": container, "output_types": output_types, "output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "Iterator", +} - Attrs: attrs, +// DecodeProtoV2MessageFormat sets the optional message_format attribute to value. +// +// value: Either `binary` or `text`. +// If not specified, defaults to "binary" +func DecodeProtoV2MessageFormat(value string) DecodeProtoV2Attr { + return func(m optionalAttr) { + m["message_format"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// CropAndResizeGradImageAttr is an optional argument to CropAndResizeGradImage. -type CropAndResizeGradImageAttr func(optionalAttr) - -// CropAndResizeGradImageMethod sets the optional method attribute to value. +// DecodeProtoV2Sanitize sets the optional sanitize attribute to value. // -// value: A string specifying the interpolation method. Only 'bilinear' is -// supported for now. -// If not specified, defaults to "bilinear" -func CropAndResizeGradImageMethod(value string) CropAndResizeGradImageAttr { +// value: Whether to sanitize the result or not. +// If not specified, defaults to false +func DecodeProtoV2Sanitize(value bool) DecodeProtoV2Attr { return func(m optionalAttr) { - m["method"] = value + m["sanitize"] = value } } -// Computes the gradient of the crop_and_resize op wrt the input image tensor. +// The op extracts fields from a serialized protocol buffers message into tensors. // -// Arguments: -// grads: A 4-D tensor of shape `[num_boxes, crop_height, crop_width, depth]`. -// boxes: A 2-D tensor of shape `[num_boxes, 4]`. The `i`-th row of the tensor -// specifies the coordinates of a box in the `box_ind[i]` image and is specified -// in normalized coordinates `[y1, x1, y2, x2]`. A normalized coordinate value of -// `y` is mapped to the image coordinate at `y * (image_height - 1)`, so as the -// `[0, 1]` interval of normalized image height is mapped to -// `[0, image_height - 1] in image height coordinates. We do allow y1 > y2, in -// which case the sampled crop is an up-down flipped version of the original -// image. The width dimension is treated similarly. Normalized coordinates -// outside the `[0, 1]` range are allowed, in which case we use -// `extrapolation_value` to extrapolate the input image values. -// box_ind: A 1-D tensor of shape `[num_boxes]` with int32 values in `[0, batch)`. -// The value of `box_ind[i]` specifies the image that the `i`-th box refers to. -// image_size: A 1-D tensor with value `[batch, image_height, image_width, depth]` -// containing the original image size. Both `image_height` and `image_width` need -// to be positive. +// The `decode_proto` op extracts fields from a serialized protocol buffers +// message into tensors. The fields in `field_names` are decoded and converted +// to the corresponding `output_types` if possible. // +// A `message_type` name must be provided to give context for the field +// names. The actual message descriptor can be looked up either in the +// linked-in descriptor pool or a filename provided by the caller using +// the `descriptor_source` attribute. // -// Returns A 4-D tensor of shape `[batch, image_height, image_width, depth]`. -func CropAndResizeGradImage(scope *Scope, grads tf.Output, boxes tf.Output, box_ind tf.Output, image_size tf.Output, T tf.DataType, optional ...CropAndResizeGradImageAttr) (output tf.Output) { +// Each output tensor is a dense tensor. This means that it is padded to +// hold the largest number of repeated elements seen in the input +// minibatch. (The shape is also padded by one to prevent zero-sized +// dimensions). The actual repeat counts for each example in the +// minibatch can be found in the `sizes` output. In many cases the output +// of `decode_proto` is fed immediately into tf.squeeze if missing values +// are not a concern. When using tf.squeeze, always pass the squeeze +// dimension explicitly to avoid surprises. +// +// For the most part, the mapping between Proto field types and +// TensorFlow dtypes is straightforward. However, there are a few +// special cases: +// +// - A proto field that contains a submessage or group can only be converted +// to `DT_STRING` (the serialized submessage). This is to reduce the +// complexity of the API. The resulting string can be used as input +// to another instance of the decode_proto op. +// +// - TensorFlow lacks support for unsigned integers. The ops represent uint64 +// types as a `DT_INT64` with the same twos-complement bit pattern +// (the obvious way). Unsigned int32 values can be represented exactly by +// specifying type `DT_INT64`, or using twos-complement if the caller +// specifies `DT_INT32` in the `output_types` attribute. +// +// The `descriptor_source` attribute selects a source of protocol +// descriptors to consult when looking up `message_type`. This may be a +// filename containing a serialized `FileDescriptorSet` message, +// or the special value `local://`, in which case only descriptors linked +// into the code will be searched; the filename can be on any filesystem +// accessible to TensorFlow. +// +// You can build a `descriptor_source` file using the `--descriptor_set_out` +// and `--include_imports` options to the protocol compiler `protoc`. +// +// The `local://` database only covers descriptors linked into the +// code via C++ libraries, not Python imports. You can link in a proto descriptor +// by creating a cc_library target with alwayslink=1. +// +// Both binary and text proto serializations are supported, and can be +// chosen using the `format` attribute. +// +// Arguments: +// bytes: Tensor of serialized protos with shape `batch_shape`. +// message_type: Name of the proto message type to decode. +// field_names: List of strings containing proto field names. +// output_types: List of TF types to use for the respective field in field_names. +// +// Returns Tensor of int32 with shape `[batch_shape, len(field_names)]`. +// Each entry is the number of values found for the corresponding field. +// Optional fields may have 0 or 1 values.List of tensors containing values for the corresponding field. +// `values[i]` has datatype `output_types[i]` +// and shape `[batch_shape, max(sizes[...,i])]`. +func DecodeProtoV2(scope *Scope, bytes tf.Output, message_type string, field_names []string, output_types []tf.DataType, optional ...DecodeProtoV2Attr) (sizes tf.Output, values []tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"T": T} + attrs := map[string]interface{}{"message_type": message_type, "field_names": field_names, "output_types": output_types} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "CropAndResizeGradImage", + Type: "DecodeProtoV2", Input: []tf.Input{ - grads, boxes, box_ind, image_size, + bytes, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) -} - -// ShuffleDatasetAttr is an optional argument to ShuffleDataset. -type ShuffleDatasetAttr func(optionalAttr) - -// ShuffleDatasetReshuffleEachIteration sets the optional reshuffle_each_iteration attribute to value. -// -// value: If true, each iterator over this dataset will be given -// a different pseudorandomly generated seed, based on a sequence seeded by the -// `seed` and `seed2` inputs. If false, each iterator will be given the same -// seed, and repeated iteration over this dataset will yield the exact same -// sequence of results. -// If not specified, defaults to true -func ShuffleDatasetReshuffleEachIteration(value bool) ShuffleDatasetAttr { - return func(m optionalAttr) { - m["reshuffle_each_iteration"] = value + if scope.Err() != nil { + return } + var idx int + var err error + sizes = op.Output(idx) + if values, idx, err = makeOutputList(op, idx, "values"); err != nil { + scope.UpdateErr("DecodeProtoV2", err) + return + } + return sizes, values } -// Creates a dataset that shuffles elements from `input_dataset` pseudorandomly. -// -// Arguments: -// -// buffer_size: The number of output elements to buffer in an iterator over -// this dataset. Compare with the `min_after_dequeue` attr when creating a -// `RandomShuffleQueue`. -// seed: A scalar seed for the random number generator. If either `seed` or -// `seed2` is set to be non-zero, the random number generator is seeded -// by the given seed. Otherwise, a random seed is used. -// seed2: A second scalar seed to avoid seed collision. -// -// -func ShuffleDataset(scope *Scope, input_dataset tf.Output, buffer_size tf.Output, seed tf.Output, seed2 tf.Output, output_types []tf.DataType, output_shapes []tf.Shape, optional ...ShuffleDatasetAttr) (handle tf.Output) { +// Creates a dataset that splits a SparseTensor into elements row-wise. +func SparseTensorSliceDataset(scope *Scope, indices tf.Output, values tf.Output, dense_shape tf.Output) (handle tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "ShuffleDataset", + Type: "SparseTensorSliceDataset", Input: []tf.Input{ - input_dataset, buffer_size, seed, seed2, + indices, values, dense_shape, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// 3D fast Fourier transform. -// -// Computes the 3-dimensional discrete Fourier transform over the inner-most 3 -// dimensions of `input`. -// -// Arguments: -// input: A complex64 tensor. +// Returns x / y element-wise for real types. // -// Returns A complex64 tensor of the same shape as `input`. The inner-most 3 -// dimensions of `input` are replaced with their 3D Fourier transform. +// If `x` and `y` are reals, this will return the floating-point division. // -// @compatibility(numpy) -// Equivalent to np.fft.fftn with 3 dimensions. -// @end_compatibility -func FFT3D(scope *Scope, input tf.Output) (output tf.Output) { +// *NOTE*: `Div` supports broadcasting. More about broadcasting +// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) +func RealDiv(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "FFT3D", + Type: "RealDiv", Input: []tf.Input{ - input, + x, y, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// CropAndResizeGradBoxesAttr is an optional argument to CropAndResizeGradBoxes. -type CropAndResizeGradBoxesAttr func(optionalAttr) - -// CropAndResizeGradBoxesMethod sets the optional method attribute to value. +// Adds v into specified rows of x. // -// value: A string specifying the interpolation method. Only 'bilinear' is -// supported for now. -// If not specified, defaults to "bilinear" -func CropAndResizeGradBoxesMethod(value string) CropAndResizeGradBoxesAttr { - return func(m optionalAttr) { - m["method"] = value - } -} - -// Computes the gradient of the crop_and_resize op wrt the input boxes tensor. +// Computes y = x; y[i, :] += v; return y. // // Arguments: -// grads: A 4-D tensor of shape `[num_boxes, crop_height, crop_width, depth]`. -// image: A 4-D tensor of shape `[batch, image_height, image_width, depth]`. -// Both `image_height` and `image_width` need to be positive. -// boxes: A 2-D tensor of shape `[num_boxes, 4]`. The `i`-th row of the tensor -// specifies the coordinates of a box in the `box_ind[i]` image and is specified -// in normalized coordinates `[y1, x1, y2, x2]`. A normalized coordinate value of -// `y` is mapped to the image coordinate at `y * (image_height - 1)`, so as the -// `[0, 1]` interval of normalized image height is mapped to -// `[0, image_height - 1] in image height coordinates. We do allow y1 > y2, in -// which case the sampled crop is an up-down flipped version of the original -// image. The width dimension is treated similarly. Normalized coordinates -// outside the `[0, 1]` range are allowed, in which case we use -// `extrapolation_value` to extrapolate the input image values. -// box_ind: A 1-D tensor of shape `[num_boxes]` with int32 values in `[0, batch)`. -// The value of `box_ind[i]` specifies the image that the `i`-th box refers to. +// x: A `Tensor` of type T. +// i: A vector. Indices into the left-most dimension of `x`. +// v: A `Tensor` of type T. Same dimension sizes as x except the first dimension, which must be the same as i's size. // -// Returns A 2-D tensor of shape `[num_boxes, 4]`. -func CropAndResizeGradBoxes(scope *Scope, grads tf.Output, image tf.Output, boxes tf.Output, box_ind tf.Output, optional ...CropAndResizeGradBoxesAttr) (output tf.Output) { +// Returns A `Tensor` of type T. An alias of `x`. The content of `y` is undefined if there are duplicates in `i`. +func InplaceAdd(scope *Scope, x tf.Output, i tf.Output, v tf.Output) (y tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "CropAndResizeGradBoxes", + Type: "InplaceAdd", Input: []tf.Input{ - grads, image, boxes, box_ind, + x, i, v, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Saves tensors in V2 checkpoint format. -// -// By default, saves the named tensors in full. If the caller wishes to save -// specific slices of full tensors, "shape_and_slices" should be non-empty strings -// and correspondingly well-formed. +// Restore a Reader to its initial clean state. // // Arguments: -// prefix: Must have a single element. The prefix of the V2 checkpoint to which we -// write the tensors. -// tensor_names: shape {N}. The names of the tensors to be saved. -// shape_and_slices: shape {N}. The slice specs of the tensors to be saved. -// Empty strings indicate that they are non-partitioned tensors. -// tensors: `N` tensors to save. +// reader_handle: Handle to a Reader. // // Returns the created operation. -func SaveV2(scope *Scope, prefix tf.Output, tensor_names tf.Output, shape_and_slices tf.Output, tensors []tf.Output) (o *tf.Operation) { +func ReaderResetV2(scope *Scope, reader_handle tf.Output) (o *tf.Operation) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "SaveV2", + Type: "ReaderResetV2", Input: []tf.Input{ - prefix, tensor_names, shape_and_slices, tf.OutputList(tensors), + reader_handle, }, } return scope.AddOperation(opspec) } -// StatsAggregatorHandleAttr is an optional argument to StatsAggregatorHandle. -type StatsAggregatorHandleAttr func(optionalAttr) +// RpcAttr is an optional argument to Rpc. +type RpcAttr func(optionalAttr) -// StatsAggregatorHandleContainer sets the optional container attribute to value. +// RpcProtocol sets the optional protocol attribute to value. +// +// value: RPC protocol to use. Empty string means use the default protocol. +// Options include 'grpc'. // If not specified, defaults to "" -func StatsAggregatorHandleContainer(value string) StatsAggregatorHandleAttr { +func RpcProtocol(value string) RpcAttr { return func(m optionalAttr) { - m["container"] = value + m["protocol"] = value } } -// StatsAggregatorHandleSharedName sets the optional shared_name attribute to value. -// If not specified, defaults to "" -func StatsAggregatorHandleSharedName(value string) StatsAggregatorHandleAttr { +// RpcFailFast sets the optional fail_fast attribute to value. +// +// value: `boolean`. If `true` (default), then failures to connect +// (i.e., the server does not immediately respond) cause an RPC failure. +// If not specified, defaults to true +func RpcFailFast(value bool) RpcAttr { return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// Creates a statistics manager resource. -func StatsAggregatorHandle(scope *Scope, optional ...StatsAggregatorHandleAttr) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "StatsAggregatorHandle", - - Attrs: attrs, + m["fail_fast"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// Greedily selects a subset of bounding boxes in descending order of score, -// -// pruning away boxes that have high intersection-over-union (IOU) overlap -// with previously selected boxes. Bounding boxes are supplied as -// [y1, x1, y2, x2], where (y1, x1) and (y2, x2) are the coordinates of any -// diagonal pair of box corners and the coordinates can be provided as normalized -// (i.e., lying in the interval [0, 1]) or absolute. Note that this algorithm -// is agnostic to where the origin is in the coordinate system. Note that this -// algorithm is invariant to orthogonal transformations and translations -// of the coordinate system; thus translating or reflections of the coordinate -// system result in the same boxes being selected by the algorithm. -// -// The output of this operation is a set of integers indexing into the input -// collection of bounding boxes representing the selected boxes. The bounding -// box coordinates corresponding to the selected indices can then be obtained -// using the `tf.gather operation`. For example: -// -// selected_indices = tf.image.non_max_suppression_v2( -// boxes, scores, max_output_size, iou_threshold) -// selected_boxes = tf.gather(boxes, selected_indices) -// -// Arguments: -// boxes: A 2-D float tensor of shape `[num_boxes, 4]`. -// scores: A 1-D float tensor of shape `[num_boxes]` representing a single -// score corresponding to each box (each row of boxes). -// max_output_size: A scalar integer tensor representing the maximum number of -// boxes to be selected by non max suppression. -// iou_threshold: A 0-D float tensor representing the threshold for deciding whether -// boxes overlap too much with respect to IOU. +// RpcTimeoutInMs sets the optional timeout_in_ms attribute to value. // -// Returns A 1-D integer tensor of shape `[M]` representing the selected -// indices from the boxes tensor, where `M <= max_output_size`. -func NonMaxSuppressionV2(scope *Scope, boxes tf.Output, scores tf.Output, max_output_size tf.Output, iou_threshold tf.Output) (selected_indices tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "NonMaxSuppressionV2", - Input: []tf.Input{ - boxes, scores, max_output_size, iou_threshold, - }, +// value: `int`. If `0` (default), then the kernel will run the RPC +// request and only time out if the RPC deadline passes or the session times out. +// If this value is greater than `0`, then the op will raise an exception if +// the RPC takes longer than `timeout_in_ms`. +// If not specified, defaults to 0 +func RpcTimeoutInMs(value int64) RpcAttr { + return func(m optionalAttr) { + m["timeout_in_ms"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// Reshapes a tensor. -// -// Given `tensor`, this operation returns a tensor that has the same values -// as `tensor` with shape `shape`. -// -// If one component of `shape` is the special value -1, the size of that dimension -// is computed so that the total size remains constant. In particular, a `shape` -// of `[-1]` flattens into 1-D. At most one component of `shape` can be -1. -// -// If `shape` is 1-D or higher, then the operation returns a tensor with shape -// `shape` filled with the values of `tensor`. In this case, the number of elements -// implied by `shape` must be the same as the number of elements in `tensor`. -// -// For example: -// -// ``` -// # tensor 't' is [1, 2, 3, 4, 5, 6, 7, 8, 9] -// # tensor 't' has shape [9] -// reshape(t, [3, 3]) ==> [[1, 2, 3], -// [4, 5, 6], -// [7, 8, 9]] +// Perform batches of RPC requests. // -// # tensor 't' is [[[1, 1], [2, 2]], -// # [[3, 3], [4, 4]]] -// # tensor 't' has shape [2, 2, 2] -// reshape(t, [2, 4]) ==> [[1, 1, 2, 2], -// [3, 3, 4, 4]] +// This op asynchronously performs either a single RPC request, or a batch +// of requests. RPC requests are defined by three main parameters: // -// # tensor 't' is [[[1, 1, 1], -// # [2, 2, 2]], -// # [[3, 3, 3], -// # [4, 4, 4]], -// # [[5, 5, 5], -// # [6, 6, 6]]] -// # tensor 't' has shape [3, 2, 3] -// # pass '[-1]' to flatten 't' -// reshape(t, [-1]) ==> [1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6] +// - `address` (the host+port or BNS address of the request) +// - `method` (the RPC method name for the request) +// - `request` (the serialized proto string, or vector of strings, +// of the RPC request argument). // -// # -1 can also be used to infer the shape +// For example, if you have an RPC service running on port localhost:2345, +// and its interface is configured with the following proto declaration: // -// # -1 is inferred to be 9: -// reshape(t, [2, -1]) ==> [[1, 1, 1, 2, 2, 2, 3, 3, 3], -// [4, 4, 4, 5, 5, 5, 6, 6, 6]] -// # -1 is inferred to be 2: -// reshape(t, [-1, 9]) ==> [[1, 1, 1, 2, 2, 2, 3, 3, 3], -// [4, 4, 4, 5, 5, 5, 6, 6, 6]] -// # -1 is inferred to be 3: -// reshape(t, [ 2, -1, 3]) ==> [[[1, 1, 1], -// [2, 2, 2], -// [3, 3, 3]], -// [[4, 4, 4], -// [5, 5, 5], -// [6, 6, 6]]] +// ``` +// service MyService { +// rpc MyMethod(MyRequestProto) returns (MyResponseProto) { +// } +// }; +// ``` +// +// then call this op with arguments: // -// # tensor 't' is [7] -// # shape `[]` reshapes to a scalar -// reshape(t, []) ==> 7 // ``` +// address = "localhost:2345" +// method = "MyService/MyMethod" +// ``` +// +// The `request` tensor is a string tensor representing serialized `MyRequestProto` +// strings; and the output string tensor `response` will have the same shape +// and contain (upon successful completion) corresponding serialized +// `MyResponseProto` strings. +// +// For example, to send a single, empty, `MyRequestProto`, call +// this op with `request = ""`. To send 5 **parallel** empty requests, +// call this op with `request = ["", "", "", "", ""]`. +// +// More generally, one can create a batch of `MyRequestProto` serialized protos +// from regular batched tensors using the `encode_proto` op, and convert +// the response `MyResponseProto` serialized protos to batched tensors +// using the `decode_proto` op. +// +// **NOTE** Working with serialized proto strings is faster than instantiating +// actual proto objects in memory, so no performance degradation is expected +// compared to writing custom kernels for this workflow. +// +// If the connection fails or the remote worker returns an error +// status, the op reraises this exception locally. +// +// See the `TryRpc` op if you prefer to handle RPC failures manually in the graph. // // Arguments: +// address: `0-D` or `1-D`. The address (i.e. host_name:port) of the RPC server. +// If this tensor has more than 1 element, then multiple parallel rpc requests +// are sent. This argument broadcasts with `method` and `request`. +// method: `0-D` or `1-D`. The method address on the RPC server. +// If this tensor has more than 1 element, then multiple parallel rpc requests +// are sent. This argument broadcasts with `address` and `request`. +// request: `0-D` or `1-D`. Serialized proto strings: the rpc request argument. +// If this tensor has more than 1 element, then multiple parallel rpc requests +// are sent. This argument broadcasts with `address` and `method`. // -// shape: Defines the shape of the output tensor. -func Reshape(scope *Scope, tensor tf.Output, shape tf.Output) (output tf.Output) { +// Returns Same shape as `request`. Serialized proto strings: the rpc responses. +func Rpc(scope *Scope, address tf.Output, method tf.Output, request tf.Output, optional ...RpcAttr) (response tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "Reshape", + Type: "Rpc", Input: []tf.Input{ - tensor, shape, + address, method, request, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Creates a dataset that splits a SparseTensor into elements row-wise. -func SparseTensorSliceDataset(scope *Scope, indices tf.Output, values tf.Output, dense_shape tf.Output) (handle tf.Output) { +// OrderedMapStageAttr is an optional argument to OrderedMapStage. +type OrderedMapStageAttr func(optionalAttr) + +// OrderedMapStageCapacity sets the optional capacity attribute to value. +// +// value: Maximum number of elements in the Staging Area. If > 0, inserts +// on the container will block when the capacity is reached. +// If not specified, defaults to 0 +// +// REQUIRES: value >= 0 +func OrderedMapStageCapacity(value int64) OrderedMapStageAttr { + return func(m optionalAttr) { + m["capacity"] = value + } +} + +// OrderedMapStageMemoryLimit sets the optional memory_limit attribute to value. +// If not specified, defaults to 0 +// +// REQUIRES: value >= 0 +func OrderedMapStageMemoryLimit(value int64) OrderedMapStageAttr { + return func(m optionalAttr) { + m["memory_limit"] = value + } +} + +// OrderedMapStageContainer sets the optional container attribute to value. +// +// value: If non-empty, this queue is placed in the given container. Otherwise, +// a default container is used. +// If not specified, defaults to "" +func OrderedMapStageContainer(value string) OrderedMapStageAttr { + return func(m optionalAttr) { + m["container"] = value + } +} + +// OrderedMapStageSharedName sets the optional shared_name attribute to value. +// +// value: It is necessary to match this name to the matching Unstage Op. +// If not specified, defaults to "" +func OrderedMapStageSharedName(value string) OrderedMapStageAttr { + return func(m optionalAttr) { + m["shared_name"] = value + } +} + +// Stage (key, values) in the underlying container which behaves like a ordered +// +// associative container. Elements are ordered by key. +// +// Arguments: +// key: int64 +// +// values: a list of tensors +// dtypes A list of data types that inserted values should adhere to. +// +// +// Returns the created operation. +func OrderedMapStage(scope *Scope, key tf.Output, indices tf.Output, values []tf.Output, dtypes []tf.DataType, optional ...OrderedMapStageAttr) (o *tf.Operation) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"dtypes": dtypes} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "SparseTensorSliceDataset", + Type: "OrderedMapStage", Input: []tf.Input{ - indices, values, dense_shape, + key, indices, tf.OutputList(values), }, + Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0) + return scope.AddOperation(opspec) } -// Returns x / y element-wise for real types. +// StackPushV2Attr is an optional argument to StackPushV2. +type StackPushV2Attr func(optionalAttr) + +// StackPushV2SwapMemory sets the optional swap_memory attribute to value. // -// If `x` and `y` are reals, this will return the floating-point division. +// value: Swap `elem` to CPU. Default to false. +// If not specified, defaults to false +func StackPushV2SwapMemory(value bool) StackPushV2Attr { + return func(m optionalAttr) { + m["swap_memory"] = value + } +} + +// Push an element onto the stack. // -// *NOTE*: `Div` supports broadcasting. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func RealDiv(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { +// Arguments: +// handle: The handle to a stack. +// elem: The tensor to be pushed onto the stack. +// +// Returns The same tensor as the input 'elem'. +func StackPushV2(scope *Scope, handle tf.Output, elem tf.Output, optional ...StackPushV2Attr) (output tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "RealDiv", + Type: "StackPushV2", Input: []tf.Input{ - x, y, + handle, elem, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) @@ -22032,11 +24533,8 @@ func ConcatenateDataset(scope *Scope, input_dataset tf.Output, another_dataset t // Adds a value to the current value of a variable. // -// Any ReadVariableOp which depends directly or indirectly on this assign is -// guaranteed to see the incremented value or a subsequent newer one. -// -// Outputs the incremented value, which can be used to totally order the -// increments to this variable. +// Any ReadVariableOp with a control dependency on this op is guaranteed to +// see the incremented value or a subsequent newer one. // // Arguments: // resource: handle to the resource in which to store the variable. @@ -22073,6 +24571,63 @@ func LatencyStatsDataset(scope *Scope, input_dataset tf.Output, tag tf.Output, o return op.Output(0) } +// MapSizeAttr is an optional argument to MapSize. +type MapSizeAttr func(optionalAttr) + +// MapSizeCapacity sets the optional capacity attribute to value. +// If not specified, defaults to 0 +// +// REQUIRES: value >= 0 +func MapSizeCapacity(value int64) MapSizeAttr { + return func(m optionalAttr) { + m["capacity"] = value + } +} + +// MapSizeMemoryLimit sets the optional memory_limit attribute to value. +// If not specified, defaults to 0 +// +// REQUIRES: value >= 0 +func MapSizeMemoryLimit(value int64) MapSizeAttr { + return func(m optionalAttr) { + m["memory_limit"] = value + } +} + +// MapSizeContainer sets the optional container attribute to value. +// If not specified, defaults to "" +func MapSizeContainer(value string) MapSizeAttr { + return func(m optionalAttr) { + m["container"] = value + } +} + +// MapSizeSharedName sets the optional shared_name attribute to value. +// If not specified, defaults to "" +func MapSizeSharedName(value string) MapSizeAttr { + return func(m optionalAttr) { + m["shared_name"] = value + } +} + +// Op returns the number of elements in the underlying container. +func MapSize(scope *Scope, dtypes []tf.DataType, optional ...MapSizeAttr) (size tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"dtypes": dtypes} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "MapSize", + + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // Convert JSON-encoded Example records to binary protocol buffer strings. // // This op translates a tensor containing Example records, encoded using @@ -22246,100 +24801,36 @@ func PaddedBatchDataset(scope *Scope, input_dataset tf.Output, batch_size tf.Out // that a particular dimension should use the maximum size of all batch elements. // // -func DenseToSparseBatchDataset(scope *Scope, input_dataset tf.Output, batch_size tf.Output, row_shape tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "DenseToSparseBatchDataset", - Input: []tf.Input{ - input_dataset, batch_size, row_shape, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Deprecated. Use TensorArrayGradV3 -// -// DEPRECATED at GraphDef version 26: Use TensorArrayGradV3 -func TensorArrayGradV2(scope *Scope, handle tf.Output, flow_in tf.Output, source string) (grad_handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"source": source} - opspec := tf.OpSpec{ - Type: "TensorArrayGradV2", - Input: []tf.Input{ - handle, flow_in, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// ResourceSparseApplyAdadeltaAttr is an optional argument to ResourceSparseApplyAdadelta. -type ResourceSparseApplyAdadeltaAttr func(optionalAttr) - -// ResourceSparseApplyAdadeltaUseLocking sets the optional use_locking attribute to value. -// -// value: If True, updating of the var and accum tensors will be protected by -// a lock; otherwise the behavior is undefined, but may exhibit less contention. -// If not specified, defaults to false -func ResourceSparseApplyAdadeltaUseLocking(value bool) ResourceSparseApplyAdadeltaAttr { - return func(m optionalAttr) { - m["use_locking"] = value - } -} - -// var: Should be from a Variable(). -// -// Arguments: -// -// accum: Should be from a Variable(). -// accum_update: : Should be from a Variable(). -// lr: Learning rate. Must be a scalar. -// rho: Decay factor. Must be a scalar. -// epsilon: Constant factor. Must be a scalar. -// grad: The gradient. -// indices: A vector of indices into the first dimension of var and accum. -// -// Returns the created operation. -func ResourceSparseApplyAdadelta(scope *Scope, var_ tf.Output, accum tf.Output, accum_update tf.Output, lr tf.Output, rho tf.Output, epsilon tf.Output, grad tf.Output, indices tf.Output, optional ...ResourceSparseApplyAdadeltaAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) +func DenseToSparseBatchDataset(scope *Scope, input_dataset tf.Output, batch_size tf.Output, row_shape tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { + if scope.Err() != nil { + return } + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} opspec := tf.OpSpec{ - Type: "ResourceSparseApplyAdadelta", + Type: "DenseToSparseBatchDataset", Input: []tf.Input{ - var_, accum, accum_update, lr, rho, epsilon, grad, indices, + input_dataset, batch_size, row_shape, }, Attrs: attrs, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0) } -// Identity op for gradient debugging. +// Deprecated. Use TensorArrayGradV3 // -// This op is hidden from public in Python. It is used by TensorFlow Debugger to -// register gradient tensors for gradient debugging. -// This op operates on non-reference-type tensors. -func DebugGradientIdentity(scope *Scope, input tf.Output) (output tf.Output) { +// DEPRECATED at GraphDef version 26: Use TensorArrayGradV3 +func TensorArrayGradV2(scope *Scope, handle tf.Output, flow_in tf.Output, source string) (grad_handle tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"source": source} opspec := tf.OpSpec{ - Type: "DebugGradientIdentity", + Type: "TensorArrayGradV2", Input: []tf.Input{ - input, + handle, flow_in, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) @@ -22763,6 +25254,35 @@ func MakeIterator(scope *Scope, dataset tf.Output, iterator tf.Output) (o *tf.Op return scope.AddOperation(opspec) } +// Makes the summary of accumulated stats for the batch. +// +// The summary stats contains gradients and hessians accumulated into the corresponding node and bucket for each example. +// +// Arguments: +// node_ids: int32 Rank 1 Tensor containing node ids, which each example falls into for the requested layer. +// gradients: float32; Rank 2 Tensor (shape=[#examples, 1]) for gradients. +// hessians: float32; Rank 2 Tensor (shape=[#examples, 1]) for hessians. +// bucketized_features_list: int32 list of Rank 1 Tensors, each containing the bucketized feature (for each feature column). +// max_splits: int; the maximum number of splits possible in the whole tree. +// num_buckets: int; equals to the maximum possible value of bucketized feature. +// +// Returns output Rank 4 Tensor (shape=[#features, #splits, #buckets, 2]) containing accumulated stats put into the corresponding node and bucket. The first index of 4th dimension refers to gradients, and the second to hessians. +func BoostedTreesMakeStatsSummary(scope *Scope, node_ids tf.Output, gradients tf.Output, hessians tf.Output, bucketized_features_list []tf.Output, max_splits int64, num_buckets int64) (stats_summary tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"max_splits": max_splits, "num_buckets": num_buckets} + opspec := tf.OpSpec{ + Type: "BoostedTreesMakeStatsSummary", + Input: []tf.Input{ + node_ids, gradients, hessians, tf.OutputList(bucketized_features_list), + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // Adjust the contrast of one or more images. // // `images` is a tensor of at least 3 dimensions. The last 3 dimensions are @@ -22927,42 +25447,6 @@ func IteratorFromStringHandle(scope *Scope, string_handle tf.Output, optional .. return op.Output(0) } -// Computes arctangent of `y/x` element-wise, respecting signs of the arguments. -// -// This is the angle \( \theta \in [-\pi, \pi] \) such that -// \[ x = r \cos(\theta) \] -// and -// \[ y = r \sin(\theta) \] -// where \(r = \sqrt(x^2 + y^2) \). -func Atan2(scope *Scope, y tf.Output, x tf.Output) (z tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Atan2", - Input: []tf.Input{ - y, x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Return a tensor with the same shape and contents as the input tensor or value. -func Identity(scope *Scope, input tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Identity", - Input: []tf.Input{ - input, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // Gather slices from `params` axis `axis` according to `indices`. // // `indices` must be an integer tensor of any dimension (usually 0-D or 1-D). @@ -22987,6 +25471,10 @@ func Identity(scope *Scope, input tf.Output) (output tf.Output) { // // // +// Note that on CPU, if an out of bound index is found, an error is returned. +// On GPU, if an out of bound index is found, a 0 is stored in the +// corresponding output value. +// // Arguments: // params: The tensor from which to gather values. Must be at least rank // `axis + 1`. @@ -23483,6 +25971,28 @@ func InvGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) { return op.Output(0) } +// List of the given size with empty elements. +// +// element_shape: the shape of the future elements of the list +// num_elements: the number of elements to reserve +// handle: the output list +// element_dtype: the desired type of elements in the list. +func TensorListReserve(scope *Scope, element_shape tf.Output, num_elements tf.Output, element_dtype tf.DataType) (handle tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"element_dtype": element_dtype} + opspec := tf.OpSpec{ + Type: "TensorListReserve", + Input: []tf.Input{ + element_shape, num_elements, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // PriorityQueueV2Attr is an optional argument to PriorityQueueV2. type PriorityQueueV2Attr func(optionalAttr) @@ -24022,122 +26532,6 @@ func StackV2(scope *Scope, max_size tf.Output, elem_type tf.DataType, optional . return op.Output(0) } -// OrderedMapStageAttr is an optional argument to OrderedMapStage. -type OrderedMapStageAttr func(optionalAttr) - -// OrderedMapStageCapacity sets the optional capacity attribute to value. -// -// value: Maximum number of elements in the Staging Area. If > 0, inserts -// on the container will block when the capacity is reached. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func OrderedMapStageCapacity(value int64) OrderedMapStageAttr { - return func(m optionalAttr) { - m["capacity"] = value - } -} - -// OrderedMapStageMemoryLimit sets the optional memory_limit attribute to value. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func OrderedMapStageMemoryLimit(value int64) OrderedMapStageAttr { - return func(m optionalAttr) { - m["memory_limit"] = value - } -} - -// OrderedMapStageContainer sets the optional container attribute to value. -// -// value: If non-empty, this queue is placed in the given container. Otherwise, -// a default container is used. -// If not specified, defaults to "" -func OrderedMapStageContainer(value string) OrderedMapStageAttr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// OrderedMapStageSharedName sets the optional shared_name attribute to value. -// -// value: It is necessary to match this name to the matching Unstage Op. -// If not specified, defaults to "" -func OrderedMapStageSharedName(value string) OrderedMapStageAttr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// Stage (key, values) in the underlying container which behaves like a ordered -// -// associative container. Elements are ordered by key. -// -// Arguments: -// key: int64 -// -// values: a list of tensors -// dtypes A list of data types that inserted values should adhere to. -// -// -// Returns the created operation. -func OrderedMapStage(scope *Scope, key tf.Output, indices tf.Output, values []tf.Output, dtypes []tf.DataType, optional ...OrderedMapStageAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtypes": dtypes} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "OrderedMapStage", - Input: []tf.Input{ - key, indices, tf.OutputList(values), - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// StackPushV2Attr is an optional argument to StackPushV2. -type StackPushV2Attr func(optionalAttr) - -// StackPushV2SwapMemory sets the optional swap_memory attribute to value. -// -// value: Swap `elem` to CPU. Default to false. -// If not specified, defaults to false -func StackPushV2SwapMemory(value bool) StackPushV2Attr { - return func(m optionalAttr) { - m["swap_memory"] = value - } -} - -// Push an element onto the stack. -// -// Arguments: -// handle: The handle to a stack. -// elem: The tensor to be pushed onto the stack. -// -// Returns The same tensor as the input 'elem'. -func StackPushV2(scope *Scope, handle tf.Output, elem tf.Output, optional ...StackPushV2Attr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "StackPushV2", - Input: []tf.Input{ - handle, elem, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // FusedBatchNormGradV2Attr is an optional argument to FusedBatchNormGradV2. type FusedBatchNormGradV2Attr func(optionalAttr) @@ -24260,64 +26654,114 @@ func DecodeCompressed(scope *Scope, bytes tf.Output, optional ...DecodeCompresse return op.Output(0) } -// Creates a TensorArray for storing the gradients of values in the given handle. -// -// If the given TensorArray gradient already exists, returns a reference to it. -// -// Locks the size of the original TensorArray by disabling its dynamic size flag. -// -// **A note about the input flow_in:** -// -// The handle flow_in forces the execution of the gradient lookup to occur -// only after certain other operations have occurred. For example, when -// the forward TensorArray is dynamically sized, writes to this TensorArray -// may resize the object. The gradient TensorArray is statically sized based -// on the size of the forward TensorArray when this operation executes. -// Furthermore, the size of the forward TensorArray is frozen by this call. -// As a result, the flow is used to ensure that the call to generate the gradient -// TensorArray only happens after all writes are executed. -// -// In the case of dynamically sized TensorArrays, gradient computation should -// only be performed on read operations that have themselves been chained via -// flow to occur only after all writes have executed. That way the final size -// of the forward TensorArray is known when this operation is called. -// -// **A note about the source attribute:** -// -// TensorArray gradient calls use an accumulator TensorArray object. If -// multiple gradients are calculated and run in the same session, the multiple -// gradient nodes may accidentally flow through the same accumulator TensorArray. -// This double counts and generally breaks the TensorArray gradient flow. -// -// The solution is to identify which gradient call this particular -// TensorArray gradient is being called in. This is performed by identifying -// a unique string (e.g. "gradients", "gradients_1", ...) from the input -// gradient Tensor's name. This string is used as a suffix when creating -// the TensorArray gradient object here (the attribute `source`). -// -// The attribute `source` is added as a suffix to the forward TensorArray's -// name when performing the creation / lookup, so that each separate gradient -// calculation gets its own TensorArray accumulator. -// -// Arguments: -// handle: The handle to the forward TensorArray. -// flow_in: A float scalar that enforces proper chaining of operations. -// source: The gradient source string, used to decide which gradient TensorArray -// to return. -func TensorArrayGradV3(scope *Scope, handle tf.Output, flow_in tf.Output, source string) (grad_handle tf.Output, flow_out tf.Output) { +// CudnnRNNAttr is an optional argument to CudnnRNN. +type CudnnRNNAttr func(optionalAttr) + +// CudnnRNNRnnMode sets the optional rnn_mode attribute to value. +// If not specified, defaults to "lstm" +func CudnnRNNRnnMode(value string) CudnnRNNAttr { + return func(m optionalAttr) { + m["rnn_mode"] = value + } +} + +// CudnnRNNInputMode sets the optional input_mode attribute to value. +// If not specified, defaults to "linear_input" +func CudnnRNNInputMode(value string) CudnnRNNAttr { + return func(m optionalAttr) { + m["input_mode"] = value + } +} + +// CudnnRNNDirection sets the optional direction attribute to value. +// If not specified, defaults to "unidirectional" +func CudnnRNNDirection(value string) CudnnRNNAttr { + return func(m optionalAttr) { + m["direction"] = value + } +} + +// CudnnRNNDropout sets the optional dropout attribute to value. +// If not specified, defaults to 0 +func CudnnRNNDropout(value float32) CudnnRNNAttr { + return func(m optionalAttr) { + m["dropout"] = value + } +} + +// CudnnRNNSeed sets the optional seed attribute to value. +// If not specified, defaults to 0 +func CudnnRNNSeed(value int64) CudnnRNNAttr { + return func(m optionalAttr) { + m["seed"] = value + } +} + +// CudnnRNNSeed2 sets the optional seed2 attribute to value. +// If not specified, defaults to 0 +func CudnnRNNSeed2(value int64) CudnnRNNAttr { + return func(m optionalAttr) { + m["seed2"] = value + } +} + +// CudnnRNNIsTraining sets the optional is_training attribute to value. +// If not specified, defaults to true +func CudnnRNNIsTraining(value bool) CudnnRNNAttr { + return func(m optionalAttr) { + m["is_training"] = value + } +} + +// A RNN backed by cuDNN. +// +// Computes the RNN from the input and initial states, with respect to the params +// buffer. +// +// rnn_mode: Indicates the type of the RNN model. +// input_mode: Indicate whether there is a linear projection between the input and +// The actual computation before the first layer. 'skip_input' is only allowed +// when input_size == num_units; 'auto_select' implies 'skip_input' when +// input_size == num_units; otherwise, it implies 'linear_input'. +// direction: Indicates whether a bidirectional model will be used. +// dir = (direction == bidirectional) ? 2 : 1 +// dropout: dropout probability. When set to 0., dropout is disabled. +// seed: the 1st part of a seed to initialize dropout. +// seed2: the 2nd part of a seed to initialize dropout. +// input: a 3-D tensor with the shape of [seq_length, batch_size, input_size]. +// input_h: a 3-D tensor with the shape of [num_layer * dir, batch_size, +// num_units]. +// input_c: For LSTM, a 3-D tensor with the shape of +// [num_layer * dir, batch, num_units]. For other models, it is ignored. +// params: a 1-D tensor that contains the weights and biases in an opaque layout. +// The size must be created through CudnnRNNParamsSize, and initialized +// separately. Note that they might not be compatible across different +// generations. So it is a good idea to save and restore +// output: a 3-D tensor with the shape of [seq_length, batch_size, +// dir * num_units]. +// output_h: the same shape has input_h. +// output_c: the same shape as input_c for LSTM. An empty tensor for other models. +// is_training: Indicates whether this operation is used for inferenece or +// training. +// reserve_space: an opaque tensor that can be used in backprop calculation. It +// is only produced if is_training is false. +func CudnnRNN(scope *Scope, input tf.Output, input_h tf.Output, input_c tf.Output, params tf.Output, optional ...CudnnRNNAttr) (output tf.Output, output_h tf.Output, output_c tf.Output, reserve_space tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"source": source} + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "TensorArrayGradV3", + Type: "CudnnRNN", Input: []tf.Input{ - handle, flow_in, + input, input_h, input_c, params, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) + return op.Output(0), op.Output(1), op.Output(2), op.Output(3) } // Compare values of `input` to `threshold` and pack resulting bits into a `uint8`. @@ -24413,6 +26857,47 @@ func TensorArrayScatterV3(scope *Scope, handle tf.Output, indices tf.Output, val return op.Output(0) } +// EmptyAttr is an optional argument to Empty. +type EmptyAttr func(optionalAttr) + +// EmptyInit sets the optional init attribute to value. +// +// value: If True, initialize the returned tensor with the default value of dtype. Otherwise, the implementation is free not to initializethe tensor's content. +// If not specified, defaults to false +func EmptyInit(value bool) EmptyAttr { + return func(m optionalAttr) { + m["init"] = value + } +} + +// Creates a tensor with the given shape. +// +// This operation creates a tensor of `shape` and `dtype`. +// +// Arguments: +// shape: 1-D. Represents the shape of the output tensor. +// +// +// Returns A `Tensor` of type `T`. +func Empty(scope *Scope, shape tf.Output, dtype tf.DataType, optional ...EmptyAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"dtype": dtype} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "Empty", + Input: []tf.Input{ + shape, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // TensorArrayConcatV3Attr is an optional argument to TensorArrayConcatV3. type TensorArrayConcatV3Attr func(optionalAttr) @@ -24530,6 +27015,27 @@ func ParameterizedTruncatedNormal(scope *Scope, shape tf.Output, means tf.Output return op.Output(0) } +// Sets the index-th position of the list to contain the given tensor. +// +// input_handle: the list +// index: the position in the list to which the tensor will be assigned +// item: the element to be assigned to that position +// output_handle: the new list, with the element in the proper position +// +func TensorListSetItem(scope *Scope, input_handle tf.Output, index tf.Output, item tf.Output) (output_handle tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "TensorListSetItem", + Input: []tf.Input{ + input_handle, index, item, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // Returns a diagonal tensor with a given diagonal values. // // Given a `diagonal`, this operation returns a tensor with the `diagonal` and @@ -25074,6 +27580,27 @@ func TensorArrayScatterV2(scope *Scope, handle tf.Output, indices tf.Output, val return op.Output(0) } +// Creates a tree ensemble model and returns a handle to it. +// +// Arguments: +// tree_ensemble_handle: Handle to the tree ensemble resource to be created. +// stamp_token: Token to use as the initial value of the resource stamp. +// tree_ensemble_serialized: Serialized proto of the tree ensemble. +// +// Returns the created operation. +func BoostedTreesCreateEnsemble(scope *Scope, tree_ensemble_handle tf.Output, stamp_token tf.Output, tree_ensemble_serialized tf.Output) (o *tf.Operation) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "BoostedTreesCreateEnsemble", + Input: []tf.Input{ + tree_ensemble_handle, stamp_token, tree_ensemble_serialized, + }, + } + return scope.AddOperation(opspec) +} + // Applies sparse addition to `input` using individual values or slices // // from `updates` according to indices `indices`. The updates are non-aliasing: @@ -25670,63 +28197,6 @@ func MapUnstage(scope *Scope, key tf.Output, indices tf.Output, dtypes []tf.Data return values } -// MapSizeAttr is an optional argument to MapSize. -type MapSizeAttr func(optionalAttr) - -// MapSizeCapacity sets the optional capacity attribute to value. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func MapSizeCapacity(value int64) MapSizeAttr { - return func(m optionalAttr) { - m["capacity"] = value - } -} - -// MapSizeMemoryLimit sets the optional memory_limit attribute to value. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func MapSizeMemoryLimit(value int64) MapSizeAttr { - return func(m optionalAttr) { - m["memory_limit"] = value - } -} - -// MapSizeContainer sets the optional container attribute to value. -// If not specified, defaults to "" -func MapSizeContainer(value string) MapSizeAttr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// MapSizeSharedName sets the optional shared_name attribute to value. -// If not specified, defaults to "" -func MapSizeSharedName(value string) MapSizeAttr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// Op returns the number of elements in the underlying container. -func MapSize(scope *Scope, dtypes []tf.DataType, optional ...MapSizeAttr) (size tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtypes": dtypes} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "MapSize", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // MapIncompleteSizeAttr is an optional argument to MapIncompleteSize. type MapIncompleteSizeAttr func(optionalAttr) @@ -25954,6 +28424,120 @@ func ShapeN(scope *Scope, input []tf.Output, optional ...ShapeNAttr) (output []t return output } +// CudnnRNNParamsToCanonicalAttr is an optional argument to CudnnRNNParamsToCanonical. +type CudnnRNNParamsToCanonicalAttr func(optionalAttr) + +// CudnnRNNParamsToCanonicalRnnMode sets the optional rnn_mode attribute to value. +// If not specified, defaults to "lstm" +func CudnnRNNParamsToCanonicalRnnMode(value string) CudnnRNNParamsToCanonicalAttr { + return func(m optionalAttr) { + m["rnn_mode"] = value + } +} + +// CudnnRNNParamsToCanonicalInputMode sets the optional input_mode attribute to value. +// If not specified, defaults to "linear_input" +func CudnnRNNParamsToCanonicalInputMode(value string) CudnnRNNParamsToCanonicalAttr { + return func(m optionalAttr) { + m["input_mode"] = value + } +} + +// CudnnRNNParamsToCanonicalDirection sets the optional direction attribute to value. +// If not specified, defaults to "unidirectional" +func CudnnRNNParamsToCanonicalDirection(value string) CudnnRNNParamsToCanonicalAttr { + return func(m optionalAttr) { + m["direction"] = value + } +} + +// CudnnRNNParamsToCanonicalDropout sets the optional dropout attribute to value. +// If not specified, defaults to 0 +func CudnnRNNParamsToCanonicalDropout(value float32) CudnnRNNParamsToCanonicalAttr { + return func(m optionalAttr) { + m["dropout"] = value + } +} + +// CudnnRNNParamsToCanonicalSeed sets the optional seed attribute to value. +// If not specified, defaults to 0 +func CudnnRNNParamsToCanonicalSeed(value int64) CudnnRNNParamsToCanonicalAttr { + return func(m optionalAttr) { + m["seed"] = value + } +} + +// CudnnRNNParamsToCanonicalSeed2 sets the optional seed2 attribute to value. +// If not specified, defaults to 0 +func CudnnRNNParamsToCanonicalSeed2(value int64) CudnnRNNParamsToCanonicalAttr { + return func(m optionalAttr) { + m["seed2"] = value + } +} + +// Retrieves CudnnRNN params in canonical form. +// +// Retrieves a set of weights from the opaque params buffer that can be saved and +// restored in a way compatible with future runs. +// +// Note that the params buffer may not be compatible across different GPUs. So any +// save and restoration should be converted to and from the canonical weights and +// biases. +// +// num_layers: Specifies the number of layers in the RNN model. +// num_units: Specifies the size of the hidden state. +// input_size: Specifies the size of the input state. +// num_params: number of parameter sets for all layers. +// Each layer may contain multiple parameter sets, with each set consisting of +// a weight matrix and a bias vector. +// weights: the canonical form of weights that can be used for saving +// and restoration. They are more likely to be compatible across different +// generations. +// biases: the canonical form of biases that can be used for saving +// and restoration. They are more likely to be compatible across different +// generations. +// rnn_mode: Indicates the type of the RNN model. +// input_mode: Indicate whether there is a linear projection between the input and +// The actual computation before the first layer. 'skip_input' is only allowed +// when input_size == num_units; 'auto_select' implies 'skip_input' when +// input_size == num_units; otherwise, it implies 'linear_input'. +// direction: Indicates whether a bidirectional model will be used. +// dir = (direction == bidirectional) ? 2 : 1 +// dropout: dropout probability. When set to 0., dropout is disabled. +// seed: the 1st part of a seed to initialize dropout. +// seed2: the 2nd part of a seed to initialize dropout. +func CudnnRNNParamsToCanonical(scope *Scope, num_layers tf.Output, num_units tf.Output, input_size tf.Output, params tf.Output, num_params int64, optional ...CudnnRNNParamsToCanonicalAttr) (weights []tf.Output, biases []tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"num_params": num_params} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "CudnnRNNParamsToCanonical", + Input: []tf.Input{ + num_layers, num_units, input_size, params, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + if scope.Err() != nil { + return + } + var idx int + var err error + if weights, idx, err = makeOutputList(op, idx, "weights"); err != nil { + scope.UpdateErr("CudnnRNNParamsToCanonical", err) + return + } + if biases, idx, err = makeOutputList(op, idx, "biases"); err != nil { + scope.UpdateErr("CudnnRNNParamsToCanonical", err) + return + } + return weights, biases +} + // UniformCandidateSamplerAttr is an optional argument to UniformCandidateSampler. type UniformCandidateSamplerAttr func(optionalAttr) @@ -26181,19 +28765,141 @@ func Switch(scope *Scope, data tf.Output, pred tf.Output) (output_false tf.Outpu // Add all input tensors element wise. // // Arguments: -// inputs: Must all be the same size and shape. -func AddN(scope *Scope, inputs []tf.Output) (sum tf.Output) { +// inputs: Must all be the same size and shape. +func AddN(scope *Scope, inputs []tf.Output) (sum tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "AddN", + Input: []tf.Input{ + tf.OutputList(inputs), + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// TryRpcAttr is an optional argument to TryRpc. +type TryRpcAttr func(optionalAttr) + +// TryRpcProtocol sets the optional protocol attribute to value. +// +// value: RPC protocol to use. Empty string means use the default protocol. +// Options include 'grpc'. +// If not specified, defaults to "" +func TryRpcProtocol(value string) TryRpcAttr { + return func(m optionalAttr) { + m["protocol"] = value + } +} + +// TryRpcFailFast sets the optional fail_fast attribute to value. +// +// value: `boolean`. If `true` (default), then failures to connect +// (i.e., the server does not immediately respond) cause an RPC failure. +// If not specified, defaults to true +func TryRpcFailFast(value bool) TryRpcAttr { + return func(m optionalAttr) { + m["fail_fast"] = value + } +} + +// TryRpcTimeoutInMs sets the optional timeout_in_ms attribute to value. +// +// value: `int`. If `0` (default), then the kernel will run the RPC +// request and only time out if the RPC deadline passes or the session times out. +// If this value is greater than `0`, then the op will raise an exception if +// the RPC takes longer than `timeout_in_ms`. +// If not specified, defaults to 0 +func TryRpcTimeoutInMs(value int64) TryRpcAttr { + return func(m optionalAttr) { + m["timeout_in_ms"] = value + } +} + +// Perform batches of RPC requests. +// +// This op asynchronously performs either a single RPC request, or a batch +// of requests. RPC requests are defined by three main parameters: +// +// - `address` (the host+port or BNS address of the request) +// - `method` (the method name for the request) +// - `request` (the serialized proto string, or vector of strings, +// of the RPC request argument). +// +// For example, if you have an RPC service running on port localhost:2345, +// and its interface is configured with the following proto declaration: +// +// ``` +// service MyService { +// rpc MyMethod(MyRequestProto) returns (MyResponseProto) { +// } +// }; +// ``` +// +// then call this op with arguments: +// +// ``` +// address = "localhost:2345" +// method = "MyService/MyMethod" +// ``` +// +// The `request` tensor is a string tensor representing serialized `MyRequestProto` +// strings; and the output string tensor `response` will have the same shape +// and contain (upon successful completion) corresponding serialized +// `MyResponseProto` strings. +// +// For example, to send a single, empty, `MyRequestProto`, call +// this op with `request = ""`. To send 5 **parallel** empty requests, +// call this op with `request = ["", "", "", "", ""]`. +// +// More generally, one can create a batch of `MyRequestProto` serialized protos +// from regular batched tensors using the `encode_proto` op, and convert +// the response `MyResponseProto` serialized protos to batched tensors +// using the `decode_proto` op. +// +// **NOTE** Working with serialized proto strings is faster than instantiating +// actual proto objects in memory, so no performance degradation is expected +// compared to writing custom kernels for this workflow. +// +// Unlike the standard `Rpc` op, if the connection fails or the remote worker +// returns an error status, this op does **not** reraise the exception. +// Instead, the `status_code` and `status_message` entry for the corresponding RPC +// call is set with the error returned from the RPC call. The `response` tensor +// will contain valid response values for those minibatch entries whose RPCs did +// not fail; the rest of the entries will have empty strings. +// +// Arguments: +// address: `0-D` or `1-D`. The address (i.e. host_name:port) of the RPC server. +// If this tensor has more than 1 element, then multiple parallel rpc requests +// are sent. This argument broadcasts with `method` and `request`. +// method: `0-D` or `1-D`. The method address on the RPC server. +// If this tensor has more than 1 element, then multiple parallel rpc requests +// are sent. This argument broadcasts with `address` and `request`. +// request: `0-D` or `1-D`. Serialized proto strings: the rpc request argument. +// If this tensor has more than 1 element, then multiple parallel rpc requests +// are sent. This argument broadcasts with `address` and `method`. +// +// Returns Same shape as `request`. Serialized proto strings: the rpc responses.Same shape as `request`. Values correspond to tensorflow Status enum codes.Same shape as `request`. Values correspond to Status messages +// returned from the RPC calls. +func TryRpc(scope *Scope, address tf.Output, method tf.Output, request tf.Output, optional ...TryRpcAttr) (response tf.Output, status_code tf.Output, status_message tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "AddN", + Type: "TryRpc", Input: []tf.Input{ - tf.OutputList(inputs), + address, method, request, }, + Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1), op.Output(2) } // EnterAttr is an optional argument to Enter. @@ -26631,6 +29337,64 @@ func ParseSingleExample(scope *Scope, serialized tf.Output, dense_defaults []tf. return sparse_indices, sparse_values, sparse_shapes, dense_values } +// Deserializes a serialized tree ensemble config and replaces current tree +// +// ensemble. +// +// Arguments: +// tree_ensemble_handle: Handle to the tree ensemble. +// stamp_token: Token to use as the new value of the resource stamp. +// tree_ensemble_serialized: Serialized proto of the ensemble. +// +// Returns the created operation. +func BoostedTreesDeserializeEnsemble(scope *Scope, tree_ensemble_handle tf.Output, stamp_token tf.Output, tree_ensemble_serialized tf.Output) (o *tf.Operation) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "BoostedTreesDeserializeEnsemble", + Input: []tf.Input{ + tree_ensemble_handle, stamp_token, tree_ensemble_serialized, + }, + } + return scope.AddOperation(opspec) +} + +// Runs multiple additive regression ensemble predictors on input instances and +// +// computes the update to cached logits. It is designed to be used during training. +// It traverses the trees starting from cached tree id and cached node id and +// calculates the updates to be pushed to the cache. +// +// Arguments: +// +// cached_tree_ids: Rank 1 Tensor containing cached tree ids which is the starting +// tree of prediction. +// cached_node_ids: Rank 1 Tensor containing cached node id which is the starting +// node of prediction. +// bucketized_features: A list of rank 1 Tensors containing bucket id for each +// feature. +// logits_dimension: scalar, dimension of the logits, to be used for partial logits +// shape. +// +// Returns Rank 2 Tensor containing logits update (with respect to cached +// values stored) for each example.Rank 1 Tensor containing new tree ids for each example.Rank 1 Tensor containing new node ids in the new tree_ids. +func BoostedTreesTrainingPredict(scope *Scope, tree_ensemble_handle tf.Output, cached_tree_ids tf.Output, cached_node_ids tf.Output, bucketized_features []tf.Output, logits_dimension int64) (partial_logits tf.Output, tree_ids tf.Output, node_ids tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"logits_dimension": logits_dimension} + opspec := tf.OpSpec{ + Type: "BoostedTreesTrainingPredict", + Input: []tf.Input{ + tree_ensemble_handle, cached_tree_ids, cached_node_ids, tf.OutputList(bucketized_features), + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1), op.Output(2) +} + // Elementwise computes the bitwise AND of `x` and `y`. // // The result will have those bits set, that are set in both `x` and `y`. The @@ -26667,6 +29431,44 @@ func LeftShift(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { return op.Output(0) } +// TensorListStackAttr is an optional argument to TensorListStack. +type TensorListStackAttr func(optionalAttr) + +// TensorListStackNumElements sets the optional num_elements attribute to value. +// If not specified, defaults to -1 +func TensorListStackNumElements(value int64) TensorListStackAttr { + return func(m optionalAttr) { + m["num_elements"] = value + } +} + +// Stacks all tensors in the list. +// +// Requires that all tensors have the same shape. +// +// input_handle: the input list +// tensor: the gathered result +// num_elements: optional. If not -1, the number of elements in the list. +// +func TensorListStack(scope *Scope, input_handle tf.Output, element_dtype tf.DataType, optional ...TensorListStackAttr) (tensor tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"element_dtype": element_dtype} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "TensorListStack", + Input: []tf.Input{ + input_handle, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // Elementwise computes the bitwise right-shift of `x` and `y`. // // Performs a logical shift for unsigned integer types, and an arithmetic shift @@ -26716,6 +29518,175 @@ func AdjustHue(scope *Scope, images tf.Output, delta tf.Output) (output tf.Outpu return op.Output(0) } +// BatchAttr is an optional argument to Batch. +type BatchAttr func(optionalAttr) + +// BatchMaxEnqueuedBatches sets the optional max_enqueued_batches attribute to value. +// If not specified, defaults to 10 +func BatchMaxEnqueuedBatches(value int64) BatchAttr { + return func(m optionalAttr) { + m["max_enqueued_batches"] = value + } +} + +// BatchAllowedBatchSizes sets the optional allowed_batch_sizes attribute to value. +// If not specified, defaults to <> +func BatchAllowedBatchSizes(value []int64) BatchAttr { + return func(m optionalAttr) { + m["allowed_batch_sizes"] = value + } +} + +// BatchContainer sets the optional container attribute to value. +// If not specified, defaults to "" +func BatchContainer(value string) BatchAttr { + return func(m optionalAttr) { + m["container"] = value + } +} + +// BatchSharedName sets the optional shared_name attribute to value. +// If not specified, defaults to "" +func BatchSharedName(value string) BatchAttr { + return func(m optionalAttr) { + m["shared_name"] = value + } +} + +// BatchBatchingQueue sets the optional batching_queue attribute to value. +// If not specified, defaults to "" +func BatchBatchingQueue(value string) BatchAttr { + return func(m optionalAttr) { + m["batching_queue"] = value + } +} + +// Batches all input tensors nondeterministically. +// +// When many instances of this Op are being run concurrently with the same +// container/shared_name in the same device, some will output zero-shaped Tensors +// and others will output Tensors of size up to max_batch_size. +// +// All Tensors in in_tensors are batched together (so, for example, labels and +// features should be batched with a single instance of this operation. +// +// Each invocation of batch emits an `id` scalar which will be used to identify +// this particular invocation when doing unbatch or its gradient. +// +// Each op which emits a non-empty batch will also emit a non-empty batch_index +// Tensor, which, is a [K, 3] matrix where each row contains the invocation's id, +// start, and length of elements of each set of Tensors present in batched_tensors. +// +// Batched tensors are concatenated along the first dimension, and all tensors in +// in_tensors must have the first dimension of the same size. +// +// in_tensors: The tensors to be batched. +// num_batch_threads: Number of scheduling threads for processing batches of work. +// Determines the number of batches processed in parallel. +// max_batch_size: Batch sizes will never be bigger than this. +// batch_timeout_micros: Maximum number of microseconds to wait before outputting +// an incomplete batch. +// allowed_batch_sizes: Optional list of allowed batch sizes. If left empty, does +// nothing. Otherwise, supplies a list of batch sizes, causing the op to pad +// batches up to one of those sizes. The entries must increase monotonically, and +// the final entry must equal max_batch_size. +// grad_timeout_micros: The timeout to use for the gradient. See Unbatch. +// batched_tensors: Either empty tensors or a batch of concatenated Tensors. +// batch_index: If out_tensors is non-empty, has information to invert it. +// container: Controls the scope of sharing of this batch. +// id: always contains a scalar with a unique ID for this invocation of Batch. +// shared_name: Concurrently running instances of batch in the same device with the +// same container and shared_name will batch their elements together. If left +// empty, the op name will be used as the shared name. +// T: the types of tensors to be batched. +func Batch(scope *Scope, in_tensors []tf.Output, num_batch_threads int64, max_batch_size int64, batch_timeout_micros int64, grad_timeout_micros int64, optional ...BatchAttr) (batched_tensors []tf.Output, batch_index tf.Output, id tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"num_batch_threads": num_batch_threads, "max_batch_size": max_batch_size, "batch_timeout_micros": batch_timeout_micros, "grad_timeout_micros": grad_timeout_micros} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "Batch", + Input: []tf.Input{ + tf.OutputList(in_tensors), + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + if scope.Err() != nil { + return + } + var idx int + var err error + if batched_tensors, idx, err = makeOutputList(op, idx, "batched_tensors"); err != nil { + scope.UpdateErr("Batch", err) + return + } + batch_index = op.Output(idx) + id = op.Output(idx) + return batched_tensors, batch_index, id +} + +// UnbatchAttr is an optional argument to Unbatch. +type UnbatchAttr func(optionalAttr) + +// UnbatchContainer sets the optional container attribute to value. +// If not specified, defaults to "" +func UnbatchContainer(value string) UnbatchAttr { + return func(m optionalAttr) { + m["container"] = value + } +} + +// UnbatchSharedName sets the optional shared_name attribute to value. +// If not specified, defaults to "" +func UnbatchSharedName(value string) UnbatchAttr { + return func(m optionalAttr) { + m["shared_name"] = value + } +} + +// Reverses the operation of Batch for a single output Tensor. +// +// An instance of Unbatch either receives an empty batched_tensor, in which case it +// asynchronously waits until the values become available from a concurrently +// running instance of Unbatch with the same container and shared_name, or receives +// a non-empty batched_tensor in which case it finalizes all other concurrently +// running instances and outputs its own element from the batch. +// +// batched_tensor: The possibly transformed output of Batch. The size of the first +// dimension should remain unchanged by the transformations for the operation to +// work. +// batch_index: The matching batch_index obtained from Batch. +// id: The id scalar emitted by Batch. +// unbatched_tensor: The Tensor corresponding to this execution. +// timeout_micros: Maximum amount of time (in microseconds) to wait to receive the +// batched input tensor associated with a given invocation of the op. +// container: Container to control resource sharing. +// shared_name: Instances of Unbatch with the same container and shared_name are +// assumed to possibly belong to the same batch. If left empty, the op name will +// be used as the shared name. +func Unbatch(scope *Scope, batched_tensor tf.Output, batch_index tf.Output, id tf.Output, timeout_micros int64, optional ...UnbatchAttr) (unbatched_tensor tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"timeout_micros": timeout_micros} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "Unbatch", + Input: []tf.Input{ + batched_tensor, batch_index, id, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // AvgPool3DGradAttr is an optional argument to AvgPool3DGrad. type AvgPool3DGradAttr func(optionalAttr) @@ -26928,6 +29899,60 @@ func ParseSingleSequenceExample(scope *Scope, serialized tf.Output, feature_list return context_sparse_indices, context_sparse_values, context_sparse_shapes, context_dense_values, feature_list_sparse_indices, feature_list_sparse_values, feature_list_sparse_shapes, feature_list_dense_values } +// UnbatchGradAttr is an optional argument to UnbatchGrad. +type UnbatchGradAttr func(optionalAttr) + +// UnbatchGradContainer sets the optional container attribute to value. +// If not specified, defaults to "" +func UnbatchGradContainer(value string) UnbatchGradAttr { + return func(m optionalAttr) { + m["container"] = value + } +} + +// UnbatchGradSharedName sets the optional shared_name attribute to value. +// If not specified, defaults to "" +func UnbatchGradSharedName(value string) UnbatchGradAttr { + return func(m optionalAttr) { + m["shared_name"] = value + } +} + +// Gradient of Unbatch. +// +// Acts like Batch but using the given batch_index index of batching things as they +// become available. This ensures that the gradients are propagated back in the +// same session which did the forward pass. +// +// original_input: The input to the Unbatch operation this is the gradient of. +// batch_index: The batch_index given to the Unbatch operation this is the gradient +// of. +// grad: The downstream gradient. +// id: The id scalar emitted by Batch. +// batched_grad: The return value, either an empty tensor or the batched gradient. +// container: Container to control resource sharing. +// shared_name: Instances of UnbatchGrad with the same container and shared_name +// are assumed to possibly belong to the same batch. If left empty, the op name +// will be used as the shared name. +func UnbatchGrad(scope *Scope, original_input tf.Output, batch_index tf.Output, grad tf.Output, id tf.Output, optional ...UnbatchGradAttr) (batched_grad tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "UnbatchGrad", + Input: []tf.Input{ + original_input, batch_index, grad, id, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // DecodeWavAttr is an optional argument to DecodeWav. type DecodeWavAttr func(optionalAttr) @@ -26991,95 +30016,97 @@ func DecodeWav(scope *Scope, contents tf.Output, optional ...DecodeWavAttr) (aud return op.Output(0), op.Output(1) } -// UniqueAttr is an optional argument to Unique. -type UniqueAttr func(optionalAttr) - -// UniqueOutIdx sets the optional out_idx attribute to value. -// If not specified, defaults to DT_INT32 -func UniqueOutIdx(value tf.DataType) UniqueAttr { - return func(m optionalAttr) { - m["out_idx"] = value - } -} - -// Finds unique elements in a 1-D tensor. -// -// This operation returns a tensor `y` containing all of the unique elements of `x` -// sorted in the same order that they occur in `x`. This operation also returns a -// tensor `idx` the same size as `x` that contains the index of each value of `x` -// in the unique output `y`. In other words: +// Concatenates a list of `N` tensors along the first dimension. // -// `y[idx[i]] = x[i] for i in [0, 1,...,rank(x) - 1]` +// The input tensors are all required to have size 1 in the first dimension. // // For example: // // ``` -// # tensor 'x' is [1, 1, 2, 4, 4, 4, 7, 8, 8] -// y, idx = unique(x) -// y ==> [1, 2, 4, 7, 8] -// idx ==> [0, 0, 1, 2, 2, 2, 3, 4, 4] +// # 'x' is [[1, 4]] +// # 'y' is [[2, 5]] +// # 'z' is [[3, 6]] +// parallel_concat([x, y, z]) => [[1, 4], [2, 5], [3, 6]] # Pack along first dim. // ``` // +// The difference between concat and parallel_concat is that concat requires all +// of the inputs be computed before the operation will begin but doesn't require +// that the input shapes be known during graph construction. Parallel concat +// will copy pieces of the input into the output as they become available, in +// some situations this can provide a performance benefit. +// // Arguments: -// x: 1-D. +// values: Tensors to be concatenated. All must have size 1 in the first dimension +// and same shape. +// shape: the final shape of the result; should be equal to the shapes of any input +// but with the number of input values in the first dimension. // -// Returns 1-D.1-D. -func Unique(scope *Scope, x tf.Output, optional ...UniqueAttr) (y tf.Output, idx tf.Output) { +// Returns The concatenated tensor. +func ParallelConcat(scope *Scope, values []tf.Output, shape tf.Shape) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) + attrs := map[string]interface{}{"shape": shape} + opspec := tf.OpSpec{ + Type: "ParallelConcat", + Input: []tf.Input{ + tf.OutputList(values), + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Subtracts `v` into specified rows of `x`. +// +// Computes y = x; y[i, :] -= v; return y. +// +// Arguments: +// x: A `Tensor` of type T. +// i: A vector. Indices into the left-most dimension of `x`. +// v: A `Tensor` of type T. Same dimension sizes as x except the first dimension, which must be the same as i's size. +// +// Returns A `Tensor` of type T. An alias of `x`. The content of `y` is undefined if there are duplicates in `i`. +func InplaceSub(scope *Scope, x tf.Output, i tf.Output, v tf.Output) (y tf.Output) { + if scope.Err() != nil { + return } opspec := tf.OpSpec{ - Type: "Unique", + Type: "InplaceSub", Input: []tf.Input{ - x, + x, i, v, }, - Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) + return op.Output(0) } -// Concatenates a list of `N` tensors along the first dimension. -// -// The input tensors are all required to have size 1 in the first dimension. -// -// For example: +// Converts a flat index or array of flat indices into a tuple of // -// ``` -// # 'x' is [[1, 4]] -// # 'y' is [[2, 5]] -// # 'z' is [[3, 6]] -// parallel_concat([x, y, z]) => [[1, 4], [2, 5], [3, 6]] # Pack along first dim. -// ``` +// coordinate arrays. // -// The difference between concat and parallel_concat is that concat requires all -// of the inputs be computed before the operation will begin but doesn't require -// that the input shapes be known during graph construction. Parallel concat -// will copy pieces of the input into the output as they become available, in -// some situations this can provide a performance benefit. +// @compatibility(numpy) +// Equivalent to np.unravel_index +// @end_compatibility // // Arguments: -// values: Tensors to be concatenated. All must have size 1 in the first dimension -// and same shape. -// shape: the final shape of the result; should be equal to the shapes of any input -// but with the number of input values in the first dimension. +// indices: An 0-D or 1-D `int` Tensor whose elements are indices into the +// flattened version of an array of dimensions dims. +// dims: An 1-D `int` Tensor. The shape of the array to use for unraveling +// indices. // -// Returns The concatenated tensor. -func ParallelConcat(scope *Scope, values []tf.Output, shape tf.Shape) (output tf.Output) { +// Returns An 2-D (or 1-D if indices is 0-D) tensor where each row has the +// same shape as the indices array. +func UnravelIndex(scope *Scope, indices tf.Output, dims tf.Output) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"shape": shape} opspec := tf.OpSpec{ - Type: "ParallelConcat", + Type: "UnravelIndex", Input: []tf.Input{ - tf.OutputList(values), + indices, dims, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) @@ -27567,6 +30594,10 @@ func EditDistance(scope *Scope, hypothesis_indices tf.Output, hypothesis_values // // indices.shape[:-1] + params.shape[indices.shape[-1]:] // +// Note that on CPU, if an out of bound index is found, an error is returned. +// On GPU, if an out of bound index is found, a 0 is stored in the +// corresponding output value. +// // Some examples below. // // Simple indexing into a matrix: @@ -27663,227 +30694,3 @@ func GatherNd(scope *Scope, params tf.Output, indices tf.Output) (output tf.Outp op := scope.AddOperation(opspec) return op.Output(0) } - -// Eagerly executes a python function to compute func(input)->output. The -// -// semantics of the input, output, and attributes are the same as those for -// PyFunc. -func EagerPyFunc(scope *Scope, input []tf.Output, token string, Tout []tf.DataType) (output []tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"token": token, "Tout": Tout} - opspec := tf.OpSpec{ - Type: "EagerPyFunc", - Input: []tf.Input{ - tf.OutputList(input), - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if output, idx, err = makeOutputList(op, idx, "output"); err != nil { - scope.UpdateErr("EagerPyFunc", err) - return - } - return output -} - -// Stops gradient computation. -// -// When executed in a graph, this op outputs its input tensor as-is. -// -// When building ops to compute gradients, this op prevents the contribution of -// its inputs to be taken into account. Normally, the gradient generator adds ops -// to a graph to compute the derivatives of a specified 'loss' by recursively -// finding out inputs that contributed to its computation. If you insert this op -// in the graph it inputs are masked from the gradient generator. They are not -// taken into account for computing gradients. -// -// This is useful any time you want to compute a value with TensorFlow but need -// to pretend that the value was a constant. Some examples include: -// -// * The *EM* algorithm where the *M-step* should not involve backpropagation -// through the output of the *E-step*. -// * Contrastive divergence training of Boltzmann machines where, when -// differentiating the energy function, the training must not backpropagate -// through the graph that generated the samples from the model. -// * Adversarial training, where no backprop should happen through the adversarial -// example generation process. -func StopGradient(scope *Scope, input tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "StopGradient", - Input: []tf.Input{ - input, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes asin of x element-wise. -func Asin(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Asin", - Input: []tf.Input{ - x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// PreventGradientAttr is an optional argument to PreventGradient. -type PreventGradientAttr func(optionalAttr) - -// PreventGradientMessage sets the optional message attribute to value. -// -// value: Will be printed in the error when anyone tries to differentiate -// this operation. -// If not specified, defaults to "" -func PreventGradientMessage(value string) PreventGradientAttr { - return func(m optionalAttr) { - m["message"] = value - } -} - -// An identity op that triggers an error if a gradient is requested. -// -// When executed in a graph, this op outputs its input tensor as-is. -// -// When building ops to compute gradients, the TensorFlow gradient system -// will return an error when trying to lookup the gradient of this op, -// because no gradient must ever be registered for this function. This -// op exists to prevent subtle bugs from silently returning unimplemented -// gradients in some corner cases. -// -// Arguments: -// input: any tensor. -// -// Returns the same input tensor. -func PreventGradient(scope *Scope, input tf.Output, optional ...PreventGradientAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "PreventGradient", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Checks a tensor for NaN and Inf values. -// -// When run, reports an `InvalidArgument` error if `tensor` has any values -// that are not a number (NaN) or infinity (Inf). Otherwise, passes `tensor` as-is. -// -// Arguments: -// -// message: Prefix of the error message. -func CheckNumerics(scope *Scope, tensor tf.Output, message string) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"message": message} - opspec := tf.OpSpec{ - Type: "CheckNumerics", - Input: []tf.Input{ - tensor, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Shuffle dimensions of x according to a permutation and conjugate the result. -// -// The output `y` has the same rank as `x`. The shapes of `x` and `y` satisfy: -// `y.shape[i] == x.shape[perm[i]] for i in [0, 1, ..., rank(x) - 1]` -// `y[i,j,k,...,s,t,u] == conj(x[perm[i], perm[j], perm[k],...,perm[s], perm[t], perm[u]])` -func ConjugateTranspose(scope *Scope, x tf.Output, perm tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ConjugateTranspose", - Input: []tf.Input{ - x, perm, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// UniqueV2Attr is an optional argument to UniqueV2. -type UniqueV2Attr func(optionalAttr) - -// UniqueV2OutIdx sets the optional out_idx attribute to value. -// If not specified, defaults to DT_INT32 -func UniqueV2OutIdx(value tf.DataType) UniqueV2Attr { - return func(m optionalAttr) { - m["out_idx"] = value - } -} - -// Finds unique elements in a 1-D tensor. -// -// This operation returns a tensor `y` containing all of the unique elements of `x` -// sorted in the same order that they occur in `x`. This operation also returns a -// tensor `idx` the same size as `x` that contains the index of each value of `x` -// in the unique output `y`. In other words: -// -// `y[idx[i]] = x[i] for i in [0, 1,...,rank(x) - 1]` -// -// For example: -// -// ``` -// # tensor 'x' is [1, 1, 2, 4, 4, 4, 7, 8, 8] -// y, idx = unique(x) -// y ==> [1, 2, 4, 7, 8] -// idx ==> [0, 0, 1, 2, 2, 2, 3, 4, 4] -// ``` -// -// Arguments: -// x: A `Tensor`. -// axis: A `Tensor` of type `int64` (default: 0). The axis of the Tensor to -// find the unique elements. -// -// Returns A `Tensor`. Unique elements along the `axis` of `Tensor` x.A 1-D Tensor. Has the same type as x that contains the index of each -// value of x in the output y. -func UniqueV2(scope *Scope, x tf.Output, axis tf.Output, optional ...UniqueV2Attr) (y tf.Output, idx tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "UniqueV2", - Input: []tf.Input{ - x, axis, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} diff --git a/tensorflow/java/BUILD b/tensorflow/java/BUILD index 1be4c838f3526bcdf32d8bda5a1ada776a8c1b21..ab7d698a45b7fc0cd498f8367fc1cecf07e4ba3c 100644 --- a/tensorflow/java/BUILD +++ b/tensorflow/java/BUILD @@ -312,7 +312,7 @@ tf_cc_test( "src/gen/cc/source_writer_test.cc", ], data = [ - "src/gen/resources/test.snippet.java", + "src/gen/resources/test.java.snippet", ], deps = [ ":java_op_gen_lib", diff --git a/tensorflow/java/maven/libtensorflow/pom.xml b/tensorflow/java/maven/libtensorflow/pom.xml index 0b69a8cbe530a13dc35aad3a5c859f77f0deca2a..9c1601753bdf5076ccb14423d339b760ae784f8d 100644 --- a/tensorflow/java/maven/libtensorflow/pom.xml +++ b/tensorflow/java/maven/libtensorflow/pom.xml @@ -6,7 +6,7 @@ org.tensorflow parentpom - 1.7.0-rc1 + 1.8.0-rc0 ../ libtensorflow diff --git a/tensorflow/java/maven/libtensorflow_jni/pom.xml b/tensorflow/java/maven/libtensorflow_jni/pom.xml index 541876f7f5e4fadcbc9336f15b319389dcddbf51..3d013e12b0d2abdd64beb50b58b562dd2265f3f3 100644 --- a/tensorflow/java/maven/libtensorflow_jni/pom.xml +++ b/tensorflow/java/maven/libtensorflow_jni/pom.xml @@ -6,7 +6,7 @@ org.tensorflow parentpom - 1.7.0-rc1 + 1.8.0-rc0 ../ libtensorflow_jni diff --git a/tensorflow/java/maven/libtensorflow_jni_gpu/pom.xml b/tensorflow/java/maven/libtensorflow_jni_gpu/pom.xml index d8933e5238149337b08e70b3f407385887aef0a0..40e44af1f537c0b2e905b164d880fcc9461373d4 100644 --- a/tensorflow/java/maven/libtensorflow_jni_gpu/pom.xml +++ b/tensorflow/java/maven/libtensorflow_jni_gpu/pom.xml @@ -6,7 +6,7 @@ org.tensorflow parentpom - 1.7.0-rc1 + 1.8.0-rc0 ../ libtensorflow_jni_gpu diff --git a/tensorflow/java/maven/pom.xml b/tensorflow/java/maven/pom.xml index 6286fd73df6dec5643fceda8f6f652220d75e1a7..82bfd0c73aece8f4c265d9e1344ff8edb4764b32 100644 --- a/tensorflow/java/maven/pom.xml +++ b/tensorflow/java/maven/pom.xml @@ -6,7 +6,7 @@ 4.0.0 org.tensorflow parentpom - 1.7.0-rc1 + 1.8.0-rc0 pom https://www.tensorflow.org diff --git a/tensorflow/java/maven/proto/pom.xml b/tensorflow/java/maven/proto/pom.xml index 4e881f5a631f0b2e389b31a9b24028902eac6301..0a2775a500cb90500a408f12c4ed042bff0efa02 100644 --- a/tensorflow/java/maven/proto/pom.xml +++ b/tensorflow/java/maven/proto/pom.xml @@ -6,7 +6,7 @@ org.tensorflow parentpom - 1.7.0-rc1 + 1.8.0-rc0 ../ proto diff --git a/tensorflow/java/maven/tensorflow/pom.xml b/tensorflow/java/maven/tensorflow/pom.xml index d512a7eda9638d428e02beda442ba4d4db9adf62..61961432a7e4a478a0e80c1e3eea0db12fae6868 100644 --- a/tensorflow/java/maven/tensorflow/pom.xml +++ b/tensorflow/java/maven/tensorflow/pom.xml @@ -6,7 +6,7 @@ org.tensorflow parentpom - 1.7.0-rc1 + 1.8.0-rc0 ../ tensorflow diff --git a/tensorflow/java/src/gen/cc/source_writer.cc b/tensorflow/java/src/gen/cc/source_writer.cc index 214999af9a6f9ee244d336a64830238e6b7ea872..a02f75ad6e7f5f1a9f22ad976e488ae5bf02a731 100644 --- a/tensorflow/java/src/gen/cc/source_writer.cc +++ b/tensorflow/java/src/gen/cc/source_writer.cc @@ -23,10 +23,20 @@ namespace tensorflow { namespace java { SourceWriter::SourceWriter() { - // push an empty generic namespace at start, for simplification + // Push an empty generic namespace at start, for simplification. generic_namespaces_.push(new GenericNamespace()); } +SourceWriter::~SourceWriter() { + // Remove empty generic namespace added at start as well as any other + // namespace objects that haven't been removed. + while (!generic_namespaces_.empty()) { + GenericNamespace* generic_namespace = generic_namespaces_.top(); + generic_namespaces_.pop(); + delete generic_namespace; + } +} + SourceWriter& SourceWriter::Indent(int tab) { left_margin_.resize( std::max(static_cast(left_margin_.size() + tab), 0), ' '); diff --git a/tensorflow/java/src/gen/cc/source_writer.h b/tensorflow/java/src/gen/cc/source_writer.h index 6abe13b5d217b30d826d013e14a590eeb91719fb..f011acd30aae3963c7f6ac995838c401f54fde54 100644 --- a/tensorflow/java/src/gen/cc/source_writer.h +++ b/tensorflow/java/src/gen/cc/source_writer.h @@ -32,7 +32,8 @@ namespace java { class SourceWriter { public: SourceWriter(); - virtual ~SourceWriter() = default; + + virtual ~SourceWriter(); // Indents following lines with white spaces. // @@ -60,7 +61,7 @@ class SourceWriter { // The data might potentially contain newline characters, therefore it will // be scanned to ensure that each line is indented and prefixed properly, // making it a bit slower than Append(). - SourceWriter& Write(const StringPiece& text); + SourceWriter& Write(const StringPiece& str); // Writes a source code snippet read from a file. // diff --git a/tensorflow/java/src/gen/cc/source_writer_test.cc b/tensorflow/java/src/gen/cc/source_writer_test.cc index 6926a5a411d070e25f2382c72589d879d3ca2180..4bce2fea7040a0e5cb9256dc2672399c3af8a03d 100644 --- a/tensorflow/java/src/gen/cc/source_writer_test.cc +++ b/tensorflow/java/src/gen/cc/source_writer_test.cc @@ -259,7 +259,9 @@ TEST(StreamTest, Types) { TEST(StreamTest, FileSnippet) { SourceBufferWriter writer; - const string& fname = "tensorflow/java/src/gen/resources/test.snippet.java"; + const string fname = tensorflow::io::JoinPath( + tensorflow::testing::TensorFlowSrcRoot(), + "java/src/gen/resources/test.java.snippet"); writer.WriteFromFile(fname) .BeginBlock() diff --git a/tensorflow/java/src/gen/resources/test.snippet.java b/tensorflow/java/src/gen/resources/test.java.snippet similarity index 100% rename from tensorflow/java/src/gen/resources/test.snippet.java rename to tensorflow/java/src/gen/resources/test.java.snippet diff --git a/tensorflow/java/src/main/java/org/tensorflow/examples/LabelImage.java b/tensorflow/java/src/main/java/org/tensorflow/examples/LabelImage.java index 489e95c3102557d7a75d83789c46106aa5aa3ed4..3948991c84d35009217f7c05844551fdcc49fb22 100644 --- a/tensorflow/java/src/main/java/org/tensorflow/examples/LabelImage.java +++ b/tensorflow/java/src/main/java/org/tensorflow/examples/LabelImage.java @@ -101,6 +101,7 @@ public class LabelImage { b.constant("mean", mean)), b.constant("scale", scale)); try (Session s = new Session(g)) { + // Generally, there may be multiple output tensors, all of them must be closed to prevent resource leaks. return s.runner().fetch(output.op().name()).run().get(0).expect(Float.class); } } @@ -110,6 +111,7 @@ public class LabelImage { try (Graph g = new Graph()) { g.importGraphDef(graphDef); try (Session s = new Session(g); + // Generally, there may be multiple output tensors, all of them must be closed to prevent resource leaks. Tensor result = s.runner().feed("input", image).fetch("output").run().get(0).expect(Float.class)) { final long[] rshape = result.shape(); diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index a3e79d46d89989c9b3b60dfd86b8796c671fb7c0..bb32f4bbe0fb8379d4d752d0e9e1a4c1ff07495e 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -4,16 +4,14 @@ # Public targets: # ":platform" - Low-level and platform-specific Python code. -package( - default_visibility = [ - "//engedu/ml/tf_from_scratch:__pkg__", - "//tensorflow:internal", - "//tensorflow/contrib/lite/toco/python:__pkg__", - "//tensorflow_models:__subpackages__", - # TODO(aselle): to pass open source test. - "//bazel_pip/tensorflow/contrib/lite/toco/python:__pkg__", - ], -) +package(default_visibility = [ + "//engedu/ml/tf_from_scratch:__pkg__", + "//tensorflow:internal", + "//tensorflow/contrib/lite/toco/python:__pkg__", + "//tensorflow_models:__subpackages__", + # TODO(aselle): to pass open source test. + "//bazel_pip/tensorflow/contrib/lite/toco/python:__pkg__", +]) licenses(["notice"]) # Apache 2.0 @@ -28,6 +26,8 @@ load("//tensorflow:tensorflow.bzl", "py_tests") load("//tensorflow:tensorflow.bzl", "tf_py_build_info_genrule") load("//tensorflow:tensorflow.bzl", "tf_py_wrap_cc") load("//tensorflow:tensorflow.bzl", "tf_cc_shared_object") +load("//tensorflow:tensorflow.bzl", "tf_native_cc_binary") +load("//tensorflow:tensorflow.bzl", "tf_custom_op_library_additional_deps_impl") load("//tensorflow:tensorflow.bzl", "cuda_py_test") load("//tensorflow:tensorflow.bzl", "cuda_py_tests") load("//tensorflow/core:platform/default/build_config.bzl", "pyx_library") @@ -58,9 +58,10 @@ py_library( "//tensorflow/tools/api/generator:__pkg__", "//tensorflow/tools/quantization:__pkg__", # TODO(b/34059704): remove when fixed ], - deps = [":no_contrib"] + if_not_windows([ + deps = [ + ":no_contrib", "//tensorflow/contrib:contrib_py", - ]), + ], ) py_library( @@ -283,6 +284,17 @@ cc_library( ], ) +cc_library( + name = "py_exception_registry", + srcs = ["lib/core/py_exception_registry.cc"], + hdrs = ["lib/core/py_exception_registry.h"], + deps = [ + "//tensorflow/c:c_api", + "//tensorflow/core:lib", + "//util/python:python_headers", + ], +) + cc_library( name = "kernel_registry", srcs = ["util/kernel_registry.cc"], @@ -413,6 +425,7 @@ tf_cc_shared_object( "-lm", ], "//tensorflow:darwin": [], + "//tensorflow:windows": [], }), deps = [ "//tensorflow/core:framework_headers_lib", @@ -820,6 +833,7 @@ py_library( srcs = ["framework/tensor_shape.py"], srcs_version = "PY2AND3", deps = [ + ":dtypes", ":util", "//tensorflow/core:protos_all_py", ], @@ -960,7 +974,6 @@ py_test( srcs = ["framework/contrib_test.py"], main = "framework/contrib_test.py", srcs_version = "PY2AND3", - tags = ["no_windows"], deps = [ "//tensorflow:tensorflow_py", "//tensorflow/python:client_testlib", @@ -1330,7 +1343,6 @@ py_test( srcs = ["framework/dtypes_test.py"], main = "framework/dtypes_test.py", srcs_version = "PY2AND3", - tags = ["no_windows"], deps = [ ":framework_for_generated_wrappers", ":framework_test_lib", @@ -1603,7 +1615,10 @@ py_library( py_library( name = "array_ops", - srcs = ["ops/array_ops.py"], + srcs = [ + "ops/array_ops.py", + "ops/inplace_ops.py", + ], srcs_version = "PY2AND3", deps = [ ":array_ops_gen", @@ -1706,7 +1721,6 @@ py_test( size = "small", srcs = ["ops/clip_ops_test.py"], srcs_version = "PY2AND3", - tags = ["no_windows"], deps = [ ":client_testlib", ":clip_ops", @@ -1778,6 +1792,16 @@ py_library( ], ) +py_library( + name = "cudnn_rnn_grad", + srcs = ["ops/cudnn_rnn_grad.py"], + srcs_version = "PY2AND3", + deps = [ + ":framework_for_generated_wrappers", + "//tensorflow/python:cudnn_rnn_ops_gen", + ], +) + py_library( name = "data_flow_grad", srcs = ["ops/data_flow_grad.py"], @@ -1921,7 +1945,8 @@ py_library( ":array_ops", ":constant_op", ":dtypes", - ":linalg_ops", + ":linalg_ops_gen", + ":linalg_ops_impl", ":math_ops", ":nn_ops", ":random_ops", @@ -1972,7 +1997,22 @@ py_library( ":array_ops", ":dtypes", ":framework_ops", + ":functional_ops", ":linalg_ops_gen", + ":linalg_ops_impl", + ":math_ops", + "//third_party/py/numpy", + ], +) + +py_library( + name = "linalg_ops_impl", + srcs = ["ops/linalg_ops_impl.py"], + srcs_version = "PY2AND3", + deps = [ + ":array_ops", + ":dtypes", + ":framework_ops", ":math_ops", "//third_party/py/numpy", ], @@ -2258,7 +2298,6 @@ py_library( ":clip_ops", ":framework_for_generated_wrappers", ":init_ops", - ":layers_base", ":math_ops", ":nn_ops", ":partitioned_variables", @@ -2452,6 +2491,7 @@ py_library( ":clip_ops", ":confusion_matrix", ":control_flow_ops", + ":cudnn_rnn_grad", ":data_flow_grad", ":data_flow_ops", ":framework_for_generated_wrappers", @@ -2535,6 +2575,30 @@ py_library( ], ) +py_library( + name = "summary_ops_v2", + srcs = ["ops/summary_ops_v2.py"], + srcs_version = "PY2AND3", + visibility = ["//tensorflow:internal"], + deps = [ + ":array_ops", + ":constant_op", + ":control_flow_ops", + ":dtypes", + ":framework_ops", + ":math_ops", + ":resource_variable_ops", + ":smart_cond", + ":summary_op_util", + ":summary_ops_gen", + ":training_util", + ":util", + "//tensorflow/core:protos_all_py", + "//tensorflow/python/eager:context", + "@six_archive//:six", + ], +) + py_library( name = "template", srcs = ["ops/template.py"], @@ -2754,7 +2818,7 @@ cuda_py_test( cuda_py_test( name = "image_ops_test", - size = "small", + size = "medium", srcs = ["ops/image_ops_test.py"], additional_deps = [ ":array_ops", @@ -2775,7 +2839,6 @@ cuda_py_test( ], data = ["//tensorflow/core:image_testdata"], shard_count = 5, - tags = ["no_windows"], ) cuda_py_test( @@ -2898,11 +2961,15 @@ py_library( name = "training", srcs = glob( ["training/**/*.py"], - exclude = ["**/*test*"], + exclude = [ + "**/*test*", + "training/training_util.py", # See :training_util + ], ), srcs_version = "PY2AND3", deps = [ ":array_ops", + ":array_ops_gen", ":checkpoint_ops_gen", ":client", ":control_flow_ops", @@ -2914,6 +2981,7 @@ py_library( ":framework_ops", ":gradients", ":init_ops", + ":distribute", ":io_ops", ":io_ops_gen", ":layers_base", @@ -2932,14 +3000,18 @@ py_library( ":string_ops", ":summary", ":training_ops_gen", + ":training_util", ":util", ":variable_scope", ":variables", + "//third_party/py/numpy", + "@six_archive//:six", + "//tensorflow/core:protos_all_py", "//tensorflow/python/eager:backprop", "//tensorflow/python/eager:context", "//tensorflow/python/ops/losses", - "//third_party/py/numpy", - "@six_archive//:six", + # `layers` dependency only exists due to the use of a small utility. + "//tensorflow/python/keras:layers", ], ) @@ -2967,6 +3039,69 @@ py_test( ], ) +py_library( + name = "device_util", + srcs = ["training/device_util.py"], + srcs_version = "PY2AND3", + deps = [ + ":device", + ":framework_ops", + "//tensorflow/python/eager:context", + ], +) + +py_library( + name = "distribute", + srcs = ["training/distribute.py"], + srcs_version = "PY2AND3", + deps = [ + ":array_ops", + ":control_flow_ops", + ":device_util", + ":framework_ops", + ":platform", + ":resource_variable_ops", + ":state_ops", + ":util", + ":variable_scope", + "//tensorflow/python/data", + "//tensorflow/python/ops/losses", + ], +) + +py_test( + name = "checkpointable_utils_test", + srcs = ["training/checkpointable_utils_test.py"], + srcs_version = "PY2AND3", + tags = [ + "no_windows", # TODO: needs investigation on Windows + "notsan", # b/74395663 + ], + deps = [ + ":checkpointable", + ":constant_op", + ":control_flow_ops", + ":dtypes", + ":framework_ops", + ":framework_test_lib", + ":init_ops", + ":resource_variable_ops", + ":session", + ":state_ops", + ":template", + ":training", + ":training_util", + ":variable_scope", + "//tensorflow/python/eager:backprop", + "//tensorflow/python/eager:context", + "//tensorflow/python/eager:function", + "//tensorflow/python/eager:test", + "//tensorflow/python/keras:engine", + "//tensorflow/python/keras:layers", + "@six_archive//:six", + ], +) + py_test( name = "distribute_test", size = "small", @@ -2974,7 +3109,7 @@ py_test( srcs_version = "PY2AND3", deps = [ ":client_testlib", - ":training", + ":distribute", ":variable_scope", ], ) @@ -3313,6 +3448,7 @@ tf_py_wrap_cc( "grappler/model_analyzer.i", "grappler/tf_optimizer.i", "lib/core/bfloat16.i", + "lib/core/py_exception_registry.i", "lib/core/py_func.i", "lib/core/strings.i", "lib/io/file_io.i", @@ -3331,6 +3467,10 @@ tf_py_wrap_cc( "util/transform_graph.i", "util/util.i", ], + win_def_file = select({ + "//tensorflow:windows": ":pywrap_tensorflow_filtered_def_file", + "//conditions:default": None, + }), deps = [ ":bfloat16_lib", ":cost_analyzer_lib", @@ -3340,6 +3480,7 @@ tf_py_wrap_cc( ":kernel_registry", ":numpy_lib", ":safe_ptr", + ":py_exception_registry", ":py_func_lib", ":py_record_reader_lib", ":py_record_writer_lib", @@ -3350,6 +3491,7 @@ tf_py_wrap_cc( "//tensorflow/c:python_api", "//tensorflow/c:tf_status_helper", "//tensorflow/c/eager:c_api", + "//tensorflow/core/distributed_runtime/rpc:grpc_rpc_factory_registration", "//tensorflow/core/distributed_runtime/rpc:grpc_server_lib", "//tensorflow/core/distributed_runtime/rpc:grpc_session", "//tensorflow/core/grappler:grappler_item", @@ -3366,6 +3508,7 @@ tf_py_wrap_cc( "//tensorflow/core/profiler/internal:print_model_analysis", "//tensorflow/tools/graph_transforms:transform_graph_lib", "//tensorflow/python/eager:pywrap_tfe_lib", + "//tensorflow/python/eager:python_eager_op_gen", "//util/python:python_headers", ] + (tf_additional_lib_deps() + tf_additional_plugin_deps() + @@ -3374,6 +3517,65 @@ tf_py_wrap_cc( tf_additional_gdr_deps()), ) +# ** Targets for Windows build (start) ** +# We need the following targets to expose symbols from _pywrap_tensorflow.dll + +# Build a cc_binary from tf_custom_op_library_additional_deps_impl, +# it contains all object code from its dependencies. +tf_native_cc_binary( + name = "tf_custom_op_library_additional_deps.so", + linkshared = 1, + linkstatic = 1, + deps = tf_custom_op_library_additional_deps_impl(), +) + +# Get a DEF file generated by parsing all object files +# of tf_custom_op_library_additional_deps.so +filegroup( + name = "pywrap_tensorflow_def_file", + srcs = [":tf_custom_op_library_additional_deps.so"], + output_group = "def_file", +) + +# Filter the DEF file to reduce the number of symbols to 64K or less. +# Note that we also write the name of the pyd file into DEF file so that +# the dynamic libraries of custom ops can find it at runtime. +genrule( + name = "pywrap_tensorflow_filtered_def_file", + srcs = [":pywrap_tensorflow_def_file"], + outs = ["pywrap_tensorflow_filtered_def_file.def"], + cmd = select({ + "//tensorflow:windows": """ + $(location @local_config_def_file_filter//:def_file_filter) \\ + --input $(location :pywrap_tensorflow_def_file) \\ + --output $@ \\ + --target _pywrap_tensorflow_internal.pyd + """, + "//conditions:default": "touch $@", # Just a placeholder for Unix platforms + }), + tools = ["@local_config_def_file_filter//:def_file_filter"], +) + +# Get the import library of _pywrap_tensorflow_internal.dll +filegroup( + name = "pywrap_tensorflow_import_lib_file", + srcs = [":_pywrap_tensorflow_internal.so"], + output_group = "interface_library", +) + +# Create a cc_import rule for the import library of _pywrap_tensorflow_internal.dll +# so that custom ops' dynamic libraries can link against it. +cc_import( + name = "pywrap_tensorflow_import_lib", + interface_library = select({ + "//tensorflow:windows": ":pywrap_tensorflow_import_lib_file", + "//conditions:default": "not_exsiting_on_unix.lib", # Just a placeholder for Unix platforms + }), + system_provided = 1, +) + +# ** Targets for Windows build (end) ** + py_library( name = "lib", srcs = [ @@ -3707,6 +3909,7 @@ cuda_py_test( ":math_ops", "//tensorflow/core:protos_all_py", ], + tags = ["no_windows"], ) cuda_py_test( @@ -3746,7 +3949,6 @@ py_test( size = "small", srcs = ["lib/core/bfloat16_test.py"], srcs_version = "PY2AND3", - tags = ["no_windows"], deps = [ ":client_testlib", ":lib", @@ -3939,6 +4141,7 @@ py_test( srcs = ["training/saver_large_partitioned_variable_test.py"], srcs_version = "PY2AND3", tags = [ + "no_windows", "noasan", # http://b/30782289 "notsan", # http://b/30782289 ], @@ -4054,7 +4257,6 @@ py_test( size = "small", srcs = ["training/checkpoint_ops_test.py"], srcs_version = "PY2AND3", - tags = ["no_windows"], deps = [ ":checkpoint_ops_gen", ":client", @@ -4095,10 +4297,7 @@ py_test( size = "medium", srcs = ["training/monitored_session_test.py"], srcs_version = "PY2AND3", - tags = [ - "no_windows", - "notsan", # b/67945581 - ], + tags = ["notsan"], # b/67945581 deps = [ ":array_ops", ":client_testlib", @@ -4116,6 +4315,25 @@ py_test( ], ) +py_library( + name = "training_util", + srcs = ["training/training_util.py"], + srcs_version = "PY2AND3", + deps = [ + ":dtypes", + ":framework", + ":framework_ops", + ":init_ops", + ":platform", + ":resource_variable_ops", + ":state_ops", + ":util", + ":variable_scope", + ":variables", + "//tensorflow/python/eager:context", + ], +) + py_test( name = "training_util_test", size = "small", @@ -4126,13 +4344,14 @@ py_test( ":framework", ":platform", ":training", + ":training_util", ":variables", ], ) tf_py_test( name = "input_test", - size = "small", + size = "medium", srcs = ["training/input_test.py"], additional_deps = [ ":array_ops", @@ -4155,6 +4374,7 @@ py_library( srcs_version = "PY2AND3", visibility = ["//visibility:public"], deps = [ + ":distribute", ":framework", ":framework_for_generated_wrappers", ":platform", @@ -4170,6 +4390,7 @@ py_library( srcs_version = "PY2AND3", visibility = ["//visibility:public"], deps = [ + ":client", ":constant_op", ":errors", ":framework", @@ -4182,6 +4403,7 @@ py_library( ":summary_op_util", ":summary_ops", ":summary_ops_gen", + ":summary_ops_v2", ":util", "//tensorflow/python/eager:context", "//third_party/py/numpy", @@ -4208,7 +4430,7 @@ py_tests( ":platform", ":platform_test", ":summary", - ":training", + ":summary_ops_v2", "//tensorflow/core:protos_all_py", ], ) @@ -4233,6 +4455,7 @@ py_library( ":variables", "//tensorflow/python/eager:context", "//tensorflow/python/estimator:util", + "//tensorflow/python/keras:engine", "//third_party/py/numpy", ], ) @@ -4269,6 +4492,7 @@ py_library( ":variables", "//tensorflow/python/eager:context", "//tensorflow/python/estimator:util", + "//tensorflow/python/keras:layers", "//third_party/py/numpy", "@six_archive//:six", ], @@ -4710,6 +4934,7 @@ py_test( ":client_testlib", ":framework_for_generated_wrappers", ":math_ops", + ":tf_item", ":tf_optimizer", "//tensorflow/core:protos_all_py", "//third_party/py/numpy", @@ -4770,6 +4995,29 @@ py_test( ], ) +cuda_py_test( + name = "constant_folding_test", + size = "medium", + srcs = [ + "grappler/constant_folding_test.py", + ], + additional_deps = [ + ":client_testlib", + ":framework_for_generated_wrappers", + ":array_ops", + ":control_flow_ops", + ":dtypes", + ":functional_ops", + ":math_ops", + ":ops", + "//third_party/py/numpy", + "//tensorflow/core:protos_all_py", + ], + tags = [ + "grappler", + ], +) + cuda_py_test( name = "layout_optimizer_test", size = "medium", diff --git a/tensorflow/python/__init__.py b/tensorflow/python/__init__.py index ab1d01a8351d63544b2c612ad228515d48975aca..13f8420a670fe64615037975139f3ee1f16820b6 100644 --- a/tensorflow/python/__init__.py +++ b/tensorflow/python/__init__.py @@ -149,6 +149,19 @@ from tensorflow.python.ops import tensor_array_ops from tensorflow.python.eager.context import executing_eagerly from tensorflow.python.framework.ops import enable_eager_execution +# Necessary for the symbols in this module to be taken into account by +# the namespace management system (API decorators). +from tensorflow.python.ops import rnn +from tensorflow.python.ops import rnn_cell + +# Required due to `rnn` and `rnn_cell` not being imported in `nn` directly +# (due to a circular dependency issue: rnn depends on layers). +nn.dynamic_rnn = rnn.dynamic_rnn +nn.static_rnn = rnn.static_rnn +nn.raw_rnn = rnn.raw_rnn +nn.bidirectional_dynamic_rnn = rnn.bidirectional_dynamic_rnn +nn.rnn_cell = rnn_cell + # Symbols whitelisted for export without documentation. # TODO(cwhipkey): review these and move to contrib, expose through # documentation, or remove. diff --git a/tensorflow/python/client/session.py b/tensorflow/python/client/session.py index 5c9ed9ccafdce9c4dd441c3b045091ca1d70bbdf..5507d011bb0746c84b868ca7efcc3e4f8d2e146a 100644 --- a/tensorflow/python/client/session.py +++ b/tensorflow/python/client/session.py @@ -27,7 +27,6 @@ import numpy as np from tensorflow.core.protobuf import config_pb2 from tensorflow.python import pywrap_tensorflow as tf_session -from tensorflow.python.framework import c_api_util from tensorflow.python.framework import device from tensorflow.python.framework import errors from tensorflow.python.framework import ops @@ -629,14 +628,12 @@ class BaseSession(SessionInterface): self._session = None opts = tf_session.TF_NewSessionOptions(target=self._target, config=config) try: - with errors.raise_exception_on_not_ok_status() as status: - if self._created_with_new_api: - # pylint: disable=protected-access - self._session = tf_session.TF_NewSession(self._graph._c_graph, opts, - status) - # pylint: enable=protected-access - else: - self._session = tf_session.TF_NewDeprecatedSession(opts, status) + if self._created_with_new_api: + # pylint: disable=protected-access + self._session = tf_session.TF_NewSession(self._graph._c_graph, opts) + # pylint: enable=protected-access + else: + self._session = tf_session.TF_NewDeprecatedSession(opts) finally: tf_session.TF_DeleteSessionOptions(opts) @@ -663,22 +660,20 @@ class BaseSession(SessionInterface): Returns: A list of devices in the session. """ - with errors.raise_exception_on_not_ok_status() as status: - if self._created_with_new_api: - raw_device_list = tf_session.TF_SessionListDevices( - self._session, status) - else: - raw_device_list = tf_session.TF_DeprecatedSessionListDevices( - self._session, status) - device_list = [] - size = tf_session.TF_DeviceListCount(raw_device_list) - for i in range(size): - name = tf_session.TF_DeviceListName(raw_device_list, i, status) - device_type = tf_session.TF_DeviceListType(raw_device_list, i, status) - memory = tf_session.TF_DeviceListMemoryBytes(raw_device_list, i, status) - device_list.append(_DeviceAttributes(name, device_type, memory)) - tf_session.TF_DeleteDeviceList(raw_device_list) - return device_list + if self._created_with_new_api: + raw_device_list = tf_session.TF_SessionListDevices(self._session) + else: + raw_device_list = tf_session.TF_DeprecatedSessionListDevices( + self._session) + device_list = [] + size = tf_session.TF_DeviceListCount(raw_device_list) + for i in range(size): + name = tf_session.TF_DeviceListName(raw_device_list, i) + device_type = tf_session.TF_DeviceListType(raw_device_list, i) + memory = tf_session.TF_DeviceListMemoryBytes(raw_device_list, i) + device_list.append(_DeviceAttributes(name, device_type, memory)) + tf_session.TF_DeleteDeviceList(raw_device_list) + return device_list def close(self): """Closes this session. @@ -692,15 +687,13 @@ class BaseSession(SessionInterface): if self._created_with_new_api: if self._session and not self._closed: self._closed = True - with errors.raise_exception_on_not_ok_status() as status: - tf_session.TF_CloseSession(self._session, status) + tf_session.TF_CloseSession(self._session) else: with self._extend_lock: if self._opened and not self._closed: self._closed = True - with errors.raise_exception_on_not_ok_status() as status: - tf_session.TF_CloseDeprecatedSession(self._session, status) + tf_session.TF_CloseDeprecatedSession(self._session) def __del__(self): # cleanly ignore all exceptions @@ -710,11 +703,10 @@ class BaseSession(SessionInterface): pass if self._session is not None: try: - status = c_api_util.ScopedTFStatus() if self._created_with_new_api: - tf_session.TF_DeleteSession(self._session, status) + tf_session.TF_DeleteSession(self._session) else: - tf_session.TF_DeleteDeprecatedSession(self._session, status) + tf_session.TF_DeleteDeprecatedSession(self._session) except AttributeError: # At shutdown, `c_api_util` or `tf_session` may have been garbage # collected, causing the above method calls to fail. In this case, @@ -1031,11 +1023,11 @@ class BaseSession(SessionInterface): # Set up a graph with feeds and fetches for partial run. def _setup_fn(session, feed_list, fetch_list, target_list): self._extend_graph() - with errors.raise_exception_on_not_ok_status() as status: - if self._created_with_new_api: - return tf_session.TF_SessionPRunSetup_wrapper( - session, feed_list, fetch_list, target_list, status) - else: + if self._created_with_new_api: + return tf_session.TF_SessionPRunSetup_wrapper( + session, feed_list, fetch_list, target_list) + else: + with errors.raise_exception_on_not_ok_status() as status: return tf_session.TF_PRunSetup(session, feed_list, fetch_list, target_list, status) @@ -1345,8 +1337,7 @@ class BaseSession(SessionInterface): def _extend_graph(self): if self._created_with_new_api: with self._graph._lock: # pylint: disable=protected-access - with errors.raise_exception_on_not_ok_status() as status: - tf_session.ExtendSession(self._session, status) + tf_session.ExtendSession(self._session) else: # Ensure any changes to the graph are reflected in the runtime. with self._extend_lock: @@ -1412,22 +1403,22 @@ class BaseSession(SessionInterface): def _call_tf_sessionrun(self, options, feed_dict, fetch_list, target_list, run_metadata): - with errors.raise_exception_on_not_ok_status() as status: - if self._created_with_new_api: - return tf_session.TF_SessionRun_wrapper( - self._session, options, feed_dict, fetch_list, target_list, - run_metadata, status) - else: + if self._created_with_new_api: + return tf_session.TF_SessionRun_wrapper( + self._session, options, feed_dict, fetch_list, target_list, + run_metadata) + else: + with errors.raise_exception_on_not_ok_status() as status: return tf_session.TF_Run( self._session, options, feed_dict, fetch_list, target_list, status, run_metadata) def _call_tf_sessionprun(self, handle, feed_dict, fetch_list): - with errors.raise_exception_on_not_ok_status() as status: - if self._created_with_new_api: - return tf_session.TF_SessionPRun_wrapper( - self._session, handle, feed_dict, fetch_list, status) - else: + if self._created_with_new_api: + return tf_session.TF_SessionPRun_wrapper( + self._session, handle, feed_dict, fetch_list) + else: + with errors.raise_exception_on_not_ok_status() as status: return tf_session.TF_PRun( self._session, handle, feed_dict, fetch_list, status) @@ -1463,7 +1454,10 @@ class BaseSession(SessionInterface): self._session._session, self._handle, args, status, None) def __del__(self): - if self._handle is not None: + # NOTE(mrry): It is possible that `self._session.__del__()` could be + # called before this destructor, in which case `self._session._session` + # will be `None`. + if self._handle is not None and self._session._session is not None: with errors.raise_exception_on_not_ok_status() as status: if self._session._created_with_new_api: tf_session.TF_SessionReleaseCallable( diff --git a/tensorflow/python/client/session_list_devices_test.py b/tensorflow/python/client/session_list_devices_test.py index 5a7413c12e9db92cb85d54a69602753ff6476425..38a3acb2dc304968915e84c8054621e441294e61 100644 --- a/tensorflow/python/client/session_list_devices_test.py +++ b/tensorflow/python/client/session_list_devices_test.py @@ -23,7 +23,6 @@ from tensorflow.core.protobuf import cluster_pb2 from tensorflow.core.protobuf import config_pb2 from tensorflow.python import pywrap_tensorflow as tf_session from tensorflow.python.client import session -from tensorflow.python.framework import c_api_util from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.framework import test_util @@ -42,21 +41,13 @@ class SessionListDevicesTestMethods(object): def testInvalidDeviceNumber(self): opts = tf_session.TF_NewSessionOptions() - with errors.raise_exception_on_not_ok_status() as status: - c_session = tf_session.TF_NewSession( - ops.get_default_graph()._c_graph, opts, status) - raw_device_list = tf_session.TF_SessionListDevices( - c_session, status) + c_session = tf_session.TF_NewSession(ops.get_default_graph()._c_graph, opts) + raw_device_list = tf_session.TF_SessionListDevices(c_session) size = tf_session.TF_DeviceListCount(raw_device_list) - # Test that invalid device numbers return -1 rather than a Swig-wrapped - # pointer. - status_no_exception = c_api_util.ScopedTFStatus() - memory = tf_session.TF_DeviceListMemoryBytes( - raw_device_list, size, status_no_exception) - self.assertEqual(memory, -1) + with self.assertRaises(errors.InvalidArgumentError): + tf_session.TF_DeviceListMemoryBytes(raw_device_list, size) tf_session.TF_DeleteDeviceList(raw_device_list) - with errors.raise_exception_on_not_ok_status() as status: - tf_session.TF_CloseSession(c_session, status) + tf_session.TF_CloseSession(c_session) def testListDevicesGrpcSession(self): server = server_lib.Server.create_local_server() diff --git a/tensorflow/python/client/tf_session.i b/tensorflow/python/client/tf_session.i index 77ce9195eef06cb29a24834e0e777950369c0053..b82182d5d3690e4601b4fe8423cef972139f2283 100644 --- a/tensorflow/python/client/tf_session.i +++ b/tensorflow/python/client/tf_session.i @@ -18,11 +18,11 @@ limitations under the License. %{ #include "tensorflow/c/python_api.h" -#include "tensorflow/python/client/tf_session_helper.h" #include "tensorflow/core/framework/session_state.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/strings/stringprintf.h" #include "tensorflow/core/public/version.h" +#include "tensorflow/python/client/tf_session_helper.h" // Helper function to convert a Python list of Tensors to a C++ vector of // TF_Outputs. @@ -72,7 +72,7 @@ void PyInt64ListToVector(PyObject* py_int_seq, std::vector* vec) { int size = PySequence_Fast_GET_SIZE(py_int_seq); for (int i = 0; i < size; ++i) { PyObject* item = PySequence_Fast_GET_ITEM(py_int_seq, i); - vec->push_back(PyInt_AsLong(item)); + vec->push_back(PyLong_AsLongLong(item)); } } @@ -157,6 +157,25 @@ tensorflow::ImportNumpy(); } } +// We use TF_OperationGetControlOutputs_wrapper instead of +// TF_OperationGetControlOutputs +%ignore TF_OperationGetControlOutputs; +%unignore TF_OperationGetControlOutputs_wrapper; +// See comment for "%noexception TF_SessionRun_wrapper;" +%noexception TF_OperationGetControlOutputs_wrapper; + +// Build a Python list of TF_Operation* and return it. +%typemap(out) std::vector tensorflow::TF_OperationGetControlOutputs_wrapper { + $result = PyList_New($1.size()); + if (!$result) { + SWIG_exception_fail(SWIG_MemoryError, "$symname: couldn't create list"); + } + + for (size_t i = 0; i < $1.size(); ++i) { + PyList_SET_ITEM($result, i, CreateWrappedTFOperation($1[i])); + } +} + %ignore TF_OperationOutputConsumers; %unignore TF_OperationOutputConsumers_wrapper; // See comment for "%noexception TF_SessionRun_wrapper;" @@ -438,6 +457,11 @@ TF_ImportGraphDefResultsMissingUnusedInputMappings_wrapper{ $1 = PyLong_AsLongLong($input); } +// Override default py3 behavior of attempting to encode into Unicode. +%typemap(out) std::string tensorflow::ResourceHandleShapeAndType { + $result = PyBytes_FromStringAndSize($1.data(), $1.size()); +} + // TODO(skyewm): SWIG emits a warning for the const char* in TF_WhileParams, // skip for now %ignore TF_WhileParams; @@ -499,9 +523,8 @@ TF_ImportGraphDefResultsMissingUnusedInputMappings_wrapper{ _TF_SetTarget(opts, target) if config is not None: from tensorflow.python.framework import errors - with errors.raise_exception_on_not_ok_status() as status: - config_str = config.SerializeToString() - _TF_SetConfig(opts, config_str, status) + config_str = config.SerializeToString() + _TF_SetConfig(opts, config_str) return opts %} diff --git a/tensorflow/python/client/tf_session_helper.cc b/tensorflow/python/client/tf_session_helper.cc index ca57abd712afc5735d827a4745b5e25cb076e928..b6481e7e29e4057f08e1c78b310bf5581afc5411 100644 --- a/tensorflow/python/client/tf_session_helper.cc +++ b/tensorflow/python/client/tf_session_helper.cc @@ -550,6 +550,15 @@ std::vector TF_OperationGetControlInputs_wrapper( return control_inputs; } +std::vector TF_OperationGetControlOutputs_wrapper( + TF_Operation* oper) { + std::vector control_outputs( + TF_OperationNumControlOutputs(oper)); + TF_OperationGetControlOutputs(oper, control_outputs.data(), + control_outputs.size()); + return control_outputs; +} + std::vector TF_OperationOutputConsumers_wrapper( TF_Output oper_out) { int num_consumers = TF_OperationOutputNumConsumers(oper_out); @@ -620,15 +629,6 @@ void TF_GraphSetTensorShape_wrapper(TF_Graph* graph, TF_Output output, TF_GraphSetTensorShape(graph, output, dims.data(), dims.size(), status); } -std::vector TF_GraphGetTensorShape_wrapper(TF_Graph* graph, - TF_Output output, - int num_dims, - TF_Status* status) { - std::vector dims(num_dims); - TF_GraphGetTensorShape(graph, output, dims.data(), num_dims, status); - return dims; -} - std::vector TF_ImportGraphDefResultsMissingUnusedInputMappings_wrapper( TF_ImportGraphDefResults* results) { int num_missing_unused_input_mappings; diff --git a/tensorflow/python/client/tf_session_helper.h b/tensorflow/python/client/tf_session_helper.h index 603d03e3152bbd2f04ab0e431fbc8d4689553985..cfd27c2bee990ab4e2829652a532761e674ed8e0 100644 --- a/tensorflow/python/client/tf_session_helper.h +++ b/tensorflow/python/client/tf_session_helper.h @@ -136,8 +136,7 @@ string EqualAttrValueWrapper(const string& actual, const string& expected); // // If shape is unknown, sets unknown_shape to true. tensorflow::gtl::InlinedVector TF_GraphGetTensorShapeHelper( - TF_Graph* graph, TF_Output output, TF_Status* out_status, - bool* unknown_shape); + TF_Graph* graph, TF_Output output, TF_Status* status, bool* unknown_shape); // Runs the graph associated with the session starting with the supplied inputs. // On success, `py_outputs` is populated with a numpy ndarray for each output @@ -149,7 +148,7 @@ void TF_SessionRun_wrapper(TF_Session* session, const TF_Buffer* run_options, const std::vector& input_ndarrays, const std::vector& outputs, const std::vector& targets, - TF_Buffer* run_metadata, TF_Status* out_status, + TF_Buffer* run_metadata, TF_Status* status, std::vector* py_outputs); // Set up the graph with the intended feeds (inputs) and fetches (output) for @@ -165,8 +164,7 @@ void TF_SessionPRunSetup_wrapper(TF_Session* session, const std::vector& inputs, const std::vector& outputs, const std::vector& targets, - const char** out_handle, - TF_Status* out_status); + const char** out_handle, TF_Status* status); // Continue to run the graph with additional feeds and fetches. The // execution state is uniquely identified by the handle. @@ -182,7 +180,7 @@ void TF_SessionPRun_wrapper(TF_Session* session, const char* handle, const std::vector& inputs, const std::vector& input_ndarrays, const std::vector& outputs, - TF_Status* out_status, + TF_Status* status, std::vector* py_outputs); // Retrieves the inputs of this operation. @@ -192,6 +190,10 @@ std::vector GetOperationInputs(TF_Operation* oper); std::vector TF_OperationGetControlInputs_wrapper( TF_Operation* oper); +// Retrieves the control outputs of this operation. +std::vector TF_OperationGetControlOutputs_wrapper( + TF_Operation* oper); + // Retrieves the op names of the consumers of `oper_out`. The returned strings // have the lifetime of the underlying TF_Graph. std::vector TF_OperationOutputConsumers_wrapper( @@ -204,7 +206,7 @@ TF_Function* TF_GraphToFunction_wrapper( const std::vector* opers, const std::vector& inputs, const std::vector& outputs, const NameVector& output_names, const TF_FunctionOptions* opts, - const char* description, TF_Status* out_status); + const char* description, TF_Status* status); // Set the shapes and types for the output's handle. // @@ -227,13 +229,6 @@ void TF_GraphSetTensorShape_wrapper(TF_Graph* graph, TF_Output output, const std::vector& dims, bool unknown_shape, TF_Status* status); -// Return the shape of output. `num_dims` should be the output of -// TF_GraphGetTensorNumDims. If `num_dims = -1`, this should not be called. -std::vector TF_GraphGetTensorShape_wrapper(TF_Graph* graph, - TF_Output output, - int num_dims, - TF_Status* status); - // Returns the string representations of the missing unused input mappings. std::vector TF_ImportGraphDefResultsMissingUnusedInputMappings_wrapper( TF_ImportGraphDefResults* results); diff --git a/tensorflow/python/data/kernel_tests/iterator_ops_test.py b/tensorflow/python/data/kernel_tests/iterator_ops_test.py index 4a14a915bdb33f1ac6e8fc1839b32bc81fa8de05..0af282a02475384cb2d0f8e273324d6211e1b50d 100644 --- a/tensorflow/python/data/kernel_tests/iterator_ops_test.py +++ b/tensorflow/python/data/kernel_tests/iterator_ops_test.py @@ -28,6 +28,7 @@ from tensorflow.python.client import session from tensorflow.python.data.ops import dataset_ops from tensorflow.python.data.ops import iterator_ops from tensorflow.python.data.ops import readers +from tensorflow.python.eager import context from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors @@ -717,6 +718,14 @@ class IteratorTest(test.TestCase): self.assertTrue( iterator_ops.GET_NEXT_CALL_WARNING_MESSAGE in str(warning.message)) + def testEagerIteratorAsync(self): + with context.eager_mode(), context.execution_mode(context.ASYNC): + val = 0 + dataset = dataset_ops.Dataset.range(10) + for foo in dataset: + self.assertEqual(val, foo.numpy()) + val += 1 + if __name__ == "__main__": test.main() diff --git a/tensorflow/python/data/kernel_tests/list_files_dataset_op_test.py b/tensorflow/python/data/kernel_tests/list_files_dataset_op_test.py index 6442eb9ff554e61829796fb904342072d1846a32..f7d7d085c974fa217ed30708723cb1b887034ca0 100644 --- a/tensorflow/python/data/kernel_tests/list_files_dataset_op_test.py +++ b/tensorflow/python/data/kernel_tests/list_files_dataset_op_test.py @@ -69,6 +69,54 @@ class ListFilesDatasetOpTest(test.TestCase): with self.assertRaises(errors.OutOfRangeError): sess.run(itr.get_next()) + def testSimpleDirectoryNotShuffled(self): + filenames = ['b', 'c', 'a'] + self._touchTempFiles(filenames) + + dataset = dataset_ops.Dataset.list_files( + path.join(self.tmp_dir, '*'), shuffle=False) + with self.test_session() as sess: + itr = dataset.make_one_shot_iterator() + next_element = itr.get_next() + + for filename in sorted(filenames): + self.assertEqual(compat.as_bytes(path.join(self.tmp_dir, filename)), + sess.run(next_element)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(itr.get_next()) + + def testFixedSeedResultsInRepeatableOrder(self): + filenames = ['a', 'b', 'c'] + self._touchTempFiles(filenames) + + dataset = dataset_ops.Dataset.list_files( + path.join(self.tmp_dir, '*'), shuffle=True, seed=37) + with self.test_session() as sess: + itr = dataset.make_initializable_iterator() + next_element = itr.get_next() + + full_filenames = [compat.as_bytes(path.join(self.tmp_dir, filename)) + for filename in filenames] + + all_produced_filenames = [] + for _ in range(3): + produced_filenames = [] + sess.run(itr.initializer) + try: + while True: + produced_filenames.append(sess.run(next_element)) + except errors.OutOfRangeError: + pass + all_produced_filenames.append(produced_filenames) + + # Each run should produce the same set of filenames, which may be + # different from the order of `full_filenames`. + self.assertItemsEqual(full_filenames, all_produced_filenames[0]) + # However, the different runs should produce filenames in the same order + # as each other. + self.assertEqual(all_produced_filenames[0], all_produced_filenames[1]) + self.assertEqual(all_produced_filenames[0], all_produced_filenames[2]) + def testEmptyDirectoryInitializer(self): filename_placeholder = array_ops.placeholder(dtypes.string, shape=[]) dataset = dataset_ops.Dataset.list_files(filename_placeholder) diff --git a/tensorflow/python/data/kernel_tests/map_dataset_op_test.py b/tensorflow/python/data/kernel_tests/map_dataset_op_test.py index 0791c614fa88700fdf2d0d673e168fc9784731a5..1ad0b9de5e76e3edd66303ab4666108f43a27428 100644 --- a/tensorflow/python/data/kernel_tests/map_dataset_op_test.py +++ b/tensorflow/python/data/kernel_tests/map_dataset_op_test.py @@ -624,6 +624,20 @@ class MapDatasetTest(test.TestCase): with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) + def testConstantOutput(self): + iterator = ( + dataset_ops.Dataset.range(10).map(lambda x: [x, "hello", 10]) + .make_initializable_iterator()) + init_op = iterator.initializer + get_next = iterator.get_next() + + with self.test_session() as sess: + sess.run(init_op) + for i in range(10): + self.assertEqual((i, b"hello", 10), sess.run(get_next)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + class MapDatasetBenchmark(test.Benchmark): diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py index 8729e085a32f6df87ba9feb515ccfac6a105cfef..bd9686f692102df4ef64f0e81c33d2dec1b2222c 100644 --- a/tensorflow/python/data/ops/dataset_ops.py +++ b/tensorflow/python/data/ops/dataset_ops.py @@ -121,7 +121,7 @@ class Dataset(object): An `Iterator` over the elements of this dataset. Raises: - RuntimeError: If eager execution is enabled. + RuntimeError: If eager execution is not enabled. """ if context.executing_eagerly(): return iterator_ops.EagerIterator(self) @@ -571,9 +571,13 @@ class Dataset(object): return PrefetchDataset(self, buffer_size) @staticmethod - def list_files(file_pattern, shuffle=None): + def list_files(file_pattern, shuffle=None, seed=None): """A dataset of all files matching a pattern. + NOTE: The default behavior of this method is to return filenames in + a non-deterministic random shuffled order. Pass a `seed` or `shuffle=False` + to get results in a deterministic order. + Example: If we had the following files on our filesystem: - /path/to/dir/a.txt @@ -584,20 +588,18 @@ class Dataset(object): - /path/to/dir/b.py - /path/to/dir/c.py - NOTE: The order of the file names returned can be non-deterministic even - when `shuffle` is `False`. - Args: file_pattern: A string or scalar string `tf.Tensor`, representing the filename pattern that will be matched. shuffle: (Optional.) If `True`, the file names will be shuffled randomly. Defaults to `True`. + seed: (Optional.) A `tf.int64` scalar `tf.Tensor`, representing the + random seed that will be used to create the distribution. See + @{tf.set_random_seed} for behavior. Returns: Dataset: A `Dataset` of strings corresponding to file names. """ - # TODO(b/73959787): Add a `seed` argument and make the `shuffle=False` - # behavior deterministic (e.g. by sorting the filenames). if shuffle is None: shuffle = True matching_files = gen_io_ops.matching_files(file_pattern) @@ -607,7 +609,7 @@ class Dataset(object): # list of files might be empty. buffer_size = math_ops.maximum( array_ops.shape(matching_files, out_type=dtypes.int64)[0], 1) - dataset = dataset.shuffle(buffer_size) + dataset = dataset.shuffle(buffer_size, seed=seed) return dataset def repeat(self, count=None): @@ -1155,10 +1157,12 @@ class _GeneratorDataset(Dataset): if isinstance(ret, list): ret = tuple(ret) - # Convert any `SparseTensorValue`s to `SparseTensor`s. + # Convert any `SparseTensorValue`s to `SparseTensor`s and all other + # values to tensors. ret = nest.pack_sequence_as(ret, [ sparse_tensor_lib.SparseTensor.from_value(t) - if sparse_tensor_lib.is_sparse(t) else t for t in nest.flatten(ret) + if sparse_tensor_lib.is_sparse(t) else ops.convert_to_tensor(t) + for t in nest.flatten(ret) ]) self._state_classes = sparse.get_classes(ret) @@ -1167,11 +1171,9 @@ class _GeneratorDataset(Dataset): self._state_types = nest.pack_sequence_as( ret, [t.dtype for t in nest.flatten(ret)]) - # Serialize any sparse tensors and convert result to tensors. - ret = nest.pack_sequence_as(ret, [ - ops.convert_to_tensor(t) - for t in nest.flatten(sparse.serialize_sparse_tensors(ret)) - ]) + # Serialize any sparse tensors. + ret = nest.pack_sequence_as( + ret, [t for t in nest.flatten(sparse.serialize_sparse_tensors(ret))]) return nest.flatten(ret) self._init_func = tf_init_func @@ -1214,10 +1216,12 @@ class _GeneratorDataset(Dataset): if isinstance(ret, list): ret = tuple(ret) - # Convert any `SparseTensorValue`s to `SparseTensor`s. + # Convert any `SparseTensorValue`s to `SparseTensor`s and all other + # values to tensors. ret = nest.pack_sequence_as(ret, [ sparse_tensor_lib.SparseTensor.from_value(t) - if sparse_tensor_lib.is_sparse(t) else t for t in nest.flatten(ret) + if sparse_tensor_lib.is_sparse(t) else ops.convert_to_tensor(t) + for t in nest.flatten(ret) ]) self._output_classes = sparse.get_classes(ret) @@ -1226,11 +1230,9 @@ class _GeneratorDataset(Dataset): self._output_types = nest.pack_sequence_as( ret, [t.dtype for t in nest.flatten(ret)]) - # Serialize any sparse tensors and convert result to tensors. - ret = nest.pack_sequence_as(ret, [ - ops.convert_to_tensor(t) - for t in nest.flatten(sparse.serialize_sparse_tensors(ret)) - ]) + # Serialize any sparse tensors. + ret = nest.pack_sequence_as( + ret, [t for t in nest.flatten(sparse.serialize_sparse_tensors(ret))]) return nest.flatten(ret) self._next_func = tf_next_func @@ -1816,10 +1818,12 @@ class MapDataset(Dataset): if isinstance(ret, list): ret = tuple(ret) - # Convert any `SparseTensorValue`s to `SparseTensor`s. + # Convert any `SparseTensorValue`s to `SparseTensor`s and all other + # values to tensors. ret = nest.pack_sequence_as(ret, [ sparse_tensor_lib.SparseTensor.from_value(t) - if sparse_tensor_lib.is_sparse(t) else t for t in nest.flatten(ret) + if sparse_tensor_lib.is_sparse(t) else ops.convert_to_tensor(t) + for t in nest.flatten(ret) ]) self._output_classes = sparse.get_classes(ret) @@ -1828,11 +1832,9 @@ class MapDataset(Dataset): self._output_types = nest.pack_sequence_as( ret, [t.dtype for t in nest.flatten(ret)]) - # Serialize any sparse tensors and convert result to tensors. - ret = nest.pack_sequence_as(ret, [ - ops.convert_to_tensor(t) - for t in nest.flatten(sparse.serialize_sparse_tensors(ret)) - ]) + # Serialize any sparse tensors. + ret = nest.pack_sequence_as( + ret, [t for t in nest.flatten(sparse.serialize_sparse_tensors(ret))]) return nest.flatten(ret) self._map_func = tf_map_func diff --git a/tensorflow/python/data/ops/iterator_ops.py b/tensorflow/python/data/ops/iterator_ops.py index d79b9d6011b6ebd00a47d572165cdbba8a31bd32..0c76afd29d4626be9120c059d60218daab5cc0ac 100644 --- a/tensorflow/python/data/ops/iterator_ops.py +++ b/tensorflow/python/data/ops/iterator_ops.py @@ -488,23 +488,27 @@ class EagerIterator(object): def _next_internal(self): """Returns a nested structure of `tf.Tensor`s containing the next element. """ - with ops.device(self._device): - # TODO(ashankar): Consider removing this ops.device() contextmanager - # and instead mimic ops placement in graphs: Operations on resource - # handles execute on the same device as where the resource is placed. - # NOTE(mrry): Here we use the "_sync" variant of `iterator_get_next` - # because in eager mode this code will run synchronously on the calling - # thread. Therefore we do not need to make a defensive context switch - # to a background thread, and can achieve a small constant performance - # boost by invoking the iterator synchronously. - ret = gen_dataset_ops.iterator_get_next_sync( - self._resource, - output_types=self._flat_output_types, - output_shapes=self._flat_output_shapes) - - return sparse.deserialize_sparse_tensors( - nest.pack_sequence_as(self._output_types, ret), self._output_types, - self._output_shapes, self._output_classes) + # This runs in sync mode as iterators use an error status to communicate + # that there is no more data to iterate over. + # TODO(b/77291417): Fix + with context.execution_mode(context.SYNC): + with ops.device(self._device): + # TODO(ashankar): Consider removing this ops.device() contextmanager + # and instead mimic ops placement in graphs: Operations on resource + # handles execute on the same device as where the resource is placed. + # NOTE(mrry): Here we use the "_sync" variant of `iterator_get_next` + # because in eager mode this code will run synchronously on the calling + # thread. Therefore we do not need to make a defensive context switch + # to a background thread, and can achieve a small constant performance + # boost by invoking the iterator synchronously. + ret = gen_dataset_ops.iterator_get_next_sync( + self._resource, + output_types=self._flat_output_types, + output_shapes=self._flat_output_shapes) + + return sparse.deserialize_sparse_tensors( + nest.pack_sequence_as(self._output_types, ret), self._output_types, + self._output_shapes, self._output_classes) def next(self): """Returns a nested structure of `tf.Tensor`s containing the next element. diff --git a/tensorflow/python/debug/BUILD b/tensorflow/python/debug/BUILD index 4195586313a9b0b1f49e957657160f4e7b1d4b63..250b4b1b6ab983c8073b5de3d2d29d02a50c71a8 100644 --- a/tensorflow/python/debug/BUILD +++ b/tensorflow/python/debug/BUILD @@ -913,6 +913,7 @@ cuda_py_test( "//tensorflow/python:util", "//tensorflow/python:variables", ], + tags = ["no_windows"], # TODO: needs investigation on Windows ) py_test( @@ -920,6 +921,7 @@ py_test( size = "small", srcs = ["cli/profile_analyzer_cli_test.py"], srcs_version = "PY2AND3", + tags = ["no_windows"], deps = [ ":debugger_cli_common", ":profile_analyzer_cli", diff --git a/tensorflow/python/debug/cli/readline_ui.py b/tensorflow/python/debug/cli/readline_ui.py index 151638789f7eb509a39a6f066e9ad9a0c92c1fe3..3296e45d07e9f385fdcda3f34804547b049dcf02 100644 --- a/tensorflow/python/debug/cli/readline_ui.py +++ b/tensorflow/python/debug/cli/readline_ui.py @@ -19,6 +19,8 @@ from __future__ import print_function import readline +import six + from tensorflow.python.debug.cli import base_ui from tensorflow.python.debug.cli import debugger_cli_common @@ -39,11 +41,7 @@ class ReadlineUI(base_ui.BaseUI): readline.set_completer(self._readline_complete) readline.parse_and_bind("tab: complete") - # For Python 2-3 compatibility. - try: - self._input = raw_input - except NameError: - self._input = input + self._input = six.moves.input def _readline_complete(self, text, state): context, prefix, except_last_word = self._analyze_tab_complete_input(text) diff --git a/tensorflow/python/debug/wrappers/grpc_wrapper.py b/tensorflow/python/debug/wrappers/grpc_wrapper.py index fb9494f57636e46e54ef230cf4803dbb6ccad0c7..1f9c8fa5a96b4d6826fae0870608e0e737c7cd88 100644 --- a/tensorflow/python/debug/wrappers/grpc_wrapper.py +++ b/tensorflow/python/debug/wrappers/grpc_wrapper.py @@ -21,6 +21,8 @@ import signal import sys import traceback +import six + # Google-internal import(s). from tensorflow.python.debug.lib import common from tensorflow.python.debug.wrappers import framework @@ -140,14 +142,9 @@ class GrpcDebugWrapperSession(framework.NonInteractiveDebugWrapperSession): def _signal_handler(unused_signal, unused_frame): - try: - input_func = raw_input - except NameError: - # Python 3 does not have raw_input. - input_func = input - while True: - response = input_func("\nSIGINT received. Quit program? (Y/n): ").strip() + response = six.moves.input( + "\nSIGINT received. Quit program? (Y/n): ").strip() if response in ("", "Y", "y"): sys.exit(0) elif response in ("N", "n"): diff --git a/tensorflow/python/debug/wrappers/hooks.py b/tensorflow/python/debug/wrappers/hooks.py index 6705cd31e291d2eab7aa8179e9b2b829f8970c18..5e4604fda4d7249a1244f12a533e1cb09e16782f 100644 --- a/tensorflow/python/debug/wrappers/hooks.py +++ b/tensorflow/python/debug/wrappers/hooks.py @@ -31,15 +31,18 @@ from tensorflow.python.training import session_run_hook class LocalCLIDebugHook(session_run_hook.SessionRunHook): """Command-line-interface debugger hook. - Can be used as a monitor/hook for `tf.train.MonitoredSession`s and - `tf.contrib.learn`'s `Estimator`s and `Experiment`s. + Can be used as a hook for `tf.train.MonitoredSession`s and + `tf.estimator.Estimator`s. Provides a substitute for + `tfdbg.LocalCLIDebugWrapperSession` in cases where the session is not directly + available. """ def __init__(self, ui_type="curses", dump_root=None, thread_name_filter=None): """Create a local debugger command-line interface (CLI) hook. Args: - ui_type: (str) user-interface type. + ui_type: (`str`) requested user-interface type. Currently supported: + (curses | readline). dump_root: (`str`) optional path to the dump root directory. Must be a directory that does not exist or an empty directory. If the directory does not exist, it will be created by the debugger core during debug @@ -153,8 +156,8 @@ class LocalCLIDebugHook(session_run_hook.SessionRunHook): class DumpingDebugHook(session_run_hook.SessionRunHook): """A debugger hook that dumps debug data to filesystem. - Can be used as a monitor/hook for `tf.train.MonitoredSession`s and - `tf.contrib.learn`'s `Estimator`s and `Experiment`s. + Can be used as a hook for `tf.train.MonitoredSession`s and + `tf.estimator.Estimator`s. """ def __init__(self, @@ -229,8 +232,8 @@ class GrpcDebugHook(session_run_hook.SessionRunHook): When the arguments of debug_utils.watch_graph changes, strongly consider changing arguments here too so that features are available to tflearn users. - Can be used as a monitor/hook for `tf.train.MonitoredSession`s and - `tf.contrib.learn`'s `Estimator`s and `Experiment`s. + Can be used as a hook for `tf.train.MonitoredSession`s and + `tf.estimator.Estimator`s. """ def __init__(self, diff --git a/tensorflow/python/eager/BUILD b/tensorflow/python/eager/BUILD index 8c0d3feeceab1bf29c1dabc668176a6ef7806421..b3268c9047e264b8264ae37b404b51be6a88962f 100644 --- a/tensorflow/python/eager/BUILD +++ b/tensorflow/python/eager/BUILD @@ -142,6 +142,8 @@ cuda_py_test( ":tape", ":test", "//tensorflow/python:clip_ops", + "//tensorflow/python:init_ops", + "//tensorflow/python:layers", "//tensorflow/python:math_ops", "//tensorflow/python:resource_variable_ops", ], diff --git a/tensorflow/python/eager/backprop.py b/tensorflow/python/eager/backprop.py index 209b0126214a945194a36977b6c9e29b69cb00e2..92774d4d50e00c85599ceaef1cc99bb062bd3ce3 100644 --- a/tensorflow/python/eager/backprop.py +++ b/tensorflow/python/eager/backprop.py @@ -31,7 +31,6 @@ from tensorflow.python.eager import imperative_grad from tensorflow.python.eager import tape from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes -from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import array_ops @@ -50,12 +49,10 @@ def op_attr_type(op_type, attr_name): try: return _op_attr_type_cache[(op_type, attr_name)] except KeyError: - with errors.raise_exception_on_not_ok_status() as status: - h = context.context()._handle # pylint: disable=protected-access - attr_type = pywrap_tensorflow.TFE_OpNameGetAttrType( - h, op_type, attr_name, status) - _op_attr_type_cache[(op_type, attr_name)] = attr_type - return attr_type + h = context.context()._handle # pylint: disable=protected-access + attr_type = pywrap_tensorflow.TFE_OpNameGetAttrType(h, op_type, attr_name) + _op_attr_type_cache[(op_type, attr_name)] = attr_type + return attr_type def make_attr(attr_type, value): diff --git a/tensorflow/python/eager/benchmarks_test.py b/tensorflow/python/eager/benchmarks_test.py index 9ca5041c38ed07b39fd73b9f110ab06e8e903251..3aad4a114a710280b5046666256b6b43dc0d5523 100644 --- a/tensorflow/python/eager/benchmarks_test.py +++ b/tensorflow/python/eager/benchmarks_test.py @@ -201,6 +201,9 @@ class MicroBenchmarks(test.Benchmark): m = self._m_2 self._run(lambda: gen_array_ops.identity(m), 30000) + def benchmark_slowpath_tf_identity(self): + self._run(lambda: gen_array_ops.identity(1), 30000) + def benchmark_tfe_py_execute_identity(self): m = self._m_2 ctx_handle = context.context()._handle @@ -214,10 +217,11 @@ class MicroBenchmarks(test.Benchmark): self._run(f, 30000) def benchmark_tf_gradient_function_identity(self): - m = self._m_2 - self._run( - lambda: backprop.gradients_function(gen_array_ops.identity, [0])(m), - 30000) + with context.device(CPU): + m = gen_array_ops.identity(self._m_2) + self._run( + lambda: backprop.gradients_function(gen_array_ops.identity, [0])(m), + 30000) def benchmark_tf_gradient_forward_identity(self): with backprop.GradientTape() as tape: @@ -233,10 +237,11 @@ class MicroBenchmarks(test.Benchmark): self._run(f, 30000) def benchmark_tf_gradient_function_no_op(self): - m = self._m_2 - self._run( - lambda: backprop.gradients_function(lambda x: x, [0])(m), - 30000) + with context.device(CPU): + m = gen_array_ops.identity(self._m_2) + self._run( + lambda: backprop.gradients_function(lambda x: x, [0])(m), + 30000) def _benchmark_np_matmul(self, m, transpose_b, num_iters): a = m.cpu().numpy() @@ -268,11 +273,12 @@ class MicroBenchmarks(test.Benchmark): # pylint: disable=protected-access ctx_handle = context.context()._handle # pylint: enable=protected-access + device = context.context().device_name attrs = ("transpose_a", False, "transpose_b", transpose_b, "T", m.dtype.as_datatype_enum) def func(): - pywrap_tensorflow.TFE_Py_Execute(ctx_handle, None, "MatMul", inputs, - attrs, 1) + pywrap_tensorflow.TFE_Py_Execute(ctx_handle, device, "MatMul", + inputs, attrs, 1) self._run(func, num_iters) diff --git a/tensorflow/python/eager/context.py b/tensorflow/python/eager/context.py index 8c1bb06bc3190ab47cb545d57a3eecd165d02107..9e146f021e813886b42ca72b07122b485901a24b 100644 --- a/tensorflow/python/eager/context.py +++ b/tensorflow/python/eager/context.py @@ -28,7 +28,6 @@ from tensorflow.core.protobuf import config_pb2 from tensorflow.python import pywrap_tensorflow from tensorflow.python.framework import c_api_util from tensorflow.python.framework import device as pydev -from tensorflow.python.framework import errors from tensorflow.python.util import compat from tensorflow.python.util import is_in_graph_mode from tensorflow.python.util import tf_contextlib @@ -86,6 +85,7 @@ class _EagerContext(threading.local): self.device_spec = pydev.DeviceSpec.from_string("") self.device_name = self.device_spec.to_string() self.mode = _default_mode + self.is_eager = _default_mode == EAGER_MODE self.scope_name = "" self.recording_summaries = False self.summary_writer_resource = None @@ -223,34 +223,27 @@ class Context(object): assert self._context_devices is None opts = pywrap_tensorflow.TFE_NewContextOptions() try: - with errors.raise_exception_on_not_ok_status() as status: - if self._config is not None: - config_str = self._config.SerializeToString() - pywrap_tensorflow.TFE_ContextOptionsSetConfig( - opts, config_str, len(config_str), status) - if self._device_policy is not None: - pywrap_tensorflow.TFE_ContextOptionsSetDevicePlacementPolicy( - opts, self._device_policy) - if self._execution_mode == ASYNC: - pywrap_tensorflow.TFE_ContextOptionsSetAsync(opts, True) - self._context_handle = pywrap_tensorflow.TFE_NewContext(opts, status) + if self._config is not None: + config_str = self._config.SerializeToString() + pywrap_tensorflow.TFE_ContextOptionsSetConfig(opts, config_str) + if self._device_policy is not None: + pywrap_tensorflow.TFE_ContextOptionsSetDevicePlacementPolicy( + opts, self._device_policy) + if self._execution_mode == ASYNC: + pywrap_tensorflow.TFE_ContextOptionsSetAsync(opts, True) + self._context_handle = pywrap_tensorflow.TFE_NewContext(opts) finally: pywrap_tensorflow.TFE_DeleteContextOptions(opts) # Store list of devices self._context_devices = [] - with errors.raise_exception_on_not_ok_status() as status: - device_list = pywrap_tensorflow.TFE_ContextListDevices( - self._context_handle, status) + device_list = pywrap_tensorflow.TFE_ContextListDevices( + self._context_handle) try: self._num_gpus = 0 for i in range(pywrap_tensorflow.TF_DeviceListCount(device_list)): - with errors.raise_exception_on_not_ok_status() as status: - dev_name = pywrap_tensorflow.TF_DeviceListName( - device_list, i, status) + dev_name = pywrap_tensorflow.TF_DeviceListName(device_list, i) self._context_devices.append(pydev.canonical_name(dev_name)) - with errors.raise_exception_on_not_ok_status() as status: - dev_type = pywrap_tensorflow.TF_DeviceListType( - device_list, i, status) + dev_type = pywrap_tensorflow.TF_DeviceListType(device_list, i) if dev_type == "GPU": self._num_gpus += 1 @@ -287,9 +280,12 @@ class Context(object): @tf_contextlib.contextmanager def _mode(self, mode): + """A context manager to allow setting the mode to EAGER/GRAPH.""" ctx = self._eager_context old_mode = ctx.mode + old_is_eager = ctx.is_eager ctx.mode = mode + ctx.is_eager = mode == EAGER_MODE if mode == EAGER_MODE: # Entering graph mode does not provide us with sufficient information to # record a context switch; graph-based context switches are only logged @@ -298,13 +294,14 @@ class Context(object): try: yield finally: + ctx.is_eager = old_is_eager ctx.mode = old_mode if mode == EAGER_MODE: self.context_switches.pop() def executing_eagerly(self): """Returns True if current thread has eager executing enabled.""" - return self._eager_context.mode == EAGER_MODE + return self._eager_context.is_eager def scalar_cache(self): """Per-device cache for scalars.""" @@ -411,9 +408,7 @@ class Context(object): if mode is None: mode = SYNC self._eager_context.execution_mode = mode - with errors.raise_exception_on_not_ok_status() as status: - pywrap_tensorflow.TFE_ContextSetAsyncForThread(self._handle, - mode == ASYNC, status) + pywrap_tensorflow.TFE_ContextSetAsyncForThread(self._handle, mode == ASYNC) @tf_contextlib.contextmanager def execution_mode(self, mode): @@ -427,8 +422,7 @@ class Context(object): def async_wait(self): """Waits for ops dispatched in ASYNC mode to finish.""" - with errors.raise_exception_on_not_ok_status() as status: - pywrap_tensorflow.TFE_ContextAsyncWait(self._handle, status) + pywrap_tensorflow.TFE_ContextAsyncWait(self._handle) def async_clear_error(self): """Clears errors raised during ASYNC execution.""" @@ -448,11 +442,9 @@ class Context(object): Args: fn: A wrapped TF_Function (returned from TF_GraphToFunction_wrapper). """ - with errors.raise_exception_on_not_ok_status() as status: - pywrap_tensorflow.TFE_ContextAddFunction( - self._handle, # pylint: disable=protected-access - fn, - status) + pywrap_tensorflow.TFE_ContextAddFunction( + self._handle, # pylint: disable=protected-access + fn) def add_function_def(self, fdef): """Add a function definition to the context. @@ -464,12 +456,10 @@ class Context(object): fdef: A FunctionDef protocol buffer message. """ fdef_string = fdef.SerializeToString() - with errors.raise_exception_on_not_ok_status() as status: - pywrap_tensorflow.TFE_ContextAddFunctionDef( - self._handle, # pylint: disable=protected-access - fdef_string, - len(fdef_string), - status) + pywrap_tensorflow.TFE_ContextAddFunctionDef( + self._handle, # pylint: disable=protected-access + fdef_string, + len(fdef_string)) def add_post_execution_callback(self, callback): """Add a post-execution callback to the context. @@ -512,23 +502,19 @@ class Context(object): To retrieve the accumulated metadata call context.export_run_metadata() and to stop tracing call context.disable_run_metadata(). """ - if not self._context_handle: - self._initialize_handle_and_devices() - pywrap_tensorflow.TFE_ContextEnableRunMetadata(self._context_handle) + pywrap_tensorflow.TFE_ContextEnableRunMetadata(self._handle) @tf_contextlib.contextmanager def device_policy(self, policy): - if not self._context_handle: - self._initialize_handle_and_devices() - old = pywrap_tensorflow.TFE_ContextGetDevicePlacementPolicy( - self._context_handle) + handle = self._handle + old = pywrap_tensorflow.TFE_ContextGetDevicePlacementPolicy(handle) pywrap_tensorflow.TFE_ContextSetThreadLocalDevicePlacementPolicy( - self._handle, policy) + handle, policy) try: yield finally: pywrap_tensorflow.TFE_ContextSetThreadLocalDevicePlacementPolicy( - self._handle, old) + handle, old) def disable_run_metadata(self): """Disables tracing of op execution via RunMetadata.""" @@ -548,9 +534,8 @@ class Context(object): if not self._context_handle: return None with c_api_util.tf_buffer() as buffer_: - with errors.raise_exception_on_not_ok_status() as status: - pywrap_tensorflow.TFE_ContextExportRunMetadata( - self._context_handle, buffer_, status) + pywrap_tensorflow.TFE_ContextExportRunMetadata( + self._context_handle, buffer_) proto_data = pywrap_tensorflow.TF_GetBuffer(buffer_) run_metadata = config_pb2.RunMetadata() run_metadata.ParseFromString(compat.as_bytes(proto_data)) @@ -579,6 +564,10 @@ def context(): return _context +def context_safe(): + return _context + + # TODO(agarwal): remove this. def get_default_context(): """Same as context.""" diff --git a/tensorflow/python/eager/execution_callbacks.py b/tensorflow/python/eager/execution_callbacks.py index 535361498a9dd33003d0479051e97d7ff2553067..9a082596535f51e5a4fb6cc2a11a4dd8a422ed44 100644 --- a/tensorflow/python/eager/execution_callbacks.py +++ b/tensorflow/python/eager/execution_callbacks.py @@ -253,7 +253,7 @@ def add_execution_callback(callback): `f(op_type, op_name, attrs, inputs, outputs)`. `op_type` is the type of the operation that was just executed (e.g., `MatMul`). - `op_name` is the name of the operation that has was just executed. This + `op_name` is the name of the operation that was just executed. This name is set by the client who created the operation and can be `None` if it is unset. `attrs` contains the attributes of the operation as a `tuple` of diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py index 343012e552592a6f8bb1255118add3e938aa443c..0f1170bb42094f4d6565db15f1203f7d08ba89c7 100644 --- a/tensorflow/python/eager/function.py +++ b/tensorflow/python/eager/function.py @@ -34,11 +34,11 @@ from tensorflow.python.eager.graph_only_ops import graph_placeholder from tensorflow.python.framework import c_api_util from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes as dtypes_module -from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import gradients_impl +from tensorflow.python.ops import resource_variable_ops from tensorflow.python.util import compat from tensorflow.python.util import nest from tensorflow.python.util import tf_decorator @@ -70,23 +70,32 @@ def capture_value(tensor_map, value, dtype, name): captured_value = graph_placeholder( dtype=dtype or value.dtype, shape=value.shape, name=name) if captured_value.dtype == dtypes_module.resource: - handle_data = value._handle_data # pylint: disable=protected-access - captured_value._handle_data = handle_data # pylint: disable=protected-access + if ops._USE_C_SHAPES: # pylint: disable=protected-access + if isinstance(value, ops.EagerTensor): + handle_data = value._handle_data # pylint: disable=protected-access + else: + handle_data = resource_variable_ops.get_resource_handle_data(value) + else: + handle_data = value._handle_data # pylint: disable=protected-access if handle_data is not None and handle_data.is_set: + # pylint: disable=protected-access + if ops._USE_C_SHAPES: + pywrap_tensorflow.TFE_SetResourceHandleShapeAndType( + captured_value.graph._c_graph, captured_value._as_tf_output(), + handle_data.SerializeToString()) + else: + captured_value._handle_data = handle_data + # pylint: enable=protected-access # Ensure that shapes and dtypes are propagated. shapes, types = zip(*[(pair.shape, pair.dtype) for pair in handle_data.shape_and_type]) ranks = [len(s.dim) if not s.unknown_rank else -1 for s in shapes] shapes = [[d.size for d in s.dim] if not s.unknown_rank else None for s in shapes] - with errors.raise_exception_on_not_ok_status() as status: - pywrap_tensorflow.TF_GraphSetOutputHandleShapesAndTypes_wrapper( - captured_value._op._graph._c_graph, # pylint: disable=protected-access - captured_value._as_tf_output(), # pylint: disable=protected-access - shapes, - ranks, - types, - status) + pywrap_tensorflow.TF_GraphSetOutputHandleShapesAndTypes_wrapper( + captured_value._op._graph._c_graph, # pylint: disable=protected-access + captured_value._as_tf_output(), # pylint: disable=protected-access + shapes, ranks, types) tensor_map[ops.tensor_id(value)] = (value, captured_value) else: @@ -228,6 +237,16 @@ class HelperContext(object): else: return val + def EnterGradientColocation(self, op, gradient_uid): + """Start building a gradient colocated with an op.""" + if self._outer_context: + self._outer_context.EnterGradientColocation(op, gradient_uid) + + def ExitGradientColocation(self, op, gradient_uid): + """Start building a gradient colocated with an op.""" + if self._outer_context: + self._outer_context.ExitGradientColocation(op, gradient_uid) + def __enter__(self): # pylint: disable=protected-access self._g = ops.get_default_graph() @@ -275,23 +294,20 @@ class _EagerDefinedFunction(object): inputs: the tensors in the graph to be used as inputs to the function outputs: the tensors in the graph which will be outputs to the function """ - with errors.raise_exception_on_not_ok_status() as status: - fn = pywrap_tensorflow.TF_GraphToFunction_wrapper( - graph._c_graph, # pylint: disable=protected-access - compat.as_str(name), - False, - [o._c_op for o in operations], # pylint: disable=protected-access - [t._as_tf_output() for t in inputs], # pylint: disable=protected-access - [t._as_tf_output() for t in outputs], # pylint: disable=protected-access - [], - None, - compat.as_str(""), - status) + fn = pywrap_tensorflow.TF_GraphToFunction_wrapper( + graph._c_graph, # pylint: disable=protected-access + compat.as_str(name), + False, + [o._c_op for o in operations], # pylint: disable=protected-access + [t._as_tf_output() for t in inputs], # pylint: disable=protected-access + [t._as_tf_output() for t in outputs], # pylint: disable=protected-access + [], + None, + compat.as_str("")) # TODO(apassos) avoid creating a FunctionDef (specially to grab the # signature, but also in general it's nice not to depend on it. with c_api_util.tf_buffer() as buffer_: - with errors.raise_exception_on_not_ok_status() as status: - pywrap_tensorflow.TF_FunctionToFunctionDef(fn, buffer_, status) + pywrap_tensorflow.TF_FunctionToFunctionDef(fn, buffer_) proto_data = pywrap_tensorflow.TF_GetBuffer(buffer_) function_def = function_pb2.FunctionDef() function_def.ParseFromString(compat.as_bytes(proto_data)) @@ -302,7 +318,7 @@ class _EagerDefinedFunction(object): self.signature = function_def.signature self.grad_func_name = None self.python_grad_func = None - self._c_func = fn + self._c_func = c_api_util.ScopedTFFunction(fn) self._grad_func = None @@ -669,7 +685,7 @@ def _defun_internal(name, func, args, kwds): if context.executing_eagerly(): for f in tmp_graph._functions.values(): # pylint: disable=protected-access # TODO(ashankar): What about the gradient registry? - _register(f._c_func) # pylint: disable=protected-access + _register(f._c_func.func) # pylint: disable=protected-access return GraphModeFunction( fname, all_inputs, extra_inputs, tmp_graph, operations, func_def_outputs, func_outputs, output_shapes, variables) diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py index 9af197981bde309160781fa5821152962e5383bb..1828c987f4343015e042359e5ba2f715ee63ad83 100644 --- a/tensorflow/python/eager/function_test.py +++ b/tensorflow/python/eager/function_test.py @@ -29,9 +29,12 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import function as tf_function from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape +from tensorflow.python.framework import test_util +from tensorflow.python.layers import convolutional from tensorflow.python.ops import array_ops from tensorflow.python.ops import clip_ops from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import init_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import resource_variable_ops from tensorflow.python.ops import variable_scope @@ -39,6 +42,7 @@ from tensorflow.python.ops import variables from tensorflow.python.training import gradient_descent +@test_util.with_c_shapes class FunctionTest(test.TestCase): def testBasic(self): @@ -104,6 +108,7 @@ class FunctionTest(test.TestCase): matmul = function.defun(math_ops.matmul) pair = collections.namedtuple('pair', ['a', 'b']) + def a_times_b(inputs): return matmul(inputs.a['a'], inputs.b['b']) @@ -312,6 +317,7 @@ class FunctionTest(test.TestCase): x = variable_scope.get_variable( 'v', initializer=constant_op.constant(1.0)) return x * constant_op.constant(2.0) + with self.assertRaisesRegexp(ValueError, 'No trainable variables were accessed'): backprop.implicit_val_and_grad(f)() @@ -581,6 +587,7 @@ class FunctionTest(test.TestCase): with ops.name_scope('foo'): v = resource_variable_ops.ResourceVariable(0.0, name='bar') self.assertEqual(v.name, 'foo/bar:0') + create_variable() def testVariableNamesRespectNameScopesWithDefunInGraph(self): @@ -590,10 +597,27 @@ class FunctionTest(test.TestCase): with ops.name_scope('foo'): v = resource_variable_ops.ResourceVariable([1.0, 2.0], name='bar') self.assertEqual(v.name, 'foo/bar:0') + with ops.get_default_graph().as_default(): create_variable() + def testLayerInDefun(self): + conv = convolutional.Conv2D( + filters=1, + kernel_size=2, + kernel_initializer=init_ops.ones_initializer(), + bias_initializer=init_ops.zeros_initializer()) + + @function.defun + def model(x): + return conv(x) + + x = array_ops.ones([1, 2, 2, 1]) + y = model(x) + self.assertAllEqual([[[[4.0]]]], y.numpy()) + +@test_util.with_c_shapes class AutomaticControlDependenciesTest(test.TestCase): def testBasic(self): diff --git a/tensorflow/python/eager/graph_callable.py b/tensorflow/python/eager/graph_callable.py index ee5d87f0835a8e70e0ce14537a51ea5418db41b9..d40ea982c746593d0eb91cd58f5db3f10b7af687 100644 --- a/tensorflow/python/eager/graph_callable.py +++ b/tensorflow/python/eager/graph_callable.py @@ -325,7 +325,7 @@ def _graph_callable_internal(func, shape_and_dtypes): # Also, what about the gradient registry of these functions? Those need to be # addressed as well. for f in tmp_graph._functions.values(): # pylint: disable=protected-access - function._register(f._c_func) # pylint: disable=protected-access + function._register(f._c_func.func) # pylint: disable=protected-access initializer_function = function.GraphModeFunction( initialization_name, placeholder_inputs, diff --git a/tensorflow/python/eager/imperative_grad.py b/tensorflow/python/eager/imperative_grad.py index 837cad974ac6555ef2b13d1a1a5e0e5f5166b01d..000152855d1a90f32936cca40c10f00c2df863a5 100644 --- a/tensorflow/python/eager/imperative_grad.py +++ b/tensorflow/python/eager/imperative_grad.py @@ -21,7 +21,6 @@ from __future__ import print_function import collections from tensorflow.python import pywrap_tensorflow -from tensorflow.python.framework import errors VSpace = collections.namedtuple( @@ -60,6 +59,5 @@ def imperative_grad( or if only non-differentiable functions of the source were used in the computation of target. """ - with errors.raise_exception_on_not_ok_status() as status: - return pywrap_tensorflow.TFE_Py_TapeGradient( - tape._tape, vspace, target, sources, output_gradients, status) # pylint: disable=protected-access + return pywrap_tensorflow.TFE_Py_TapeGradient( + tape._tape, vspace, target, sources, output_gradients) # pylint: disable=protected-access diff --git a/tensorflow/python/eager/python_eager_op_gen.cc b/tensorflow/python/eager/python_eager_op_gen.cc index c2ce8efd7f70c6ba93b6d444f88ddbb9aa51ccdb..9afab0077b666b36d77abea5a7d8c444b6400812 100644 --- a/tensorflow/python/eager/python_eager_op_gen.cc +++ b/tensorflow/python/eager/python_eager_op_gen.cc @@ -117,7 +117,7 @@ class GenEagerPythonOp : public python_op_gen_internal::GenPythonOp { const string& function_name) : python_op_gen_internal::GenPythonOp(op_def, api_def, function_name) { op_name_ = function_name_; - op_name_.Consume("_"); + str_util::ConsumePrefix(&op_name_, "_"); } ~GenEagerPythonOp() override {} @@ -366,8 +366,8 @@ string GenEagerPythonOp::Code() { void GenEagerPythonOp::HandleGraphMode(const string& function_setup) { // Handle graph-mode case strings::StrAppend(&result_, - " _ctx = _context.context()\n" - " if not _ctx.executing_eagerly():\n", + " _ctx = _context._context\n" + " if _ctx is None or not _ctx._eager_context.is_eager:\n", function_setup, " _, _, _op = _op_def_lib._apply_op_helper(\n"); AddBodyNoReturn(" "); @@ -492,7 +492,7 @@ bool GenEagerPythonOp::GetEagerFunctionSetup(const string& indentation, strings::StrAppend(function_setup, indentation, " ", attr_api_name, " = ", default_value, "\n"); } - if (attr_type.starts_with("list(")) { + if (str_util::StartsWith(attr_type, "list(")) { ExpectListArg(indentation, attr_api_name, function_setup); } @@ -683,13 +683,14 @@ bool GenEagerPythonOp::AddEagerFallbackCode( return true; } - AddDefLine(strings::StrCat(function_name_, kEagerFallbackSuffix), parameters); + AddDefLine(strings::StrCat(function_name_, kEagerFallbackSuffix), + strings::StrCat(parameters, ", ctx=None")); strings::StrAppend( &result_, " r\"\"\"This is the slowpath function for Eager mode.\n"); strings::StrAppend(&result_, " This is for function ", function_name_, "\n \"\"\"\n"); - strings::StrAppend(&result_, " _ctx = _context.context()\n"); + strings::StrAppend(&result_, " _ctx = ctx if ctx else _context.context()\n"); string function_setup; if (!GetEagerFunctionSetup(" ", &function_setup)) { @@ -712,9 +713,9 @@ bool GenEagerPythonOp::AddEagerFallbackCode( } void GenEagerPythonOp::AddEagerFastPathExecute() { - string fastpath_execute_params = - strings::StrCat("_ctx._handle, _ctx.device_name, \"", op_def_.name(), - "\", ", "name, _ctx._post_execution_callbacks"); + string fastpath_execute_params = strings::StrCat( + "_ctx._context_handle, _ctx._eager_context.device_name, \"", + op_def_.name(), "\", ", "name, _ctx._post_execution_callbacks"); string fallback_params; for (int i = 0; i < api_def_.in_arg_size(); i++) { @@ -755,6 +756,8 @@ void GenEagerPythonOp::AddEagerFastPathExecute() { strings::StrAppend(&result_, " ", "return _result\n"); // Handle fallback. + if (!fallback_params.empty()) strings::StrAppend(&fallback_params, ", "); + strings::StrAppend(&fallback_params, "ctx=_ctx"); strings::StrAppend(&result_, " ", "except _core._FallbackException:\n"); strings::StrAppend( &result_, " ", "return ", function_name_, kEagerFallbackSuffix, diff --git a/tensorflow/python/eager/pywrap_tensor.cc b/tensorflow/python/eager/pywrap_tensor.cc index 519814b979e00dd7c9df41eacbe1edc02c9d88e8..b5b4e394e33bd3adcf8e90fa4c35f87fbbb5a155 100644 --- a/tensorflow/python/eager/pywrap_tensor.cc +++ b/tensorflow/python/eager/pywrap_tensor.cc @@ -60,42 +60,6 @@ TFE_TensorHandle* NumpyToTensorHandle(PyObject* obj) { } } -// Casts data referred to by `handle` from type `src_type_enum` to type -// `dst_type_enum`. -TFE_TensorHandle* EagerCast(TFE_Context* ctx, TFE_TensorHandle* handle, - TF_DataType src_type_enum, - TF_DataType dst_type_enum, TF_Status* out_status) { - if (ctx == nullptr) return nullptr; - const char* op_name = "Cast"; - const char* device_name = "/job:localhost/replica:0/task:0/device:CPU:0"; - TFE_Op* op = TFE_NewOp(ctx, op_name, out_status); -#define RETURN_ERROR \ - { \ - TFE_DeleteOp(op); \ - return nullptr; \ - } - if (TF_GetCode(out_status) != TF_OK) RETURN_ERROR - TFE_OpSetDevice(op, device_name, out_status); - if (TF_GetCode(out_status) != TF_OK) RETURN_ERROR - TFE_OpAddInput(op, handle, out_status); - if (TF_GetCode(out_status) != TF_OK) RETURN_ERROR - TFE_OpSetAttrType(op, "SrcT", src_type_enum); - TFE_OpSetAttrType(op, "DstT", dst_type_enum); - TFE_TensorHandle* output = nullptr; - int num_outputs = 1; - TFE_Execute(op, &output, &num_outputs, out_status); - if (TF_GetCode(out_status) != TF_OK || num_outputs != 1 || - output == nullptr) { - if (output != nullptr) { - TFE_DeleteTensorHandle(output); - } - RETURN_ERROR - } - TFE_DeleteOp(op); - return output; -#undef RETURN_ERROR -} - TFE_TensorHandle* CopyToDevice(TFE_TensorHandle* handle, PyObject* ctx, PyObject* dev) { const char* device = ""; @@ -161,6 +125,100 @@ PyObject* PyIntFromDataType(TF_DataType l) { } // namespace +namespace tensorflow { +// Casts data referred to by `handle` from type `src_type_enum` to type +// `dst_type_enum`. +TFE_TensorHandle* EagerCast(TFE_Context* ctx, TFE_TensorHandle* handle, + TF_DataType src_type_enum, + TF_DataType dst_type_enum, TF_Status* out_status) { + if (ctx == nullptr) return nullptr; + const char* op_name = "Cast"; + const char* device_name = "/job:localhost/replica:0/task:0/device:CPU:0"; + TFE_Op* op = TFE_NewOp(ctx, op_name, out_status); +#define RETURN_ERROR \ + { \ + TFE_DeleteOp(op); \ + return nullptr; \ + } + if (TF_GetCode(out_status) != TF_OK) RETURN_ERROR + TFE_OpSetDevice(op, device_name, out_status); + if (TF_GetCode(out_status) != TF_OK) RETURN_ERROR + TFE_OpAddInput(op, handle, out_status); + if (TF_GetCode(out_status) != TF_OK) RETURN_ERROR + TFE_OpSetAttrType(op, "SrcT", src_type_enum); + TFE_OpSetAttrType(op, "DstT", dst_type_enum); + TFE_TensorHandle* output = nullptr; + int num_outputs = 1; + TFE_Execute(op, &output, &num_outputs, out_status); + if (TF_GetCode(out_status) != TF_OK || num_outputs != 1 || + output == nullptr) { + if (output != nullptr) { + TFE_DeleteTensorHandle(output); + } + RETURN_ERROR + } + TFE_DeleteOp(op); + return output; +#undef RETURN_ERROR +} + +TFE_TensorHandle* ConvertToEagerTensor(PyObject* value, PyObject* dtype) { + int desired_dtype = -1; + if (dtype != Py_None) { + if (!PyIntToDataType(dtype, &desired_dtype)) { + PyErr_SetString(PyExc_TypeError, + tensorflow::strings::StrCat( + "Expecting a DataType value for dtype. Got ", + Py_TYPE(dtype)->tp_name) + .c_str()); + return nullptr; + } + } + if (PyArray_Check(value)) { + int desired_np_dtype = -1; + if (desired_dtype >= 0) { + if (!tensorflow::TF_DataType_to_PyArray_TYPE( + static_cast(desired_dtype), &desired_np_dtype) + .ok()) { + PyErr_SetString(PyExc_TypeError, + tensorflow::strings::StrCat( + "Invalid dtype argument value ", desired_dtype) + .c_str()); + return nullptr; + } + } + PyArrayObject* array = reinterpret_cast(value); + int current_np_dtype = PyArray_TYPE(array); + auto safe_value = tensorflow::make_safe(static_cast(nullptr)); + if ((desired_np_dtype >= 0 && desired_np_dtype != current_np_dtype) || + !PyArray_ISCARRAY(array)) { + int new_dtype = + desired_np_dtype >= 0 ? desired_np_dtype : current_np_dtype; + safe_value = tensorflow::make_safe( + PyArray_FromAny(value, PyArray_DescrFromType(new_dtype), 0, 0, + NPY_ARRAY_CARRAY | NPY_ARRAY_FORCECAST, nullptr)); + if (PyErr_Occurred()) return nullptr; + if (safe_value == nullptr) { + PyErr_SetString(PyExc_ValueError, "Error while casting a numpy value"); + return nullptr; + } + value = safe_value.get(); + } + return NumpyToTensorHandle(value); + } else { + tensorflow::Tensor t; + // TODO(josh11b): Have PySeqToTensor set python errors instead of + // returning Status. + auto cppstatus = tensorflow::PySeqToTensor(value, dtype, &t); + if (!cppstatus.ok()) { + PyErr_SetString(PyExc_ValueError, cppstatus.error_message().c_str()); + return nullptr; + } + return TFE_NewTensorHandle(t); + } +} +} // namespace tensorflow + extern "C" { static const int kMaxEagerTensorParentSize = 64; @@ -230,61 +288,16 @@ int EagerTensor_init(EagerTensor* self, PyObject* args, PyObject* kwds) { return -1; } } - tensorflow::Safe_TFE_TensorHandlePtr handle = - tensorflow::make_safe(static_cast(nullptr)); PyErr_Clear(); - if (PyArray_Check(value)) { - int desired_np_dtype = -1; - if (desired_dtype >= 0) { - if (!tensorflow::TF_DataType_to_PyArray_TYPE( - static_cast(desired_dtype), &desired_np_dtype) - .ok()) { - PyErr_SetString(PyExc_TypeError, - tensorflow::strings::StrCat( - "Invalid dtype argument value ", desired_dtype) - .c_str()); - return -1; - } - } - PyArrayObject* array = reinterpret_cast(value); - int current_np_dtype = PyArray_TYPE(array); - auto safe_value = tensorflow::make_safe(static_cast(nullptr)); - if ((desired_np_dtype >= 0 && desired_np_dtype != current_np_dtype) || - !PyArray_ISCARRAY(array)) { - int new_dtype = - desired_np_dtype >= 0 ? desired_np_dtype : current_np_dtype; - safe_value = tensorflow::make_safe( - PyArray_FromAny(value, PyArray_DescrFromType(new_dtype), 0, 0, - NPY_ARRAY_CARRAY | NPY_ARRAY_FORCECAST, nullptr)); - if (PyErr_Occurred()) return -1; - if (safe_value == nullptr) { - PyErr_SetString(PyExc_ValueError, "Error while casting a numpy value"); - return -1; - } - value = safe_value.get(); - } - handle = tensorflow::make_safe(NumpyToTensorHandle(value)); - } else { - tensorflow::Tensor t; - // TODO(josh11b): Have PySeqToTensor set python errors instead of - // returning Status. - auto cppstatus = tensorflow::PySeqToTensor(value, dtype, &t); - if (!cppstatus.ok()) { - PyErr_SetString(PyExc_ValueError, cppstatus.error_message().c_str()); - return -1; - } - handle = tensorflow::make_safe(TFE_NewTensorHandle(t)); - } - if (PyErr_Occurred()) return -1; - if (handle == nullptr) { - PyErr_SetString(PyExc_ValueError, "Error while creating an EagerTensor"); - return -1; - } + tensorflow::Safe_TFE_TensorHandlePtr handle = + tensorflow::make_safe(static_cast( + tensorflow::ConvertToEagerTensor(value, dtype))); + if (handle == nullptr) return -1; TF_DataType handle_dtype = TFE_TensorHandleDataType(handle.get()); if (desired_dtype >= 0 && desired_dtype != handle_dtype) { - handle = tensorflow::make_safe( - EagerCast(GetContext(context), handle.get(), handle_dtype, - static_cast(desired_dtype), self->status)); + handle = tensorflow::make_safe(tensorflow::EagerCast( + GetContext(context), handle.get(), handle_dtype, + static_cast(desired_dtype), self->status)); if (TF_GetCode(self->status) != TF_OK) { PyErr_SetString(PyExc_ValueError, tensorflow::strings::StrCat( @@ -701,12 +714,12 @@ PyObject* TFE_Py_InitEagerTensor(PyObject* base_class) { return reinterpret_cast(EagerTensorType); } -PyObject* TFE_Py_TensorShapeSlice(PyObject* tensor_list, int slice_dim) { - if (!PyList_Check(tensor_list)) { +PyObject* TFE_Py_TensorShapeSlice(PyObject* tensors, int slice_dim) { + if (!PyList_Check(tensors) && !PyTuple_Check(tensors)) { PyErr_SetString(PyExc_TypeError, tensorflow::strings::StrCat( - "tensor_list argument must be a list. Got \"", - Py_TYPE(tensor_list)->tp_name, "\"") + "tensors argument must be a list or a tuple. Got \"", + Py_TYPE(tensors)->tp_name, "\"") .c_str()); return nullptr; } @@ -720,14 +733,14 @@ PyObject* TFE_Py_TensorShapeSlice(PyObject* tensor_list, int slice_dim) { return nullptr; } - Py_ssize_t num_tensors = PyList_Size(tensor_list); + Py_ssize_t num_tensors = PySequence_Fast_GET_SIZE(tensors); int64_t num_tensors_int = static_cast(num_tensors); auto tensor = tensorflow::make_safe(TF_AllocateTensor( TF_INT32, &num_tensors_int, /*num_dims=*/1, /*len=*/4 * num_tensors_int)); int32_t* data = reinterpret_cast(TF_TensorData(tensor.get())); auto status = tensorflow::make_safe(TF_NewStatus()); for (Py_ssize_t i = 0; i < num_tensors; ++i) { - PyObject* tensor_obj = PyList_GET_ITEM(tensor_list, i); + PyObject* tensor_obj = PySequence_Fast_GET_ITEM(tensors, i); if (!EagerTensor_CheckExact(tensor_obj)) { PyErr_SetString(PyExc_TypeError, tensorflow::strings::StrCat( diff --git a/tensorflow/python/eager/pywrap_tensor.h b/tensorflow/python/eager/pywrap_tensor.h index aa1efdd1b81cca9df0088c4cecedfe52f258d2bc..63ab1ed84d5ba3f280904be8dd202912e42241d0 100644 --- a/tensorflow/python/eager/pywrap_tensor.h +++ b/tensorflow/python/eager/pywrap_tensor.h @@ -22,4 +22,14 @@ limitations under the License. bool EagerTensor_CheckExact(const PyObject* o); tensorflow::int64 EagerTensor_id(const PyObject* tensor); +namespace tensorflow { +TFE_TensorHandle* ConvertToEagerTensor(PyObject* value, PyObject* dtype); + +// TODO(nareshmodi): Move EagerCast and ReadVariableOp (which use the C API to +// execute TFE Ops) to a separate common library. +TFE_TensorHandle* EagerCast(TFE_Context* ctx, TFE_TensorHandle* handle, + TF_DataType src_type_enum, + TF_DataType dst_type_enum, TF_Status* out_status); +} + #endif // TENSORFLOW_PYTHON_EAGER_PYWRAP_TENSOR_H_ diff --git a/tensorflow/python/eager/pywrap_tfe.h b/tensorflow/python/eager/pywrap_tfe.h index 32d731d0f68910b8e41a57cb32ae60c3ea6742f7..691b613e48b217c595fe0f3249c493facf756d47 100644 --- a/tensorflow/python/eager/pywrap_tfe.h +++ b/tensorflow/python/eager/pywrap_tfe.h @@ -186,16 +186,16 @@ PyObject* TFE_Py_RecordGradient(PyObject* op_name, PyObject* inputs, // Returns the set of variables watched by the given tape. PyObject* TFE_Py_TapeWatchedVariables(PyObject* tape); -// Returns an EagerTensor of dimension [len(`tensor_list`)] containing -// the `slice_dim`'th dimension of each tensor in `tensor_list`. In other words, +// Returns an EagerTensor of dimension [len(`tensors`)] containing +// the `slice_dim`'th dimension of each tensor in `tensors`. In other words, // TFE_Py_TensorShapeSlice takes a slice of dimensions of tensors in -// `tensor_list`. For example, if `tensor_list` contains tensors of with shapes +// `tensors`. For example, if `tensors` contains tensors of with shapes // [1, 2, 3], [4, 5], [6, 7, 8, 9], TFE_Py_TensorShapeSlice called with // `slice_dim` equal to 1 will return [2, 5, 7]. // On error, returns nullptr and sets python exception. -// REQUIRES: `tensor_list` is a python list of EagerTensors +// REQUIRES: `tensors` is a python list/tuple of EagerTensors // REQUIRES: `slice_dim` is non-negative and smaller than the rank of all -// tensors in `tensor_list`. -PyObject* TFE_Py_TensorShapeSlice(PyObject* tensor_list, int slice_dim); +// tensors in `tensors`. +PyObject* TFE_Py_TensorShapeSlice(PyObject* tensors, int slice_dim); #endif // TENSORFLOW_PYTHON_EAGER_PYWRAP_TFE_H_ diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc index 8a398f6447434ae40917f3fe67d9e6972f2a9174..2bfa1f052cfe6550b25ad3bae7fa5c67a4e45be5 100644 --- a/tensorflow/python/eager/pywrap_tfe_src.cc +++ b/tensorflow/python/eager/pywrap_tfe_src.cc @@ -38,6 +38,54 @@ using tensorflow::strings::Printf; namespace { +struct InputInfo { + InputInfo(int i, bool is_list) : i(i), is_list(is_list) {} + + int i; + bool is_list = false; +}; + +using AttrToInputsMap = + tensorflow::gtl::FlatMap>; + +tensorflow::mutex all_attr_to_input_maps_lock( + tensorflow::LINKER_INITIALIZED); +tensorflow::gtl::FlatMap* GetAllAttrToInputsMaps() { + static auto* all_attr_to_input_maps = + new tensorflow::gtl::FlatMap; + return all_attr_to_input_maps; +} + +AttrToInputsMap* GetAttrToInputsMap(const tensorflow::OpDef& op_def) { + tensorflow::mutex_lock l(all_attr_to_input_maps_lock); + auto* all_attr_to_input_maps = GetAllAttrToInputsMaps(); + + auto* output = + tensorflow::gtl::FindPtrOrNull(*all_attr_to_input_maps, op_def.name()); + if (output != nullptr) { + return output; + } + + std::unique_ptr m(new AttrToInputsMap); + + // Store a list of InputIndex -> List of corresponding inputs. + for (int i = 0; i < op_def.input_arg_size(); i++) { + if (!op_def.input_arg(i).type_attr().empty()) { + auto it = m->find(op_def.input_arg(i).type_attr()); + if (it == m->end()) { + it = m->insert({op_def.input_arg(i).type_attr(), {}}).first; + } + it->second.emplace_back(i, !op_def.input_arg(i).number_attr().empty()); + } + } + + auto* retval = m.get(); + (*all_attr_to_input_maps)[op_def.name()] = m.release(); + + return retval; +} + struct FastPathOpExecInfo { TFE_Context* ctx; const char* device_name; @@ -53,6 +101,14 @@ struct FastPathOpExecInfo { // The op type name of the main op being executed. PyObject* op_name; PyObject* callbacks; + + // All the args passed into the FastPathOpExecInfo. + PyObject* args; + + // DTypes can come from another input that has the same attr. So build that + // map. + const AttrToInputsMap* attr_to_inputs_map; + tensorflow::gtl::FlatMap cached_dtypes; }; #define PARSE_VALUE(fn_name, type, check_fn, parse_fn) \ @@ -76,12 +132,29 @@ PARSE_VALUE(ParseIntValue, int, PyLong_Check, PyLong_AsLong) PARSE_VALUE(ParseInt64Value, int64_t, PyLong_Check, PyLong_AsLong) #else PARSE_VALUE(ParseIntValue, int, PyInt_Check, PyInt_AsLong) -PARSE_VALUE(ParseInt64Value, int64_t, PyInt_Check, PyInt_AsLong) -PARSE_VALUE(ParseInt64LongValue, int64_t, PyLong_Check, PyLong_AsLong) #endif PARSE_VALUE(ParseFloatValue, float, PyFloat_Check, PyFloat_AsDouble) #undef PARSE_VALUE +#if PY_MAJOR_VERSION < 3 +bool ParseInt64Value(const string& key, PyObject* py_value, TF_Status* status, + int64_t* value) { + if (PyInt_Check(py_value)) { + *value = static_cast(PyInt_AsLong(py_value)); + return true; + } else if (PyLong_Check(py_value)) { + *value = static_cast(PyLong_AsLong(py_value)); + return true; + } + TF_SetStatus( + status, TF_INVALID_ARGUMENT, + tensorflow::strings::StrCat("Expecting int or long value for attr ", key, + ", got ", py_value->ob_type->tp_name) + .c_str()); + return false; +} +#endif + Py_ssize_t TensorShapeNumDims(PyObject* value) { const auto size = PySequence_Size(value); if (size == -1) { @@ -234,7 +307,7 @@ bool SetOpAttrList( std::unique_ptr buffer(new int64_t[total_dims]); // Copy the input dims into the buffer and set dims to point to // the start of each list's dims. - std::unique_ptr dims(new const int64_t*[num_values]); + std::unique_ptr dims(new const int64_t*[num_values]); std::unique_ptr num_dims(new int[num_values]); int64_t* offset = buffer.get(); for (int i = 0; i < num_values; ++i) { @@ -296,7 +369,7 @@ void SetOpAttrListDefault( TF_Status* status) { if (type == TF_ATTR_STRING) { int num_values = attr.default_value().list().s_size(); - std::unique_ptr values(new const char*[num_values]); + std::unique_ptr values(new const char*[num_values]); (*attr_list_sizes)[key] = num_values; for (int i = 0; i < num_values; i++) { values[i] = attr.default_value().list().s(i).data(); @@ -349,7 +422,7 @@ void SetOpAttrListDefault( std::unique_ptr buffer(new int64_t[total_dims]); // Copy the input dims into the buffer and set dims to point to // the start of each list's dims. - std::unique_ptr dims(new const int64_t*[num_values]); + std::unique_ptr dims(new const int64_t*[num_values]); std::unique_ptr num_dims(new int[num_values]); int64_t* offset = buffer.get(); for (int i = 0; i < num_values; ++i) { @@ -369,7 +442,7 @@ void SetOpAttrListDefault( } else if (type == TF_ATTR_FUNC) { int num_values = attr.default_value().list().func_size(); (*attr_list_sizes)[key] = num_values; - std::unique_ptr funcs(new const TFE_Op*[num_values]); + std::unique_ptr funcs(new const TFE_Op*[num_values]); for (int i = 0; i < num_values; i++) { funcs[i] = GetFunc(ctx, attr.default_value().list().func(i), status); } @@ -1399,10 +1472,39 @@ PyObject* GetPythonObjectFromString(const char* s) { #endif } +PyObject* GetPythonObjectFromInt(int num) { +#if PY_MAJOR_VERSION >= 3 + return PyLong_FromLong(num); +#else + return PyInt_FromLong(num); +#endif +} + bool CheckResourceVariable(PyObject* item) { return PyObject_TypeCheck(item, resource_variable_type); } +bool IsNumberType(PyObject* item) { +#if PY_MAJOR_VERSION >= 3 + return PyFloat_Check(item) || PyLong_Check(item); +#else + return PyFloat_Check(item) || PyInt_Check(item) || PyLong_Check(item); +#endif +} + +bool CheckOneInput(PyObject* item) { + if (EagerTensor_CheckExact(item) || CheckResourceVariable(item) || + PyArray_Check(item) || IsNumberType(item)) { + return true; + } + + // Sequences are not properly handled. Sequences with purely python numeric + // types work, but sequences with mixes of EagerTensors and python numeric + // types don't work. + // TODO(nareshmodi): fix + return false; +} + bool CheckInputsOk(PyObject* seq, int start_index, const tensorflow::OpDef& op_def) { for (int i = 0; i < op_def.input_arg_size(); i++) { @@ -1419,8 +1521,7 @@ bool CheckInputsOk(PyObject* seq, int start_index, } for (Py_ssize_t j = 0; j < PySequence_Fast_GET_SIZE(item); j++) { PyObject* inner_item = PySequence_Fast_GET_ITEM(item, j); - if (!EagerTensor_CheckExact(inner_item) && - !CheckResourceVariable(inner_item)) { + if (!CheckOneInput(inner_item)) { VLOG(1) << "Falling back to slow path for Op \"" << op_def.name() << "\", Input \"" << op_def.input_arg(i).name() << "\", Index " @@ -1430,7 +1531,7 @@ bool CheckInputsOk(PyObject* seq, int start_index, return false; } } - } else if (!EagerTensor_CheckExact(item) && !CheckResourceVariable(item)) { + } else if (!CheckOneInput(item)) { VLOG(1) << "Falling back to slow path for Op \"" << op_def.name() << "\", Input \"" << op_def.input_arg(i).name() @@ -1443,6 +1544,52 @@ bool CheckInputsOk(PyObject* seq, int start_index, return true; } +PyObject* MaybeGetDType(PyObject* item) { + if (EagerTensor_CheckExact(item)) { + tensorflow::Safe_PyObjectPtr py_dtype( + PyObject_GetAttrString(item, "dtype")); + return PyObject_GetAttrString(py_dtype.get(), "_type_enum"); + } + + if (CheckResourceVariable(item)) { + tensorflow::Safe_PyObjectPtr py_dtype( + PyObject_GetAttrString(item, "_dtype")); + return PyObject_GetAttrString(py_dtype.get(), "_type_enum"); + } + + return nullptr; +} + +PyObject* MaybeGetDTypeForAttr(const string& attr, + FastPathOpExecInfo* op_exec_info) { + auto cached_it = op_exec_info->cached_dtypes.find(attr); + if (cached_it != op_exec_info->cached_dtypes.end()) { + return GetPythonObjectFromInt(cached_it->second); + } + + auto it = op_exec_info->attr_to_inputs_map->find(attr); + if (it == op_exec_info->attr_to_inputs_map->end()) { + // No other inputs - this should never happen. + Py_RETURN_NONE; + } + + for (const auto& input_info : it->second) { + PyObject* item = PyTuple_GET_ITEM( + op_exec_info->args, kFastPathExecuteInputStartIndex + input_info.i); + if (input_info.is_list) { + for (int i = 0; i < PySequence_Fast_GET_SIZE(item); i++) { + auto* dtype = MaybeGetDType(PySequence_Fast_GET_ITEM(item, i)); + if (dtype != nullptr) return dtype; + } + } else { + auto* dtype = MaybeGetDType(item); + if (dtype != nullptr) return dtype; + } + } + + Py_RETURN_NONE; +} + bool OpDoesntRequireOutput(const string& op_name) { static tensorflow::gtl::FlatSet* ops_that_dont_require_outputs = new tensorflow::gtl::FlatSet({ @@ -1668,23 +1815,80 @@ bool ReadVariableOp(const FastPathOpExecInfo& parent_op_exec_info, // i) input is an EagerTensor // ii) input is a ResourceVariable - in this case, the is_variable param is set // to true. -bool ConvertToTensor(const FastPathOpExecInfo& op_exec_info, PyObject* input, - tensorflow::Safe_PyObjectPtr* output_handle, - TF_Status* status) { - if (CheckResourceVariable(input)) { +// +// NOTE: dtype_hint_getter must *always* return a PyObject that can be +// decref'd. So if no hint is found, Py_RETURN_NONE (which correctly +// increfs Py_None). +bool ConvertToTensor( + const FastPathOpExecInfo& op_exec_info, PyObject* input, + tensorflow::Safe_PyObjectPtr* output_handle, + // This gets a hint for this particular input. + const std::function& dtype_hint_getter, + // This sets the dtype after conversion is complete. + const std::function& dtype_setter, + TF_Status* status) { + if (EagerTensor_CheckExact(input)) { + Py_INCREF(input); + output_handle->reset(input); + return true; + } else if (CheckResourceVariable(input)) { return ReadVariableOp(op_exec_info, input, output_handle, status); } - Py_INCREF(input); - output_handle->reset(input); + // The hint comes from a supposedly similarly typed tensor. + tensorflow::Safe_PyObjectPtr dtype_hint(dtype_hint_getter()); + if (PyErr_Occurred()) { + return false; + } + + tensorflow::Safe_TFE_TensorHandlePtr handle = + tensorflow::make_safe(static_cast( + tensorflow::ConvertToEagerTensor(input, dtype_hint.get()))); + if (handle == nullptr) { + status->status = tensorflow::errors::InvalidArgument( + "Unable to convert value to tensor"); + return false; + } + + int desired_dtype = -1; + if (dtype_hint.get() != Py_None) { + if (!ParseTypeValue("", dtype_hint.get(), status, &desired_dtype)) { + status->status = tensorflow::errors::InvalidArgument( + "Expecting a DataType value for dtype. Got ", + Py_TYPE(dtype_hint.get())->tp_name); + } + } + + TF_DataType handle_dtype = TFE_TensorHandleDataType(handle.get()); + if (desired_dtype >= 0 && desired_dtype != handle_dtype) { + handle = tensorflow::make_safe( + tensorflow::EagerCast(op_exec_info.ctx, handle.get(), handle_dtype, + static_cast(desired_dtype), status)); + if (!status->status.ok()) return false; + + handle_dtype = TFE_TensorHandleDataType(handle.get()); + } + + if (handle_dtype != TF_INT32) { + // Note that this is a shallow copy and will share the underlying buffer + // if copying to the same device. + handle = tensorflow::make_safe(TFE_TensorHandleCopyToDevice( + handle.get(), op_exec_info.ctx, op_exec_info.device_name, status)); + if (!status->status.ok()) return false; + } + + output_handle->reset(EagerTensorFromHandle(handle.release())); + + dtype_setter(handle_dtype); return true; } // Adds input and type attr to the op, and to the list of flattened // inputs/attrs. -bool AddInputToOp(const FastPathOpExecInfo& op_exec_info, PyObject* input, - const tensorflow::OpDef::ArgDef* input_arg, +bool AddInputToOp(FastPathOpExecInfo* op_exec_info, PyObject* input, + const bool add_type_attr, + const tensorflow::OpDef::ArgDef& input_arg, std::vector* flattened_attrs, std::vector* flattened_inputs, TFE_Op* op, TF_Status* status) { @@ -1693,18 +1897,30 @@ bool AddInputToOp(const FastPathOpExecInfo& op_exec_info, PyObject* input, // out of scope in this function. tensorflow::Safe_PyObjectPtr py_eager_tensor = nullptr; - if (!ConvertToTensor(op_exec_info, input, &py_eager_tensor, status)) { + if (!ConvertToTensor( + *op_exec_info, input, &py_eager_tensor, + [&]() { + if (input_arg.type() != tensorflow::DataType::DT_INVALID) { + return GetPythonObjectFromInt(input_arg.type()); + } + return MaybeGetDTypeForAttr(input_arg.type_attr(), op_exec_info); + }, + [&](const TF_DataType dtype) { + op_exec_info->cached_dtypes[input_arg.type_attr()] = + static_cast(dtype); + }, + status)) { return false; } TFE_TensorHandle* input_handle = EagerTensor_Handle(py_eager_tensor.get()); - if (input_arg != nullptr && !input_arg->type_attr().empty()) { + if (add_type_attr && !input_arg.type_attr().empty()) { auto dtype = TFE_TensorHandleDataType(input_handle); - TFE_OpSetAttrType(op, input_arg->type_attr().data(), dtype); + TFE_OpSetAttrType(op, input_arg.type_attr().data(), dtype); if (flattened_attrs != nullptr) { flattened_attrs->emplace_back( - GetPythonObjectFromString(input_arg->type_attr().data())); + GetPythonObjectFromString(input_arg.type_attr().data())); flattened_attrs->emplace_back(PyLong_FromLong(dtype)); } } @@ -1844,6 +2060,16 @@ PyObject* TFE_Py_FastPathExecute_C(PyObject*, PyObject* args) { op_exec_info.ctx = reinterpret_cast( PyCapsule_GetPointer(PyTuple_GET_ITEM(args, 0), nullptr)); + op_exec_info.args = args; + + if (op_exec_info.ctx == nullptr) { + // The context hasn't been initialized. It will be in the slow path. + RaiseFallbackException( + "This function does not handle the case of the path where " + "all inputs are not already EagerTensors."); + return nullptr; + } + op_exec_info.device_name = GetDeviceName(PyTuple_GET_ITEM(args, 1)); op_exec_info.op_name = PyTuple_GET_ITEM(args, 2); op_exec_info.op_def = GetOpDef(op_exec_info.op_name); @@ -1883,6 +2109,8 @@ PyObject* TFE_Py_FastPathExecute_C(PyObject*, PyObject* args) { return nullptr; } + op_exec_info.attr_to_inputs_map = GetAttrToInputsMap(*op_def); + TF_Status* status = TF_NewStatus(); TFE_Op* op = TFE_NewOp(op_exec_info.ctx, op_def->name().c_str(), status); auto cleaner = tensorflow::gtl::MakeCleanup([status, op] { @@ -1977,17 +2205,16 @@ PyObject* TFE_Py_FastPathExecute_C(PyObject*, PyObject* args) { if (len > 0) { // First item adds the type attr. - if (!AddInputToOp(op_exec_info, PySequence_Fast_GET_ITEM(input, 0), - &input_arg, flattened_attrs.get(), + if (!AddInputToOp(&op_exec_info, PySequence_Fast_GET_ITEM(input, 0), + true, input_arg, flattened_attrs.get(), flattened_inputs.get(), op, status)) { return nullptr; } for (Py_ssize_t j = 1; j < len; j++) { // Since the list is homogeneous, we don't need to re-add the attr. - if (!AddInputToOp(op_exec_info, PySequence_Fast_GET_ITEM(input, j), - nullptr /* input_arg */, - nullptr /* flattened_attrs */, + if (!AddInputToOp(&op_exec_info, PySequence_Fast_GET_ITEM(input, j), + false, input_arg, nullptr /* flattened_attrs */, flattened_inputs.get(), op, status)) { return nullptr; } @@ -2009,7 +2236,8 @@ PyObject* TFE_Py_FastPathExecute_C(PyObject*, PyObject* args) { PyObject* py_input = PySequence_Fast_GET_ITEM(input, j); tensorflow::Safe_PyObjectPtr py_eager_tensor; if (!ConvertToTensor(op_exec_info, py_input, &py_eager_tensor, - status)) { + []() { Py_RETURN_NONE; }, + [](const TF_DataType& dtype) {}, status)) { return nullptr; } @@ -2039,8 +2267,9 @@ PyObject* TFE_Py_FastPathExecute_C(PyObject*, PyObject* args) { attr_list_sizes[attr_name] = len; } else { // The item is a single item. - if (!AddInputToOp(op_exec_info, input, &input_arg, flattened_attrs.get(), - flattened_inputs.get(), op, status)) { + if (!AddInputToOp(&op_exec_info, input, true, input_arg, + flattened_attrs.get(), flattened_inputs.get(), op, + status)) { return nullptr; } } diff --git a/tensorflow/python/eager/tensor_test.py b/tensorflow/python/eager/tensor_test.py index 0bd5a5dbafd5ea8da21d4fb8a7dcae9fe23dd3d2..b044b30231603b0265aa1ef0320e9f1cfb303724 100644 --- a/tensorflow/python/eager/tensor_test.py +++ b/tensorflow/python/eager/tensor_test.py @@ -278,14 +278,9 @@ class TFETensorUtilTest(test_util.TensorFlowTestCase): with self.assertRaisesRegexp( TypeError, - r"tensor_list argument must be a list. Got \"EagerTensor\""): + r"tensors argument must be a list or a tuple. Got \"EagerTensor\""): pywrap_tensorflow.TFE_Py_TensorShapeSlice(t1, -2) - with self.assertRaisesRegexp( - TypeError, - r"tensor_list argument must be a list. Got \"tuple\""): - pywrap_tensorflow.TFE_Py_TensorShapeSlice((t1,), -2) - def testNegativeSliceDim(self): t1 = _create_tensor([1, 2], dtype=dtypes.int32) diff --git a/tensorflow/python/estimator/BUILD b/tensorflow/python/estimator/BUILD index f93bc221cc5c68ed734cc7a4135a4c9c5627c0cc..c6bb9b9be7cb8049acb2d6c3fe0f50a720e71f2e 100644 --- a/tensorflow/python/estimator/BUILD +++ b/tensorflow/python/estimator/BUILD @@ -7,7 +7,6 @@ package( licenses(["notice"]) # Apache 2.0 load("//tensorflow:tensorflow.bzl", "py_test") -load("//tensorflow:tensorflow.bzl", "cuda_py_test") py_library( name = "estimator_py", @@ -25,7 +24,6 @@ py_library( ":linear", ":model_fn", ":parsing_utils", - ":replicate_model_fn", ":run_config", ":training", "//tensorflow/python:util", @@ -207,6 +205,7 @@ py_test( "no_pip", "noasan", # test flakily times out in asan mode. "notsan", # b/67510291 + "optonly", # flakily times out in fastbuild ], deps = [ ":baseline", @@ -251,6 +250,7 @@ py_library( "//tensorflow/python:array_ops", "//tensorflow/python:boosted_trees_ops", "//tensorflow/python:data_flow_ops", + "//tensorflow/python:distribute", "//tensorflow/python:dtypes", "//tensorflow/python:framework_ops", "//tensorflow/python:lookup_ops", @@ -327,6 +327,7 @@ py_library( "//tensorflow/python:client_testlib", "//tensorflow/python:constant_op", "//tensorflow/python:control_flow_ops", + "//tensorflow/python:distribute", "//tensorflow/python:dtypes", "//tensorflow/python:framework_ops", "//tensorflow/python:math_ops", @@ -383,6 +384,7 @@ py_library( ":model_fn", ":optimizers", "//tensorflow/python:control_flow_ops", + "//tensorflow/python:distribute", "//tensorflow/python:framework_ops", "//tensorflow/python:init_ops", "//tensorflow/python:layers", @@ -466,6 +468,7 @@ py_library( "//tensorflow/core:protos_all_py", "//tensorflow/python:client", "//tensorflow/python:control_flow_ops", + "//tensorflow/python:distribute", "//tensorflow/python:framework_for_generated_wrappers", "//tensorflow/python:metrics", "//tensorflow/python:platform", @@ -743,6 +746,7 @@ py_library( "//tensorflow/python:client_testlib", "//tensorflow/python:control_flow_ops", "//tensorflow/python:data_flow_ops", + "//tensorflow/python:distribute", "//tensorflow/python:dtypes", "//tensorflow/python:framework_ops", "//tensorflow/python:math_ops", @@ -904,67 +908,3 @@ py_test( "//tensorflow/python:training", ], ) - -py_library( - name = "replicate_model_fn", - srcs = [ - "replicate_model_fn.py", - ], - srcs_version = "PY2AND3", - deps = [ - ":export_output", - ":model_fn", - ":util", - "//tensorflow/core:protos_all_py", - "//tensorflow/python:array_ops", - "//tensorflow/python:control_flow_ops", - "//tensorflow/python:device", - "//tensorflow/python:device_lib", - "//tensorflow/python:framework_ops", - "//tensorflow/python:math_ops", - "//tensorflow/python:platform", - "//tensorflow/python:sparse_ops", - "//tensorflow/python:sparse_tensor", - "//tensorflow/python:state_ops", - "//tensorflow/python:training", - "//tensorflow/python:variable_scope", - "//tensorflow/python/ops/losses", - "@six_archive//:six", - ], -) - -cuda_py_test( - name = "replicate_model_fn_test", - size = "medium", - srcs = ["replicate_model_fn_test.py"], - additional_deps = [ - "//tensorflow/python/estimator", - ":dnn", - ":export_export", - ":export_output", - ":model_fn", - ":numpy_io", - ":optimizers", - ":prediction_keys", - "//tensorflow/python/feature_column", - "//tensorflow/python/ops/losses", - "//tensorflow/python/saved_model:signature_constants", - "//tensorflow/python:array_ops", - "//tensorflow/python:client_testlib", - "//tensorflow/python:control_flow_ops", - "//tensorflow/python:framework_for_generated_wrappers", - "//tensorflow/python:framework_test_lib", - "//tensorflow/python:math_ops", - "//tensorflow/python:metrics", - "//tensorflow/python:platform", - "//tensorflow/python:summary", - "//tensorflow/python:training", - "//tensorflow/python:variable_scope", - "//tensorflow/python:variables", - ":replicate_model_fn", - ], - tags = [ - "multi_gpu", - "noasan", # flaky time outs - ], -) diff --git a/tensorflow/python/estimator/canned/boosted_trees.py b/tensorflow/python/estimator/canned/boosted_trees.py index 500ea03ea7fef9c60b9f36f1d04f1f4c337371e8..085dace1b3eb1b75b1b2d688e7cdffec10c2a878 100644 --- a/tensorflow/python/estimator/canned/boosted_trees.py +++ b/tensorflow/python/estimator/canned/boosted_trees.py @@ -32,6 +32,7 @@ from tensorflow.python.ops import data_flow_ops from tensorflow.python.ops import gradients_impl from tensorflow.python.ops import lookup_ops from tensorflow.python.ops import math_ops +from tensorflow.python.ops import state_ops from tensorflow.python.ops import variable_scope from tensorflow.python.ops.losses import losses from tensorflow.python.summary import summary @@ -40,14 +41,42 @@ from tensorflow.python.training import session_run_hook from tensorflow.python.training import training_util from tensorflow.python.util.tf_export import tf_export -_TreeHParams = collections.namedtuple( - 'TreeHParams', - ['n_trees', 'max_depth', 'learning_rate', 'l1', 'l2', 'tree_complexity']) +# TODO(nponomareva): Reveal pruning params here. +_TreeHParams = collections.namedtuple('TreeHParams', [ + 'n_trees', 'max_depth', 'learning_rate', 'l1', 'l2', 'tree_complexity', + 'min_node_weight' +]) _HOLD_FOR_MULTI_CLASS_SUPPORT = object() _HOLD_FOR_MULTI_DIM_SUPPORT = object() +def _get_max_buckets(feature_columns): + """Gets the maximum number of buckets from feature_columns. + + Args: + feature_columns: a list/set of tf.feature_column. + + Returns: + max_buckets: the maximum number of buckets among bucketized_columns. + + Raises: + ValueError: when unsupported feature_columns are given. + """ + if not feature_columns: + raise ValueError('feature_columns must be a non-empty list/set of ' + 'tf.feature_column.') + max_buckets = 1 + for fc in feature_columns: + if isinstance(fc, feature_column_lib._BucketizedColumn): # pylint:disable=protected-access + # N boundaries creates (N+1) buckets. + max_buckets = max(max_buckets, len(fc.boundaries) + 1) + else: + raise ValueError('For now, only bucketized_column is supported but ' + 'got: {}'.format(fc)) + return max_buckets + + def _get_transformed_features(features, feature_columns): """Gets the transformed features from features/feature_columns pair. @@ -57,36 +86,31 @@ def _get_transformed_features(features, feature_columns): Returns: result_features: a list of the transformed features, sorted by the name. - num_buckets: the maximum number of buckets across bucketized_columns. Raises: ValueError: when unsupported features/columns are tried. """ - num_buckets = 1 # pylint:disable=protected-access for fc in feature_columns: - if isinstance(fc, feature_column_lib._BucketizedColumn): - # N boundaries creates (N+1) buckets. - num_buckets = max(num_buckets, len(fc.boundaries) + 1) - else: + if not isinstance(fc, feature_column_lib._BucketizedColumn): raise ValueError('For now, only bucketized_column is supported but ' 'got: {}'.format(fc)) - transformed = feature_column_lib._transform_features(features, - feature_columns) + transformed_features = feature_column_lib._transform_features( + features, feature_columns) # pylint:enable=protected-access result_features = [] - for column in sorted(transformed, key=lambda tc: tc.name): + for column in sorted(transformed_features, key=lambda tc: tc.name): source_name = column.source_column.name - squeezed_tensor = array_ops.squeeze(transformed[column], axis=1) + squeezed_tensor = array_ops.squeeze(transformed_features[column], axis=1) if len(squeezed_tensor.shape) > 1: raise ValueError('For now, only supports features equivalent to rank 1 ' 'but column `{}` got: {}'.format( source_name, features[source_name].shape)) result_features.append(squeezed_tensor) - return result_features, num_buckets + return result_features -def _keep_as_local_variable(tensor, name=None): +def _local_variable(tensor, name=None): """Stores a tensor as a local Variable for faster read.""" return variable_scope.variable( initial_value=tensor, @@ -96,6 +120,48 @@ def _keep_as_local_variable(tensor, name=None): name=name) +def _cache_transformed_features(features, feature_columns, batch_size): + """Transform features and cache, then returns (cached_features, cache_op).""" + num_features = len(feature_columns) + cached_features = [ + _local_variable( + array_ops.zeros([batch_size], dtype=dtypes.int32), + name='cached_feature_{}'.format(i)) + for i in range(num_features) + ] + are_features_cached = _local_variable(False, name='are_features_cached') + + def cache_features_and_return(): + """Caches transoformed features. + + The intention is to hide get_transformed_features() from the graph by + caching the result except the first step, since bucketize operation + (inside get_transformed_features) is expensive. + + Returns: + input_feature_list: a list of input features. + cache_flip_op: op to add to graph to make sure cache update is included to + the graph. + """ + + transformed_features = _get_transformed_features(features, feature_columns) + cached = [ + state_ops.assign(cached_features[i], transformed_features[i]) + for i in range(num_features) + ] + # TODO(youngheek): Try other combination of dependencies so that the + # function returns a single result, not a tuple. + with ops.control_dependencies(cached): + cache_flip_op = are_features_cached.assign(True) + return cached, cache_flip_op + + input_feature_list, cache_flip_op = control_flow_ops.cond( + are_features_cached, + lambda: (cached_features, control_flow_ops.no_op()), + cache_features_and_return) + return input_feature_list, cache_flip_op + + class _CacheTrainingStatesUsingHashTable(object): """Caching logits, etc. using MutableHashTable.""" @@ -184,13 +250,13 @@ class _CacheTrainingStatesUsingVariables(object): logits_dimension: a constant (int) for the dimension of logits. """ self._logits_dimension = logits_dimension - self._tree_ids = _keep_as_local_variable( + self._tree_ids = _local_variable( array_ops.zeros([batch_size], dtype=dtypes.int32), name='tree_ids_cache') - self._node_ids = _keep_as_local_variable( + self._node_ids = _local_variable( array_ops.zeros([batch_size], dtype=dtypes.int32), name='node_ids_cache') - self._logits = _keep_as_local_variable( + self._logits = _local_variable( array_ops.zeros([batch_size, logits_dimension], dtype=dtypes.float32), name='logits_cache') @@ -209,8 +275,8 @@ class _CacheTrainingStatesUsingVariables(object): name='cache_insert') -class StopAtAttemptsHook(session_run_hook.SessionRunHook): - """Hook that requests stop at the number of trees.""" +class _StopAtAttemptsHook(session_run_hook.SessionRunHook): + """Hook that requests stop at the number of attempts.""" def __init__(self, num_finalized_trees_tensor, num_attempted_layers_tensor, max_trees, max_depth): @@ -224,25 +290,17 @@ class StopAtAttemptsHook(session_run_hook.SessionRunHook): [self._num_finalized_trees_tensor, self._num_attempted_layers_tensor]) def after_run(self, run_context, run_values): + # num_* tensors should be retrieved by a separate session than the training + # one, in order to read the values after growing. + # So, if it's approaching to the limit, get the actual value by additional + # session. num_finalized_trees, num_attempted_layers = run_values.results + if (num_finalized_trees >= self._max_trees - 1 or + num_attempted_layers > 2 * self._max_trees * self._max_depth - 1): + num_finalized_trees, num_attempted_layers = run_context.session.run( + [self._num_finalized_trees_tensor, self._num_attempted_layers_tensor]) if (num_finalized_trees >= self._max_trees or - 1.0 * num_attempted_layers / self._max_depth > 2 * self._max_trees): - run_context.request_stop() - - -class StopAtNumTreesHook(session_run_hook.SessionRunHook): - """Hook that requests stop at the number of trees.""" - - def __init__(self, num_trees_tensor, max_trees): - self._num_trees_tensor = num_trees_tensor - self._max_trees = max_trees - - def before_run(self, run_context): - return session_run_hook.SessionRunArgs(self._num_trees_tensor) - - def after_run(self, run_context, run_values): - num_trees = run_values.results - if num_trees > self._max_trees: + num_attempted_layers > 2 * self._max_trees * self._max_depth): run_context.request_stop() @@ -296,69 +354,75 @@ def _bt_model_fn( 'When train_in_memory is enabled, input_fn should return the entire ' 'dataset as a single batch, and n_batches_per_layer should be set as ' '1.') + if (not config.is_chief or config.num_worker_replicas > 1 or + config.num_ps_replicas > 0): + raise ValueError('train_in_memory is supported only for ' + 'non-distributed training.') worker_device = control_flow_ops.no_op().device # maximum number of splits possible in the whole tree =2^(D-1)-1 # TODO(youngheek): perhaps storage could be optimized by storing stats with # the dimension max_splits_per_layer, instead of max_splits (for the entire # tree). max_splits = (1 << tree_hparams.max_depth) - 1 + max_buckets = _get_max_buckets(feature_columns) + train_op = [] with ops.name_scope(name) as name: # Prepare. global_step = training_util.get_or_create_global_step() - input_feature_list, num_buckets = _get_transformed_features( - features, feature_columns) - if train_in_memory and mode == model_fn.ModeKeys.TRAIN: - input_feature_list = [ - _keep_as_local_variable(feature) for feature in input_feature_list - ] - num_features = len(input_feature_list) - - cache = None - if mode == model_fn.ModeKeys.TRAIN: - if train_in_memory and is_single_machine: # maybe just train_in_memory? - batch_size = array_ops.shape(input_feature_list[0])[0] - cache = _CacheTrainingStatesUsingVariables(batch_size, - head.logits_dimension) - elif example_id_column_name: + num_features = len(feature_columns) + # Extract input features and set up cache for training. + training_state_cache = None + if mode == model_fn.ModeKeys.TRAIN and train_in_memory: + # cache transformed features as well for in-memory training. + batch_size = array_ops.shape(labels)[0] + input_feature_list, input_cache_op = _cache_transformed_features( + features, feature_columns, batch_size) + train_op.append(input_cache_op) + training_state_cache = _CacheTrainingStatesUsingVariables( + batch_size, head.logits_dimension) + else: + input_feature_list = _get_transformed_features(features, feature_columns) + if mode == model_fn.ModeKeys.TRAIN and example_id_column_name: example_ids = features[example_id_column_name] - cache = _CacheTrainingStatesUsingHashTable(example_ids, - head.logits_dimension) + training_state_cache = _CacheTrainingStatesUsingHashTable( + example_ids, head.logits_dimension) # Create Ensemble resources. - if is_single_machine: - tree_ensemble = boosted_trees_ops.TreeEnsemble(name=name) - local_tree_ensemble = tree_ensemble - ensemble_reload = control_flow_ops.no_op() - else: - tree_ensemble = boosted_trees_ops.TreeEnsemble(name=name) - with ops.device(worker_device): - local_tree_ensemble = boosted_trees_ops.TreeEnsemble( - name=name + '_local', is_local=True) - # TODO(soroush): Do partial updates if this becomes a bottleneck. - ensemble_reload = local_tree_ensemble.deserialize( - *tree_ensemble.serialize()) - + tree_ensemble = boosted_trees_ops.TreeEnsemble(name=name) # Create logits. if mode != model_fn.ModeKeys.TRAIN: logits = boosted_trees_ops.predict( - tree_ensemble_handle=local_tree_ensemble.resource_handle, + # For non-TRAIN mode, ensemble doesn't change after initialization, + # so no local copy is needed; using tree_ensemble directly. + tree_ensemble_handle=tree_ensemble.resource_handle, bucketized_features=input_feature_list, - logits_dimension=head.logits_dimension, - max_depth=tree_hparams.max_depth) + logits_dimension=head.logits_dimension) else: - if cache: - cached_tree_ids, cached_node_ids, cached_logits = cache.lookup() + if is_single_machine: + local_tree_ensemble = tree_ensemble + ensemble_reload = control_flow_ops.no_op() + else: + # Have a local copy of ensemble for the distributed setting. + with ops.device(worker_device): + local_tree_ensemble = boosted_trees_ops.TreeEnsemble( + name=name + '_local', is_local=True) + # TODO(soroush): Do partial updates if this becomes a bottleneck. + ensemble_reload = local_tree_ensemble.deserialize( + *tree_ensemble.serialize()) + if training_state_cache: + cached_tree_ids, cached_node_ids, cached_logits = ( + training_state_cache.lookup()) else: # Always start from the beginning when no cache is set up. - batch_size = array_ops.shape(input_feature_list[0])[0] + batch_size = array_ops.shape(labels)[0] cached_tree_ids, cached_node_ids, cached_logits = ( array_ops.zeros([batch_size], dtype=dtypes.int32), array_ops.zeros([batch_size], dtype=dtypes.int32), array_ops.zeros( [batch_size, head.logits_dimension], dtype=dtypes.float32)) with ops.control_dependencies([ensemble_reload]): - (stamp_token, num_trees, num_finalized_trees, - num_attempted_layers) = local_tree_ensemble.get_states() + (stamp_token, num_trees, num_finalized_trees, num_attempted_layers, + last_layer_nodes_range) = local_tree_ensemble.get_states() summary.scalar('ensemble/num_trees', num_trees) summary.scalar('ensemble/num_finalized_trees', num_finalized_trees) summary.scalar('ensemble/num_attempted_layers', num_attempted_layers) @@ -368,16 +432,14 @@ def _bt_model_fn( cached_tree_ids=cached_tree_ids, cached_node_ids=cached_node_ids, bucketized_features=input_feature_list, - logits_dimension=head.logits_dimension, - max_depth=tree_hparams.max_depth) + logits_dimension=head.logits_dimension) logits = cached_logits + partial_logits # Create training graph. def _train_op_fn(loss): """Run one training iteration.""" - train_op = [] - if cache: - train_op.append(cache.insert(tree_ids, node_ids, logits)) + if training_state_cache: + train_op.append(training_state_cache.insert(tree_ids, node_ids, logits)) if closed_form_grad_and_hess_fn: gradients, hessians = closed_form_grad_and_hess_fn(logits, labels) else: @@ -392,7 +454,7 @@ def _bt_model_fn( hessians=hessians, bucketized_features_list=[input_feature_list[f]], max_splits=max_splits, - num_buckets=num_buckets), + num_buckets=max_buckets), axis=0) for f in range(num_features) ] @@ -401,14 +463,12 @@ def _bt_model_fn( (node_ids_per_feature, gains_list, thresholds_list, left_node_contribs_list, right_node_contribs_list) = ( boosted_trees_ops.calculate_best_gains_per_feature( - node_id_range=array_ops.stack([ - math_ops.reduce_min(node_ids), - math_ops.reduce_max(node_ids) - ]), + node_id_range=last_layer_nodes_range, stats_summary_list=stats_summary_list, l1=tree_hparams.l1, l2=tree_hparams.l2, tree_complexity=tree_hparams.tree_complexity, + min_node_weight=tree_hparams.min_node_weight, max_splits=max_splits)) grow_op = boosted_trees_ops.update_ensemble( # Confirm if local_tree_ensemble or tree_ensemble should be used. @@ -431,7 +491,7 @@ def _bt_model_fn( summary_accumulator = data_flow_ops.ConditionalAccumulator( dtype=dtypes.float32, # The stats consist of gradients and hessians (the last dimension). - shape=[num_features, max_splits, num_buckets, 2], + shape=[num_features, max_splits, max_buckets, 2], shared_name='stats_summary_accumulator') apply_grad = summary_accumulator.apply_grad( array_ops.stack(stats_summary_list, axis=0), stamp_token) @@ -468,7 +528,8 @@ def _bt_model_fn( # Add an early stop hook. estimator_spec = estimator_spec._replace( training_hooks=estimator_spec.training_hooks + - (StopAtNumTreesHook(num_trees, tree_hparams.n_trees),)) + (_StopAtAttemptsHook(num_finalized_trees, num_attempted_layers, + tree_hparams.n_trees, tree_hparams.max_depth),)) return estimator_spec @@ -526,21 +587,21 @@ def _create_regression_head(label_dimension, weight_column=None): class BoostedTreesClassifier(estimator.Estimator): """A Classifier for Tensorflow Boosted Trees models.""" - def __init__( - self, - feature_columns, - n_batches_per_layer, - model_dir=None, - n_classes=_HOLD_FOR_MULTI_CLASS_SUPPORT, - weight_column=None, - label_vocabulary=None, - n_trees=100, - max_depth=6, - learning_rate=0.1, - l1_regularization=0., - l2_regularization=0., - tree_complexity=0., - config=None): + def __init__(self, + feature_columns, + n_batches_per_layer, + model_dir=None, + n_classes=_HOLD_FOR_MULTI_CLASS_SUPPORT, + weight_column=None, + label_vocabulary=None, + n_trees=100, + max_depth=6, + learning_rate=0.1, + l1_regularization=0., + l2_regularization=0., + tree_complexity=0., + min_node_weight=0., + config=None): """Initializes a `BoostedTreesClassifier` instance. Example: @@ -604,6 +665,9 @@ class BoostedTreesClassifier(estimator.Estimator): l2_regularization: regularization multiplier applied to the square weights of the tree leafs. tree_complexity: regularization factor to penalize trees with more leaves. + min_node_weight: min_node_weight: minimum hessian a node must have for a + split to be considered. The value will be compared with + sum(leaf_hessian)/(batch_size * n_batches_per_layer). config: `RunConfig` object to configure the runtime settings. Raises: @@ -617,9 +681,9 @@ class BoostedTreesClassifier(estimator.Estimator): n_classes, weight_column, label_vocabulary=label_vocabulary) # HParams for the model. - tree_hparams = _TreeHParams( - n_trees, max_depth, learning_rate, l1_regularization, l2_regularization, - tree_complexity) + tree_hparams = _TreeHParams(n_trees, max_depth, learning_rate, + l1_regularization, l2_regularization, + tree_complexity, min_node_weight) def _model_fn(features, labels, mode, config): return _bt_model_fn( # pylint: disable=protected-access @@ -641,20 +705,20 @@ class BoostedTreesClassifier(estimator.Estimator): class BoostedTreesRegressor(estimator.Estimator): """A Regressor for Tensorflow Boosted Trees models.""" - def __init__( - self, - feature_columns, - n_batches_per_layer, - model_dir=None, - label_dimension=_HOLD_FOR_MULTI_DIM_SUPPORT, - weight_column=None, - n_trees=100, - max_depth=6, - learning_rate=0.1, - l1_regularization=0., - l2_regularization=0., - tree_complexity=0., - config=None): + def __init__(self, + feature_columns, + n_batches_per_layer, + model_dir=None, + label_dimension=_HOLD_FOR_MULTI_DIM_SUPPORT, + weight_column=None, + n_trees=100, + max_depth=6, + learning_rate=0.1, + l1_regularization=0., + l2_regularization=0., + tree_complexity=0., + min_node_weight=0., + config=None): """Initializes a `BoostedTreesRegressor` instance. Example: @@ -711,6 +775,9 @@ class BoostedTreesRegressor(estimator.Estimator): l2_regularization: regularization multiplier applied to the square weights of the tree leafs. tree_complexity: regularization factor to penalize trees with more leaves. + min_node_weight: min_node_weight: minimum hessian a node must have for a + split to be considered. The value will be compared with + sum(leaf_hessian)/(batch_size * n_batches_per_layer). config: `RunConfig` object to configure the runtime settings. Raises: @@ -723,9 +790,9 @@ class BoostedTreesRegressor(estimator.Estimator): head = _create_regression_head(label_dimension, weight_column) # HParams for the model. - tree_hparams = _TreeHParams( - n_trees, max_depth, learning_rate, l1_regularization, l2_regularization, - tree_complexity) + tree_hparams = _TreeHParams(n_trees, max_depth, learning_rate, + l1_regularization, l2_regularization, + tree_complexity, min_node_weight) def _model_fn(features, labels, mode, config): return _bt_model_fn( # pylint: disable=protected-access diff --git a/tensorflow/python/estimator/canned/boosted_trees_test.py b/tensorflow/python/estimator/canned/boosted_trees_test.py index 01e5cc7a5d6eb0a26fd47f7d7f9bfb566520b246..c8c52d3bc649c939b7f7531f52880580289a83d9 100644 --- a/tensorflow/python/estimator/canned/boosted_trees_test.py +++ b/tensorflow/python/estimator/canned/boosted_trees_test.py @@ -20,6 +20,7 @@ from __future__ import print_function import numpy as np from tensorflow.core.kernels.boosted_trees import boosted_trees_pb2 +from tensorflow.python.data.ops import dataset_ops from tensorflow.python.estimator import model_fn from tensorflow.python.estimator import run_config from tensorflow.python.estimator.canned import boosted_trees @@ -58,18 +59,37 @@ def _make_train_input_fn(is_classification): """Makes train input_fn for classification/regression.""" def _input_fn(): - features = dict(FEATURES_DICT) - features[EXAMPLE_ID_COLUMN] = constant_op.constant(EXAMPLE_IDS) - if is_classification: - labels = CLASSIFICATION_LABELS + features_dict = dict(FEATURES_DICT) + features_dict[EXAMPLE_ID_COLUMN] = constant_op.constant(EXAMPLE_IDS) + labels = CLASSIFICATION_LABELS if is_classification else REGRESSION_LABELS + return features_dict, labels + + return _input_fn + + +def _make_train_input_fn_dataset(is_classification, batch=None, repeat=None): + """Makes input_fn using Dataset.""" + + def _input_fn(): + features_dict = dict(FEATURES_DICT) + features_dict[EXAMPLE_ID_COLUMN] = constant_op.constant(EXAMPLE_IDS) + labels = CLASSIFICATION_LABELS if is_classification else REGRESSION_LABELS + if batch: + ds = dataset_ops.Dataset.zip( + (dataset_ops.Dataset.from_tensor_slices(features_dict), + dataset_ops.Dataset.from_tensor_slices(labels))).batch(batch) else: - labels = REGRESSION_LABELS - return features, labels + ds = dataset_ops.Dataset.zip( + (dataset_ops.Dataset.from_tensors(features_dict), + dataset_ops.Dataset.from_tensors(labels))) + # repeat indefinitely by default, or stop at the given step. + ds = ds.repeat(repeat) + return ds return _input_fn -class BoostedTreesClassifierTest(test_util.TensorFlowTestCase): +class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): def setUp(self): self._feature_columns = { @@ -79,10 +99,18 @@ class BoostedTreesClassifierTest(test_util.TensorFlowTestCase): for i in range(NUM_FEATURES) } - def _assert_checkpoint(self, model_dir, expected_global_step): - self.assertEqual(expected_global_step, - checkpoint_utils.load_variable(model_dir, - ops.GraphKeys.GLOBAL_STEP)) + def _assert_checkpoint(self, model_dir, global_step, finalized_trees, + attempted_layers): + reader = checkpoint_utils.load_checkpoint(model_dir) + self.assertEqual(global_step, reader.get_tensor(ops.GraphKeys.GLOBAL_STEP)) + serialized = reader.get_tensor('boosted_trees:0_serialized') + ensemble_proto = boosted_trees_pb2.TreeEnsemble() + ensemble_proto.ParseFromString(serialized) + self.assertEqual( + finalized_trees, + sum([1 for t in ensemble_proto.tree_metadata if t.is_finalized])) + self.assertEqual(attempted_layers, + ensemble_proto.growing_metadata.num_layers_attempted) def testTrainAndEvaluateBinaryClassifier(self): input_fn = _make_train_input_fn(is_classification=True) @@ -97,7 +125,8 @@ class BoostedTreesClassifierTest(test_util.TensorFlowTestCase): num_steps = 100 # Train for a few steps, and validate final checkpoint. est.train(input_fn, steps=num_steps) - self._assert_checkpoint(est.model_dir, 6) + self._assert_checkpoint( + est.model_dir, global_step=5, finalized_trees=1, attempted_layers=5) eval_res = est.evaluate(input_fn=input_fn, steps=1) self.assertAllClose(eval_res['accuracy'], 1.0) @@ -116,31 +145,30 @@ class BoostedTreesClassifierTest(test_util.TensorFlowTestCase): num_steps = 100 # Train for a few steps, and validate final checkpoint. est.train(train_input_fn, steps=num_steps) - + self._assert_checkpoint( + est.model_dir, global_step=5, finalized_trees=1, attempted_layers=5) predictions = list(est.predict(input_fn=predict_input_fn)) - self.assertEquals(5, len(predictions)) - # All labels are correct. - self.assertAllClose([0], predictions[0]['class_ids']) - self.assertAllClose([1], predictions[1]['class_ids']) - self.assertAllClose([1], predictions[2]['class_ids']) - self.assertAllClose([0], predictions[3]['class_ids']) - self.assertAllClose([0], predictions[4]['class_ids']) - - -class BoostedTreesRegressionTest(test_util.TensorFlowTestCase): + self.assertAllClose([[0], [1], [1], [0], [0]], + [pred['class_ids'] for pred in predictions]) - def setUp(self): - self._feature_columns = { - feature_column.bucketized_column( - feature_column.numeric_column('f_%d' % i, dtype=dtypes.float32), - BUCKET_BOUNDARIES) - for i in range(NUM_FEATURES) - } + def testTrainClassifierWithDataset(self): + train_input_fn = _make_train_input_fn_dataset(is_classification=True) + predict_input_fn = numpy_io.numpy_input_fn( + x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False) - def _assert_checkpoint(self, model_dir, expected_global_step): - self.assertEqual(expected_global_step, - checkpoint_utils.load_variable(model_dir, - ops.GraphKeys.GLOBAL_STEP)) + est = boosted_trees.BoostedTreesClassifier( + feature_columns=self._feature_columns, + n_batches_per_layer=1, + n_trees=1, + max_depth=5) + est.train(train_input_fn, steps=100) # will stop after 5 steps anyway. + self._assert_checkpoint( + est.model_dir, global_step=5, finalized_trees=1, attempted_layers=5) + eval_res = est.evaluate(input_fn=train_input_fn, steps=1) + self.assertAllClose(eval_res['accuracy'], 1.0) + predictions = list(est.predict(input_fn=predict_input_fn)) + self.assertAllClose([[0], [1], [1], [0], [0]], + [pred['class_ids'] for pred in predictions]) def testTrainAndEvaluateRegressor(self): input_fn = _make_train_input_fn(is_classification=False) @@ -155,9 +183,10 @@ class BoostedTreesRegressionTest(test_util.TensorFlowTestCase): num_steps = 100 # Train for a few steps, and validate final checkpoint. est.train(input_fn, steps=num_steps) - self._assert_checkpoint(est.model_dir, 11) + self._assert_checkpoint( + est.model_dir, global_step=10, finalized_trees=2, attempted_layers=10) eval_res = est.evaluate(input_fn=input_fn, steps=1) - self.assertAllClose(eval_res['average_loss'], 0.913176) + self.assertAllClose(eval_res['average_loss'], 1.008551) def testInferRegressor(self): train_input_fn = _make_train_input_fn(is_classification=False) @@ -174,16 +203,127 @@ class BoostedTreesRegressionTest(test_util.TensorFlowTestCase): num_steps = 100 # Train for a few steps, and validate final checkpoint. est.train(train_input_fn, steps=num_steps) - self._assert_checkpoint(est.model_dir, 6) + self._assert_checkpoint( + est.model_dir, global_step=5, finalized_trees=1, attempted_layers=5) + predictions = list(est.predict(input_fn=predict_input_fn)) + self.assertAllClose( + [[0.571619], [0.262821], [0.124549], [0.956801], [1.769801]], + [pred['predictions'] for pred in predictions]) + def testTrainRegressorWithDataset(self): + train_input_fn = _make_train_input_fn_dataset(is_classification=False) + predict_input_fn = numpy_io.numpy_input_fn( + x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False) + + est = boosted_trees.BoostedTreesRegressor( + feature_columns=self._feature_columns, + n_batches_per_layer=1, + n_trees=1, + max_depth=5) + est.train(train_input_fn, steps=100) # will stop after 5 steps anyway. + self._assert_checkpoint( + est.model_dir, global_step=5, finalized_trees=1, attempted_layers=5) + eval_res = est.evaluate(input_fn=train_input_fn, steps=1) + self.assertAllClose(eval_res['average_loss'], 2.478283) predictions = list(est.predict(input_fn=predict_input_fn)) + self.assertAllClose( + [[0.571619], [0.262821], [0.124549], [0.956801], [1.769801]], + [pred['predictions'] for pred in predictions]) + + def testTrainRegressorWithDatasetBatch(self): + # The batch_size as the entire data size should yield the same result as + # dataset without batching. + train_input_fn = _make_train_input_fn_dataset( + is_classification=False, batch=5) + predict_input_fn = numpy_io.numpy_input_fn( + x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False) + + est = boosted_trees.BoostedTreesRegressor( + feature_columns=self._feature_columns, + n_batches_per_layer=1, + n_trees=1, + max_depth=5) + est.train(train_input_fn, steps=100) # will stop after 5 steps anyway. + self._assert_checkpoint( + est.model_dir, global_step=5, finalized_trees=1, attempted_layers=5) + eval_res = est.evaluate(input_fn=train_input_fn, steps=1) + self.assertAllClose(eval_res['average_loss'], 2.478283) + predictions = list(est.predict(input_fn=predict_input_fn)) + self.assertAllClose( + [[0.571619], [0.262821], [0.124549], [0.956801], [1.769801]], + [pred['predictions'] for pred in predictions]) + + def testTrainRegressorWithDatasetLargerBatch(self): + # The batch_size as the multiple of the entire data size should still yield + # the same result. + train_input_fn = _make_train_input_fn_dataset( + is_classification=False, batch=15) + predict_input_fn = numpy_io.numpy_input_fn( + x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False) + + est = boosted_trees.BoostedTreesRegressor( + feature_columns=self._feature_columns, + n_batches_per_layer=1, + n_trees=1, + max_depth=5) + est.train(train_input_fn, steps=100) # will stop after 5 steps anyway. + self._assert_checkpoint( + est.model_dir, global_step=5, finalized_trees=1, attempted_layers=5) + eval_res = est.evaluate(input_fn=train_input_fn, steps=1) + self.assertAllClose(eval_res['average_loss'], 2.478283) + predictions = list(est.predict(input_fn=predict_input_fn)) + self.assertAllClose( + [[0.571619], [0.262821], [0.124549], [0.956801], [1.769801]], + [pred['predictions'] for pred in predictions]) + + def testTrainRegressorWithDatasetSmallerBatch(self): + # Even when using small batches, if (n_batches_per_layer * batch_size) makes + # the same entire data size, the result should be the same. + train_input_fn = _make_train_input_fn_dataset( + is_classification=False, batch=1) + predict_input_fn = numpy_io.numpy_input_fn( + x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False) - self.assertEquals(5, len(predictions)) - self.assertAllClose([0.703549], predictions[0]['predictions']) - self.assertAllClose([0.266539], predictions[1]['predictions']) - self.assertAllClose([0.256479], predictions[2]['predictions']) - self.assertAllClose([1.088732], predictions[3]['predictions']) - self.assertAllClose([1.901732], predictions[4]['predictions']) + est = boosted_trees.BoostedTreesRegressor( + feature_columns=self._feature_columns, + n_batches_per_layer=5, + n_trees=1, + max_depth=5) + # Train stops after (n_batches_per_layer * n_trees * max_depth) steps. + est.train(train_input_fn, steps=100) + self._assert_checkpoint( + est.model_dir, global_step=25, finalized_trees=1, attempted_layers=5) + # 5 batches = one epoch. + eval_res = est.evaluate(input_fn=train_input_fn, steps=5) + self.assertAllClose(eval_res['average_loss'], 2.478283) + predictions = list(est.predict(input_fn=predict_input_fn)) + self.assertAllClose( + [[0.571619], [0.262821], [0.124549], [0.956801], [1.769801]], + [pred['predictions'] for pred in predictions]) + + def testTrainRegressorWithDatasetWhenInputIsOverEarlier(self): + train_input_fn = _make_train_input_fn_dataset( + is_classification=False, repeat=3) # to stop input after 3 steps. + predict_input_fn = numpy_io.numpy_input_fn( + x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False) + + est = boosted_trees.BoostedTreesRegressor( + feature_columns=self._feature_columns, + n_batches_per_layer=1, + n_trees=1, + max_depth=5) + # Note that training will stop when input exhausts. + # This might not be a typical pattern, but dataset.repeat(3) causes + # the input stream to cease after 3 steps. + est.train(train_input_fn, steps=100) + self._assert_checkpoint( + est.model_dir, global_step=3, finalized_trees=0, attempted_layers=3) + eval_res = est.evaluate(input_fn=train_input_fn, steps=1) + self.assertAllClose(eval_res['average_loss'], 3.777295) + predictions = list(est.predict(input_fn=predict_input_fn)) + self.assertAllClose( + [[0.353850], [0.254100], [0.106850], [0.712100], [1.012100]], + [pred['predictions'] for pred in predictions]) class ModelFnTests(test_util.TensorFlowTestCase): @@ -201,7 +341,8 @@ class ModelFnTests(test_util.TensorFlowTestCase): learning_rate=0.1, l1=0., l2=0.01, - tree_complexity=0.) + tree_complexity=0., + min_node_weight=0.) def _get_expected_ensembles_for_classification(self): first_round = """ @@ -236,6 +377,8 @@ class ModelFnTests(test_util.TensorFlowTestCase): growing_metadata { num_trees_attempted: 1 num_layers_attempted: 1 + last_layer_node_start: 1 + last_layer_node_end: 3 } """ second_round = """ @@ -320,6 +463,8 @@ class ModelFnTests(test_util.TensorFlowTestCase): growing_metadata { num_trees_attempted: 1 num_layers_attempted: 2 + last_layer_node_start: 0 + last_layer_node_end: 1 } """ third_round = """ @@ -420,6 +565,8 @@ class ModelFnTests(test_util.TensorFlowTestCase): growing_metadata { num_trees_attempted: 2 num_layers_attempted: 3 + last_layer_node_start: 1 + last_layer_node_end: 3 } """ return (first_round, second_round, third_round) @@ -457,6 +604,8 @@ class ModelFnTests(test_util.TensorFlowTestCase): growing_metadata { num_trees_attempted: 1 num_layers_attempted: 1 + last_layer_node_start: 1 + last_layer_node_end: 3 } """ second_round = """ @@ -541,6 +690,8 @@ class ModelFnTests(test_util.TensorFlowTestCase): growing_metadata { num_trees_attempted: 1 num_layers_attempted: 2 + last_layer_node_start: 0 + last_layer_node_end: 1 } """ third_round = """ @@ -641,6 +792,8 @@ class ModelFnTests(test_util.TensorFlowTestCase): growing_metadata { num_trees_attempted: 2 num_layers_attempted: 3 + last_layer_node_start: 1 + last_layer_node_end: 3 } """ return (first_round, second_round, third_round) diff --git a/tensorflow/python/estimator/canned/head.py b/tensorflow/python/estimator/canned/head.py index bb033d349534e044b2b92d064051ee5fa07f4d62..efa4bdf5980a34001e10bb7a1125e4434215f0ee 100644 --- a/tensorflow/python/estimator/canned/head.py +++ b/tensorflow/python/estimator/canned/head.py @@ -57,8 +57,8 @@ _PREDICT_SERVING_KEY = 'predict' # A LossSpec contains # * a scalar `Tensor` representing reduced weighted training loss -# * a scalar `Tensor` representing the unreduced unweighted loss -# * a scalar `Tensor` representing the example weights +# * a `Tensor` representing the unreduced unweighted loss +# * a `Tensor` representing the example weights # * possibly processed labels (e.g. vocabulary lookup, shape manipulation, etc) LossSpec = collections.namedtuple( 'LossSpec', ['training_loss', 'unreduced_loss', 'weights', @@ -163,8 +163,8 @@ class _Head(object): Returns: A LossSpec that contains * the scalar `Tensor` representing reduced weighted training loss - * the scalar `Tensor` representing the unreduced unweighted loss - * the scalar `Tensor` representing the example weights + * the `Tensor` representing the unreduced unweighted loss + * the `Tensor` representing the example weights * possibly processed labels (e.g. vocabulary lookup, shape manipulation, etc.) @@ -263,9 +263,12 @@ def _check_dense_labels_match_logits_and_reshape( if (dim1 is not None) and (dim1 != expected_labels_dimension): raise ValueError( 'Mismatched label shape. ' - 'Classifier configured with n_classes=%s. Received %s. ' - 'Suggested Fix: check your n_classes argument to the estimator ' - 'and/or the shape of your label.' % + 'Expected labels dimension=%s. Received %s. ' + 'Suggested Fix:' + 'If your classifier expects one-hot encoding label,' + 'check your n_classes argument to the estimator' + 'and/or the shape of your label.' + 'Otherwise, check the shape of your label.' % (expected_labels_dimension, dim1)) expected_labels_shape = array_ops.concat( [logits_shape[:-1], [expected_labels_dimension]], axis=0) @@ -1039,7 +1042,7 @@ class _BinaryLogisticHeadWithSigmoidCrossEntropyLoss(_Head): vocabulary_list=tuple(self._label_vocabulary), name='class_id_lookup').lookup(labels) labels = math_ops.to_float(labels) - labels = _assert_range(labels, 2) + labels = _assert_range(labels, n_classes=2) if self._loss_fn: unweighted_loss = _call_loss_fn( loss_fn=self._loss_fn, labels=labels, logits=logits, @@ -1447,12 +1450,12 @@ class _RegressionHeadWithMeanSquaredErrorLoss(_Head): def _assert_range(labels, n_classes, message=None): with ops.name_scope(None, 'assert_range', (labels,)): - assert_less = check_ops.assert_less( + assert_less = check_ops.assert_less_equal( labels, - ops.convert_to_tensor(n_classes, dtype=labels.dtype), - message=message or 'Label IDs must < n_classes') + ops.convert_to_tensor(n_classes - 1, dtype=labels.dtype), + message=message or 'Labels must <= n_classes - 1') assert_greater = check_ops.assert_non_negative( - labels, message=message or 'Label IDs must >= 0') + labels, message=message or 'Labels must >= 0') with ops.control_dependencies((assert_less, assert_greater)): return array_ops.identity(labels) diff --git a/tensorflow/python/estimator/canned/head_test.py b/tensorflow/python/estimator/canned/head_test.py index fe6ee07529bc0314618a7cc85926dbb39660a352..7da3df01dc48d9ed3aef8a030f7a516db3a5abeb 100644 --- a/tensorflow/python/estimator/canned/head_test.py +++ b/tensorflow/python/estimator/canned/head_test.py @@ -255,14 +255,14 @@ class MultiClassHeadWithSoftmaxCrossEntropyLoss(test.TestCase): logits=logits_placeholder, labels=labels_placeholder)[0] with self.test_session(): - with self.assertRaisesOpError('Label IDs must < n_classes'): + with self.assertRaisesOpError('Labels must <= n_classes - 1'): training_loss.eval({ labels_placeholder: labels_2x1_with_large_id, logits_placeholder: logits_2x3 }) with self.test_session(): - with self.assertRaisesOpError('Label IDs must >= 0'): + with self.assertRaisesOpError('Labels must >= 0'): training_loss.eval({ labels_placeholder: labels_2x1_with_negative_id, logits_placeholder: logits_2x3 @@ -2090,6 +2090,24 @@ class BinaryLogisticHeadWithSigmoidCrossEntropyLossTest(test.TestCase): expected_regularization_loss), }, summary_str) + def test_float_labels_invalid_values(self): + head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss() + + logits = np.array([[0.5], [-0.3]], dtype=np.float32) + labels = np.array([[1.2], [0.4]], dtype=np.float32) + features = {'x': np.array([[42]], dtype=np.float32)} + training_loss = head.create_loss( + features=features, + mode=model_fn.ModeKeys.TRAIN, + logits=logits, + labels=labels)[0] + with self.assertRaisesRegexp( + errors.InvalidArgumentError, + r'Labels must <= n_classes - 1'): + with self.test_session(): + _initialize_variables(self, monitored_session.Scaffold()) + training_loss.eval() + def test_float_labels_train_create_loss(self): head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss() diff --git a/tensorflow/python/estimator/estimator.py b/tensorflow/python/estimator/estimator.py index ab69a093a2cf9ec6def4a35a7f57dd1f5683961d..887e9a474ab0471de64c53f24520c746e95195e9 100644 --- a/tensorflow/python/estimator/estimator.py +++ b/tensorflow/python/estimator/estimator.py @@ -188,7 +188,7 @@ class Estimator(object): self._config = config # The distribute field contains an instance of DistributionStrategy. - self._distribution = self._config.distribute + self._distribution = self._config.train_distribute # Model directory. model_dir = compat_internal.path_to_str(model_dir) @@ -216,7 +216,8 @@ class Estimator(object): else: self._session_config = self._config.session_config - self._device_fn = _get_replica_device_setter(self._config) + self._device_fn = self._config.device_fn or \ + _get_replica_device_setter(self._config) if model_fn is None: raise ValueError('model_fn must be provided to Estimator.') @@ -637,7 +638,7 @@ class Estimator(object): # pylint: disable=protected-access local_init_op = ( estimator_spec.scaffold.local_init_op or - monitored_session.Scaffold._default_local_init_op()) + monitored_session.Scaffold.default_local_init_op()) # pylint: enable=protected-access # Perform the export @@ -688,24 +689,16 @@ class Estimator(object): def _get_features_and_labels_from_input_fn(self, input_fn, mode): """Extracts the `features` and labels from return values of `input_fn`.""" - result = self._call_input_fn(input_fn, mode) - # TODO(anjalisridhar): What about the default DistributionStrategy? Perhaps - # using any input is alright in that case. There is also a - # has_dataset_or_queue_runner function that we may want to extend and use. - if (self._distribution is not None and - not isinstance(result, dataset_ops.Dataset) and - mode == model_fn_lib.ModeKeys.TRAIN): - raise ValueError('input_fn() must return a tf.data.Dataset when using a ' - 'DistributionStrategy.') input_hooks = [] - if isinstance(result, dataset_ops.Dataset): - if self._distribution is not None and mode == model_fn_lib.ModeKeys.TRAIN: - # TODO(josh11b): This is currently using a one-shot iterator, we - # will update this to an initializeable iterator once the - # necessory support for creating an initializable iterator is - # available. - result = self._distribution.distribute_dataset(result).get_next() - else: + if self._distribution is not None and mode == model_fn_lib.ModeKeys.TRAIN: + result = self._distribution.distribute_dataset( + lambda: self._call_input_fn(input_fn, mode)) + iterator = result.make_initializable_iterator() + input_hooks.append(_DatasetInitializerHook(iterator)) + result = iterator.get_next() + else: + result = self._call_input_fn(input_fn, mode) + if isinstance(result, dataset_ops.Dataset): iterator = result.make_initializable_iterator() input_hooks.append(_DatasetInitializerHook(iterator)) result = iterator.get_next() @@ -723,7 +716,7 @@ class Estimator(object): batch_length = batch_length or value.shape[0] if value.shape[0] != batch_length: raise ValueError('Batch length of predictions should be same. %s has ' - 'different batch length then others.' % key) + 'different batch length than others.' % key) return batch_length def _extract_keys(self, predictions, predict_keys): @@ -1264,7 +1257,8 @@ def _dict_to_str(dictionary): A `str` representing the `dictionary`. """ return ', '.join('%s = %s' % (k, v) - for k, v in sorted(six.iteritems(dictionary))) + for k, v in sorted(six.iteritems(dictionary)) + if not isinstance(v, six.binary_type)) def _write_dict_to_summary(output_dir, diff --git a/tensorflow/python/estimator/export/export.py b/tensorflow/python/estimator/export/export.py index 9206a4964b3b7a6e3cc1e0f9e965a197be78c4ba..41c1f5a2e25cd6ba1e93744b5b82ecc34c4375d0 100644 --- a/tensorflow/python/estimator/export/export.py +++ b/tensorflow/python/estimator/export/export.py @@ -74,8 +74,20 @@ class ServingInputReceiver(collections.namedtuple( raise ValueError('feature keys must be strings: {}.'.format(name)) if not (isinstance(tensor, ops.Tensor) or isinstance(tensor, sparse_tensor.SparseTensor)): - raise ValueError( + value_error = ValueError( 'feature {} must be a Tensor or SparseTensor.'.format(name)) + # NOTE(ericmc): This if-else block is a specific carve-out for + # LabeledTensor, which has a `.tensor` attribute and which is + # convertible to tf.Tensor via ops.convert_to_tensor. + # Allowing all types convertible to tf.Tensor is considered by soergel@ + # to be too permissive. + if hasattr(tensor, 'tensor'): + try: + ops.convert_to_tensor(tensor) + except TypeError: + raise value_error + else: + raise value_error if receiver_tensors is None: raise ValueError('receiver_tensors must be defined.') diff --git a/tensorflow/python/estimator/export/export_test.py b/tensorflow/python/estimator/export/export_test.py index eb9688bc973666554b6057f5f546b9a2d18461d6..c203be7dacf80043079b35cd8e89fc5b69dcaaa0 100644 --- a/tensorflow/python/estimator/export/export_test.py +++ b/tensorflow/python/estimator/export/export_test.py @@ -39,6 +39,21 @@ from tensorflow.python.saved_model import signature_constants from tensorflow.python.saved_model import signature_def_utils +class LabeledTensorMock(object): + """Mock class emulating LabeledTensor.""" + + def __init__(self): + self.tensor = constant_op.constant([1]) + + +def _convert_labeled_tensor_mock_to_tensor(value, *args, **kwargs): + return ops.internal_convert_to_tensor(value.tensor, *args, **kwargs) + + +ops.register_tensor_conversion_function(LabeledTensorMock, + _convert_labeled_tensor_mock_to_tensor) + + class ExportTest(test_util.TensorFlowTestCase): def test_serving_input_receiver_constructor(self): @@ -135,6 +150,11 @@ class ExportTest(test_util.TensorFlowTestCase): with self.assertRaises(ValueError): _ = export.ServingInputReceiver(feature, receiver_tensor) + def test_feature_labeled_tensor(self): + feature = LabeledTensorMock() + receiver_tensor = array_ops.placeholder(dtypes.string) + _ = export.ServingInputReceiver(feature, receiver_tensor) + def test_receiver_wrong_type(self): feature = constant_op.constant(5) receiver_tensor = "not a tensor" diff --git a/tensorflow/python/estimator/replicate_model_fn.py b/tensorflow/python/estimator/replicate_model_fn.py deleted file mode 100644 index 144d89abf3444062927d9261301fe50f4a63b280..0000000000000000000000000000000000000000 --- a/tensorflow/python/estimator/replicate_model_fn.py +++ /dev/null @@ -1,824 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Utilities to replicate model_fn's over local GPUs. - -This file contains util that allow to replicate `Estimator.model_fn` over -GPUs. Replicated version of a `model_fn` is returned that can subsequently -be used with `Estimator`. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from collections import defaultdict -from contextlib import contextmanager -import copy - -import six - -from tensorflow.core.framework import node_def_pb2 -from tensorflow.python.client import device_lib -from tensorflow.python.estimator import model_fn as model_fn_lib -from tensorflow.python.estimator import util -from tensorflow.python.estimator.export import export_output as export_output_lib -from tensorflow.python.framework import device as framework_device -from tensorflow.python.framework import ops as ops_lib -from tensorflow.python.framework import sparse_tensor -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import control_flow_ops -from tensorflow.python.ops import math_ops -from tensorflow.python.ops import sparse_ops -from tensorflow.python.ops import state_ops -from tensorflow.python.ops import variable_scope -from tensorflow.python.ops.losses import losses -from tensorflow.python.platform import tf_logging -from tensorflow.python.training import device_setter as device_setter_lib -from tensorflow.python.training import optimizer as optimizer_lib - - -def _replicate_model_fn(model_fn, - devices=None): - """Replicate `Estimator.model_fn` over GPUs. - - The given `model_fn` specifies a single forward pass of a model. To replicate - such a model over GPUs, each GPU gets its own instance of the forward pass - (a.k.a. a tower). The input features and labels get sharded into the chunks - that correspond to the number of GPUs. Each tower computes a loss based - on its input. For each such loss, gradients are computed. After that, the - available losses are aggregated to form aggregated loss. Available - gradients are summed. Then, they update weights using the specified - optimizer. - - If `devices` are `None`, then all available GPUs are going to be used for - replication. If no GPUs are available, then the model is going to be - placed on the CPU. - - Two modes of local replication over available GPUs are supported: - 1) If exactly 1 GPU is detected, then variables and operations are placed - onto the GPU. - 2) If more than 1 GPU is detected, then variables are going to be placed on - the CPU. Replicas of operations are placed on each individual GPU. - - Here is an example of how one might use their `model_fn` to run over GPUs: - ```python - ... - def model_fn(...): # See `model_fn` in `Estimator`. - loss = ... - optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001) - optimizer = tf.contrib.estimator._TowerOptimizer(optimizer) - if mode == tf.estimator.ModeKeys.TRAIN: - # See the section below on `EstimatorSpec.train_op`. - return EstimatorSpec(mode=mode, loss=loss, - train_op=optimizer.minimize(loss)) - - # No change for `ModeKeys.EVAL` or `ModeKeys.PREDICT`. - return EstimatorSpec(...) - ... - classifier = tf.estimator.Estimator( - model_fn=tf.contrib.estimator.replicate_model_fn(model_fn)) - ``` - - Please see `DNNClassifierIntegrationTest` for an example with a canned - Estimator. - - On `EstimatorSpec.train_op`: - `model_fn` returns `EstimatorSpec.train_op` for - `tf.estimator.GraphKeys.TRAIN`. It is typically derived using an optimizer. - Towers are expected to populate it in the same way. Gradients from all towers - are reduced and applied in the last tower. To achieve that in the case of - multiple towers, `_TowerOptimizer` needs to be used. See `_TowerOptimizer`. - - On sharding input features and labels: - Input features and labels are split for consumption by each tower. They are - split across the dimension 0. Features and labels need to be batch major. - - On reduction algorithms: - Certain algorithms were chosen for aggregating results of computations on - multiple towers: - - Losses from all towers are reduced according to `loss_reduction` argument - to TowerOptimizer.. - - Gradients from all towers are reduced according to the `loss_reduction` - for each trainable variable. - - `eval_metrics_ops` are reduced per metric using `reduce_mean`. - - `EstimatorSpec.predictions` and `EstimatorSpec.export_outputs` are - reduced using concatenation. - - For all other fields of `EstimatorSpec` the values of the first tower - are taken. - - On distribution of variables: - Variables are not duplicated between towers. Instead, they are placed on a - single device as defined above and shared across towers. - - On overhead: - If only one device is specified, then aggregation of loss and gradients - doesn't happen. Replication consists of placing `model_fn` onto the - specified device. - - On current limitations: - - `predictions` are not supported for `ModeKeys.EVAL`. They are required - for `tf.contrib.estimator.add_metrics`. - - Args: - model_fn: `model_fn` as defined in `Estimator`. See the section above about - the train_op argument of `EstimatorSpec`. - devices: Optional list of devices to replicate the model across. This - argument can be used to replice only on the subset of available GPUs. - If `None`, then all available GPUs are going to be used for replication. - If no GPUs are available, then the model is going to be placed on the CPU. - - Returns: - A replicated version of the supplied `model_fn`. Returned function that - conforms to the requirements of `Estimator`'s `model_fn` and can be used - instead of the supplied `model_fn`. - """ - return _replicate_model_fn_with_mode( - model_fn, - devices, - # TODO(isaprykin): Query the system configuration to choose modes other - # than `SHARED_LOCAL_PARAMETER_SERVER`, even though it is often - # appropriate. - mode=_VariableDistributionMode.SHARED_LOCAL_PARAMETER_SERVER) - - -class _VariableDistributionMode(object): - """Modes for variable distribution used for forcing a particular one. - - Forcing a mode is meant for performance experimentation purposes rather than - for general use cases. - """ - - SHARED_LOCAL_PARAMETER_SERVER = 1 - """Variables are placed on a single device and shared across all devices. - - Two ways to achieve this distribution over available GPUs are supported: - 1) If exactly 1 GPU is detected, then variables and operations are placed - onto GPU. - 2) If more than 1 GPU is detected, then variables are going to be placed on - the CPU. Replicas of operations are placed on each individual GPU. - """ - - SHARED_ROUND_ROBIN = 2 - """Variables are placed on all devices in a round-robin fashion. - - Every subsequent variable is placed on the next device. There is only one - copy of each variable that is shared across all devices. - """ - - -def _replicate_model_fn_with_mode( - model_fn, - devices=None, - mode=_VariableDistributionMode.SHARED_LOCAL_PARAMETER_SERVER): - """A version of `replicate_model_fn` that allows to specify a `mode`.""" - if not devices: - devices = _get_local_devices('GPU') or _get_local_devices('CPU') - - is_a_single_gpu_case = len(devices) == 1 and 'GPU' in devices[0].upper() - consolidation_device = devices[0] if is_a_single_gpu_case else '/CPU:0' - - ps_devices = [consolidation_device] - if mode == _VariableDistributionMode.SHARED_ROUND_ROBIN: - ps_devices = devices - - tf_logging.info('Replicating the `model_fn` across {}. Variables are going ' - 'to be placed on {}. Consolidation device is going to be {}.' - .format(devices, ps_devices, consolidation_device)) - - def single_device_model_fn(features, labels, mode, params=None, config=None): - """`model_fn` on a single device without reduction overhead.""" - return _get_loss_towers( - model_fn=model_fn, - mode=mode, - features=[features], - labels=[labels], - params=params, - config=config, - devices=devices, - local_ps_devices=ps_devices)[0] # One device, so one spec is out. - - def replicated_model_fn(features, labels, mode, params=None, config=None): - """Replicated version of `model_fn` to be used instead.""" - feature_shards, label_shards = _split_batch( - features, labels, len(devices), device=consolidation_device) - tower_specs = _get_loss_towers( - model_fn=model_fn, - mode=mode, - features=feature_shards, - labels=label_shards, - params=params, - config=config, - devices=devices, - local_ps_devices=ps_devices) - - if mode == model_fn_lib.ModeKeys.TRAIN: - train_op = _minimize_towers(tower_specs) - return _train_spec( - tower_specs, train_op, aggregation_device=consolidation_device) - elif mode == model_fn_lib.ModeKeys.EVAL: - return _eval_spec(tower_specs, aggregation_device=consolidation_device) - elif mode == model_fn_lib.ModeKeys.PREDICT: - return _predict_spec(tower_specs, aggregation_device=consolidation_device) - - if len(devices) == 1: - return single_device_model_fn - else: - return replicated_model_fn - - -class _TowerOptimizer(optimizer_lib.Optimizer): - """Gathers gradients from all towers and reduces them in the last one.""" - - COLLECTION_FOR_GRAPH_STATES = 'replicate_model_fn_graph_states' - - def __init__(self, optimizer_or_optimizer_fn, - loss_reduction=losses.Reduction.SUM_OVER_BATCH_SIZE): - """Wrap an existing optimizer for gathering gradients across towers. - - Each invocation of model_fn has to call the same optimizers in the same - order. - - Multiple optimizers that use the same or different losses are supported. - - If _TowerOptimizer is used but `replicate_model_fn` isn't, then no - aggregation will happen. All calls will simply be forwarded to the - underlying optimizer. The behavior is similar if there is only one tower. - - If _TowerOptimizer is used together with SyncReplicasOptimizer that wraps - the user's optimizer, then it's the SyncReplicasOptimizer that needs to be - wrapped with _TowerOptimizer. - - Args: - optimizer_or_optimizer_fn: an instance of optimizer to wrap. That - instance is going to be used for optimizer-specific logic. This can - also be a no-argument function that returns such an optimizer instance. - loss_reduction: controls whether losses are summed or averaged. - """ - self._optimizer_or_optimizer_fn = optimizer_or_optimizer_fn - self._loss_reduction = loss_reduction - - @staticmethod - def has_been_used(): - return _TowerOptimizer._graph_state().has_tower_optimizer_been_used - - def get_slot(self, *args, **kwargs): - return self._get_optimizer().get_slot(*args, **kwargs) - - def get_slot_names(self, *args, **kwargs): - return self._get_optimizer().get_slot_names(*args, **kwargs) - - def get_name(self, *args, **kwargs): - return self._get_optimizer().get_name(*args, **kwargs) - - def variables(self, *args, **kwargs): - return self._get_optimizer().variables(*args, **kwargs) - - def compute_gradients(self, loss, *args, **kwargs): - """Compute gradients, but first, if needed, scale the loss.""" - _TowerOptimizer._graph_state().set_loss_reduction(self._loss_reduction) - loss = _scale_loss(loss, - self._loss_reduction, - self._graph_state().number_of_towers) - return self._get_optimizer().compute_gradients(loss, *args, **kwargs) - - def apply_gradients(self, grads_and_vars, global_step=None, **kwargs): - """Collect gradients updates to apply them with the last tower.""" - if self._graph_state().number_of_towers == 1: - # Avoid the overhead of reduction if there's only one tower. - # - # There assumed to be only one tower if aggregation-related methods were - # not called by `_get_loss_towers`, for example if the model_fn uses - # TowerEstimator, but `replicate_model_fn` isn't used. - return self._get_optimizer().apply_gradients(grads_and_vars, global_step, - **kwargs) - - self._graph_state().collect_gradients(grads_and_vars) - - if not self._graph_state().is_the_last_tower: - with ops_lib.control_dependencies(_extract_tensors(grads_and_vars)): - return self._construct_no_op_train_op() - else: - # Gradients need to be gathered and applied in the scope of the first - # tower, so that the tensors are accessible via names without prefixes. - var_scope, name_scope = self._graph_state().scopes_of_the_first_tower - with variable_scope.variable_scope(var_scope): - with ops_lib.name_scope(name_scope): - return self._apply_gathered_gradients(global_step, **kwargs) - - def _apply_gathered_gradients(self, global_step, **kwargs): - graph_state = self._graph_state() - optimizer = self._get_optimizer() - - grad_lists = {} - for grad, var in graph_state.get_latest_gradients_from_all_towers(): - if grad is not None: - grad_lists.setdefault(var, []).append(grad) - - aggregated_grads = [] - with ops_lib.name_scope('gradient_aggregating'): - for var, grads in six.iteritems(grad_lists): - grad = _compute_sum_on_device(grads, var.device) - aggregated_grads.append((grad, var)) - return optimizer.apply_gradients( - aggregated_grads, global_step=global_step, **kwargs) - - def _get_optimizer(self): - if callable(self._optimizer_or_optimizer_fn): - # If optimizer is given as a function then we need to wait till we are - # under the right graph context before constructing it. That's why the - # optimizer is constructed in _get_optimizer() rather than __init__(). - self._optimizer_or_optimizer_fn = self._optimizer_or_optimizer_fn() - self._graph_state().has_tower_optimizer_been_used = True - return self._optimizer_or_optimizer_fn - - def _construct_no_op_train_op(self): - return control_flow_ops.no_op(name='train_op_placeholder') - - @staticmethod - def _graph_state(): - graph_states = ops_lib.get_default_graph().get_collection_ref( - _TowerOptimizer.COLLECTION_FOR_GRAPH_STATES) - if not graph_states: - graph_states.append(_TowerOptimizer._PerGraphState()) - return graph_states[-1] - - @staticmethod - def _did_towers_have_same_optimizer_calls(): - graph_state = _TowerOptimizer._graph_state() - return graph_state.did_towers_have_same_optimizer_calls() - - @staticmethod - def _clear_graph_state(): - # Clearing the Graph collection will prevent _PerGraphState from being - # serialized. - ops_lib.get_default_graph().clear_collection( - _TowerOptimizer.COLLECTION_FOR_GRAPH_STATES) - - class _PerGraphState(object): - """Gradient reduction related state of a Tensorflow graph.""" - - def __init__(self): - self._collected_grads_and_vars = defaultdict(list) - self._current_tower_index = 0 - self._number_of_towers = 1 - self._loss_reduction = None - # Scopes of the first tower that don't have a prefix: - self._variable_scope = None - self._name_scope = None - # If needed, alert that _TowerOptimizer needs to be used with model_fn. - self._has_tower_optimizer_been_used = False - - def collect_gradients(self, grads_and_vars): - self._collected_grads_and_vars[self._current_tower_index].append( - grads_and_vars) - - def get_latest_gradients_from_all_towers(self): - """Get gradients across towers for the last called optimizer.""" - grads_and_vars = [] - index_of_last_gradients = len( - self._collected_grads_and_vars[self._current_tower_index]) - 1 - for tower_id in range(self._current_tower_index + 1): - grads_and_vars.extend( - self._collected_grads_and_vars[tower_id][index_of_last_gradients]) - return grads_and_vars - - def set_number_of_towers(self, number_of_towers): - self._number_of_towers = number_of_towers - - def set_loss_reduction(self, loss_reduction): - self._loss_reduction = loss_reduction - - @contextmanager - def tower(self, tower_id, var_scope, name_scope): - if tower_id == 0: - self._variable_scope = var_scope - self._name_scope = name_scope - self._current_tower_index = tower_id - yield - - @property - def scopes_of_the_first_tower(self): - return self._variable_scope, self._name_scope - - @property - def is_the_last_tower(self): - return self._current_tower_index == (self._number_of_towers - 1) - - @property - def number_of_towers(self): - return self._number_of_towers - - @property - def loss_reduction(self): - return self._loss_reduction - - @property - def has_tower_optimizer_been_used(self): - return self._has_tower_optimizer_been_used - - @has_tower_optimizer_been_used.setter - def has_tower_optimizer_been_used(self, value): - self._has_tower_optimizer_been_used = value - - def did_towers_have_same_optimizer_calls(self): - total_number_of_grads = sum([ - len(grads) - for _, grads in six.iteritems(self._collected_grads_and_vars) - ]) - return total_number_of_grads % self._number_of_towers == 0 - - -def _get_local_devices(device_type): - local_device_protos = device_lib.list_local_devices() - return [ - device.name - for device in local_device_protos - if device.device_type == device_type - ] - - -def _split_batch(features, labels, number_of_shards, device): - """Split input features and labes into batches.""" - - def ensure_divisible_by_shards(sequence): - batch_size = ops_lib.convert_to_tensor(sequence).get_shape()[0] - if batch_size % number_of_shards != 0: - raise ValueError( - 'Batch size {} needs to be divisible by the number of GPUs, which ' - 'is {}.'.format(batch_size, number_of_shards)) - - def split_dictionary(dictionary): - """Split a dictionary into shards.""" - shards = [{} for _ in range(number_of_shards)] - for name, tensor in six.iteritems(dictionary): - if isinstance(tensor, sparse_tensor.SparseTensor): - for i, shard in enumerate( - sparse_ops.sparse_split( - sp_input=tensor, num_split=number_of_shards, axis=0)): - shards[i][name] = shard - else: - ensure_divisible_by_shards(tensor) - for i, shard in enumerate(array_ops.split(tensor, number_of_shards)): - shards[i][name] = shard - return shards - - with ops_lib.name_scope('split_inputs'): - with ops_lib.device(device): - if isinstance(features, dict): - feature_shards = split_dictionary(features) - else: - ensure_divisible_by_shards(features) - feature_shards = array_ops.split(features, number_of_shards) - - if labels is None: - label_shards = None - elif isinstance(labels, dict): - label_shards = split_dictionary(labels) - else: - ensure_divisible_by_shards(labels) - label_shards = array_ops.split(labels, number_of_shards) - return feature_shards, label_shards - - -_DEFAULT_NAME_SCOPE_PATTERN = 'tower_{}' - - -def _get_loss_towers(model_fn, - mode, - features, - labels, - params, - config, - devices, - local_ps_devices, - name_scope_pattern=_DEFAULT_NAME_SCOPE_PATTERN): - """Replicate the loss computation across devices.""" - tower_specs = [] - - model_fn_args = util.fn_args(model_fn) - optional_params = {} - if 'params' in model_fn_args: - optional_params['params'] = copy.deepcopy(params) - if 'config' in model_fn_args: - optional_params['config'] = copy.deepcopy(config) - - # pylint: disable=protected-access - round_robin_strategy = device_setter_lib._RoundRobinStrategy( - num_tasks=len(local_ps_devices)) - _TowerOptimizer._graph_state().set_number_of_towers(len(devices)) - - for i, device in enumerate(devices): - is_the_first_tower = (i == 0) - - device_setter = _local_device_setter( - worker_device=device, - ps_devices=local_ps_devices, - ps_strategy=round_robin_strategy) - - # We would like to preserve the names of the variables and ops that the user - # might be relying on. Names without a prefix are going to resolve to - # variables and ops of the first tower. - name_scope = name_scope_pattern - if is_the_first_tower: - name_scope = '' - - with variable_scope.variable_scope( - '', reuse=not is_the_first_tower) as var_scope: - with ops_lib.name_scope(name_scope.format(i)) as name_scope: - with _TowerOptimizer._graph_state().tower( - tower_id=i, var_scope=var_scope, name_scope=name_scope): - with ops_lib.device(device_setter): - labels_shard = None - if labels: - labels_shard = labels[i] - - tower_spec = model_fn( - mode=mode, - features=features[i], - labels=labels_shard, - **optional_params) - - if (tower_spec.train_op is not None and len(devices) > 1 and - not _TowerOptimizer.has_been_used()): - raise ValueError('Please wrap optimizers with _TowerOptimizer' - ' in order to use replicate_model_fn with' - ' multiple `devices`.') - - # Scaling the loss here doesn't actually affect gradients. Another - # instance of scaling happens inside the _TowerOptimizer. - tower_spec = _scale_tower_loss( - tower_spec, - _TowerOptimizer._graph_state().loss_reduction, - number_of_towers=len(devices)) - tower_specs.append(tower_spec) - - if not _TowerOptimizer._did_towers_have_same_optimizer_calls(): - raise ValueError('Each invocation of model_fn was supposed to make the same' - ' optimizer calls.') - _TowerOptimizer._clear_graph_state() - # pylint: enable=protected-access - return tower_specs - - -def _local_device_setter(worker_device, ps_devices, ps_strategy): - """A device setter that puts distributes Var/Ops to PS/workers.""" - ps_ops = ['Variable', 'VariableV2', 'VarHandleOp'] - - def local_device_chooser(op): - current_device = framework_device.DeviceSpec.from_string(op.device or '') - - node_def = op if isinstance(op, node_def_pb2.NodeDef) else op.node_def - if node_def.op in ps_ops: - ps_device_spec = framework_device.DeviceSpec.from_string( - '{}'.format(ps_devices[ps_strategy(op)])) - - ps_device_spec.merge_from(current_device) - return ps_device_spec.to_string() - else: - worker_device_spec = framework_device.DeviceSpec.from_string( - worker_device or '') - worker_device_spec.merge_from(current_device) - return worker_device_spec.to_string() - - return local_device_chooser - - -def _scale_tower_loss(tower_spec, loss_reduction, number_of_towers): - """Produce an EstimatorSpec with approproriately scaled loss.""" - if tower_spec.loss is None: - return tower_spec - - estimator_spec = _asdict(tower_spec) - estimator_spec['loss'] = _scale_loss( - tower_spec.loss, - loss_reduction, - number_of_towers, - reduced_loss_name='averaged_loss') - return model_fn_lib.EstimatorSpec(**estimator_spec) - - -def _scale_loss(loss, loss_reduction, number_of_towers, reduced_loss_name=None): - """If needed, scale down the loss for averaging loss by summing.""" - if loss is None: - return None - if number_of_towers == 1: - return loss - - if loss_reduction == losses.Reduction.NONE: - raise ValueError('Tower losses need to be reduced in some way, yet {} ' - 'reduction is specified.'.format(loss_reduction)) - - if loss_reduction != losses.Reduction.SUM: - return math_ops.div(loss, 1.0 * number_of_towers, name=reduced_loss_name) - else: - return loss - - -def _minimize_towers(tower_specs): - """`train_op` of the last tower applies aggregated gradients.""" - return tower_specs[-1].train_op - - -def _compute_sum_on_device(values, device, name=None): - with ops_lib.device(device): - if isinstance(values[0], ops_lib.IndexedSlices): - if name: - raise ValueError('The name {} is not expected to be given to ' - 'IndexedSlices {}'.format(name, values)) - - values_concat = array_ops.concat([v.values for v in values], axis=0) - indices_concat = array_ops.concat([v.indices for v in values], axis=0) - return ops_lib.IndexedSlices(values_concat, indices_concat, - values[0].dense_shape) - else: - return math_ops.add_n(values, name=name) - - -def _train_spec(tower_specs, - train_op, - aggregation_device, - aggregated_loss_name='loss'): - """Populate replicated EstimatorSpec for `GraphKeys.TRAIN`.""" - # Spec of the last tower is used as the template for the final spec, because - # some `EstimatorSpec.training_hooks` rely on calls made in model_fn. For - # example, `SyncReplicasOptimizerHook` validates the - # `SyncReplicasOptimizer.apply_gradients` call. `TowerEstimator` makes that - # call only in the last tower. - estimator_spec = _asdict(tower_specs[-1]) - estimator_spec['mode'] = model_fn_lib.ModeKeys.TRAIN - estimator_spec['train_op'] = train_op - estimator_spec['loss'] = _compute_sum_on_device( - [spec.loss for spec in tower_specs], aggregation_device, - aggregated_loss_name) - return model_fn_lib.EstimatorSpec(**estimator_spec) - - -def _eval_spec(tower_specs, aggregation_device, aggregated_loss_name='loss'): - """Populate replicated EstimatorSpec for `GraphKeys.EVAL`.""" - estimator_spec = _asdict(tower_specs[0]) - estimator_spec['mode'] = model_fn_lib.ModeKeys.EVAL - estimator_spec['loss'] = _compute_sum_on_device( - [spec.loss for spec in tower_specs], aggregation_device, - aggregated_loss_name) - - update_ops = [] - for tower_spec in tower_specs: - for name, (_, update_op) in six.iteritems(tower_spec.eval_metric_ops): - update_ops.append(update_op) - - with ops_lib.control_dependencies(update_ops): - reduced_update_op = _reduce_metric_variables(len(tower_specs)) - - eval_metric_ops = {} - for name, (metric_tensor, _) in six.iteritems(tower_specs[0].eval_metric_ops): - eval_metric_ops[name] = (metric_tensor, reduced_update_op) - estimator_spec['eval_metric_ops'] = eval_metric_ops - return model_fn_lib.EstimatorSpec(**estimator_spec) - - -def _reduce_metric_variables(number_of_towers): - """Aggregate local variables used in metrics into the first tower.""" - if number_of_towers == 1: - return control_flow_ops.no_op(name='no_eval_metric_reduction') - - metric_variables = ops_lib.get_collection(ops_lib.GraphKeys.METRIC_VARIABLES) - variables_per_tower = len(metric_variables) // number_of_towers - - if len(metric_variables) % number_of_towers != 0: - raise ValueError( - 'Different `EstimatorSpec.eval_metric_ops` across `model_fn()` calls.' - ' Expected {} local variables, but got {} instead.'.format( - variables_per_tower * number_of_towers, len(metric_variables))) - - # `metric_variables` has the size of `variables_per_tower` x - # number_of_towers. Each tower is produced by calling the same model_fn. - # First `variables_per_tower` correspond to the first tower. Each such - # variable has an replica at the `(variables_per_tower * i)` position, where - # `i` is `[1.. number_of_towers]`. We are going to add values from replicas - # to each variable of the first tower. We then zero out replica values, so - # that `_reduce_metric_variables` operation is idempotent. If a metric - # is then computed based on local variables from the first tower, then the - # resulting metric is an estimate for all `number_of_towers` towers. - ops = [] - for i in range(0, variables_per_tower): - next_replica_id = i + variables_per_tower - replicas = [ - metric_variables[replica_id] - for replica_id in range(next_replica_id, len(metric_variables), - variables_per_tower) - ] # `replicas` doesn't contain the first-tower variable. - - reduce_op = state_ops.assign_add(metric_variables[i], - math_ops.add_n(replicas)) - - with ops_lib.control_dependencies([reduce_op]): - for replica in replicas: - zeros_for_replica = array_ops.zeros( - array_ops.shape(replica), dtype=replica.dtype) - zero_out_replica_op = state_ops.assign(replica, zeros_for_replica) - ops.append(zero_out_replica_op) - - return control_flow_ops.group(*ops) - - -def _predict_spec(tower_specs, aggregation_device): - """Populate replicated EstimatorSpec for `GraphKeys.PREDICT`.""" - estimator_spec = _asdict(tower_specs[0]) - estimator_spec['mode'] = model_fn_lib.ModeKeys.PREDICT - - with ops_lib.device(aggregation_device): - estimator_spec['predictions'] = _concat_tensor_dicts( - *[tower_spec.predictions for tower_spec in tower_specs]) - - export_outputs_dict = _dict_concat( - *[tower_spec.export_outputs for tower_spec in tower_specs]) - - export_outputs = {} - for name, export_output_list in six.iteritems(export_outputs_dict): - if isinstance(export_output_list[0], export_output_lib.PredictOutput): - export_outputs[name] = export_output_lib.PredictOutput( - outputs=_concat_tensor_dicts(*[ - export_output.outputs for export_output in export_output_list - ])) - elif isinstance(export_output_list[0], - export_output_lib.RegressionOutput): - export_outputs[name] = export_output_lib.RegressionOutput( - value=array_ops.concat( - [export_output.value for export_output in export_output_list], - axis=0)) - elif isinstance(export_output_list[0], - export_output_lib.ClassificationOutput): - scores = None - if export_output_list[0].scores is not None: - scores = array_ops.concat( - [export_output.scores for export_output in export_output_list], - axis=0) - - classes = None - if export_output_list[0].classes is not None: - classes = array_ops.stack( - [export_output.classes for export_output in export_output_list], - axis=0) - - export_outputs[name] = export_output_lib.ClassificationOutput( - scores=scores, classes=classes) - - estimator_spec['export_outputs'] = export_outputs - return model_fn_lib.EstimatorSpec(**estimator_spec) - - -def _concat_tensor_dicts(*tensor_dicts): - return { - name: array_ops.concat(tensors, axis=0, name=name) - for name, tensors in six.iteritems(_dict_concat(*tensor_dicts)) - } - - -def _extract_tensors(tensors_and_vars): - tensors = [] - for tensor_and_var in tensors_and_vars: - tensor, _ = tensor_and_var - if isinstance(tensor, ops_lib.IndexedSlices): - tensors.append(tensor.values) - elif tensor is not None: - tensors.append(tensor) - return tensors - - -def _dict_concat(*dicts): - list_dict = {} - for d in dicts: - if d is None: - continue - - for k, v in six.iteritems(d): - list_dict.setdefault(k, []).append(v) - return list_dict - - -def _asdict(namedtuple): - """Returns a namedtuple as a dictionary. - - This is required because `_asdict()` in Python 3.x.x is broken in classes - that inherit from `collections.namedtuple`. See - https://bugs.python.org/issue24931 for more details. - - Args: - namedtuple: An object that inherits from `collections.namedtuple`. - - Returns: - A dictionary version of the tuple. - """ - return {k: getattr(namedtuple, k) for k in namedtuple._fields} diff --git a/tensorflow/python/estimator/replicate_model_fn_test.py b/tensorflow/python/estimator/replicate_model_fn_test.py deleted file mode 100644 index ad1f9c02b92d7b1ce929494f4b6fbf636762a7fd..0000000000000000000000000000000000000000 --- a/tensorflow/python/estimator/replicate_model_fn_test.py +++ /dev/null @@ -1,1739 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for utilities that replicate `Estimator.model_fn` over GPUs.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import re -import shutil -import tempfile -import numpy as np -import six - -from tensorflow.python.estimator import estimator as estimator_lib -from tensorflow.python.estimator import model_fn as model_fn_lib -from tensorflow.python.estimator import replicate_model_fn -from tensorflow.python.estimator.canned import dnn -from tensorflow.python.estimator.canned import optimizers -from tensorflow.python.estimator.canned import prediction_keys -from tensorflow.python.estimator.export import export -from tensorflow.python.estimator.export import export_output -from tensorflow.python.estimator.inputs import numpy_io -from tensorflow.python.feature_column import feature_column -from tensorflow.python.framework import constant_op -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import ops as ops_lib -from tensorflow.python.framework import sparse_tensor -from tensorflow.python.framework import test_util -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import control_flow_ops -from tensorflow.python.ops import losses -from tensorflow.python.ops import math_ops -from tensorflow.python.ops import metrics as metrics_lib -from tensorflow.python.ops import variable_scope -from tensorflow.python.ops import variables -from tensorflow.python.ops.losses import losses -from tensorflow.python.platform import gfile -from tensorflow.python.platform import test -from tensorflow.python.saved_model import signature_constants -from tensorflow.python.summary.writer import writer_cache -from tensorflow.python.training import adam -from tensorflow.python.training import device_setter -from tensorflow.python.training import gradient_descent -from tensorflow.python.training import training - - -# TODO(isaprykin): Parametrize all the tests on -# replicate_model_fn._VariableDistributionMode when it's supported. -class DNNClassifierIntegrationTest(test_util.TensorFlowTestCase): - - def setUp(self): - self._model_dir = tempfile.mkdtemp() - - def test_complete_flow_with_public_version(self): - return self._complete_flow_with_mode(mode=None) - - def test_complete_flow_with_mode_local_ps_server(self): - return self._complete_flow_with_mode( - replicate_model_fn._VariableDistributionMode. - SHARED_LOCAL_PARAMETER_SERVER) - - def test_complete_flow_with_mode_round_robin(self): - return self._complete_flow_with_mode( - replicate_model_fn._VariableDistributionMode.SHARED_ROUND_ROBIN) - - def _complete_flow_with_mode(self, mode): - n_classes = 3 - input_dimension = 2 - batch_size = 12 - - data = np.linspace( - 0., n_classes - 1., batch_size * input_dimension, dtype=np.float32) - x_data = data.reshape(batch_size, input_dimension) - categorical_data = np.random.random_integers( - 0, len(x_data), size=len(x_data)) - y_data = np.reshape(self._as_label(data[:batch_size]), (batch_size, 1)) - train_input_fn = numpy_io.numpy_input_fn( - x={'x': x_data, - 'categories': categorical_data}, - y=y_data, - batch_size=batch_size, - num_epochs=None, - shuffle=True) - eval_input_fn = numpy_io.numpy_input_fn( - x={'x': x_data, - 'categories': categorical_data}, - y=y_data, - batch_size=batch_size, - shuffle=False) - predict_input_fn = numpy_io.numpy_input_fn( - x={'x': x_data, - 'categories': categorical_data}, - batch_size=batch_size, - shuffle=False) - - feature_columns = [ - feature_column.numeric_column('x', shape=(input_dimension,)), - feature_column.embedding_column( - feature_column.categorical_column_with_vocabulary_list( - 'categories', - vocabulary_list=np.linspace( - 0., len(x_data), len(x_data), dtype=np.int64)), 1) - ] - - def optimizer_fn(): - return optimizers.get_optimizer_instance('Adagrad', learning_rate=0.05) - - estimator = dnn.DNNClassifier( - hidden_units=(2, 2), - # Adagrad is configured with `get_optimizer_instance`, so the function - # form of `TowerOptimizer.__init__` is used. - optimizer=replicate_model_fn._TowerOptimizer( - optimizer_fn, loss_reduction=losses.Reduction.SUM), - feature_columns=feature_columns, - n_classes=n_classes, - model_dir=self._model_dir) - - if not mode: # Use the public `replicate_model_fn`. - model_fn = replicate_model_fn._replicate_model_fn( - estimator.model_fn, devices=['/gpu:0', '/gpu:1', '/gpu:2']) - else: - model_fn = replicate_model_fn._replicate_model_fn_with_mode( - estimator.model_fn, - devices=['/gpu:0', '/gpu:1', '/gpu:2'], - mode=mode) - - estimator = estimator_lib.Estimator( - model_fn=model_fn, - model_dir=estimator.model_dir, - config=estimator.config, - params=estimator.params) - - num_steps = 10 - estimator.train(train_input_fn, steps=num_steps) - - scores = estimator.evaluate(eval_input_fn) - self.assertEqual(num_steps, scores[ops_lib.GraphKeys.GLOBAL_STEP]) - self.assertIn('loss', six.iterkeys(scores)) - - predicted_proba = np.array([ - x[prediction_keys.PredictionKeys.PROBABILITIES] - for x in estimator.predict(predict_input_fn) - ]) - self.assertAllEqual((batch_size, n_classes), predicted_proba.shape) - - feature_spec = feature_column.make_parse_example_spec(feature_columns) - serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( - feature_spec) - export_dir = estimator.export_savedmodel(tempfile.mkdtemp(), - serving_input_receiver_fn) - self.assertTrue(gfile.Exists(export_dir)) - - # Nothing should be left in the graph so that it doesn't get serialized. - self.assertFalse(ops_lib.get_default_graph().get_collection_ref( - replicate_model_fn._TowerOptimizer.COLLECTION_FOR_GRAPH_STATES)) - - def _as_label(self, data_in_float): - return np.rint(data_in_float).astype(np.int64) - - def tearDown(self): - if self._model_dir: - writer_cache.FileWriterCache.clear() - shutil.rmtree(self._model_dir) - - -class ReplicateModelTest(test_util.TensorFlowTestCase): - - def create_model_fn_with_loss_reduction(self, loss_reduction): - - def model_fn(mode, features, labels, params): - c = variable_scope.get_variable( - 'c', - initializer=constant_op.constant(10, dtype=dtypes.float64), - dtype=dtypes.float64) - - predictions = math_ops.multiply(features, c) - - loss = losses.absolute_difference( - labels=labels, - predictions=predictions, - reduction=losses.Reduction.SUM) - loss = math_ops.reduce_sum(loss) - - metrics = { - 'accuracy': metrics_lib.accuracy(labels, predictions), - 'auc': metrics_lib.auc(labels, predictions) - } - - optimizer = replicate_model_fn._TowerOptimizer( - gradient_descent.GradientDescentOptimizer(params['learning_rate']), - loss_reduction=loss_reduction) - - return model_fn_lib.EstimatorSpec( - mode=mode, - loss=loss, - eval_metric_ops=metrics, - predictions={'probabilities': predictions}, - train_op=optimizer.minimize(loss)) - - return model_fn - - @property - def params(self): - params = {} - params['learning_rate'] = 1.0 - return params - - def test_train(self): - features = np.array([[1.0], [2.0]]) - labels = np.array([[1.0], [2.0]]) - - with self.test_session() as session: - replicated_model_fn = replicate_model_fn._replicate_model_fn( - self.create_model_fn_with_loss_reduction(losses.Reduction.SUM), - devices=['/gpu:0', '/gpu:1']) - estimator_spec = replicated_model_fn( - features, labels, model_fn_lib.ModeKeys.TRAIN, self.params) - session.run(variables.global_variables_initializer()) - - # loss = feature * c - label - total_loss = (1.0 * 10 - 1.0) + (2.0 * 10 - 2.0) - self.assertEqual(total_loss, session.run(estimator_spec.loss)) - - # derivative of loss = (1*c - 1) + (2*c - 2) is 3. - # new value of c = 10 - learning rate * 3 = 7.0. - session.run(estimator_spec.train_op) - with variable_scope.variable_scope('', reuse=True): - c = variable_scope.get_variable('c', dtype=dtypes.float64) - self.assertEqual(7.0, session.run(c)) - - def test_train_with_mean_reduction(self): - features = np.array([[1.0], [2.0]]) - labels = np.array([[1.0], [2.0]]) - - with self.test_session() as session: - # Add another trainable variable that doesn't produce a gradient to - # verify that None gradients are supported. - _ = variable_scope.get_variable( - 'another_variable', - initializer=constant_op.constant(1, dtype=dtypes.float64), - dtype=dtypes.float64) - - replicated_model_fn = replicate_model_fn._replicate_model_fn( - self.create_model_fn_with_loss_reduction(losses.Reduction.MEAN), - devices=['/gpu:0', '/gpu:1']) - estimator_spec = replicated_model_fn( - features, labels, model_fn_lib.ModeKeys.TRAIN, self.params) - session.run(variables.global_variables_initializer()) - - # loss = feature * c - label - total_loss = ((1.0 * 10 - 1.0) + (2.0 * 10 - 2.0)) / 2.0 - self.assertEqual(total_loss, session.run(estimator_spec.loss)) - - # derivative of loss = (1*c - 1)/2 + (2*c - 2)/2 is 1.5. - # It's the same computation as without mean reduction, but the - # loss from every tower is scaled by 1/. - # new value of c = 10 - learning rate * 1.5 = 8.5 - session.run(estimator_spec.train_op) - with variable_scope.variable_scope('', reuse=True): - c = variable_scope.get_variable('c', dtype=dtypes.float64) - self.assertEqual(8.5, session.run(c)) - - def test_train_two_steps_collected_gradients_are_reset_between_steps(self): - with ops_lib.Graph().as_default(): - features = array_ops.placeholder(dtypes.float64) - labels = array_ops.placeholder(dtypes.float64) - - feature_inputs = np.array([[1.0], [2.0]]), np.array([[1.5], [2.5]]) - label_inputs = np.array([[1.0], [2.0]]), np.array([[1.5], [2.5]]) - - # loss = feature * c - label - expected_losses = ((1.0 * 10 - 1.0) + (2.0 * 10 - 2.0), - (1.5 * 7.0 - 1.5) + (2.5 * 7.0 - 2.5)) - # Derivative of the loss is 1.0 + 2.0 for the first step and 1.5 + 2.5 - # for the second. - expected_c = 10.0 - 3.0, 7.0 - 4.0 - - with self.test_session() as session, variable_scope.variable_scope( - '', reuse=variable_scope.AUTO_REUSE): - replicated_model_fn = replicate_model_fn._replicate_model_fn( - self.create_model_fn_with_loss_reduction(losses.Reduction.SUM), - devices=['/gpu:0', '/gpu:1']) - estimator_spec = replicated_model_fn( - features, labels, model_fn_lib.ModeKeys.TRAIN, self.params) - session.run(variables.global_variables_initializer()) - - for feature_input, label_input, loss, weight in zip( - feature_inputs, label_inputs, expected_losses, expected_c): - feeds = {features: feature_input, labels: label_input} - - self.assertEqual(loss, session.run(estimator_spec.loss, feeds)) - - session.run(estimator_spec.train_op, feeds) - c = variable_scope.get_variable('c', dtype=dtypes.float64) - self.assertEqual(weight, session.run(c, feeds)) - - def test_eval(self): - features = np.array([[0.01], [0.002]]) - labels = np.array([[0.01], [0.02]]) - - with self.test_session() as session: - replicated_model_fn = replicate_model_fn._replicate_model_fn( - self.create_model_fn_with_loss_reduction(losses.Reduction.SUM), - devices=['/gpu:0', '/gpu:1']) - estimator_spec = replicated_model_fn( - features, labels, model_fn_lib.ModeKeys.EVAL, self.params) - session.run(variables.local_variables_initializer()) - session.run(variables.global_variables_initializer()) - - accuracy, a = estimator_spec.eval_metric_ops['accuracy'] - auc, b = estimator_spec.eval_metric_ops['auc'] - - session.run([a, b]) - accuracy = session.run(accuracy) - auc = session.run(auc) - - # loss[i] = features[i] * 10 - labels[i]. - # Accuracy is 0.0 (no match) in the first tower. - # Accuracy is 1.0 (match) in the second tower, since the feature - # times weight "c" happened to be equal to the label. - total_loss = ((0.01 * 10 - 0.01) + (0.002 * 10 - 0.02)) - - self.assertNear((0.0 + 1.0) / 2.0, accuracy, 0.01) - self.assertEqual(0, auc) - self.assertNear(total_loss, session.run(estimator_spec.loss), 0.01) - - def test_eval_with_mean_reduction(self): - features = np.array([[0.01], [0.002]]) - labels = np.array([[0.01], [0.02]]) - - with self.test_session() as session: - replicated_model_fn = replicate_model_fn._replicate_model_fn( - self.create_model_fn_with_loss_reduction(losses.Reduction.MEAN), - devices=['/gpu:0', '/gpu:1']) - estimator_spec = replicated_model_fn( - features, labels, model_fn_lib.ModeKeys.EVAL, self.params) - session.run(variables.local_variables_initializer()) - session.run(variables.global_variables_initializer()) - - accuracy, a = estimator_spec.eval_metric_ops['accuracy'] - auc, b = estimator_spec.eval_metric_ops['auc'] - - session.run([a, b]) - accuracy = session.run(accuracy) - auc = session.run(auc) - - # loss[i] = features[i] * 10 - labels[i]. - # Accuracy is 0.0 (no match) in the first tower. - # Accuracy is 1.0 (match) in the second tower, since the feature - # times weight "c" happened to be equal to the label. - total_loss = ((0.01 * 10 - 0.01) + (0.002 * 10 - 0.02)) / 2.0 - - self.assertNear((0.0 + 1.0) / 2.0, accuracy, 0.01) - self.assertEqual(0, auc) - self.assertNear(total_loss, session.run(estimator_spec.loss), 0.01) - - def test_predict(self): - features = np.array([[0.01], [0.002]]) - labels = np.array([[0.01], [0.02]]) - - with self.test_session() as session: - replicated_model_fn = replicate_model_fn._replicate_model_fn( - self.create_model_fn_with_loss_reduction(losses.Reduction.SUM), - devices=['/gpu:0', '/gpu:1']) - estimator_spec = replicated_model_fn( - features, labels, model_fn_lib.ModeKeys.PREDICT, self.params) - session.run(variables.global_variables_initializer()) - - self.assertAllClose({ - 'probabilities': np.array([[0.1], [0.02]]) - }, session.run(estimator_spec.predictions)) - - def test_train_single_tower(self): - features = np.array([[1.0], [2.0]]) - labels = np.array([[1.0], [2.0]]) - - with self.test_session() as session: - replicated_model_fn = replicate_model_fn._replicate_model_fn( - self.create_model_fn_with_loss_reduction(losses.Reduction.SUM), - devices=['/gpu:0']) - estimator_spec = replicated_model_fn( - features, labels, model_fn_lib.ModeKeys.TRAIN, self.params) - session.run(variables.global_variables_initializer()) - - # loss = feature * c - label - total_loss = (1.0 * 10 - 1.0) + (2.0 * 10 - 2.0) - self.assertEqual(total_loss, session.run(estimator_spec.loss)) - - # loss' of c is 3. - # new value of c = 10 - learning rate * 3 = 7.0. - session.run(estimator_spec.train_op) - with variable_scope.variable_scope('', reuse=True): - c = variable_scope.get_variable('c', dtype=dtypes.float64) - self.assertEqual(7.0, session.run(c)) - - def test_eval_single_tower(self): - features = np.array([[0.01], [0.002]]) - labels = np.array([[0.01], [0.02]]) - - with self.test_session() as session: - replicated_model_fn = replicate_model_fn._replicate_model_fn( - self.create_model_fn_with_loss_reduction(losses.Reduction.SUM), - devices=['/gpu:0']) - estimator_spec = replicated_model_fn( - features, labels, model_fn_lib.ModeKeys.EVAL, self.params) - session.run(variables.local_variables_initializer()) - session.run(variables.global_variables_initializer()) - - accuracy, a = estimator_spec.eval_metric_ops['accuracy'] - auc, b = estimator_spec.eval_metric_ops['auc'] - - session.run([a, b]) - accuracy = session.run(accuracy) - auc = session.run(auc) - - # Accuracy is 0.0 (no match) in the first tower. - # Accuracy is 1.0 (match) in the second tower, since the feature - # times weight "c" happened to be equal to the label. - total_loss = ((0.01 * 10 - 0.01) + (0.002 * 10 - 0.02)) - - self.assertNear((0.0 + 1.0) / 2.0, accuracy, 0.01) - self.assertEqual(0, auc) - self.assertNear(total_loss, session.run(estimator_spec.loss), 0.01) - - def test_predict_single_tower(self): - features = np.array([[0.01], [0.002]]) - labels = np.array([[0.01], [0.02]]) - - with self.test_session() as session: - replicated_model_fn = replicate_model_fn._replicate_model_fn( - self.create_model_fn_with_loss_reduction(losses.Reduction.SUM), - devices=['/gpu:0']) - estimator_spec = replicated_model_fn( - features, labels, model_fn_lib.ModeKeys.PREDICT, self.params) - session.run(variables.global_variables_initializer()) - - self.assertAllClose({ - 'probabilities': np.array([[0.1], [0.02]]) - }, session.run(estimator_spec.predictions)) - - def test_batch_size_that_is_not_divisible_by_the_number_of_gpus(self): - features = np.array([[1.0], [2.0], [3.0]]) - labels = np.array([[1.0], [2.0], [3.0]]) - - with self.assertRaisesRegexp( - ValueError, '.*Batch.+size.+needs.+to.+be.+divisible.+by.+GPUs.+'): - replicated_model_fn = replicate_model_fn._replicate_model_fn( - self.create_model_fn_with_loss_reduction(losses.Reduction.SUM), - devices=['/gpu:0', '/gpu:1']) - _ = replicated_model_fn( - features, labels, model_fn_lib.ModeKeys.TRAIN, self.params) - - def test_unsupported_loss_reduction(self): - features = np.array([[1.0], [2.0], [3.0]]) - labels = np.array([[1.0], [2.0], [3.0]]) - - with self.assertRaisesRegexp(ValueError, - '.+none.+reduction.+is.+specified.+'): - replicated_model_fn = replicate_model_fn._replicate_model_fn( - self.create_model_fn_with_loss_reduction(losses.Reduction.NONE), - devices=['/gpu:0', '/gpu:1', '/gpu:2']) - _ = replicated_model_fn( - features, labels, model_fn_lib.ModeKeys.TRAIN, self.params) - - def test_places_on_gpu_with_upper_case_spelling(self): - features = np.array([[0.01], [0.002]]) - labels = np.array([[0.01], [0.02]]) - - with self.test_session(): - replicated_model_fn = replicate_model_fn._replicate_model_fn( - self.create_model_fn_with_loss_reduction(losses.Reduction.SUM), - devices=['/GPU:0']) - _ = replicated_model_fn( - features, labels, model_fn_lib.ModeKeys.TRAIN, self.params) - - with variable_scope.variable_scope('', reuse=True): - c = variable_scope.get_variable('c', dtype=dtypes.float64) - self.assertEqual('/device:GPU:0', c.device) - - def test_places_on_gpu_with_lower_case_spelling(self): - features = np.array([[0.01], [0.002]]) - labels = np.array([[0.01], [0.02]]) - - with self.test_session(): - replicated_model_fn = replicate_model_fn._replicate_model_fn( - self.create_model_fn_with_loss_reduction(losses.Reduction.SUM), - devices=['/gpu:0']) - _ = replicated_model_fn( - features, labels, model_fn_lib.ModeKeys.TRAIN, self.params) - - with variable_scope.variable_scope('', reuse=True): - c = variable_scope.get_variable('c', dtype=dtypes.float64) - self.assertEqual('/device:GPU:0', c.device) - - -class ReplicateAcrossASingleDeviceWithoutTowerOptimizer( - test_util.TensorFlowTestCase): - - def model_fn(self, mode, features, labels, params): - c = variable_scope.get_variable( - 'c', - initializer=constant_op.constant(10, dtype=dtypes.float64), - dtype=dtypes.float64) - - predictions = math_ops.multiply(features, c) - - loss = losses.absolute_difference( - labels=labels, predictions=predictions, reduction=losses.Reduction.SUM) - loss = math_ops.reduce_sum(loss) - - metrics = { - 'accuracy': metrics_lib.accuracy(labels, predictions), - 'auc': metrics_lib.auc(labels, predictions) - } - - optimizer = gradient_descent.GradientDescentOptimizer( - params['learning_rate']) - - return model_fn_lib.EstimatorSpec( - mode=mode, - loss=loss, - eval_metric_ops=metrics, - predictions={'probabilities': predictions}, - train_op=optimizer.minimize(loss)) - - @property - def params(self): - params = {} - params['learning_rate'] = 1.0 - return params - - def test_train_single_tower(self): - features = np.array([[1.0], [2.0]]) - labels = np.array([[1.0], [2.0]]) - - with self.test_session() as session: - replicated_model_fn = replicate_model_fn._replicate_model_fn( - self.model_fn, devices=['/gpu:0']) - estimator_spec = replicated_model_fn( - features, labels, model_fn_lib.ModeKeys.TRAIN, self.params) - session.run(variables.global_variables_initializer()) - - # loss = feature * c - label - total_loss = (1.0 * 10 - 1.0) + (2.0 * 10 - 2.0) - self.assertEqual(total_loss, session.run(estimator_spec.loss)) - - # loss' of c is 3. - # new value of c = 10 - learning rate * 3 = 7.0. - session.run(estimator_spec.train_op) - with variable_scope.variable_scope('', reuse=True): - c = variable_scope.get_variable('c', dtype=dtypes.float64) - self.assertEqual(7.0, session.run(c)) - - -class UseTowerEstimatorWithoutReplication(test_util.TensorFlowTestCase): - - def model_fn(self, mode, features, labels, params): - c = variable_scope.get_variable( - 'c', - initializer=constant_op.constant(10, dtype=dtypes.float64), - dtype=dtypes.float64) - - features = features['features'] - predictions = math_ops.multiply(features, c) - - loss = losses.absolute_difference( - labels=labels, predictions=predictions, reduction=losses.Reduction.SUM) - loss = math_ops.reduce_sum(loss) - - metrics = { - 'accuracy': metrics_lib.accuracy(labels, predictions), - 'auc': metrics_lib.auc(labels, predictions) - } - - optimizer = replicate_model_fn._TowerOptimizer( - gradient_descent.GradientDescentOptimizer(params['learning_rate'])) - - return model_fn_lib.EstimatorSpec( - mode=mode, - loss=loss, - eval_metric_ops=metrics, - predictions={'probabilities': predictions}, - train_op=optimizer.minimize(loss)) - - @property - def params(self): - params = {} - params['learning_rate'] = 1.0 - return params - - def test_train_single_tower(self): - features = np.array([[1.0], [2.0]]) - labels = np.array([[1.0], [2.0]]) - - train_input_fn = numpy_io.numpy_input_fn( - x={'features': features}, y=labels, batch_size=2, shuffle=False) - - with self.test_session(): - estimator = estimator_lib.Estimator( - model_fn=self.model_fn, - model_dir=tempfile.mkdtemp(), - params=self.params) - estimator.train(train_input_fn, steps=1) - - self.assertEqual(7.0, estimator.get_variable_value('c')) - - -class MakeSureSyncReplicasOptimizerWorks(test_util.TensorFlowTestCase): - - def model_fn(self, mode, features, labels, params): - c = variable_scope.get_variable( - 'c', - initializer=constant_op.constant(10, dtype=dtypes.float64), - dtype=dtypes.float64) - - features = features['features'] - predictions = math_ops.multiply(features, c) - - loss = losses.absolute_difference( - labels=labels, predictions=predictions, reduction=losses.Reduction.SUM) - loss = math_ops.reduce_sum(loss) - - metrics = { - 'accuracy': metrics_lib.accuracy(labels, predictions), - 'auc': metrics_lib.auc(labels, predictions) - } - - optimizer = gradient_descent.GradientDescentOptimizer( - params['learning_rate']) - optimizer = training.SyncReplicasOptimizer( - optimizer, replicas_to_aggregate=1) - sync_hook = optimizer.make_session_run_hook(True) - optimizer = replicate_model_fn._TowerOptimizer( - optimizer, loss_reduction=losses.Reduction.SUM) - - return model_fn_lib.EstimatorSpec( - mode=mode, - loss=loss, - eval_metric_ops=metrics, - training_hooks=[sync_hook], - predictions={'probabilities': predictions}, - train_op=optimizer.minimize( - loss, global_step=training.get_global_step())) - - @property - def params(self): - params = {} - params['learning_rate'] = 1.0 - return params - - def test_train_multiple_towers(self): - features = np.array([[1.0], [2.0]]) - labels = np.array([[1.0], [2.0]]) - - train_input_fn = numpy_io.numpy_input_fn( - x={'features': features}, y=labels, batch_size=2, shuffle=False) - - model_fn = replicate_model_fn._replicate_model_fn( - self.model_fn, - devices=['/gpu:0', '/gpu:1']) - - estimator = estimator_lib.Estimator( - model_fn=model_fn, model_dir=tempfile.mkdtemp(), params=self.params) - estimator.train(train_input_fn, steps=1) - - self.assertEqual(7.0, estimator.get_variable_value('c')) - - -class ReplicateWithTwoOptimizersTest(test_util.TensorFlowTestCase): - - def model_fn(self, mode, features, labels, params): - c = variable_scope.get_variable( - 'c', - initializer=constant_op.constant(10, dtype=dtypes.float64), - dtype=dtypes.float64) - - side_effects = variable_scope.get_variable( - 'side_effects', - initializer=constant_op.constant(0, dtype=dtypes.float64), - dtype=dtypes.float64, - use_resource=True, - trainable=False) - - predictions = math_ops.multiply(features, c) - - loss = losses.absolute_difference( - labels=labels, predictions=predictions, reduction=losses.Reduction.SUM) - loss = math_ops.reduce_sum(loss) - - metrics = { - 'accuracy': metrics_lib.accuracy(labels, predictions), - 'auc': metrics_lib.auc(labels, predictions) - } - - first_optimizer = replicate_model_fn._TowerOptimizer( - gradient_descent.GradientDescentOptimizer(1.0), - loss_reduction=losses.Reduction.SUM) - second_optimizer = replicate_model_fn._TowerOptimizer( - adam.AdamOptimizer(1.0), loss_reduction=losses.Reduction.SUM) - - with ops_lib.control_dependencies([side_effects.assign_add(1.0)]): - first_grads_and_vars = first_optimizer.compute_gradients(loss) - - train_op = control_flow_ops.group( - [first_optimizer.apply_gradients(first_grads_and_vars), - second_optimizer.minimize(loss)]) - - return model_fn_lib.EstimatorSpec( - mode=mode, - loss=loss, - eval_metric_ops=metrics, - predictions={'probabilities': predictions}, - train_op=train_op) - - def test_train(self): - features = np.array([[1.0], [2.0]]) - labels = np.array([[1.0], [2.0]]) - - with self.test_session() as session: - replicated_model_fn = replicate_model_fn._replicate_model_fn( - self.model_fn, - devices=['/gpu:0', '/gpu:1']) - estimator_spec = replicated_model_fn(features, labels, - model_fn_lib.ModeKeys.TRAIN, {}) - session.run(variables.global_variables_initializer()) - - # loss = feature * c - label - total_loss = (1.0 * 10 - 1.0) + (2.0 * 10 - 2.0) - self.assertEqual(total_loss, session.run(estimator_spec.loss)) - - # loss' of c is 3. - # new value of c = 10 - learning rate * 3 = 7.0. - # Adam subtracts another ~1. - session.run(estimator_spec.train_op) - with variable_scope.variable_scope('', reuse=True): - c = variable_scope.get_variable('c', dtype=dtypes.float64) - self.assertNear(6.0, session.run(c), 0.000001) - - side_effects = variable_scope.get_variable( - 'side_effects', dtype=dtypes.float64) - self.assertNear(2.0, session.run(side_effects), 0.000001) - - -class ReplicateWithTwoLossesAndOneOptimizer(test_util.TensorFlowTestCase): - - def setUp(self): - self._should_skip_optimizer = False - self._towers_left_before_skipping_optimizer = -1 - - def incorrectly_skip_optimizer_for_tower(self, tower_number): - self._should_skip_optimizer = True - self._towers_left_before_skipping_optimizer = tower_number - - def should_skip_optimizer(self): - if not self._should_skip_optimizer: - return False - if self._towers_left_before_skipping_optimizer == 0: - return True - else: - self._towers_left_before_skipping_optimizer -= 1 - return False - - def model_fn(self, mode, features, labels, params): - c = variable_scope.get_variable( - 'c', - initializer=constant_op.constant(10, dtype=dtypes.float64), - dtype=dtypes.float64) - d = variable_scope.get_variable( - 'd', - initializer=constant_op.constant(2, dtype=dtypes.float64), - dtype=dtypes.float64) - - predictions = math_ops.multiply(features, c) - - loss = losses.absolute_difference( - labels=labels, predictions=predictions, reduction=losses.Reduction.SUM) - loss = math_ops.reduce_sum(loss) - - another_predictions = math_ops.multiply(features, d) - another_loss = losses.absolute_difference( - labels=labels, - predictions=another_predictions, - reduction=losses.Reduction.SUM) - another_loss = math_ops.reduce_sum(another_loss) - - total_loss = math_ops.add(loss, another_loss) - - metrics = { - 'accuracy': metrics_lib.accuracy(labels, predictions), - 'auc': metrics_lib.auc(labels, predictions) - } - - train_ops = [] - - optimizer = replicate_model_fn._TowerOptimizer( - gradient_descent.GradientDescentOptimizer(1.0), - loss_reduction=losses.Reduction.SUM) - train_ops.append(optimizer.minimize(loss, var_list=[c])) - if not self.should_skip_optimizer(): - another_optimizer = replicate_model_fn._TowerOptimizer( - gradient_descent.GradientDescentOptimizer(1.0), - loss_reduction=losses.Reduction.SUM) - train_ops.append(another_optimizer.minimize(another_loss, var_list=[d])) - - train_op = control_flow_ops.group(train_ops) - return model_fn_lib.EstimatorSpec( - mode=mode, - loss=total_loss, - eval_metric_ops=metrics, - predictions={'probabilities': predictions}, - train_op=train_op) - - def test_train(self): - features = np.array([[1.0], [2.0]]) - labels = np.array([[1.0], [2.0]]) - - with ops_lib.Graph().as_default(), self.test_session() as session: - replicated_model_fn = replicate_model_fn._replicate_model_fn( - self.model_fn, - devices=['/gpu:0', '/gpu:1']) - estimator_spec = replicated_model_fn(features, labels, - model_fn_lib.ModeKeys.TRAIN, {}) - session.run(variables.global_variables_initializer()) - - # For each tower, loss = (feature * c - label) + (feature * d - label). - total_loss = (1.0 * 10 - 1.0 + 1.0 * 2.0 - 1.0) + ( - 2.0 * 10 - 2.0 + 2.0 * 2.0 - 2.0) - self.assertEqual(total_loss, session.run(estimator_spec.loss)) - - session.run(estimator_spec.train_op) - - # loss' of c or loss' of d is 3. - # new value of c = 10 - learning rate * 3 = 7.0. - # new value of d = 2 - learning rate * 3 = -1.0. - with variable_scope.variable_scope('', reuse=True): - c = variable_scope.get_variable('c', dtype=dtypes.float64) - self.assertNear(7.0, session.run(c), 0.000001) - d = variable_scope.get_variable('d', dtype=dtypes.float64) - self.assertNear(-1.0, session.run(d), 0.000001) - - def test_different_optimizer_calls_within_towers(self): - self.incorrectly_skip_optimizer_for_tower(1) - - features = np.array([[1.0], [2.0]]) - labels = np.array([[1.0], [2.0]]) - - with self.test_session(), ops_lib.Graph().as_default(): - with self.assertRaisesRegexp( - ValueError, '.+was.+supposed.+to.+make.+same.+optimizer.+calls.+'): - replicated_model_fn = replicate_model_fn._replicate_model_fn( - self.model_fn, devices=['/gpu:0', '/gpu:1']) - _ = replicated_model_fn(features, labels, model_fn_lib.ModeKeys.TRAIN, - {}) - - -class FailToWrapOptimizerInTheModelFn(test_util.TensorFlowTestCase): - - def model_fn(self, mode, features, labels, params): - c = variable_scope.get_variable( - 'c', - initializer=constant_op.constant(10, dtype=dtypes.float64), - dtype=dtypes.float64) - - predictions = math_ops.multiply(features, c) - - loss = losses.absolute_difference( - labels=labels, predictions=predictions, reduction=losses.Reduction.SUM) - loss = math_ops.reduce_sum(loss) - - metrics = { - 'accuracy': metrics_lib.accuracy(labels, predictions), - 'auc': metrics_lib.auc(labels, predictions) - } - - optimizer = gradient_descent.GradientDescentOptimizer(1.0) - train_op = optimizer.minimize(loss) - - return model_fn_lib.EstimatorSpec( - mode=mode, - loss=loss, - eval_metric_ops=metrics, - predictions={'probabilities': predictions}, - train_op=train_op) - - def test_train(self): - features = np.array([[1.0], [2.0]]) - labels = np.array([[1.0], [2.0]]) - - with self.test_session(): - with self.assertRaisesRegexp(ValueError, - 'Please.+wrap.+with.+TowerOptimizer'): - replicated_model_fn = replicate_model_fn._replicate_model_fn( - self.model_fn, devices=['/gpu:0', '/gpu:1']) - _ = replicated_model_fn(features, labels, model_fn_lib.ModeKeys.TRAIN, - {}) - - -class GetLossTowersTest(test_util.TensorFlowTestCase): - - def create_model_fn_with_loss_reduction(self, loss_reduction): - - def model_fn(mode, features, labels, params): - del params - c = variable_scope.get_variable( - 'c', - initializer=constant_op.constant(0.25, dtype=dtypes.float64), - dtype=dtypes.float64) - - predictions = math_ops.add(np.array([0.1, 0.2, 0.3, features[0]]), c) - labels = np.array([0.1, 0.2, 0.3, labels[0]]) - - loss = losses.absolute_difference( - labels=labels, - predictions=predictions, - reduction=losses.Reduction.SUM) - - optimizer = replicate_model_fn._TowerOptimizer( - gradient_descent.GradientDescentOptimizer(1.0), - loss_reduction) - - return model_fn_lib.EstimatorSpec( - mode=mode, - loss=math_ops.reduce_sum(loss), - train_op=optimizer.minimize(loss)) - - return model_fn - - def test_gradients_are_computed(self): - with self.test_session() as session: - tower_specs = replicate_model_fn._get_loss_towers( - self.create_model_fn_with_loss_reduction(losses.Reduction.SUM), - mode=None, - features=[[0.6], [1.6]], - labels=[[0.6], [0.6]], - params=None, - config=None, - devices=['/gpu:0', '/gpu:1'], - local_ps_devices=['/gpu:0'], - name_scope_pattern='test_tower_{}') - session.run(variables.global_variables_initializer()) - - self.assertEqual(len(tower_specs), 2) - - self.assertEqual('/device:GPU:0', tower_specs[0].loss.device) - self.assertEqual('Sum:0', tower_specs[0].loss.name) - self.assertEqual(1.0, session.run(tower_specs[0].loss)) - - self.assertEqual('/device:GPU:1', tower_specs[1].loss.device) - self.assertEqual('test_tower_1/Sum:0', tower_specs[1].loss.name) - # The input batch for the second tower had a loss that is 1.0 - # bigger: 0.6 vs 1.6. - self.assertEqual(2.0, session.run(tower_specs[1].loss)) - - self.assertEqual(1, len(variables.global_variables())) - self.assertEqual(1, len(variables.trainable_variables())) - - with variable_scope.variable_scope('', reuse=True): - c = variable_scope.get_variable('c', dtype=dtypes.float64) - self.assertEqual(0.25, session.run(c)) - - def test_gradients_are_computed_with_mean_reduction(self): - with self.test_session() as session: - tower_specs = replicate_model_fn._get_loss_towers( - self.create_model_fn_with_loss_reduction(losses.Reduction.MEAN), - mode=model_fn_lib.ModeKeys.EVAL, - features=[[0.6], [1.6]], - labels=[[0.6], [0.6]], - params=None, - config=None, - devices=['/gpu:0', '/gpu:1'], - local_ps_devices=['/gpu:0'], - name_scope_pattern='test_tower_{}') - session.run(variables.global_variables_initializer()) - - self.assertEqual(len(tower_specs), 2) - - self.assertEqual('/device:GPU:0', tower_specs[0].loss.device) - self.assertEqual('averaged_loss:0', tower_specs[0].loss.name) - self.assertEqual(0.5, session.run(tower_specs[0].loss)) - - self.assertEqual('/device:GPU:1', tower_specs[1].loss.device) - self.assertEqual('test_tower_1/averaged_loss:0', tower_specs[1].loss.name) - # The input batch for the second tower had a loss that is 1.0 - # bigger: 0.6 vs 1.6. - self.assertEqual(1.0, session.run(tower_specs[1].loss)) - - self.assertEqual(1, len(variables.global_variables())) - self.assertEqual(1, len(variables.trainable_variables())) - - with variable_scope.variable_scope('', reuse=True): - c = variable_scope.get_variable('c', dtype=dtypes.float64) - self.assertEqual(0.25, session.run(c)) - - def test_variables_are_round_robined_correctly(self): - """Test that creates multiple variables and tests round-robin placement.""" - - def model_fn(mode, features, labels, params): - del params - for variable_name in ['a', 'b', 'c', 'd']: - c = variable_scope.get_variable( - variable_name, - initializer=constant_op.constant(0.25, dtype=dtypes.float64), - dtype=dtypes.float64) - - predictions = math_ops.add(np.array([0.1, 0.2, 0.3, features[0]]), c) - labels = np.array([0.1, 0.2, 0.3, labels[0]]) - loss = losses.absolute_difference( - labels=labels, - predictions=predictions, - reduction=losses.Reduction.SUM) - return model_fn_lib.EstimatorSpec( - mode=mode, loss=math_ops.reduce_sum(loss)) - - with self.test_session() as session: - tower_specs = replicate_model_fn._get_loss_towers( - model_fn, - mode=None, - features=[[0.6], [1.6], [2.6]], - labels=[[0.6], [0.6], [2.6]], - params=None, - config=None, - devices=['/gpu:0', '/gpu:1', '/gpu:3'], - local_ps_devices=['/gpu:0', '/gpu:1', '/gpu:3'], - name_scope_pattern='test_tower_{}') - session.run(variables.global_variables_initializer()) - - self.assertEqual(len(tower_specs), 3) - self.assertEqual('/device:GPU:0', tower_specs[0].loss.device) - self.assertEqual('/device:GPU:1', tower_specs[1].loss.device) - self.assertEqual('/device:GPU:3', tower_specs[2].loss.device) - - with variable_scope.variable_scope('', reuse=True): - a = variable_scope.get_variable('a', dtype=dtypes.float64) - self.assertEqual('/device:GPU:0', a.device) - b = variable_scope.get_variable('b', dtype=dtypes.float64) - self.assertEqual('/device:GPU:1', b.device) - c = variable_scope.get_variable('c', dtype=dtypes.float64) - self.assertEqual('/device:GPU:3', c.device) - d = variable_scope.get_variable('d', dtype=dtypes.float64) - self.assertEqual('/device:GPU:0', d.device) - - -class SplitBatchTest(test_util.TensorFlowTestCase): - - def evaluate_shards(self, first_list, second_list): - evaluate_items = lambda x: x.eval() - return list(map(evaluate_items, first_list)), list( - map(evaluate_items, second_list)) - - def assertSparseValuesEqual(self, a, b): - self.assertAllEqual(a.indices, b.indices) - self.assertAllEqual(a.values, b.values) - self.assertAllEqual(a.dense_shape, b.dense_shape) - - def test_simple_half_split(self): - with self.test_session(): - features = [0.0, 1.0, 2.0, 3.0] - labels = [10.0, 11.0, 12.0, 13.0] - feature_shards, label_shards = replicate_model_fn._split_batch( - features, labels, 2, device='/gpu:0') - - feature_shards, label_shards = self.evaluate_shards( - feature_shards, label_shards) - - self.assertAllEqual([[0.0, 1.0], [2.0, 3.0]], feature_shards) - self.assertAllEqual([[10.0, 11.0], [12.0, 13.0]], label_shards) - - def test_to_each_their_own(self): - with self.test_session(): - features = [0.0, 1.0, 2.0, 3.0] - labels = [10.0, 11.0, 12.0, 13.0] - feature_shards, label_shards = replicate_model_fn._split_batch( - features, labels, 4, device='/gpu:0') - - feature_shards, label_shards = self.evaluate_shards( - feature_shards, label_shards) - - self.assertAllEqual([[0.0], [1.0], [2.0], [3.0]], feature_shards) - self.assertAllEqual([[10.0], [11.0], [12.0], [13.0]], label_shards) - - def test_one_batch(self): - with self.test_session(): - features = [0.0, 1.0, 2.0, 3.0] - labels = [10.0, 11.0, 12.0, 13.0] - feature_shards, label_shards = replicate_model_fn._split_batch( - features, labels, 1, device='/gpu:0') - - feature_shards, label_shards = self.evaluate_shards( - feature_shards, label_shards) - - self.assertAllEqual([[0.0, 1.0, 2.0, 3.0]], feature_shards) - self.assertAllEqual([[10.0, 11.0, 12.0, 13.0]], label_shards) - - def test_half_split_in_dictionary(self): - with self.test_session(): - features = {'first': [0.0, 1.0, 2.0, 3.0], 'second': [4.0, 5.0, 6.0, 7.0]} - labels = [10.0, 11.0, 12.0, 13.0] - - feature_shards, label_shards = replicate_model_fn._split_batch( - features, labels, 2, device='/gpu:0') - - self.assertAllEqual([0.0, 1.0], feature_shards[0]['first'].eval()) - self.assertAllEqual([4.0, 5.0], feature_shards[0]['second'].eval()) - self.assertAllEqual([2.0, 3.0], feature_shards[1]['first'].eval()) - self.assertAllEqual([6.0, 7.0], feature_shards[1]['second'].eval()) - self.assertAllEqual([10.0, 11.0], label_shards[0].eval()) - self.assertAllEqual([12.0, 13.0], label_shards[1].eval()) - - def test_sparse_tensor_can_be_split_unevenly(self): - with self.test_session(): - features = { - 'x': - sparse_tensor.SparseTensor( - indices=[[0, 0], [1, 2], [2, 2]], - values=[1.0, 2.0, 3.0], - dense_shape=[3, 4]) - } - labels = np.array([[1.0], [2.0]]) - - feature_shards, label_shards = replicate_model_fn._split_batch( - features, labels, 2, device='/gpu:0') - - self.assertSparseValuesEqual( - sparse_tensor.SparseTensorValue( - indices=[[0, 0], [1, 2]], values=[1., 2.], dense_shape=[2, 4]), - feature_shards[0]['x'].eval()) - self.assertSparseValuesEqual( - sparse_tensor.SparseTensorValue( - indices=[[0, 2]], values=[3.], dense_shape=[1, 4]), - feature_shards[1]['x'].eval()) - self.assertAllEqual([[1.0]], label_shards[0].eval()) - self.assertAllEqual([[2.0]], label_shards[1].eval()) - - def test_sparse_tensor_can_be_split_unevenly_repeated_row(self): - with self.test_session(): - features = { - 'x': - sparse_tensor.SparseTensor( - indices=[[0, 0], [1, 0], [1, 1]], - values=[1.0, 2.0, 3.0], - dense_shape=[3, 4]) - } - labels = np.array([[1.0], [2.0]]) - - feature_shards, label_shards = replicate_model_fn._split_batch( - features, labels, 2, device='/gpu:0') - - self.assertSparseValuesEqual( - sparse_tensor.SparseTensorValue( - indices=[[0, 0], [1, 0], [1, 1]], - values=[1., 2., 3.], - dense_shape=[2, 4]), feature_shards[0]['x'].eval()) - - second_batch = feature_shards[1]['x'].eval() - self.assertFalse(len(second_batch.indices)) - self.assertFalse(len(second_batch.values)) - self.assertAllEqual([1, 4], second_batch.dense_shape) - self.assertAllEqual([[1.0]], label_shards[0].eval()) - self.assertAllEqual([[2.0]], label_shards[1].eval()) - - def test_one_batch_in_dictionary(self): - with self.test_session() as session: # pylint: disable=unused-variable - features = {'first': [0.0, 1.0, 2.0, 3.0], 'second': [4.0, 5.0, 6.0, 7.0]} - labels = [10.0, 11.0, 12.0, 13.0] - - feature_shards, label_shards = replicate_model_fn._split_batch( - features, labels, 1, device='/gpu:0') - - self.assertAllEqual([0.0, 1.0, 2.0, 3.0], - feature_shards[0]['first'].eval()) - self.assertAllEqual([4.0, 5.0, 6.0, 7.0], - feature_shards[0]['second'].eval()) - self.assertAllEqual([10.0, 11.0, 12.0, 13.0], label_shards[0].eval()) - - def test_feature_and_label_dictionaries(self): - with self.test_session() as session: # pylint: disable=unused-variable - features = {'first': [0.0, 1.0, 2.0, 3.0], 'second': [4.0, 5.0, 6.0, 7.0]} - labels = {'first': [10.0, 11.0], 'second': [12.0, 13.0]} - - feature_shards, label_shards = replicate_model_fn._split_batch( - features, labels, 2, device='/gpu:0') - - self.assertAllEqual([0.0, 1.0], feature_shards[0]['first'].eval()) - self.assertAllEqual([4.0, 5.0], feature_shards[0]['second'].eval()) - self.assertAllEqual([2.0, 3.0], feature_shards[1]['first'].eval()) - self.assertAllEqual([6.0, 7.0], feature_shards[1]['second'].eval()) - self.assertAllEqual([10.0], label_shards[0]['first'].eval()) - self.assertAllEqual([12.0], label_shards[0]['second'].eval()) - self.assertAllEqual([11], label_shards[1]['first'].eval()) - self.assertAllEqual([13.0], label_shards[1]['second'].eval()) - - -class TrainSpecTest(test_util.TensorFlowTestCase): - - expected_predictions = {} - - def create_estimator_spec(self, loss): - return model_fn_lib.EstimatorSpec( - mode=model_fn_lib.ModeKeys.TRAIN, - loss=loss, - train_op=loss, # Not used; currently required. - predictions=self.expected_predictions) - - def create_constant_loss(self, loss_value): - return constant_op.constant(loss_value, dtype=dtypes.float64) - - def test_example(self): - with self.test_session() as session: - tower_losses = list(map(self.create_constant_loss, [2, 4, 6])) - tower_specs = list(map(self.create_estimator_spec, tower_losses)) - - expected_train_op = tower_losses[1] - - estimator_spec = replicate_model_fn._train_spec( - tower_specs, expected_train_op, aggregation_device='/gpu:0') - - self.assertEqual(expected_train_op, estimator_spec.train_op) - self.assertEqual(2 + 4 + 6, session.run(estimator_spec.loss)) - self.assertEqual(self.expected_predictions, estimator_spec.predictions) - - -class EvalSpecTest(test_util.TensorFlowTestCase): - - def create_estimator_spec(self, loss, metrics): - return model_fn_lib.EstimatorSpec( - mode=model_fn_lib.ModeKeys.EVAL, loss=loss, eval_metric_ops=metrics) - - def create_constant_loss(self, loss_value): - return constant_op.constant(loss_value, dtype=dtypes.float64) - - def create_eval_metrics(self, noise): - predictions = np.array([0.1, 0.2, 0.3, 0.6 + noise]) - labels = np.array([0.1, 0.2, 0.3, 0.6]) - - metrics = { - 'accuracy': metrics_lib.accuracy(labels, predictions), - 'auc': metrics_lib.auc(labels, predictions) - } - return metrics - - def test_example(self): - with self.test_session() as session: - tower_losses = map(self.create_constant_loss, [2, 4, 6]) - tower_metrics = map(self.create_eval_metrics, [0, 0.2, 0.3]) - tower_specs = [ - self.create_estimator_spec(l, m) - for l, m in zip(tower_losses, tower_metrics) - ] - session.run(variables.local_variables_initializer()) - - estimator_spec = replicate_model_fn._eval_spec( - tower_specs, aggregation_device='/device:GPU:0') - - accuracy, a = estimator_spec.eval_metric_ops['accuracy'] - auc, b = estimator_spec.eval_metric_ops['auc'] - - self.assertEqual('/device:CPU:0', accuracy.device) - self.assertEqual('/device:CPU:0', auc.device) - - session.run([a, b]) - accuracy, auc = session.run([accuracy, auc]) - - self.assertNear((12 - 2) / 12, accuracy, 0.01) - self.assertEqual(0, auc) - self.assertEqual(2 + 4 + 6, session.run(estimator_spec.loss)) - - def test_handles_single_tower(self): - with self.test_session() as session: - tower_losses = map(self.create_constant_loss, [5]) - tower_metrics = map(self.create_eval_metrics, [0.2]) - tower_specs = [ - self.create_estimator_spec(l, m) - for l, m in zip(tower_losses, tower_metrics) - ] - session.run(variables.local_variables_initializer()) - - estimator_spec = replicate_model_fn._eval_spec( - tower_specs, aggregation_device='/device:GPU:0') - - accuracy, a = estimator_spec.eval_metric_ops['accuracy'] - auc, b = estimator_spec.eval_metric_ops['auc'] - - self.assertEqual('/device:CPU:0', accuracy.device) - self.assertEqual('/device:CPU:0', auc.device) - - session.run([a, b]) - accuracy = session.run(accuracy) - auc = session.run(auc) - - self.assertNear((4 - 1) / 4, accuracy, 0.01) - self.assertEqual(0, auc) - self.assertEqual(5, session.run(estimator_spec.loss)) - - -class PredictSpecTest(test_util.TensorFlowTestCase): - - def model_fn(self, mode, features, labels, params): - c = variable_scope.get_variable( - 'c', - initializer=constant_op.constant(0.25, dtype=dtypes.float64), - dtype=dtypes.float64) - - predictions = math_ops.add(np.array([features[0], features[0]]), c) - - return model_fn_lib.EstimatorSpec( - mode=model_fn_lib.ModeKeys.PREDICT, - predictions={ - 'probabilities': predictions - }) - - def test_example(self): - with self.test_session() as session: - tower_specs = replicate_model_fn._get_loss_towers( - self.model_fn, - mode=None, - features=[[0.1], [0.2]], - labels=[[], []], - params=None, - config=None, - devices=['/gpu:0', '/gpu:1'], - local_ps_devices=['/gpu:0'], - ) - session.run(variables.global_variables_initializer()) - - estimator_spec = replicate_model_fn._predict_spec( - tower_specs, aggregation_device='/gpu:0') - - self.assertEqual('/device:GPU:0', - estimator_spec.predictions['probabilities'].device) - self.assertAllClose({ - 'probabilities': np.array([0.35, 0.35, 0.45, 0.45]) - }, session.run(estimator_spec.predictions)) - - -class ReduceMetricVariablesTest(test_util.TensorFlowTestCase): - - def create_metric_variable(self, initial_value, name): - return variable_scope.variable( - initial_value, - trainable=False, - collections=[ops_lib.GraphKeys.METRIC_VARIABLES], - validate_shape=True, - name=name) - - def create_tower_metrics(self, tower_id): - with variable_scope.variable_scope('', reuse=(tower_id != 0)): - self.create_metric_variable(1.3 * (tower_id + 1), 'total') - self.create_metric_variable(2.3 * (tower_id + 1), 'count') - self.create_metric_variable( - np.array([3.3, 3.5, 3.7]) * (tower_id + 1), 'total') - - def test_example(self): - with self.test_session() as session: - for tower_id in range(3): - self.create_tower_metrics(tower_id) - - session.run( - variables.variables_initializer( - ops_lib.get_collection(ops_lib.GraphKeys.METRIC_VARIABLES))) - - session.run( - replicate_model_fn._reduce_metric_variables(number_of_towers=3)) - - # 1st tower = 1.3, 2.3, [3.3, 3.5, 3.7] - # 2nd tower = 2.6, 4.6, [6.6, 7.0, 7.4] - # 3rd tower = 3.9, 6.9, [9.9, 10.5, 11.1] - # Reduced = 7.8, 13.8, [19.8, 21.0, 22.2] - # Towers are accumulated in the first tower. - local_metrics = session.run( - ops_lib.get_collection(ops_lib.GraphKeys.METRIC_VARIABLES)) - - self.assertNear(7.8, local_metrics[0], 0.01) - self.assertNear(13.8, local_metrics[1], 0.01) - self.assertAllClose([19.8, 21., 22.1], local_metrics[2], 0.01) - self.assertNear(0.0, local_metrics[3], 0.01) - self.assertNear(0.0, local_metrics[4], 0.01) - self.assertAllClose([0.0, 0.0, 0.0], local_metrics[5], 0.01) - self.assertNear(0.0, local_metrics[6], 0.01) - self.assertNear(0.0, local_metrics[7], 0.01) - self.assertAllClose([0.0, 0.0, 0.0], local_metrics[8], 0.01) - - def test_reduce_is_idempotent(self): - with self.test_session() as session: - for tower_id in range(3): - self.create_tower_metrics(tower_id) - - session.run( - variables.variables_initializer( - ops_lib.get_collection(ops_lib.GraphKeys.METRIC_VARIABLES))) - - for _ in range(20): - session.run( - replicate_model_fn._reduce_metric_variables(number_of_towers=3)) - - local_metrics = session.run( - ops_lib.get_collection(ops_lib.GraphKeys.METRIC_VARIABLES)) - - self.assertNear(7.8, local_metrics[0], 0.01) - self.assertNear(13.8, local_metrics[1], 0.01) - self.assertAllClose([19.8, 21., 22.1], local_metrics[2], 0.01) - self.assertNear(0.0, local_metrics[3], 0.01) - self.assertNear(0.0, local_metrics[4], 0.01) - self.assertAllClose([0.0, 0.0, 0.0], local_metrics[5], 0.01) - self.assertNear(0.0, local_metrics[6], 0.01) - self.assertNear(0.0, local_metrics[7], 0.01) - self.assertAllClose([0.0, 0.0, 0.0], local_metrics[8], 0.01) - - def test_handles_single_tower(self): - with self.test_session() as session: - self.create_tower_metrics(0) - session.run( - variables.variables_initializer( - ops_lib.get_collection(ops_lib.GraphKeys.METRIC_VARIABLES))) - - session.run( - replicate_model_fn._reduce_metric_variables(number_of_towers=1)) - - local_metrics = session.run( - ops_lib.get_collection(ops_lib.GraphKeys.METRIC_VARIABLES)) - - self.assertNear(1.3, local_metrics[0], 0.01) - self.assertNear(2.3, local_metrics[1], 0.01) - self.assertAllClose([3.3, 3.5, 3.7], local_metrics[2], 0.01) - - def test_doesnt_accept_uneven_number_of_variables(self): - with self.test_session() as session: - for tower_id in range(3): - self.create_tower_metrics(tower_id) - self.create_metric_variable(-1.0, 'oddball') - - session.run( - variables.variables_initializer( - ops_lib.get_collection(ops_lib.GraphKeys.METRIC_VARIABLES))) - - with self.assertRaisesRegexp( - ValueError, '.+Expected.+local.+variables.+but.+got.+instead.+'): - session.run( - replicate_model_fn._reduce_metric_variables(number_of_towers=3)) - - -class MergeExportOutputsTest(test_util.TensorFlowTestCase): - - def model_fn(self, mode, features, labels, params): - c = variable_scope.get_variable( - 'c', - initializer=constant_op.constant(10, dtype=dtypes.float64), - dtype=dtypes.float64) - - predictions = {'probabilities': math_ops.multiply(features, c)} - loss = losses.absolute_difference( - labels=labels, - predictions=predictions['probabilities'], - reduction=losses.Reduction.SUM) - - metrics = { - 'accuracy': metrics_lib.accuracy(labels, predictions['probabilities']), - 'auc': metrics_lib.auc(labels, predictions['probabilities']) - } - tensor_string_repr = str(features) - classes = constant_op.constant( - re.search('(split_inputs/split:[0-9])', tensor_string_repr).group(1), - dtype=dtypes.string) - - export_outputs = { - signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: - export_output.PredictOutput(predictions), - 'classification_output': - export_output.ClassificationOutput(predictions['probabilities'], - classes), - 'classification_scores': - export_output.ClassificationOutput( - scores=predictions['probabilities']), - 'classification_classes': - export_output.ClassificationOutput(classes=classes), - 'regression_output': - export_output.RegressionOutput(predictions['probabilities']), - } - - return model_fn_lib.EstimatorSpec( - mode=mode, - loss=math_ops.reduce_sum(loss), - eval_metric_ops=metrics, - predictions=predictions, - export_outputs=export_outputs) - - def replicate_estimator_spec(self, session): - features = np.array([0.01, 0.002]) - labels = np.array([0.01, 0.02]) - - replicated_model_fn = replicate_model_fn._replicate_model_fn( - self.model_fn, devices=['/gpu:0', '/gpu:1']) - estimator_spec = replicated_model_fn(features, labels, - model_fn_lib.ModeKeys.PREDICT, {}) - session.run(variables.global_variables_initializer()) - return estimator_spec - - def test_merge_predict_output(self): - with self.test_session() as session: - estimator_spec = self.replicate_estimator_spec(session) - self.assertAllClose( - { - 'probabilities': np.array([0.1, 0.02]) - }, - session.run(estimator_spec.export_outputs[ - signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY].outputs)) - - def test_merge_classification_output_scores_classes(self): - with self.test_session() as session: - estimator_spec = self.replicate_estimator_spec(session) - self.assertAllClose( - [0.1, 0.02], - session.run( - estimator_spec.export_outputs['classification_output'].scores)) - self.assertAllEqual( - [b'split_inputs/split:0', b'split_inputs/split:1'], - session.run( - estimator_spec.export_outputs['classification_output'].classes)) - - def test_merge_classification_output_scores(self): - with self.test_session() as session: - estimator_spec = self.replicate_estimator_spec(session) - self.assertAllClose( - [0.1, 0.02], - session.run( - estimator_spec.export_outputs['classification_scores'].scores)) - self.assertEqual( - None, estimator_spec.export_outputs['classification_scores'].classes) - - def test_merge_classification_output_classes(self): - with self.test_session() as session: - estimator_spec = self.replicate_estimator_spec(session) - self.assertAllEqual( - [b'split_inputs/split:0', b'split_inputs/split:1'], - session.run( - estimator_spec.export_outputs['classification_classes'].classes)) - self.assertEqual( - None, estimator_spec.export_outputs['classification_classes'].scores) - - def test_merge_regression_output(self): - with self.test_session() as session: - estimator_spec = self.replicate_estimator_spec(session) - self.assertAllClose( - [0.1, 0.02], - session.run(estimator_spec.export_outputs['regression_output'].value)) - - -class GetLocalDevicesTest(test_util.TensorFlowTestCase): - - def test_there_is_at_least_a_cpu(self): - self.assertTrue(replicate_model_fn._get_local_devices('CPU')) - - def test_there_is_no_xpu(self): - self.assertFalse( - replicate_model_fn._get_local_devices('XPU')) # XPU doesn't exist. - - def test_whether_there_is_a_gpu(self): - if test.is_gpu_available(): - self.assertTrue(len(replicate_model_fn._get_local_devices('GPU'))) - - -class LocalDeviceSetterTest(test_util.TensorFlowTestCase): - - def test_vars_are_on_ps_but_ops_are_on_workers(self): - ps_devices = ['/device:GPU:3'] - round_robin = device_setter._RoundRobinStrategy(num_tasks=len(ps_devices)) - - local_device_setter = replicate_model_fn._local_device_setter( - ps_devices=ps_devices, - ps_strategy=round_robin, - worker_device='/device:GPU:2') - - with ops_lib.device(local_device_setter): - a = variables.Variable(0.01) - self.assertEqual('/device:GPU:3', a.device) - - b = variables.Variable(0.02) - self.assertEqual('/device:GPU:3', b.device) - - c = variables.Variable(0.03) - self.assertEqual('/device:GPU:3', c.device) - - a_op = array_ops.concat(a, axis=0) - self.assertEqual('/device:GPU:2', a_op.device) - - b_op = array_ops.concat(b, axis=0) - self.assertEqual('/device:GPU:2', b_op.device) - - def test_round_robin_placement(self): - ps_devices = [ - '/device:GPU:0', '/device:GPU:1', '/device:GPU:3', '/device:GPU:4' - ] - round_robin = device_setter._RoundRobinStrategy(num_tasks=len(ps_devices)) - - local_device_setter = replicate_model_fn._local_device_setter( - ps_devices=ps_devices, - ps_strategy=round_robin, - worker_device='/device:GPU:2') - - with ops_lib.device(local_device_setter): - a = variables.Variable(0.01) - self.assertEqual('/device:GPU:0', a.device) - - b = variables.Variable(0.02) - self.assertEqual('/device:GPU:1', b.device) - - c = variables.Variable(0.03) - self.assertEqual('/device:GPU:3', c.device) - - a_op = array_ops.concat(a, axis=0) - self.assertEqual('/device:GPU:2', a_op.device) - - b_op = array_ops.concat(b, axis=0) - self.assertEqual('/device:GPU:2', b_op.device) - - c = variables.Variable(0.03) - self.assertEqual('/device:GPU:4', c.device) - - d = variables.Variable(0.03) - self.assertEqual('/device:GPU:0', d.device) - - c_op = array_ops.concat(c, axis=0) - self.assertEqual('/device:GPU:2', c_op.device) - - -class ComputeSumWithDevicePlacementTest(test_util.TensorFlowTestCase): - - def test_vectors(self): - with self.test_session() as session: - total = replicate_model_fn._compute_sum_on_device( - [1.0, 2.0, 3.0, 4.0], device='/device:GPU:0', name='test_sum') - - self.assertEqual('/device:GPU:0', total.device) - self.assertEqual('test_sum', total.op.name) - self.assertEqual(10.0, session.run(total)) - - def test_tensors(self): - with self.test_session() as session: - total = replicate_model_fn._compute_sum_on_device( - [[1.0, 2.0], [3.0, 4.0]], device='/device:GPU:0', name='test_sum') - - self.assertEqual('/device:GPU:0', total.device) - self.assertEqual('test_sum', total.op.name) - self.assertAllEqual([4.0, 6.0], session.run(total)) - - def test_indexedslices(self): - with self.test_session() as session: - a = ops_lib.IndexedSlices( - constant_op.constant([1.0, 2.0]), [0, 1], - dense_shape=constant_op.constant([2])) - b = ops_lib.IndexedSlices(constant_op.constant([3.0, 4.0]), [0, 1]) - - total = replicate_model_fn._compute_sum_on_device( - [a, b], device='/device:GPU:0') - - self.assertEqual('/device:GPU:0', total.device) - self.assertAllEqual([4.0, 6.0], - session.run(ops_lib.convert_to_tensor(total))) - - def test_indexedslices_higher_dimensions(self): - with self.test_session() as session: - a = ops_lib.IndexedSlices( - constant_op.constant([[1.0, 5.0], [2.0, 6.0]]), [0, 1], - dense_shape=constant_op.constant([2, 4])) - b = ops_lib.IndexedSlices( - constant_op.constant([[3.0, 7.0], [4.0, 8.0]]), [0, 1]) - - total = replicate_model_fn._compute_sum_on_device( - [a, b], device='/device:GPU:0') - - self.assertEqual('/device:GPU:0', total.device) - self.assertAllEqual([[4.0, 12.0], [6.0, 14.0]], - session.run(ops_lib.convert_to_tensor(total))) - - def test_indexedslices_some_dont_overlap(self): - with self.test_session() as session: - a = ops_lib.IndexedSlices( - constant_op.constant([1.0, 2.0]), [0, 3], - dense_shape=constant_op.constant([4])) - b = ops_lib.IndexedSlices(constant_op.constant([3.0, 4.0]), [0, 1]) - - total = replicate_model_fn._compute_sum_on_device( - [a, b], device='/device:GPU:0') - - self.assertEqual('/device:GPU:0', total.device) - self.assertAllEqual([4.0, 4.0, 0.0, 2.0], - session.run(ops_lib.convert_to_tensor(total))) - - def test_no_name_for_indexslices(self): - a = ops_lib.IndexedSlices( - constant_op.constant([1.0, 2.0]), [0, 1], - dense_shape=constant_op.constant([2])) - b = ops_lib.IndexedSlices(constant_op.constant([3.0, 4.0]), [0, 1]) - - with self.assertRaisesRegexp(ValueError, '.+name.+not.+expected.+'): - _ = replicate_model_fn._compute_sum_on_device( - [a, b], device='/device:GPU:0', name='cant_name_indexslices') - - -class ConcatTensorDictsTest(test_util.TensorFlowTestCase): - - def test_example(self): - tensor_dicts = [ - { - 'a': np.array([1.0, 2.0]), - 'b': np.array([11.0]), - 'c': np.array([21.0]), - }, - { - 'a': np.array([3.0]), - 'b': np.array([12.0, 13.0]), - }, - { - 'b': np.array([14.0]), - }, - ] - - with self.test_session() as session: - self.assertAllClose({ - 'a': np.array([1.0, 2.0, 3.0]), - 'b': np.array([11.0, 12.0, 13.0, 14.0]), - 'c': np.array([21.0]), - }, session.run(replicate_model_fn._concat_tensor_dicts(*tensor_dicts))) - - -if __name__ == '__main__': - test.main() diff --git a/tensorflow/python/estimator/run_config.py b/tensorflow/python/estimator/run_config.py index 41415b89e9cd82bbc15d6b30975fd922d1c2d7eb..8162b249f1f0be6c901f09ff21f9a67f7c5e492f 100644 --- a/tensorflow/python/estimator/run_config.py +++ b/tensorflow/python/estimator/run_config.py @@ -27,11 +27,13 @@ import six from tensorflow.core.protobuf import config_pb2 from tensorflow.python.platform import tf_logging as logging from tensorflow.python.training import server_lib +from tensorflow.python.estimator import util from tensorflow.python.util import compat_internal from tensorflow.python.util.tf_export import tf_export _USE_DEFAULT = object() +_VALID_DEVICE_FN_ARGS = set(['op']) # A list of the property names in RunConfig that the user is allowed to change. _DEFAULT_REPLACEABLE_LIST = [ @@ -44,7 +46,8 @@ _DEFAULT_REPLACEABLE_LIST = [ 'keep_checkpoint_max', 'keep_checkpoint_every_n_hours', 'log_step_count_steps', - 'distribute' + 'train_distribute', + 'device_fn' ] _SAVE_CKPT_ERR = ( @@ -279,6 +282,11 @@ def _validate_properties(run_config): _validate('tf_random_seed', lambda seed: isinstance(seed, six.integer_types), message='tf_random_seed must be integer.') + _validate('device_fn', lambda device_fn: six.callable(device_fn) and + set(util.fn_args(device_fn)) == _VALID_DEVICE_FN_ARGS, + message='device_fn must be callable with exactly' + ' one argument "op".') + class TaskType(object): MASTER = 'master' @@ -302,7 +310,8 @@ class RunConfig(object): keep_checkpoint_max=5, keep_checkpoint_every_n_hours=10000, log_step_count_steps=100, - distribute=None): + train_distribute=None, + device_fn=None): """Constructs a RunConfig. All distributed training related properties `cluster_spec`, `is_chief`, @@ -316,7 +325,7 @@ class RunConfig(object): a list of task addresses. `task` has two attributes: `type` and `index`, where `type` can be any of - the task types in `cluster`. ` When `TF_CONFIG` contains said information, + the task types in `cluster`. When `TF_CONFIG` contains said information, the following properties are set on this class: * `cluster_spec` is parsed from `TF_CONFIG['cluster']`. Defaults to {}. If @@ -426,10 +435,14 @@ class RunConfig(object): the feature. log_step_count_steps: The frequency, in number of global steps, that the global step/sec and the loss will be logged during training. - distribute: an optional instance of + train_distribute: an optional instance of `tf.contrib.distribute.DistributionStrategy`. If specified, - then Estimator will distribute the user's model according to the policy - specified by that strategy. + then Estimator will distribute the user's model during training, + according to the policy specified by that strategy. + device_fn: A callable invoked for every `Operation` that takes the + `Operation` and returns the device string. If `None`, defaults to + the device function returned by `tf.train.replica_device_setter` + with round-robin strategy. Raises: ValueError: If both `save_checkpoints_steps` and `save_checkpoints_secs` @@ -466,7 +479,8 @@ class RunConfig(object): keep_checkpoint_max=keep_checkpoint_max, keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours, log_step_count_steps=log_step_count_steps, - distribute=distribute) + train_distribute=train_distribute, + device_fn=device_fn) self._init_distributed_setting_from_environment_var(tf_config) @@ -568,6 +582,16 @@ class RunConfig(object): def cluster_spec(self): return self._cluster_spec + @property + def device_fn(self): + """Returns the device_fn. + + If device_fn is not `None`, it overrides the default + device function used in `Estimator`. + Otherwise the default one is used. + """ + return self._device_fn + @property def evaluation_master(self): return self._evaluation_master @@ -678,10 +702,10 @@ class RunConfig(object): return self._service @property - def distribute(self): + def train_distribute(self): """Returns the optional `tf.contrib.distribute.DistributionStrategy` object. """ - return self._distribute + return self._train_distribute def replace(self, **kwargs): """Returns a new instance of `RunConfig` replacing specified properties. @@ -697,7 +721,8 @@ class RunConfig(object): - `keep_checkpoint_max`, - `keep_checkpoint_every_n_hours`, - `log_step_count_steps`, - - `distribute`. + - `train_distribute`, + - `device_fn`. In addition, either `save_checkpoints_steps` or `save_checkpoints_secs` can be set (should not be both). diff --git a/tensorflow/python/estimator/run_config_test.py b/tensorflow/python/estimator/run_config_test.py index a3eef4c53fd90a1ce69f3067d0b5c15909f43cec..c8b12605e1aaad11e114e4ace63697b93f3b2b92 100644 --- a/tensorflow/python/estimator/run_config_test.py +++ b/tensorflow/python/estimator/run_config_test.py @@ -42,6 +42,7 @@ _SESSION_CONFIG_ERR = 'session_config must be instance of ConfigProto' _KEEP_CKPT_MAX_ERR = 'keep_checkpoint_max should be >= 0' _KEEP_CKPT_HOURS_ERR = 'keep_checkpoint_every_n_hours should be > 0' _TF_RANDOM_SEED_ERR = 'tf_random_seed must be integer' +_DEVICE_FN_ERR = 'device_fn must be callable with exactly one argument "op".' _ONE_CHIEF_ERR = 'The "cluster" in TF_CONFIG must have only one "chief" node.' _ONE_MASTER_ERR = 'The "cluster" in TF_CONFIG must have only one "master" node.' _INVALID_TASK_TYPE_FOR_EVAL_MASTER = ( @@ -83,6 +84,7 @@ class RunConfigTest(test.TestCase): self.assertEqual(5, config.keep_checkpoint_max) self.assertEqual(10000, config.keep_checkpoint_every_n_hours) self.assertIsNone(config.service) + self.assertIsNone(config.device_fn) def test_model_dir(self): empty_config = run_config_lib.RunConfig() @@ -93,6 +95,7 @@ class RunConfigTest(test.TestCase): def test_replace_with_allowed_properties(self): session_config = config_pb2.ConfigProto(allow_soft_placement=True) + device_fn = lambda op: "/cpu:0" config = run_config_lib.RunConfig().replace( tf_random_seed=11, @@ -100,13 +103,15 @@ class RunConfigTest(test.TestCase): save_checkpoints_secs=14, session_config=session_config, keep_checkpoint_max=16, - keep_checkpoint_every_n_hours=17) + keep_checkpoint_every_n_hours=17, + device_fn=device_fn) self.assertEqual(11, config.tf_random_seed) self.assertEqual(12, config.save_summary_steps) self.assertEqual(14, config.save_checkpoints_secs) self.assertEqual(session_config, config.session_config) self.assertEqual(16, config.keep_checkpoint_max) self.assertEqual(17, config.keep_checkpoint_every_n_hours) + self.assertEqual(device_fn, config.device_fn) def test_replace_none_value(self): config = run_config_lib.RunConfig().replace( @@ -117,7 +122,8 @@ class RunConfigTest(test.TestCase): save_checkpoints_steps=None, session_config=None, keep_checkpoint_max=None, - keep_checkpoint_every_n_hours=None) + keep_checkpoint_every_n_hours=None, + device_fn=None) self.assertIsNone(config.tf_random_seed) self.assertIsNone(config.model_dir) self.assertIsNone(config.save_summary_steps) @@ -126,6 +132,7 @@ class RunConfigTest(test.TestCase): self.assertIsNone(config.session_config) self.assertIsNone(config.keep_checkpoint_max) self.assertIsNone(config.keep_checkpoint_every_n_hours) + self.assertIsNone(config.device_fn) def test_replace_with_disallowallowed_properties(self): config = run_config_lib.RunConfig() @@ -166,9 +173,12 @@ class RunConfigTest(test.TestCase): config.replace(keep_checkpoint_every_n_hours=0) with self.assertRaisesRegexp(ValueError, _TF_RANDOM_SEED_ERR): config.replace(tf_random_seed=1.0) + with self.assertRaisesRegexp(ValueError, _DEVICE_FN_ERR): + config.replace(device_fn=lambda x, y: 0) def test_init_with_allowed_properties(self): session_config = config_pb2.ConfigProto(allow_soft_placement=True) + device_fn = lambda op: "/cpu:0" config = run_config_lib.RunConfig( tf_random_seed=11, @@ -176,13 +186,15 @@ class RunConfigTest(test.TestCase): save_checkpoints_secs=14, session_config=session_config, keep_checkpoint_max=16, - keep_checkpoint_every_n_hours=17) + keep_checkpoint_every_n_hours=17, + device_fn=device_fn) self.assertEqual(11, config.tf_random_seed) self.assertEqual(12, config.save_summary_steps) self.assertEqual(14, config.save_checkpoints_secs) self.assertEqual(session_config, config.session_config) self.assertEqual(16, config.keep_checkpoint_max) self.assertEqual(17, config.keep_checkpoint_every_n_hours) + self.assertEqual(device_fn, config.device_fn) def test_init_none_value(self): config = run_config_lib.RunConfig( @@ -193,7 +205,8 @@ class RunConfigTest(test.TestCase): save_checkpoints_steps=None, session_config=None, keep_checkpoint_max=None, - keep_checkpoint_every_n_hours=None) + keep_checkpoint_every_n_hours=None, + device_fn=None) self.assertIsNone(config.tf_random_seed) self.assertIsNone(config.model_dir) self.assertIsNone(config.save_summary_steps) @@ -202,6 +215,7 @@ class RunConfigTest(test.TestCase): self.assertIsNone(config.session_config) self.assertIsNone(config.keep_checkpoint_max) self.assertIsNone(config.keep_checkpoint_every_n_hours) + self.assertIsNone(config.device_fn) def test_init_invalid_values(self): with self.assertRaisesRegexp(ValueError, _MODEL_DIR_ERR): @@ -220,6 +234,8 @@ class RunConfigTest(test.TestCase): run_config_lib.RunConfig(keep_checkpoint_every_n_hours=0) with self.assertRaisesRegexp(ValueError, _TF_RANDOM_SEED_ERR): run_config_lib.RunConfig(tf_random_seed=1.0) + with self.assertRaisesRegexp(ValueError, _DEVICE_FN_ERR): + run_config_lib.RunConfig(device_fn=lambda x: "/cpu:0") class RunConfigDistributedSettingTest(test.TestCase): diff --git a/tensorflow/python/estimator/training.py b/tensorflow/python/estimator/training.py index e38b765da52a7b6957a4fb8a02087c5d1fd5a781..9d271758f635869730c03bc2ac853b3493ec0cae 100644 --- a/tensorflow/python/estimator/training.py +++ b/tensorflow/python/estimator/training.py @@ -137,7 +137,7 @@ class TrainSpec( * A tuple (features, labels): Where features is a `Tensor` or a dictionary of string feature name to `Tensor` and labels is a `Tensor` or a dictionary of string label name to `Tensor`. - + max_steps: Int. Positive number of total steps for which to train model. If `None`, train forever. The training `input_fn` is not expected to generate `OutOfRangeError` or `StopIteration` exceptions. See the @@ -334,7 +334,8 @@ def train_and_evaluate(estimator, train_spec, eval_spec): can read and write). The only extra work to do is setting the environment variable `TF_CONFIG` properly for each worker correspondingly. - Also see: https://www.tensorflow.org/deploy/distributed + Also see + [Distributed TensorFlow](https://www.tensorflow.org/deploy/distributed). Setting environment variable depends on the platform. For example, on Linux, it can be done as follows (`$` is the shell prompt): diff --git a/tensorflow/python/feature_column/BUILD b/tensorflow/python/feature_column/BUILD index 219105d386aabc60c2ab24547f426826d6d6cdd3..295d4ca094cc8cb85c0f1f7fd47c20b910c270df 100644 --- a/tensorflow/python/feature_column/BUILD +++ b/tensorflow/python/feature_column/BUILD @@ -43,6 +43,7 @@ py_library( "//tensorflow/python:util", "//tensorflow/python:variable_scope", "//tensorflow/python:variables", + "//tensorflow/python/keras", "@six_archive//:six", ], ) diff --git a/tensorflow/python/feature_column/feature_column.py b/tensorflow/python/feature_column/feature_column.py index 92c6ff21c4130c9858652b24399612f1c902fc70..c16c3cda4892b8017571c2b37736b85c80f3d8a4 100644 --- a/tensorflow/python/feature_column/feature_column.py +++ b/tensorflow/python/feature_column/feature_column.py @@ -135,10 +135,13 @@ import numpy as np import six +from tensorflow.python.eager import context from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor as sparse_tensor_lib from tensorflow.python.framework import tensor_shape +from tensorflow.python.keras._impl.keras.engine import training +from tensorflow.python.layers import base from tensorflow.python.ops import array_ops from tensorflow.python.ops import check_ops from tensorflow.python.ops import control_flow_ops @@ -159,7 +162,6 @@ from tensorflow.python.platform import tf_logging as logging from tensorflow.python.training import checkpoint_utils from tensorflow.python.util import nest from tensorflow.python.util.tf_export import tf_export -from tensorflow.python.util.tf_export import tf_export def _internal_input_layer(features, @@ -406,59 +408,207 @@ def linear_model(features, ValueError: if an item in `feature_columns` is neither a `_DenseColumn` nor `_CategoricalColumn`. """ - feature_columns = _clean_feature_columns(feature_columns) - for column in feature_columns: - if not isinstance(column, (_DenseColumn, _CategoricalColumn)): - raise ValueError('Items of feature_columns must be either a _DenseColumn ' - 'or _CategoricalColumn. Given: {}'.format(column)) - weight_collections = list(weight_collections or []) - if ops.GraphKeys.GLOBAL_VARIABLES not in weight_collections: - weight_collections.append(ops.GraphKeys.GLOBAL_VARIABLES) - if ops.GraphKeys.MODEL_VARIABLES not in weight_collections: - weight_collections.append(ops.GraphKeys.MODEL_VARIABLES) - with variable_scope.variable_scope( - None, default_name='linear_model', values=features.values()): - weighted_sums = [] - ordered_columns = [] - builder = _LazyBuilder(features) - for column in sorted(feature_columns, key=lambda x: x.name): - with variable_scope.variable_scope( - None, default_name=column._var_scope_name): # pylint: disable=protected-access - ordered_columns.append(column) - weighted_sum = _create_weighted_sum( - column=column, - builder=builder, - units=units, - sparse_combiner=sparse_combiner, - weight_collections=weight_collections, - trainable=trainable) - weighted_sums.append(weighted_sum) - if cols_to_vars is not None: - # Retrieve the variables created. - cols_to_vars[column] = ops.get_collection( - ops.GraphKeys.GLOBAL_VARIABLES, - scope=variable_scope.get_variable_scope().name) - _verify_static_batch_size_equality(weighted_sums, ordered_columns) - predictions_no_bias = math_ops.add_n( - weighted_sums, name='weighted_sum_no_bias') - bias = variable_scope.get_variable( + linear_model_layer = _LinearModel( + feature_columns=feature_columns, + units=units, + sparse_combiner=sparse_combiner, + weight_collections=weight_collections, + trainable=trainable, + name='linear_model') + retval = linear_model_layer(features) # pylint: disable=not-callable + if cols_to_vars is not None: + cols_to_vars.update(linear_model_layer.cols_to_vars()) + return retval + + +def _add_to_collections(var, weight_collections): + # TODO(rohanj): Explore adding a _get_variable_list method on `Variable` + # so that we don't have to do this check. + if isinstance(var, variables.PartitionedVariable): + for constituent_var in list(var): + ops.add_to_collections(weight_collections, constituent_var) + else: + ops.add_to_collections(weight_collections, var) + + +class _FCLinearWrapper(base.Layer): + """Wraps a _FeatureColumn in a layer for use in a linear model. + + See `linear_model` above. + """ + + def __init__(self, + feature_column, + units=1, + sparse_combiner='sum', + weight_collections=None, + trainable=True, + name=None, + **kwargs): + super(_FCLinearWrapper, self).__init__( + trainable=trainable, name=name, **kwargs) + self._feature_column = feature_column + self._units = units + self._sparse_combiner = sparse_combiner + self._weight_collections = weight_collections + + def build(self, _): + if isinstance(self._feature_column, _CategoricalColumn): + weight = self.add_variable( + name='weights', + shape=(self._feature_column._num_buckets, self._units), # pylint: disable=protected-access + initializer=init_ops.zeros_initializer(), + trainable=self.trainable) + else: + num_elements = self._feature_column._variable_shape.num_elements() # pylint: disable=protected-access + weight = self.add_variable( + name='weights', + shape=[num_elements, self._units], + initializer=init_ops.zeros_initializer(), + trainable=self.trainable) + _add_to_collections(weight, self._weight_collections) + self._weight_var = weight + self.built = True + + def call(self, builder): + weighted_sum = _create_weighted_sum( + column=self._feature_column, + builder=builder, + units=self._units, + sparse_combiner=self._sparse_combiner, + weight_collections=self._weight_collections, + trainable=self.trainable, + weight_var=self._weight_var) + return weighted_sum + + +class _BiasLayer(base.Layer): + """A layer for the bias term. + """ + + def __init__(self, + units=1, + trainable=True, + weight_collections=None, + name=None, + **kwargs): + super(_BiasLayer, self).__init__(trainable=trainable, name=name, **kwargs) + self._units = units + self._weight_collections = weight_collections + + def build(self, _): + self._bias_variable = self.add_variable( 'bias_weights', - shape=[units], + shape=[self._units], initializer=init_ops.zeros_initializer(), + trainable=self.trainable) + _add_to_collections(self._bias_variable, self._weight_collections) + self.built = True + + def call(self, _): + return self._bias_variable + + +def _get_expanded_variable_list(variable): + if (isinstance(variable, variables.Variable) or + resource_variable_ops.is_resource_variable(variable)): + return [variable] # Single variable case. + else: # Must be a PartitionedVariable, so convert into a list. + return list(variable) + + +def _strip_leading_slashes(name): + return name.rsplit('/', 1)[-1] + + +class _LinearModel(training.Model): + """Creates a linear model using feature columns. + + See `linear_model` for details. + """ + + def __init__(self, + feature_columns, + units=1, + sparse_combiner='sum', + weight_collections=None, + trainable=True, + name=None, + **kwargs): + super(_LinearModel, self).__init__(name=name, **kwargs) + self._feature_columns = _clean_feature_columns(feature_columns) + self._weight_collections = list(weight_collections or []) + if ops.GraphKeys.MODEL_VARIABLES not in self._weight_collections: + self._weight_collections.append(ops.GraphKeys.MODEL_VARIABLES) + + column_layers = {} + for column in sorted(self._feature_columns, key=lambda x: x.name): + with variable_scope.variable_scope( + None, default_name=column._var_scope_name) as vs: # pylint: disable=protected-access + # Having the fully expressed variable scope name ends up doubly + # expressing the outer scope (scope with which this method was called) + # in the name of the variable that would get created. + column_name = _strip_leading_slashes(vs.name) + column_layer = _FCLinearWrapper(column, units, sparse_combiner, + self._weight_collections, trainable, + column_name, **kwargs) + column_layers[column_name] = column_layer + self._column_layers = self._add_layers(column_layers) + self._bias_layer = _BiasLayer( + units=units, trainable=trainable, - collections=weight_collections) - predictions = nn_ops.bias_add( - predictions_no_bias, bias, name='weighted_sum') - if cols_to_vars is not None: - # Add the bias to cols_to_vars as well, converting the Variable or - # PartitionedVariable to a list of Variable's. - if (isinstance(bias, variables.Variable) or - resource_variable_ops.is_resource_variable(bias)): - cols_to_vars['bias'] = [bias] - else: # Must be a PartitionedVariable. - cols_to_vars['bias'] = list(bias) + weight_collections=self._weight_collections, + name='bias_layer', + **kwargs) + self._cols_to_vars = {} + + def cols_to_vars(self): + """Returns a dict mapping _FeatureColumns to variables. + + See `linear_model` for more information. + This is not populated till `call` is called i.e. layer is built. + """ + return self._cols_to_vars + + def call(self, features): + with variable_scope.variable_scope(self.name): + for column in self._feature_columns: + if not isinstance(column, (_DenseColumn, _CategoricalColumn)): + raise ValueError( + 'Items of feature_columns must be either a ' + '_DenseColumn or _CategoricalColumn. Given: {}'.format(column)) + weighted_sums = [] + ordered_columns = [] + builder = _LazyBuilder(features) + for layer in sorted(self._column_layers.values(), key=lambda x: x.name): + column = layer._feature_column # pylint: disable=protected-access + ordered_columns.append(column) + weighted_sum = layer(builder) + weighted_sums.append(weighted_sum) + self._cols_to_vars[column] = ops.get_collection( + ops.GraphKeys.GLOBAL_VARIABLES, scope=layer.scope_name) + + _verify_static_batch_size_equality(weighted_sums, ordered_columns) + predictions_no_bias = math_ops.add_n( + weighted_sums, name='weighted_sum_no_bias') + predictions = nn_ops.bias_add( + predictions_no_bias, + self._bias_layer( # pylint: disable=not-callable + builder, + scope=variable_scope.get_variable_scope()), # pylint: disable=not-callable + name='weighted_sum') + bias = self._bias_layer.variables[0] + self._cols_to_vars['bias'] = _get_expanded_variable_list(bias) return predictions + def _add_layers(self, layers): + # "Magic" required for keras.Model classes to track all the variables in + # a list of layers.Layer objects. + # TODO(ashankar): Figure out API so user code doesn't have to do this. + for name, layer in layers.items(): + setattr(self, 'layer-%s' % name, layer) + return layers + def _transform_features(features, feature_columns): """Returns transformed features based on features columns passed in. @@ -655,11 +805,22 @@ def embedding_column( initializer = init_ops.truncated_normal_initializer( mean=0.0, stddev=1 / math.sqrt(dimension)) + embedding_shape = categorical_column._num_buckets, dimension # pylint: disable=protected-access + + def _creator(weight_collections, scope): + embedding_column_layer = _EmbeddingColumnLayer( + embedding_shape=embedding_shape, + initializer=initializer, + weight_collections=weight_collections, + trainable=trainable, + name='embedding_column_layer') + return embedding_column_layer(None, scope=scope) # pylint: disable=not-callable + return _EmbeddingColumn( categorical_column=categorical_column, dimension=dimension, combiner=combiner, - initializer=initializer, + layer_creator=_creator, ckpt_to_load_from=ckpt_to_load_from, tensor_name_in_ckpt=tensor_name_in_ckpt, max_norm=max_norm, @@ -782,6 +943,7 @@ def shared_embedding_columns( sorted_columns = sorted(categorical_columns, key=lambda x: x.name) c0 = sorted_columns[0] + num_buckets = c0._num_buckets # pylint: disable=protected-access if not isinstance(c0, _CategoricalColumn): raise ValueError( 'All categorical_columns must be subclasses of _CategoricalColumn. ' @@ -797,23 +959,45 @@ def shared_embedding_columns( 'the same type, or be weighted_categorical_column of the same type. ' 'Given column: {} of type: {} does not match given column: {} of ' 'type: {}'.format(c0, type(c0), c, type(c))) + if num_buckets != c._num_buckets: # pylint: disable=protected-access + raise ValueError( + 'To use shared_embedding_column, all categorical_columns must have ' + 'the same number of buckets. Given column: {} with buckets: {} does ' + 'not match column: {} with buckets: {}'.format( + c0, num_buckets, c, c._num_buckets)) # pylint: disable=protected-access if not shared_embedding_collection_name: shared_embedding_collection_name = '_'.join(c.name for c in sorted_columns) shared_embedding_collection_name += '_shared_embedding' + # Create the state (_SharedEmbeddingColumnLayer) here. + embedding_shape = num_buckets, dimension + + shared_embedding_column_layer = _EmbeddingColumnLayer( + embedding_shape=embedding_shape, + initializer=initializer, + weight_collections=[], + trainable=trainable, + name=shared_embedding_collection_name) + result = [] for column in categorical_columns: - result.append(_SharedEmbeddingColumn( - categorical_column=column, - dimension=dimension, - combiner=combiner, - initializer=initializer, - shared_embedding_collection_name=shared_embedding_collection_name, - ckpt_to_load_from=ckpt_to_load_from, - tensor_name_in_ckpt=tensor_name_in_ckpt, - max_norm=max_norm, - trainable=trainable)) + result.append( + _SharedEmbeddingColumn( + categorical_column=column, + initializer=initializer, + dimension=dimension, + combiner=combiner, + var_scope_name=shared_embedding_collection_name, + ckpt_to_load_from=ckpt_to_load_from, + tensor_name_in_ckpt=tensor_name_in_ckpt, + max_norm=max_norm, + trainable=trainable)) + + for single_result in result: + single_result._set_layer(shared_embedding_column_layer) # pylint: disable=protected-access + single_result._set_all_columns(result) # pylint: disable=protected-access + return result @@ -1570,6 +1754,57 @@ def crossed_column(keys, hash_bucket_size, hash_key=None): hash_key=hash_key) +# TODO(rohanj): Clearly define semantics of this layer. +class _EmbeddingColumnLayer(base.Layer): + """A layer that stores all the state required for a embedding column.""" + + def __init__(self, + embedding_shape, + initializer, + weight_collections=None, + trainable=True, + name=None, + **kwargs): + """Constructor. + + Args: + embedding_shape: Shape of the embedding variable used for lookup. + initializer: A variable initializer function to be used in embedding + variable initialization. If not specified, defaults to + `tf.truncated_normal_initializer` with mean `0.0` and standard deviation + `1/sqrt(dimension)`. + weight_collections: A list of collection names to which the Variable will + be added. Note that, variables will also be added to collections + `tf.GraphKeys.GLOBAL_VARIABLES` and `ops.GraphKeys.MODEL_VARIABLES`. + trainable: If `True` also add the variable to the graph collection + `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). + name: Name of the layer + **kwargs: keyword named properties. + """ + super(_EmbeddingColumnLayer, self).__init__( + trainable=trainable, name=name, **kwargs) + self._embedding_shape = embedding_shape + self._initializer = initializer + self._weight_collections = weight_collections + + def build(self, _): + self._embedding_weight_var = self.add_variable( + name='embedding_weights', + shape=self._embedding_shape, + dtype=dtypes.float32, + initializer=self._initializer, + trainable=self.trainable) + # self.add_variable already appends to GLOBAL_VARIABLES collection. + if self._weight_collections and not context.executing_eagerly(): + for weight_collection in self._weight_collections: + if weight_collection != ops.GraphKeys.GLOBAL_VARIABLES: + _add_to_collections(self._embedding_weight_var, [weight_collection]) + self.built = True + + def call(self, _): + return self._embedding_weight_var + + class _FeatureColumn(object): """Represents a feature column abstraction. @@ -1643,6 +1878,14 @@ class _FeatureColumn(object): """ pass + def _reset_config(self): + """Resets the configuration in the column. + + Some feature columns e.g. embedding or shared embedding columns might + have some state that is needed to be reset sometimes. Use this method + in that scenario. + """ + class _DenseColumn(_FeatureColumn): """Represents a column which can be represented as `Tensor`. @@ -1687,13 +1930,13 @@ class _DenseColumn(_FeatureColumn): pass -def _create_weighted_sum( - column, - builder, - units, - sparse_combiner, - weight_collections, - trainable): +def _create_weighted_sum(column, + builder, + units, + sparse_combiner, + weight_collections, + trainable, + weight_var=None): """Creates a weighted sum for a dense or sparse column for linear_model.""" if isinstance(column, _CategoricalColumn): return _create_categorical_column_weighted_sum( @@ -1702,18 +1945,24 @@ def _create_weighted_sum( units=units, sparse_combiner=sparse_combiner, weight_collections=weight_collections, - trainable=trainable) + trainable=trainable, + weight_var=weight_var) else: return _create_dense_column_weighted_sum( column=column, builder=builder, units=units, weight_collections=weight_collections, - trainable=trainable) + trainable=trainable, + weight_var=weight_var) -def _create_dense_column_weighted_sum( - column, builder, units, weight_collections, trainable): +def _create_dense_column_weighted_sum(column, + builder, + units, + weight_collections, + trainable, + weight_var=None): """Create a weighted sum of a dense column for linear_model.""" tensor = column._get_dense_tensor( # pylint: disable=protected-access builder, @@ -1722,12 +1971,15 @@ def _create_dense_column_weighted_sum( num_elements = column._variable_shape.num_elements() # pylint: disable=protected-access batch_size = array_ops.shape(tensor)[0] tensor = array_ops.reshape(tensor, shape=(batch_size, num_elements)) - weight = variable_scope.get_variable( - name='weights', - shape=[num_elements, units], - initializer=init_ops.zeros_initializer(), - trainable=trainable, - collections=weight_collections) + if weight_var is not None: + weight = weight_var + else: + weight = variable_scope.get_variable( + name='weights', + shape=[num_elements, units], + initializer=init_ops.zeros_initializer(), + trainable=trainable, + collections=weight_collections) return math_ops.matmul(tensor, weight, name='weighted_sum') @@ -1777,8 +2029,13 @@ class _CategoricalColumn(_FeatureColumn): pass -def _create_categorical_column_weighted_sum( - column, builder, units, sparse_combiner, weight_collections, trainable): +def _create_categorical_column_weighted_sum(column, + builder, + units, + sparse_combiner, + weight_collections, + trainable, + weight_var=None): """Create a weighted sum of a categorical column for linear_model.""" sparse_tensors = column._get_sparse_tensors( # pylint: disable=protected-access builder, @@ -1792,12 +2049,15 @@ def _create_categorical_column_weighted_sum( weight_tensor = sparse_ops.sparse_reshape( weight_tensor, [array_ops.shape(weight_tensor)[0], -1]) - weight = variable_scope.get_variable( - name='weights', - shape=(column._num_buckets, units), # pylint: disable=protected-access - initializer=init_ops.zeros_initializer(), - trainable=trainable, - collections=weight_collections) + if weight_var is not None: + weight = weight_var + else: + weight = variable_scope.get_variable( + name='weights', + shape=(column._num_buckets, units), # pylint: disable=protected-access + initializer=init_ops.zeros_initializer(), + trainable=trainable, + collections=weight_collections) return _safe_embedding_lookup_sparse( weight, id_tensor, @@ -2170,10 +2430,10 @@ class _BucketizedColumn(_DenseColumn, _CategoricalColumn, class _EmbeddingColumn( _DenseColumn, _SequenceDenseColumn, - collections.namedtuple('_EmbeddingColumn', ( - 'categorical_column', 'dimension', 'combiner', 'initializer', - 'ckpt_to_load_from', 'tensor_name_in_ckpt', 'max_norm', 'trainable' - ))): + collections.namedtuple( + '_EmbeddingColumn', + ('categorical_column', 'dimension', 'combiner', 'layer_creator', + 'ckpt_to_load_from', 'tensor_name_in_ckpt', 'max_norm', 'trainable'))): """See `embedding_column`.""" @property @@ -2195,8 +2455,10 @@ class _EmbeddingColumn( self._shape = tensor_shape.vector(self.dimension) return self._shape - def _get_dense_tensor_internal( - self, inputs, weight_collections=None, trainable=None): + def _get_dense_tensor_internal(self, + inputs, + weight_collections=None, + trainable=None): """Private method that follows the signature of _get_dense_tensor.""" # Get sparse IDs and weights. sparse_tensors = self.categorical_column._get_sparse_tensors( # pylint: disable=protected-access @@ -2204,14 +2466,10 @@ class _EmbeddingColumn( sparse_ids = sparse_tensors.id_tensor sparse_weights = sparse_tensors.weight_tensor - embedding_shape = (self.categorical_column._num_buckets, self.dimension) # pylint: disable=protected-access - embedding_weights = variable_scope.get_variable( - name='embedding_weights', - shape=embedding_shape, - dtype=dtypes.float32, - initializer=self.initializer, - trainable=self.trainable and trainable, - collections=weight_collections) + embedding_weights = self.layer_creator( + weight_collections=weight_collections, + scope=variable_scope.get_variable_scope()) + if self.ckpt_to_load_from is not None: to_restore = embedding_weights if isinstance(to_restore, variables.PartitionedVariable): @@ -2242,7 +2500,8 @@ class _EmbeddingColumn( self.name, type(self.categorical_column), self.categorical_column)) return self._get_dense_tensor_internal( - inputs=inputs, weight_collections=weight_collections, + inputs=inputs, + weight_collections=weight_collections, trainable=trainable) def _get_sequence_dense_tensor( @@ -2267,13 +2526,20 @@ class _EmbeddingColumn( dense_tensor=dense_tensor, sequence_length=sequence_length) +def _get_graph_for_variable(var): + if isinstance(var, variables.PartitionedVariable): + return list(var)[0].graph + else: + return var.graph + + class _SharedEmbeddingColumn( _DenseColumn, - collections.namedtuple('_SharedEmbeddingColumn', ( - 'categorical_column', 'dimension', 'combiner', 'initializer', - 'shared_embedding_collection_name', 'ckpt_to_load_from', - 'tensor_name_in_ckpt', 'max_norm', 'trainable' - ))): + collections.namedtuple( + '_SharedEmbeddingColumn', + ('categorical_column', 'dimension', 'combiner', 'initializer', + 'var_scope_name', 'ckpt_to_load_from', 'tensor_name_in_ckpt', + 'max_norm', 'trainable'))): """See `embedding_column`.""" @property @@ -2284,7 +2550,7 @@ class _SharedEmbeddingColumn( @property def _var_scope_name(self): - return self.shared_embedding_collection_name + return self.var_scope_name @property def _parse_example_spec(self): @@ -2293,6 +2559,22 @@ class _SharedEmbeddingColumn( def _transform_feature(self, inputs): return inputs.get(self.categorical_column) + def _set_layer(self, layer): + self._layer = layer + + def _set_all_columns(self, all_columns): + self._all_columns = all_columns + + def _reset_config(self): + config = self._layer.get_config() + config['embedding_shape'] = ( + self.categorical_column._num_buckets, # pylint: disable=protected-access + self.dimension) + config['initializer'] = self.initializer + self._layer = self._layer.__class__.from_config(config) + for column in self._all_columns: + column._set_layer(self._layer) # pylint: disable=protected-access + @property def _variable_shape(self): if not hasattr(self, '_shape'): @@ -2310,38 +2592,17 @@ class _SharedEmbeddingColumn( sparse_ids = sparse_tensors.id_tensor sparse_weights = sparse_tensors.weight_tensor - embedding_shape = (self.categorical_column._num_buckets, self.dimension) # pylint: disable=protected-access - shared_embedding_collection = ops.get_collection( - self.shared_embedding_collection_name) - if shared_embedding_collection: - if len(shared_embedding_collection) > 1: - raise ValueError( - 'Collection {} can only contain one variable. ' - 'Suggested fix A: Choose a unique name for this collection. ' - 'Suggested fix B: Do not add any variables to this collection. ' - 'The feature_column library already adds a variable under the ' - 'hood.'.format(shared_embedding_collection)) - embedding_weights = shared_embedding_collection[0] - if embedding_weights.get_shape() != embedding_shape: - raise ValueError( - 'Shared embedding collection {} contains variable {} of ' - 'unexpected shape {}. Expected shape is {}. ' - 'Suggested fix A: Choose a unique name for this collection. ' - 'Suggested fix B: Do not add any variables to this collection. ' - 'The feature_column library already adds a variable under the ' - 'hood.'.format( - self.shared_embedding_collection_name, embedding_weights.name, - embedding_weights.get_shape(), embedding_shape)) - else: - embedding_weights = variable_scope.get_variable( - name='embedding_weights', - shape=embedding_shape, - dtype=dtypes.float32, - initializer=self.initializer, - trainable=self.trainable and trainable, - collections=weight_collections) - ops.add_to_collection( - self.shared_embedding_collection_name, embedding_weights) + embedding_weights = self._layer( + None, scope=variable_scope.get_variable_scope()) + # If we're in graph mode and this is called with a different graph, + # then we should reset. + if not context.executing_eagerly() and ( + ops.get_default_graph() != + _get_graph_for_variable(embedding_weights)): + self._reset_config() + embedding_weights = self._layer( + None, scope=variable_scope.get_variable_scope()) + if self.ckpt_to_load_from is not None: to_restore = embedding_weights if isinstance(to_restore, variables.PartitionedVariable): @@ -2891,6 +3152,9 @@ def _safe_embedding_lookup_sparse(embedding_weights, # Prune invalid ids and weights. sparse_ids, sparse_weights = _prune_invalid_ids(sparse_ids, sparse_weights) + if combiner != 'sum': + sparse_ids, sparse_weights = _prune_invalid_weights( + sparse_ids, sparse_weights) # Fill in dummy values for empty features, if necessary. sparse_ids, is_row_empty = sparse_ops.sparse_fill_empty_rows(sparse_ids, @@ -2939,13 +3203,23 @@ def _prune_invalid_ids(sparse_ids, sparse_weights): is_id_valid = math_ops.greater_equal(sparse_ids.values, 0) if sparse_weights is not None: is_id_valid = math_ops.logical_and( - is_id_valid, math_ops.greater(sparse_weights.values, 0)) + is_id_valid, + array_ops.ones_like(sparse_weights.values, dtype=dtypes.bool)) sparse_ids = sparse_ops.sparse_retain(sparse_ids, is_id_valid) if sparse_weights is not None: sparse_weights = sparse_ops.sparse_retain(sparse_weights, is_id_valid) return sparse_ids, sparse_weights +def _prune_invalid_weights(sparse_ids, sparse_weights): + """Prune invalid weights (< 0) from the input ids and weights.""" + if sparse_weights is not None: + is_weights_valid = math_ops.greater(sparse_weights.values, 0) + sparse_ids = sparse_ops.sparse_retain(sparse_ids, is_weights_valid) + sparse_weights = sparse_ops.sparse_retain(sparse_weights, is_weights_valid) + return sparse_ids, sparse_weights + + class _IndicatorColumn(_DenseColumn, _SequenceDenseColumn, collections.namedtuple('_IndicatorColumn', ['categorical_column'])): diff --git a/tensorflow/python/feature_column/feature_column_test.py b/tensorflow/python/feature_column/feature_column_test.py index 6f366e77229577b1a6a5363f882daa07203f525c..d963dd9b551c0ebefbbf2677af75114abc59c084 100644 --- a/tensorflow/python/feature_column/feature_column_test.py +++ b/tensorflow/python/feature_column/feature_column_test.py @@ -34,6 +34,7 @@ from tensorflow.python.feature_column.feature_column import _CategoricalColumn from tensorflow.python.feature_column.feature_column import _DenseColumn from tensorflow.python.feature_column.feature_column import _FeatureColumn from tensorflow.python.feature_column.feature_column import _LazyBuilder +from tensorflow.python.feature_column.feature_column import _LinearModel from tensorflow.python.feature_column.feature_column import _transform_features from tensorflow.python.feature_column.feature_column import InputLayer from tensorflow.python.framework import constant_op @@ -339,6 +340,20 @@ class NumericColumnTest(test.TestCase): sess.run(price_var.assign([[10.]])) self.assertAllClose([[10.], [50.]], predictions.eval()) + def test_keras_linear_model(self): + price = fc.numeric_column('price') + with ops.Graph().as_default(): + features = {'price': [[1.], [5.]]} + predictions = get_keras_linear_model_predictions(features, [price]) + bias = get_linear_model_bias() + price_var = get_linear_model_column_var(price) + with _initialized_session() as sess: + self.assertAllClose([0.], bias.eval()) + self.assertAllClose([[0.]], price_var.eval()) + self.assertAllClose([[0.], [0.]], predictions.eval()) + sess.run(price_var.assign([[10.]])) + self.assertAllClose([[10.], [50.]], predictions.eval()) + class BucketizedColumnTest(test.TestCase): @@ -561,6 +576,62 @@ class BucketizedColumnTest(test.TestCase): sess.run(bias.assign([1.])) self.assertAllClose([[81.], [141.]], predictions.eval()) + def test_keras_linear_model_one_input_value(self): + """Tests _LinearModel for input with shape=[1].""" + price = fc.numeric_column('price', shape=[1]) + bucketized_price = fc.bucketized_column(price, boundaries=[0, 2, 4, 6]) + with ops.Graph().as_default(): + features = {'price': [[-1.], [1.], [5.], [6.]]} + predictions = get_keras_linear_model_predictions(features, + [bucketized_price]) + bias = get_linear_model_bias() + bucketized_price_var = get_linear_model_column_var(bucketized_price) + with _initialized_session() as sess: + self.assertAllClose([0.], bias.eval()) + # One weight variable per bucket, all initialized to zero. + self.assertAllClose([[0.], [0.], [0.], [0.], [0.]], + bucketized_price_var.eval()) + self.assertAllClose([[0.], [0.], [0.], [0.]], predictions.eval()) + sess.run( + bucketized_price_var.assign([[10.], [20.], [30.], [40.], [50.]])) + # price -1. is in the 0th bucket, whose weight is 10. + # price 1. is in the 1st bucket, whose weight is 20. + # price 5. is in the 3rd bucket, whose weight is 40. + # price 6. is in the 4th bucket, whose weight is 50. + self.assertAllClose([[10.], [20.], [40.], [50.]], predictions.eval()) + sess.run(bias.assign([1.])) + self.assertAllClose([[11.], [21.], [41.], [51.]], predictions.eval()) + + def test_keras_linear_model_two_input_values(self): + """Tests _LinearModel for input with shape=[2].""" + price = fc.numeric_column('price', shape=[2]) + bucketized_price = fc.bucketized_column(price, boundaries=[0, 2, 4, 6]) + with ops.Graph().as_default(): + features = {'price': [[-1., 1.], [5., 6.]]} + predictions = get_keras_linear_model_predictions(features, + [bucketized_price]) + bias = get_linear_model_bias() + bucketized_price_var = get_linear_model_column_var(bucketized_price) + with _initialized_session() as sess: + self.assertAllClose([0.], bias.eval()) + # One weight per bucket per input column, all initialized to zero. + self.assertAllClose( + [[0.], [0.], [0.], [0.], [0.], [0.], [0.], [0.], [0.], [0.]], + bucketized_price_var.eval()) + self.assertAllClose([[0.], [0.]], predictions.eval()) + sess.run( + bucketized_price_var.assign([[10.], [20.], [30.], [40.], [50.], + [60.], [70.], [80.], [90.], [100.]])) + # 1st example: + # price -1. is in the 0th bucket, whose weight is 10. + # price 1. is in the 6th bucket, whose weight is 70. + # 2nd example: + # price 5. is in the 3rd bucket, whose weight is 40. + # price 6. is in the 9th bucket, whose weight is 100. + self.assertAllClose([[80.], [140.]], predictions.eval()) + sess.run(bias.assign([1.])) + self.assertAllClose([[81.], [141.]], predictions.eval()) + class HashedCategoricalColumnTest(test.TestCase): @@ -767,6 +838,28 @@ class HashedCategoricalColumnTest(test.TestCase): # 'skywalker' -> 2, 'omar' -> 2: wire_var[2] + wire_var[2] = 3+3 = 6 self.assertAllClose(((4.,), (6.,)), predictions.eval()) + def test_keras_linear_model(self): + wire_column = fc.categorical_column_with_hash_bucket('wire', 4) + self.assertEqual(4, wire_column._num_buckets) + with ops.Graph().as_default(): + predictions = get_keras_linear_model_predictions({ + wire_column.name: + sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0), (1, 1)), + values=('marlo', 'skywalker', 'omar'), + dense_shape=(2, 2)) + }, (wire_column,)) + bias = get_linear_model_bias() + wire_var = get_linear_model_column_var(wire_column) + with _initialized_session(): + self.assertAllClose((0.,), bias.eval()) + self.assertAllClose(((0.,), (0.,), (0.,), (0.,)), wire_var.eval()) + self.assertAllClose(((0.,), (0.,)), predictions.eval()) + wire_var.assign(((1.,), (2.,), (3.,), (4.,))).eval() + # 'marlo' -> 3: wire_var[3] = 4 + # 'skywalker' -> 2, 'omar' -> 2: wire_var[2] + wire_var[2] = 3+3 = 6 + self.assertAllClose(((4.,), (6.,)), predictions.eval()) + class CrossedColumnTest(test.TestCase): @@ -1039,49 +1132,809 @@ class CrossedColumnTest(test.TestCase): return _CategoricalColumn.IdWeightPair( id_tensor=ids_and_weights[0], weight_tensor=ids_and_weights[1]) - t = _TestColumnWithWeights() - crossed = fc.crossed_column([t, 'c'], hash_bucket_size=5, hash_key=5) - with ops.Graph().as_default(): - with self.assertRaisesRegexp( - ValueError, - 'crossed_column does not support weight_tensor.*{}'.format(t.name)): - fc.linear_model({ - t.name: sparse_tensor.SparseTensor( - indices=((0, 0), (1, 0), (1, 1)), - values=[0, 1, 2], - dense_shape=(2, 2)), - '{}_weights'.format(t.name): sparse_tensor.SparseTensor( - indices=((0, 0), (1, 0), (1, 1)), - values=[1., 10., 2.], - dense_shape=(2, 2)), - 'c': sparse_tensor.SparseTensor( - indices=((0, 0), (1, 0), (1, 1)), - values=['cA', 'cB', 'cC'], - dense_shape=(2, 2)), - }, (crossed,)) + t = _TestColumnWithWeights() + crossed = fc.crossed_column([t, 'c'], hash_bucket_size=5, hash_key=5) + with ops.Graph().as_default(): + with self.assertRaisesRegexp( + ValueError, + 'crossed_column does not support weight_tensor.*{}'.format(t.name)): + fc.linear_model({ + t.name: sparse_tensor.SparseTensor( + indices=((0, 0), (1, 0), (1, 1)), + values=[0, 1, 2], + dense_shape=(2, 2)), + '{}_weights'.format(t.name): sparse_tensor.SparseTensor( + indices=((0, 0), (1, 0), (1, 1)), + values=[1., 10., 2.], + dense_shape=(2, 2)), + 'c': sparse_tensor.SparseTensor( + indices=((0, 0), (1, 0), (1, 1)), + values=['cA', 'cB', 'cC'], + dense_shape=(2, 2)), + }, (crossed,)) + + def test_keras_linear_model(self): + """Tests _LinearModel. + + Uses data from test_get_sparse_tesnsors_simple. + """ + a = fc.numeric_column('a', dtype=dtypes.int32, shape=(2,)) + b = fc.bucketized_column(a, boundaries=(0, 1)) + crossed = fc.crossed_column([b, 'c'], hash_bucket_size=5, hash_key=5) + with ops.Graph().as_default(): + predictions = get_keras_linear_model_predictions({ + 'a': + constant_op.constant(((-1., .5), (.5, 1.))), + 'c': + sparse_tensor.SparseTensor( + indices=((0, 0), (1, 0), (1, 1)), + values=['cA', 'cB', 'cC'], + dense_shape=(2, 2)), + }, (crossed,)) + bias = get_linear_model_bias() + crossed_var = get_linear_model_column_var(crossed) + with _initialized_session() as sess: + self.assertAllClose((0.,), bias.eval()) + self.assertAllClose(((0.,), (0.,), (0.,), (0.,), (0.,)), + crossed_var.eval()) + self.assertAllClose(((0.,), (0.,)), predictions.eval()) + sess.run(crossed_var.assign(((1.,), (2.,), (3.,), (4.,), (5.,)))) + # Expected ids after cross = (1, 0, 1, 3, 4, 2) + self.assertAllClose(((3.,), (14.,)), predictions.eval()) + sess.run(bias.assign((.1,))) + self.assertAllClose(((3.1,), (14.1,)), predictions.eval()) + + def test_keras_linear_model_with_weights(self): + + class _TestColumnWithWeights(_CategoricalColumn): + """Produces sparse IDs and sparse weights.""" + + @property + def name(self): + return 'test_column' + + @property + def _parse_example_spec(self): + return { + self.name: + parsing_ops.VarLenFeature(dtypes.int32), + '{}_weights'.format(self.name): + parsing_ops.VarLenFeature(dtypes.float32), + } + + @property + def _num_buckets(self): + return 5 + + def _transform_feature(self, inputs): + return (inputs.get(self.name), + inputs.get('{}_weights'.format(self.name))) + + def _get_sparse_tensors(self, + inputs, + weight_collections=None, + trainable=None): + """Populates both id_tensor and weight_tensor.""" + ids_and_weights = inputs.get(self) + return _CategoricalColumn.IdWeightPair( + id_tensor=ids_and_weights[0], weight_tensor=ids_and_weights[1]) + + t = _TestColumnWithWeights() + crossed = fc.crossed_column([t, 'c'], hash_bucket_size=5, hash_key=5) + with ops.Graph().as_default(): + with self.assertRaisesRegexp( + ValueError, + 'crossed_column does not support weight_tensor.*{}'.format(t.name)): + get_keras_linear_model_predictions({ + t.name: + sparse_tensor.SparseTensor( + indices=((0, 0), (1, 0), (1, 1)), + values=[0, 1, 2], + dense_shape=(2, 2)), + '{}_weights'.format(t.name): + sparse_tensor.SparseTensor( + indices=((0, 0), (1, 0), (1, 1)), + values=[1., 10., 2.], + dense_shape=(2, 2)), + 'c': + sparse_tensor.SparseTensor( + indices=((0, 0), (1, 0), (1, 1)), + values=['cA', 'cB', 'cC'], + dense_shape=(2, 2)), + }, (crossed,)) + + +def get_linear_model_bias(): + with variable_scope.variable_scope('linear_model', reuse=True): + return variable_scope.get_variable('bias_weights') + + +def get_linear_model_column_var(column): + return ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES, + 'linear_model/' + column.name)[0] + + +def get_keras_linear_model_predictions(features, + feature_columns, + units=1, + sparse_combiner='sum', + weight_collections=None, + trainable=True, + cols_to_vars=None): + keras_linear_model = _LinearModel( + feature_columns, + units, + sparse_combiner, + weight_collections, + trainable, + name='linear_model') + retval = keras_linear_model(features) # pylint: disable=not-callable + if cols_to_vars is not None: + cols_to_vars.update(keras_linear_model.cols_to_vars()) + return retval + + +@test_util.with_c_api +class LinearModelTest(test.TestCase): + + def test_raises_if_empty_feature_columns(self): + with self.assertRaisesRegexp(ValueError, + 'feature_columns must not be empty'): + fc.linear_model(features={}, feature_columns=[]) + + def test_should_be_feature_column(self): + with self.assertRaisesRegexp(ValueError, 'must be a _FeatureColumn'): + fc.linear_model(features={'a': [[0]]}, feature_columns='NotSupported') + + def test_should_be_dense_or_categorical_column(self): + + class NotSupportedColumn(_FeatureColumn): + + @property + def name(self): + return 'NotSupportedColumn' + + def _transform_feature(self, cache): + pass + + @property + def _parse_example_spec(self): + pass + + with self.assertRaisesRegexp( + ValueError, 'must be either a _DenseColumn or _CategoricalColumn'): + fc.linear_model( + features={'a': [[0]]}, feature_columns=[NotSupportedColumn()]) + + def test_does_not_support_dict_columns(self): + with self.assertRaisesRegexp( + ValueError, 'Expected feature_columns to be iterable, found dict.'): + fc.linear_model( + features={'a': [[0]]}, feature_columns={'a': fc.numeric_column('a')}) + + def test_raises_if_duplicate_name(self): + with self.assertRaisesRegexp( + ValueError, 'Duplicate feature column name found for columns'): + fc.linear_model( + features={'a': [[0]]}, + feature_columns=[fc.numeric_column('a'), + fc.numeric_column('a')]) + + def test_dense_bias(self): + price = fc.numeric_column('price') + with ops.Graph().as_default(): + features = {'price': [[1.], [5.]]} + predictions = fc.linear_model(features, [price]) + bias = get_linear_model_bias() + price_var = get_linear_model_column_var(price) + with _initialized_session() as sess: + self.assertAllClose([0.], bias.eval()) + sess.run(price_var.assign([[10.]])) + sess.run(bias.assign([5.])) + self.assertAllClose([[15.], [55.]], predictions.eval()) + + def test_sparse_bias(self): + wire_cast = fc.categorical_column_with_hash_bucket('wire_cast', 4) + with ops.Graph().as_default(): + wire_tensor = sparse_tensor.SparseTensor( + values=['omar', 'stringer', 'marlo'], # hashed to = [2, 0, 3] + indices=[[0, 0], [1, 0], [1, 1]], + dense_shape=[2, 2]) + features = {'wire_cast': wire_tensor} + predictions = fc.linear_model(features, [wire_cast]) + bias = get_linear_model_bias() + wire_cast_var = get_linear_model_column_var(wire_cast) + with _initialized_session() as sess: + self.assertAllClose([0.], bias.eval()) + self.assertAllClose([[0.], [0.], [0.], [0.]], wire_cast_var.eval()) + sess.run(wire_cast_var.assign([[10.], [100.], [1000.], [10000.]])) + sess.run(bias.assign([5.])) + self.assertAllClose([[1005.], [10015.]], predictions.eval()) + + def test_dense_and_sparse_bias(self): + wire_cast = fc.categorical_column_with_hash_bucket('wire_cast', 4) + price = fc.numeric_column('price') + with ops.Graph().as_default(): + wire_tensor = sparse_tensor.SparseTensor( + values=['omar', 'stringer', 'marlo'], # hashed to = [2, 0, 3] + indices=[[0, 0], [1, 0], [1, 1]], + dense_shape=[2, 2]) + features = {'wire_cast': wire_tensor, 'price': [[1.], [5.]]} + predictions = fc.linear_model(features, [wire_cast, price]) + bias = get_linear_model_bias() + wire_cast_var = get_linear_model_column_var(wire_cast) + price_var = get_linear_model_column_var(price) + with _initialized_session() as sess: + sess.run(wire_cast_var.assign([[10.], [100.], [1000.], [10000.]])) + sess.run(bias.assign([5.])) + sess.run(price_var.assign([[10.]])) + self.assertAllClose([[1015.], [10065.]], predictions.eval()) + + def test_dense_and_sparse_column(self): + """When the column is both dense and sparse, uses sparse tensors.""" + + class _DenseAndSparseColumn(_DenseColumn, _CategoricalColumn): + + @property + def name(self): + return 'dense_and_sparse_column' + + @property + def _parse_example_spec(self): + return {self.name: parsing_ops.VarLenFeature(self.dtype)} + + def _transform_feature(self, inputs): + return inputs.get(self.name) + + @property + def _variable_shape(self): + raise ValueError('Should not use this method.') + + def _get_dense_tensor(self, inputs, weight_collections=None, + trainable=None): + raise ValueError('Should not use this method.') + + @property + def _num_buckets(self): + return 4 + + def _get_sparse_tensors(self, inputs, weight_collections=None, + trainable=None): + sp_tensor = sparse_tensor.SparseTensor( + indices=[[0, 0], [1, 0], [1, 1]], + values=[2, 0, 3], + dense_shape=[2, 2]) + return _CategoricalColumn.IdWeightPair(sp_tensor, None) + + dense_and_sparse_column = _DenseAndSparseColumn() + with ops.Graph().as_default(): + sp_tensor = sparse_tensor.SparseTensor( + values=['omar', 'stringer', 'marlo'], + indices=[[0, 0], [1, 0], [1, 1]], + dense_shape=[2, 2]) + features = {dense_and_sparse_column.name: sp_tensor} + predictions = fc.linear_model(features, [dense_and_sparse_column]) + bias = get_linear_model_bias() + dense_and_sparse_column_var = get_linear_model_column_var( + dense_and_sparse_column) + with _initialized_session() as sess: + sess.run(dense_and_sparse_column_var.assign( + [[10.], [100.], [1000.], [10000.]])) + sess.run(bias.assign([5.])) + self.assertAllClose([[1005.], [10015.]], predictions.eval()) + + def test_dense_multi_output(self): + price = fc.numeric_column('price') + with ops.Graph().as_default(): + features = {'price': [[1.], [5.]]} + predictions = fc.linear_model(features, [price], units=3) + bias = get_linear_model_bias() + price_var = get_linear_model_column_var(price) + with _initialized_session() as sess: + self.assertAllClose(np.zeros((3,)), bias.eval()) + self.assertAllClose(np.zeros((1, 3)), price_var.eval()) + sess.run(price_var.assign([[10., 100., 1000.]])) + sess.run(bias.assign([5., 6., 7.])) + self.assertAllClose([[15., 106., 1007.], [55., 506., 5007.]], + predictions.eval()) + + def test_sparse_multi_output(self): + wire_cast = fc.categorical_column_with_hash_bucket('wire_cast', 4) + with ops.Graph().as_default(): + wire_tensor = sparse_tensor.SparseTensor( + values=['omar', 'stringer', 'marlo'], # hashed to = [2, 0, 3] + indices=[[0, 0], [1, 0], [1, 1]], + dense_shape=[2, 2]) + features = {'wire_cast': wire_tensor} + predictions = fc.linear_model(features, [wire_cast], units=3) + bias = get_linear_model_bias() + wire_cast_var = get_linear_model_column_var(wire_cast) + with _initialized_session() as sess: + self.assertAllClose(np.zeros((3,)), bias.eval()) + self.assertAllClose(np.zeros((4, 3)), wire_cast_var.eval()) + sess.run( + wire_cast_var.assign([[10., 11., 12.], [100., 110., 120.], [ + 1000., 1100., 1200. + ], [10000., 11000., 12000.]])) + sess.run(bias.assign([5., 6., 7.])) + self.assertAllClose([[1005., 1106., 1207.], [10015., 11017., 12019.]], + predictions.eval()) + + def test_dense_multi_dimension(self): + price = fc.numeric_column('price', shape=2) + with ops.Graph().as_default(): + features = {'price': [[1., 2.], [5., 6.]]} + predictions = fc.linear_model(features, [price]) + price_var = get_linear_model_column_var(price) + with _initialized_session() as sess: + self.assertAllClose([[0.], [0.]], price_var.eval()) + sess.run(price_var.assign([[10.], [100.]])) + self.assertAllClose([[210.], [650.]], predictions.eval()) + + def test_sparse_multi_rank(self): + wire_cast = fc.categorical_column_with_hash_bucket('wire_cast', 4) + with ops.Graph().as_default(): + wire_tensor = array_ops.sparse_placeholder(dtypes.string) + wire_value = sparse_tensor.SparseTensorValue( + values=['omar', 'stringer', 'marlo', 'omar'], # hashed = [2, 0, 3, 2] + indices=[[0, 0, 0], [0, 1, 0], [1, 0, 0], [1, 0, 1]], + dense_shape=[2, 2, 2]) + features = {'wire_cast': wire_tensor} + predictions = fc.linear_model(features, [wire_cast]) + wire_cast_var = get_linear_model_column_var(wire_cast) + with _initialized_session() as sess: + self.assertAllClose(np.zeros((4, 1)), wire_cast_var.eval()) + self.assertAllClose( + np.zeros((2, 1)), + predictions.eval(feed_dict={wire_tensor: wire_value})) + sess.run(wire_cast_var.assign([[10.], [100.], [1000.], [10000.]])) + self.assertAllClose( + [[1010.], [11000.]], + predictions.eval(feed_dict={wire_tensor: wire_value})) + + def test_sparse_combiner(self): + wire_cast = fc.categorical_column_with_hash_bucket('wire_cast', 4) + with ops.Graph().as_default(): + wire_tensor = sparse_tensor.SparseTensor( + values=['omar', 'stringer', 'marlo'], # hashed to = [2, 0, 3] + indices=[[0, 0], [1, 0], [1, 1]], + dense_shape=[2, 2]) + features = {'wire_cast': wire_tensor} + predictions = fc.linear_model( + features, [wire_cast], sparse_combiner='mean') + bias = get_linear_model_bias() + wire_cast_var = get_linear_model_column_var(wire_cast) + with _initialized_session() as sess: + sess.run(wire_cast_var.assign([[10.], [100.], [1000.], [10000.]])) + sess.run(bias.assign([5.])) + self.assertAllClose([[1005.], [5010.]], predictions.eval()) + + def test_sparse_combiner_with_negative_weights(self): + wire_cast = fc.categorical_column_with_hash_bucket('wire_cast', 4) + wire_cast_weights = fc.weighted_categorical_column(wire_cast, 'weights') + + with ops.Graph().as_default(): + wire_tensor = sparse_tensor.SparseTensor( + values=['omar', 'stringer', 'marlo'], # hashed to = [2, 0, 3] + indices=[[0, 0], [1, 0], [1, 1]], + dense_shape=[2, 2]) + features = { + 'wire_cast': wire_tensor, + 'weights': constant_op.constant([[1., 1., -1.0]]) + } + predictions = fc.linear_model( + features, [wire_cast_weights], sparse_combiner='sum') + bias = get_linear_model_bias() + wire_cast_var = get_linear_model_column_var(wire_cast) + with _initialized_session() as sess: + sess.run(wire_cast_var.assign([[10.], [100.], [1000.], [10000.]])) + sess.run(bias.assign([5.])) + self.assertAllClose([[1005.], [-9985.]], predictions.eval()) + + def test_dense_multi_dimension_multi_output(self): + price = fc.numeric_column('price', shape=2) + with ops.Graph().as_default(): + features = {'price': [[1., 2.], [5., 6.]]} + predictions = fc.linear_model(features, [price], units=3) + bias = get_linear_model_bias() + price_var = get_linear_model_column_var(price) + with _initialized_session() as sess: + self.assertAllClose(np.zeros((3,)), bias.eval()) + self.assertAllClose(np.zeros((2, 3)), price_var.eval()) + sess.run(price_var.assign([[1., 2., 3.], [10., 100., 1000.]])) + sess.run(bias.assign([2., 3., 4.])) + self.assertAllClose([[23., 205., 2007.], [67., 613., 6019.]], + predictions.eval()) + + def test_raises_if_shape_mismatch(self): + price = fc.numeric_column('price', shape=2) + with ops.Graph().as_default(): + features = {'price': [[1.], [5.]]} + if ops._USE_C_API: + with self.assertRaisesRegexp( + Exception, + r'Cannot reshape a tensor with 2 elements to shape \[2,2\]'): + predictions = fc.linear_model(features, [price]) + else: + predictions = fc.linear_model(features, [price]) + with _initialized_session(): + with self.assertRaisesRegexp(Exception, 'requested shape has 4'): + predictions.eval() + + def test_dense_reshaping(self): + price = fc.numeric_column('price', shape=[1, 2]) + with ops.Graph().as_default(): + features = {'price': [[[1., 2.]], [[5., 6.]]]} + predictions = fc.linear_model(features, [price]) + bias = get_linear_model_bias() + price_var = get_linear_model_column_var(price) + with _initialized_session() as sess: + self.assertAllClose([0.], bias.eval()) + self.assertAllClose([[0.], [0.]], price_var.eval()) + self.assertAllClose([[0.], [0.]], predictions.eval()) + sess.run(price_var.assign([[10.], [100.]])) + self.assertAllClose([[210.], [650.]], predictions.eval()) + + def test_dense_multi_column(self): + price1 = fc.numeric_column('price1', shape=2) + price2 = fc.numeric_column('price2') + with ops.Graph().as_default(): + features = { + 'price1': [[1., 2.], [5., 6.]], + 'price2': [[3.], [4.]] + } + predictions = fc.linear_model(features, [price1, price2]) + bias = get_linear_model_bias() + price1_var = get_linear_model_column_var(price1) + price2_var = get_linear_model_column_var(price2) + with _initialized_session() as sess: + self.assertAllClose([0.], bias.eval()) + self.assertAllClose([[0.], [0.]], price1_var.eval()) + self.assertAllClose([[0.]], price2_var.eval()) + self.assertAllClose([[0.], [0.]], predictions.eval()) + sess.run(price1_var.assign([[10.], [100.]])) + sess.run(price2_var.assign([[1000.]])) + sess.run(bias.assign([7.])) + self.assertAllClose([[3217.], [4657.]], predictions.eval()) + + def test_fills_cols_to_vars(self): + price1 = fc.numeric_column('price1', shape=2) + price2 = fc.numeric_column('price2') + with ops.Graph().as_default(): + features = {'price1': [[1., 2.], [5., 6.]], 'price2': [[3.], [4.]]} + cols_to_vars = {} + fc.linear_model(features, [price1, price2], cols_to_vars=cols_to_vars) + bias = get_linear_model_bias() + price1_var = get_linear_model_column_var(price1) + price2_var = get_linear_model_column_var(price2) + self.assertAllEqual(cols_to_vars['bias'], [bias]) + self.assertAllEqual(cols_to_vars[price1], [price1_var]) + self.assertAllEqual(cols_to_vars[price2], [price2_var]) + + def test_fills_cols_to_vars_partitioned_variables(self): + price1 = fc.numeric_column('price1', shape=2) + price2 = fc.numeric_column('price2', shape=3) + with ops.Graph().as_default(): + features = { + 'price1': [[1., 2.], [6., 7.]], + 'price2': [[3., 4., 5.], [8., 9., 10.]] + } + cols_to_vars = {} + with variable_scope.variable_scope( + 'linear', + partitioner=partitioned_variables.fixed_size_partitioner(2, axis=0)): + fc.linear_model(features, [price1, price2], cols_to_vars=cols_to_vars) + with _initialized_session(): + self.assertEqual([0.], cols_to_vars['bias'][0].eval()) + # Partitioning shards the [2, 1] price1 var into 2 [1, 1] Variables. + self.assertAllEqual([[0.]], cols_to_vars[price1][0].eval()) + self.assertAllEqual([[0.]], cols_to_vars[price1][1].eval()) + # Partitioning shards the [3, 1] price2 var into a [2, 1] Variable and + # a [1, 1] Variable. + self.assertAllEqual([[0.], [0.]], cols_to_vars[price2][0].eval()) + self.assertAllEqual([[0.]], cols_to_vars[price2][1].eval()) + + def test_dense_collection(self): + price = fc.numeric_column('price') + with ops.Graph().as_default() as g: + features = {'price': [[1.], [5.]]} + fc.linear_model(features, [price], weight_collections=['my-vars']) + my_vars = g.get_collection('my-vars') + bias = get_linear_model_bias() + price_var = get_linear_model_column_var(price) + self.assertIn(bias, my_vars) + self.assertIn(price_var, my_vars) + + def test_sparse_collection(self): + wire_cast = fc.categorical_column_with_hash_bucket('wire_cast', 4) + with ops.Graph().as_default() as g: + wire_tensor = sparse_tensor.SparseTensor( + values=['omar'], indices=[[0, 0]], dense_shape=[1, 1]) + features = {'wire_cast': wire_tensor} + fc.linear_model( + features, [wire_cast], weight_collections=['my-vars']) + my_vars = g.get_collection('my-vars') + bias = get_linear_model_bias() + wire_cast_var = get_linear_model_column_var(wire_cast) + self.assertIn(bias, my_vars) + self.assertIn(wire_cast_var, my_vars) + + def test_dense_trainable_default(self): + price = fc.numeric_column('price') + with ops.Graph().as_default() as g: + features = {'price': [[1.], [5.]]} + fc.linear_model(features, [price]) + bias = get_linear_model_bias() + price_var = get_linear_model_column_var(price) + trainable_vars = g.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES) + self.assertIn(bias, trainable_vars) + self.assertIn(price_var, trainable_vars) + + def test_sparse_trainable_default(self): + wire_cast = fc.categorical_column_with_hash_bucket('wire_cast', 4) + with ops.Graph().as_default() as g: + wire_tensor = sparse_tensor.SparseTensor( + values=['omar'], indices=[[0, 0]], dense_shape=[1, 1]) + features = {'wire_cast': wire_tensor} + fc.linear_model(features, [wire_cast]) + trainable_vars = g.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES) + bias = get_linear_model_bias() + wire_cast_var = get_linear_model_column_var(wire_cast) + self.assertIn(bias, trainable_vars) + self.assertIn(wire_cast_var, trainable_vars) + + def test_dense_trainable_false(self): + price = fc.numeric_column('price') + with ops.Graph().as_default() as g: + features = {'price': [[1.], [5.]]} + fc.linear_model(features, [price], trainable=False) + trainable_vars = g.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES) + self.assertEqual([], trainable_vars) + + def test_sparse_trainable_false(self): + wire_cast = fc.categorical_column_with_hash_bucket('wire_cast', 4) + with ops.Graph().as_default() as g: + wire_tensor = sparse_tensor.SparseTensor( + values=['omar'], indices=[[0, 0]], dense_shape=[1, 1]) + features = {'wire_cast': wire_tensor} + fc.linear_model(features, [wire_cast], trainable=False) + trainable_vars = g.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES) + self.assertEqual([], trainable_vars) + + def test_column_order(self): + price_a = fc.numeric_column('price_a') + price_b = fc.numeric_column('price_b') + wire_cast = fc.categorical_column_with_hash_bucket('wire_cast', 4) + with ops.Graph().as_default() as g: + features = { + 'price_a': [[1.]], + 'price_b': [[3.]], + 'wire_cast': + sparse_tensor.SparseTensor( + values=['omar'], indices=[[0, 0]], dense_shape=[1, 1]) + } + fc.linear_model( + features, [price_a, wire_cast, price_b], + weight_collections=['my-vars']) + my_vars = g.get_collection('my-vars') + self.assertIn('price_a', my_vars[0].name) + self.assertIn('price_b', my_vars[1].name) + self.assertIn('wire_cast', my_vars[2].name) + + with ops.Graph().as_default() as g: + features = { + 'price_a': [[1.]], + 'price_b': [[3.]], + 'wire_cast': + sparse_tensor.SparseTensor( + values=['omar'], indices=[[0, 0]], dense_shape=[1, 1]) + } + fc.linear_model( + features, [wire_cast, price_b, price_a], + weight_collections=['my-vars']) + my_vars = g.get_collection('my-vars') + self.assertIn('price_a', my_vars[0].name) + self.assertIn('price_b', my_vars[1].name) + self.assertIn('wire_cast', my_vars[2].name) + + def test_static_batch_size_mismatch(self): + price1 = fc.numeric_column('price1') + price2 = fc.numeric_column('price2') + with ops.Graph().as_default(): + features = { + 'price1': [[1.], [5.], [7.]], # batchsize = 3 + 'price2': [[3.], [4.]] # batchsize = 2 + } + with self.assertRaisesRegexp( + ValueError, + 'Batch size \(first dimension\) of each feature must be same.'): # pylint: disable=anomalous-backslash-in-string + fc.linear_model(features, [price1, price2]) + + def test_subset_of_static_batch_size_mismatch(self): + price1 = fc.numeric_column('price1') + price2 = fc.numeric_column('price2') + price3 = fc.numeric_column('price3') + with ops.Graph().as_default(): + features = { + 'price1': array_ops.placeholder(dtype=dtypes.int64), # batchsize = 3 + 'price2': [[3.], [4.]], # batchsize = 2 + 'price3': [[3.], [4.], [5.]] # batchsize = 3 + } + with self.assertRaisesRegexp( + ValueError, + 'Batch size \(first dimension\) of each feature must be same.'): # pylint: disable=anomalous-backslash-in-string + fc.linear_model(features, [price1, price2, price3]) + + def test_runtime_batch_size_mismatch(self): + price1 = fc.numeric_column('price1') + price2 = fc.numeric_column('price2') + with ops.Graph().as_default(): + features = { + 'price1': array_ops.placeholder(dtype=dtypes.int64), # batchsize = 3 + 'price2': [[3.], [4.]] # batchsize = 2 + } + predictions = fc.linear_model(features, [price1, price2]) + with _initialized_session() as sess: + with self.assertRaisesRegexp(errors.OpError, + 'must have the same size and shape'): + sess.run( + predictions, feed_dict={features['price1']: [[1.], [5.], [7.]]}) + + def test_runtime_batch_size_matches(self): + price1 = fc.numeric_column('price1') + price2 = fc.numeric_column('price2') + with ops.Graph().as_default(): + features = { + 'price1': array_ops.placeholder(dtype=dtypes.int64), # batchsize = 2 + 'price2': array_ops.placeholder(dtype=dtypes.int64), # batchsize = 2 + } + predictions = fc.linear_model(features, [price1, price2]) + with _initialized_session() as sess: + sess.run( + predictions, + feed_dict={ + features['price1']: [[1.], [5.]], + features['price2']: [[1.], [5.]], + }) + + def test_with_numpy_input_fn(self): + price = fc.numeric_column('price') + price_buckets = fc.bucketized_column(price, boundaries=[0., 10., 100.,]) + body_style = fc.categorical_column_with_vocabulary_list( + 'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan']) + + input_fn = numpy_io.numpy_input_fn( + x={ + 'price': np.array([-1., 2., 13., 104.]), + 'body-style': np.array(['sedan', 'hardtop', 'wagon', 'sedan']), + }, + batch_size=2, + shuffle=False) + features = input_fn() + net = fc.linear_model(features, [price_buckets, body_style]) + # self.assertEqual(1 + 3 + 5, net.shape[1]) + with _initialized_session() as sess: + coord = coordinator.Coordinator() + threads = queue_runner_impl.start_queue_runners(sess, coord=coord) + + bias = get_linear_model_bias() + price_buckets_var = get_linear_model_column_var(price_buckets) + body_style_var = get_linear_model_column_var(body_style) + + sess.run(price_buckets_var.assign([[10.], [100.], [1000.], [10000.]])) + sess.run(body_style_var.assign([[-10.], [-100.], [-1000.]])) + sess.run(bias.assign([5.])) + + self.assertAllClose([[10 - 1000 + 5.], [100 - 10 + 5.]], sess.run(net)) + + coord.request_stop() + coord.join(threads) + + def test_with_1d_sparse_tensor(self): + price = fc.numeric_column('price') + price_buckets = fc.bucketized_column(price, boundaries=[0., 10., 100.,]) + body_style = fc.categorical_column_with_vocabulary_list( + 'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan']) + + # Provides 1-dim tensor and dense tensor. + features = { + 'price': constant_op.constant([-1., 12.,]), + 'body-style': sparse_tensor.SparseTensor( + indices=((0,), (1,)), + values=('sedan', 'hardtop'), + dense_shape=(2,)), + } + self.assertEqual(1, features['price'].shape.ndims) + self.assertEqual(1, features['body-style'].dense_shape.get_shape()[0]) + + net = fc.linear_model(features, [price_buckets, body_style]) + with _initialized_session() as sess: + bias = get_linear_model_bias() + price_buckets_var = get_linear_model_column_var(price_buckets) + body_style_var = get_linear_model_column_var(body_style) + + sess.run(price_buckets_var.assign([[10.], [100.], [1000.], [10000.]])) + sess.run(body_style_var.assign([[-10.], [-100.], [-1000.]])) + sess.run(bias.assign([5.])) + + self.assertAllClose([[10 - 1000 + 5.], [1000 - 10 + 5.]], sess.run(net)) + + def test_with_1d_unknown_shape_sparse_tensor(self): + price = fc.numeric_column('price') + price_buckets = fc.bucketized_column(price, boundaries=[0., 10., 100.,]) + body_style = fc.categorical_column_with_vocabulary_list( + 'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan']) + country = fc.categorical_column_with_vocabulary_list( + 'country', vocabulary_list=['US', 'JP', 'CA']) + + # Provides 1-dim tensor and dense tensor. + features = { + 'price': array_ops.placeholder(dtypes.float32), + 'body-style': array_ops.sparse_placeholder(dtypes.string), + 'country': array_ops.placeholder(dtypes.string), + } + self.assertIsNone(features['price'].shape.ndims) + self.assertIsNone(features['body-style'].get_shape().ndims) + + price_data = np.array([-1., 12.]) + body_style_data = sparse_tensor.SparseTensorValue( + indices=((0,), (1,)), + values=('sedan', 'hardtop'), + dense_shape=(2,)) + country_data = np.array(['US', 'CA']) + + net = fc.linear_model(features, [price_buckets, body_style, country]) + bias = get_linear_model_bias() + price_buckets_var = get_linear_model_column_var(price_buckets) + body_style_var = get_linear_model_column_var(body_style) + with _initialized_session() as sess: + sess.run(price_buckets_var.assign([[10.], [100.], [1000.], [10000.]])) + sess.run(body_style_var.assign([[-10.], [-100.], [-1000.]])) + sess.run(bias.assign([5.])) + self.assertAllClose([[10 - 1000 + 5.], [1000 - 10 + 5.]], + sess.run( + net, + feed_dict={ + features['price']: price_data, + features['body-style']: body_style_data, + features['country']: country_data + })) -def get_linear_model_bias(): - with variable_scope.variable_scope('linear_model', reuse=True): - return variable_scope.get_variable('bias_weights') + def test_with_rank_0_feature(self): + price = fc.numeric_column('price') + features = { + 'price': constant_op.constant(0), + } + self.assertEqual(0, features['price'].shape.ndims) + # Static rank 0 should fail + with self.assertRaisesRegexp(ValueError, 'Feature .* cannot have rank 0'): + fc.linear_model(features, [price]) -def get_linear_model_column_var(column): - return ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES, - 'linear_model/' + column.name)[0] + # Dynamic rank 0 should fail + features = { + 'price': array_ops.placeholder(dtypes.float32), + } + net = fc.linear_model(features, [price]) + self.assertEqual(1, net.shape[1]) + with _initialized_session() as sess: + with self.assertRaisesOpError('Feature .* cannot have rank 0'): + sess.run(net, feed_dict={features['price']: np.array(1)}) @test_util.with_c_api -class LinearModelTest(test.TestCase): +class _LinearModelTest(test.TestCase): def test_raises_if_empty_feature_columns(self): with self.assertRaisesRegexp(ValueError, 'feature_columns must not be empty'): - fc.linear_model(features={}, feature_columns=[]) + get_keras_linear_model_predictions(features={}, feature_columns=[]) def test_should_be_feature_column(self): with self.assertRaisesRegexp(ValueError, 'must be a _FeatureColumn'): - fc.linear_model(features={'a': [[0]]}, feature_columns='NotSupported') + get_keras_linear_model_predictions( + features={'a': [[0]]}, feature_columns='NotSupported') def test_should_be_dense_or_categorical_column(self): @@ -1100,7 +1953,7 @@ class LinearModelTest(test.TestCase): with self.assertRaisesRegexp( ValueError, 'must be either a _DenseColumn or _CategoricalColumn'): - fc.linear_model( + get_keras_linear_model_predictions( features={'a': [[0]]}, feature_columns=[NotSupportedColumn()]) def test_does_not_support_dict_columns(self): @@ -1112,7 +1965,7 @@ class LinearModelTest(test.TestCase): def test_raises_if_duplicate_name(self): with self.assertRaisesRegexp( ValueError, 'Duplicate feature column name found for columns'): - fc.linear_model( + get_keras_linear_model_predictions( features={'a': [[0]]}, feature_columns=[fc.numeric_column('a'), fc.numeric_column('a')]) @@ -1121,7 +1974,7 @@ class LinearModelTest(test.TestCase): price = fc.numeric_column('price') with ops.Graph().as_default(): features = {'price': [[1.], [5.]]} - predictions = fc.linear_model(features, [price]) + predictions = get_keras_linear_model_predictions(features, [price]) bias = get_linear_model_bias() price_var = get_linear_model_column_var(price) with _initialized_session() as sess: @@ -1138,7 +1991,7 @@ class LinearModelTest(test.TestCase): indices=[[0, 0], [1, 0], [1, 1]], dense_shape=[2, 2]) features = {'wire_cast': wire_tensor} - predictions = fc.linear_model(features, [wire_cast]) + predictions = get_keras_linear_model_predictions(features, [wire_cast]) bias = get_linear_model_bias() wire_cast_var = get_linear_model_column_var(wire_cast) with _initialized_session() as sess: @@ -1157,7 +2010,8 @@ class LinearModelTest(test.TestCase): indices=[[0, 0], [1, 0], [1, 1]], dense_shape=[2, 2]) features = {'wire_cast': wire_tensor, 'price': [[1.], [5.]]} - predictions = fc.linear_model(features, [wire_cast, price]) + predictions = get_keras_linear_model_predictions(features, + [wire_cast, price]) bias = get_linear_model_bias() wire_cast_var = get_linear_model_column_var(wire_cast) price_var = get_linear_model_column_var(price) @@ -1187,7 +2041,9 @@ class LinearModelTest(test.TestCase): def _variable_shape(self): raise ValueError('Should not use this method.') - def _get_dense_tensor(self, inputs, weight_collections=None, + def _get_dense_tensor(self, + inputs, + weight_collections=None, trainable=None): raise ValueError('Should not use this method.') @@ -1195,7 +2051,9 @@ class LinearModelTest(test.TestCase): def _num_buckets(self): return 4 - def _get_sparse_tensors(self, inputs, weight_collections=None, + def _get_sparse_tensors(self, + inputs, + weight_collections=None, trainable=None): sp_tensor = sparse_tensor.SparseTensor( indices=[[0, 0], [1, 0], [1, 1]], @@ -1210,13 +2068,15 @@ class LinearModelTest(test.TestCase): indices=[[0, 0], [1, 0], [1, 1]], dense_shape=[2, 2]) features = {dense_and_sparse_column.name: sp_tensor} - predictions = fc.linear_model(features, [dense_and_sparse_column]) + predictions = get_keras_linear_model_predictions( + features, [dense_and_sparse_column]) bias = get_linear_model_bias() dense_and_sparse_column_var = get_linear_model_column_var( dense_and_sparse_column) with _initialized_session() as sess: - sess.run(dense_and_sparse_column_var.assign( - [[10.], [100.], [1000.], [10000.]])) + sess.run( + dense_and_sparse_column_var.assign([[10.], [100.], [1000.], + [10000.]])) sess.run(bias.assign([5.])) self.assertAllClose([[1005.], [10015.]], predictions.eval()) @@ -1224,7 +2084,8 @@ class LinearModelTest(test.TestCase): price = fc.numeric_column('price') with ops.Graph().as_default(): features = {'price': [[1.], [5.]]} - predictions = fc.linear_model(features, [price], units=3) + predictions = get_keras_linear_model_predictions( + features, [price], units=3) bias = get_linear_model_bias() price_var = get_linear_model_column_var(price) with _initialized_session() as sess: @@ -1243,16 +2104,17 @@ class LinearModelTest(test.TestCase): indices=[[0, 0], [1, 0], [1, 1]], dense_shape=[2, 2]) features = {'wire_cast': wire_tensor} - predictions = fc.linear_model(features, [wire_cast], units=3) + predictions = get_keras_linear_model_predictions( + features, [wire_cast], units=3) bias = get_linear_model_bias() wire_cast_var = get_linear_model_column_var(wire_cast) with _initialized_session() as sess: self.assertAllClose(np.zeros((3,)), bias.eval()) self.assertAllClose(np.zeros((4, 3)), wire_cast_var.eval()) sess.run( - wire_cast_var.assign([[10., 11., 12.], [100., 110., 120.], [ - 1000., 1100., 1200. - ], [10000., 11000., 12000.]])) + wire_cast_var.assign([[10., 11., 12.], [100., 110., 120.], + [1000., 1100., + 1200.], [10000., 11000., 12000.]])) sess.run(bias.assign([5., 6., 7.])) self.assertAllClose([[1005., 1106., 1207.], [10015., 11017., 12019.]], predictions.eval()) @@ -1261,7 +2123,7 @@ class LinearModelTest(test.TestCase): price = fc.numeric_column('price', shape=2) with ops.Graph().as_default(): features = {'price': [[1., 2.], [5., 6.]]} - predictions = fc.linear_model(features, [price]) + predictions = get_keras_linear_model_predictions(features, [price]) price_var = get_linear_model_column_var(price) with _initialized_session() as sess: self.assertAllClose([[0.], [0.]], price_var.eval()) @@ -1277,7 +2139,7 @@ class LinearModelTest(test.TestCase): indices=[[0, 0, 0], [0, 1, 0], [1, 0, 0], [1, 0, 1]], dense_shape=[2, 2, 2]) features = {'wire_cast': wire_tensor} - predictions = fc.linear_model(features, [wire_cast]) + predictions = get_keras_linear_model_predictions(features, [wire_cast]) wire_cast_var = get_linear_model_column_var(wire_cast) with _initialized_session() as sess: self.assertAllClose(np.zeros((4, 1)), wire_cast_var.eval()) @@ -1297,7 +2159,7 @@ class LinearModelTest(test.TestCase): indices=[[0, 0], [1, 0], [1, 1]], dense_shape=[2, 2]) features = {'wire_cast': wire_tensor} - predictions = fc.linear_model( + predictions = get_keras_linear_model_predictions( features, [wire_cast], sparse_combiner='mean') bias = get_linear_model_bias() wire_cast_var = get_linear_model_column_var(wire_cast) @@ -1310,7 +2172,8 @@ class LinearModelTest(test.TestCase): price = fc.numeric_column('price', shape=2) with ops.Graph().as_default(): features = {'price': [[1., 2.], [5., 6.]]} - predictions = fc.linear_model(features, [price], units=3) + predictions = get_keras_linear_model_predictions( + features, [price], units=3) bias = get_linear_model_bias() price_var = get_linear_model_column_var(price) with _initialized_session() as sess: @@ -1329,9 +2192,9 @@ class LinearModelTest(test.TestCase): with self.assertRaisesRegexp( Exception, r'Cannot reshape a tensor with 2 elements to shape \[2,2\]'): - predictions = fc.linear_model(features, [price]) + predictions = get_keras_linear_model_predictions(features, [price]) else: - predictions = fc.linear_model(features, [price]) + predictions = get_keras_linear_model_predictions(features, [price]) with _initialized_session(): with self.assertRaisesRegexp(Exception, 'requested shape has 4'): predictions.eval() @@ -1340,7 +2203,7 @@ class LinearModelTest(test.TestCase): price = fc.numeric_column('price', shape=[1, 2]) with ops.Graph().as_default(): features = {'price': [[[1., 2.]], [[5., 6.]]]} - predictions = fc.linear_model(features, [price]) + predictions = get_keras_linear_model_predictions(features, [price]) bias = get_linear_model_bias() price_var = get_linear_model_column_var(price) with _initialized_session() as sess: @@ -1354,11 +2217,9 @@ class LinearModelTest(test.TestCase): price1 = fc.numeric_column('price1', shape=2) price2 = fc.numeric_column('price2') with ops.Graph().as_default(): - features = { - 'price1': [[1., 2.], [5., 6.]], - 'price2': [[3.], [4.]] - } - predictions = fc.linear_model(features, [price1, price2]) + features = {'price1': [[1., 2.], [5., 6.]], 'price2': [[3.], [4.]]} + predictions = get_keras_linear_model_predictions(features, + [price1, price2]) bias = get_linear_model_bias() price1_var = get_linear_model_column_var(price1) price2_var = get_linear_model_column_var(price2) @@ -1378,7 +2239,8 @@ class LinearModelTest(test.TestCase): with ops.Graph().as_default(): features = {'price1': [[1., 2.], [5., 6.]], 'price2': [[3.], [4.]]} cols_to_vars = {} - fc.linear_model(features, [price1, price2], cols_to_vars=cols_to_vars) + get_keras_linear_model_predictions( + features, [price1, price2], cols_to_vars=cols_to_vars) bias = get_linear_model_bias() price1_var = get_linear_model_column_var(price1) price2_var = get_linear_model_column_var(price2) @@ -1398,7 +2260,8 @@ class LinearModelTest(test.TestCase): with variable_scope.variable_scope( 'linear', partitioner=partitioned_variables.fixed_size_partitioner(2, axis=0)): - fc.linear_model(features, [price1, price2], cols_to_vars=cols_to_vars) + get_keras_linear_model_predictions( + features, [price1, price2], cols_to_vars=cols_to_vars) with _initialized_session(): self.assertEqual([0.], cols_to_vars['bias'][0].eval()) # Partitioning shards the [2, 1] price1 var into 2 [1, 1] Variables. @@ -1413,7 +2276,8 @@ class LinearModelTest(test.TestCase): price = fc.numeric_column('price') with ops.Graph().as_default() as g: features = {'price': [[1.], [5.]]} - fc.linear_model(features, [price], weight_collections=['my-vars']) + get_keras_linear_model_predictions( + features, [price], weight_collections=['my-vars']) my_vars = g.get_collection('my-vars') bias = get_linear_model_bias() price_var = get_linear_model_column_var(price) @@ -1426,7 +2290,7 @@ class LinearModelTest(test.TestCase): wire_tensor = sparse_tensor.SparseTensor( values=['omar'], indices=[[0, 0]], dense_shape=[1, 1]) features = {'wire_cast': wire_tensor} - fc.linear_model( + get_keras_linear_model_predictions( features, [wire_cast], weight_collections=['my-vars']) my_vars = g.get_collection('my-vars') bias = get_linear_model_bias() @@ -1438,7 +2302,7 @@ class LinearModelTest(test.TestCase): price = fc.numeric_column('price') with ops.Graph().as_default() as g: features = {'price': [[1.], [5.]]} - fc.linear_model(features, [price]) + get_keras_linear_model_predictions(features, [price]) bias = get_linear_model_bias() price_var = get_linear_model_column_var(price) trainable_vars = g.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES) @@ -1451,7 +2315,7 @@ class LinearModelTest(test.TestCase): wire_tensor = sparse_tensor.SparseTensor( values=['omar'], indices=[[0, 0]], dense_shape=[1, 1]) features = {'wire_cast': wire_tensor} - fc.linear_model(features, [wire_cast]) + get_keras_linear_model_predictions(features, [wire_cast]) trainable_vars = g.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES) bias = get_linear_model_bias() wire_cast_var = get_linear_model_column_var(wire_cast) @@ -1462,7 +2326,7 @@ class LinearModelTest(test.TestCase): price = fc.numeric_column('price') with ops.Graph().as_default() as g: features = {'price': [[1.], [5.]]} - fc.linear_model(features, [price], trainable=False) + get_keras_linear_model_predictions(features, [price], trainable=False) trainable_vars = g.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES) self.assertEqual([], trainable_vars) @@ -1472,7 +2336,7 @@ class LinearModelTest(test.TestCase): wire_tensor = sparse_tensor.SparseTensor( values=['omar'], indices=[[0, 0]], dense_shape=[1, 1]) features = {'wire_cast': wire_tensor} - fc.linear_model(features, [wire_cast], trainable=False) + get_keras_linear_model_predictions(features, [wire_cast], trainable=False) trainable_vars = g.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES) self.assertEqual([], trainable_vars) @@ -1488,7 +2352,7 @@ class LinearModelTest(test.TestCase): sparse_tensor.SparseTensor( values=['omar'], indices=[[0, 0]], dense_shape=[1, 1]) } - fc.linear_model( + get_keras_linear_model_predictions( features, [price_a, wire_cast, price_b], weight_collections=['my-vars']) my_vars = g.get_collection('my-vars') @@ -1504,7 +2368,7 @@ class LinearModelTest(test.TestCase): sparse_tensor.SparseTensor( values=['omar'], indices=[[0, 0]], dense_shape=[1, 1]) } - fc.linear_model( + get_keras_linear_model_predictions( features, [wire_cast, price_b, price_a], weight_collections=['my-vars']) my_vars = g.get_collection('my-vars') @@ -1523,7 +2387,7 @@ class LinearModelTest(test.TestCase): with self.assertRaisesRegexp( ValueError, 'Batch size \(first dimension\) of each feature must be same.'): # pylint: disable=anomalous-backslash-in-string - fc.linear_model(features, [price1, price2]) + get_keras_linear_model_predictions(features, [price1, price2]) def test_subset_of_static_batch_size_mismatch(self): price1 = fc.numeric_column('price1') @@ -1538,7 +2402,7 @@ class LinearModelTest(test.TestCase): with self.assertRaisesRegexp( ValueError, 'Batch size \(first dimension\) of each feature must be same.'): # pylint: disable=anomalous-backslash-in-string - fc.linear_model(features, [price1, price2, price3]) + get_keras_linear_model_predictions(features, [price1, price2, price3]) def test_runtime_batch_size_mismatch(self): price1 = fc.numeric_column('price1') @@ -1548,7 +2412,8 @@ class LinearModelTest(test.TestCase): 'price1': array_ops.placeholder(dtype=dtypes.int64), # batchsize = 3 'price2': [[3.], [4.]] # batchsize = 2 } - predictions = fc.linear_model(features, [price1, price2]) + predictions = get_keras_linear_model_predictions(features, + [price1, price2]) with _initialized_session() as sess: with self.assertRaisesRegexp(errors.OpError, 'must have the same size and shape'): @@ -1563,7 +2428,8 @@ class LinearModelTest(test.TestCase): 'price1': array_ops.placeholder(dtype=dtypes.int64), # batchsize = 2 'price2': array_ops.placeholder(dtype=dtypes.int64), # batchsize = 2 } - predictions = fc.linear_model(features, [price1, price2]) + predictions = get_keras_linear_model_predictions(features, + [price1, price2]) with _initialized_session() as sess: sess.run( predictions, @@ -1574,7 +2440,12 @@ class LinearModelTest(test.TestCase): def test_with_numpy_input_fn(self): price = fc.numeric_column('price') - price_buckets = fc.bucketized_column(price, boundaries=[0., 10., 100.,]) + price_buckets = fc.bucketized_column( + price, boundaries=[ + 0., + 10., + 100., + ]) body_style = fc.categorical_column_with_vocabulary_list( 'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan']) @@ -1586,7 +2457,8 @@ class LinearModelTest(test.TestCase): batch_size=2, shuffle=False) features = input_fn() - net = fc.linear_model(features, [price_buckets, body_style]) + net = get_keras_linear_model_predictions(features, + [price_buckets, body_style]) # self.assertEqual(1 + 3 + 5, net.shape[1]) with _initialized_session() as sess: coord = coordinator.Coordinator() @@ -1607,22 +2479,33 @@ class LinearModelTest(test.TestCase): def test_with_1d_sparse_tensor(self): price = fc.numeric_column('price') - price_buckets = fc.bucketized_column(price, boundaries=[0., 10., 100.,]) + price_buckets = fc.bucketized_column( + price, boundaries=[ + 0., + 10., + 100., + ]) body_style = fc.categorical_column_with_vocabulary_list( 'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan']) # Provides 1-dim tensor and dense tensor. features = { - 'price': constant_op.constant([-1., 12.,]), - 'body-style': sparse_tensor.SparseTensor( - indices=((0,), (1,)), - values=('sedan', 'hardtop'), - dense_shape=(2,)), + 'price': + constant_op.constant([ + -1., + 12., + ]), + 'body-style': + sparse_tensor.SparseTensor( + indices=((0,), (1,)), + values=('sedan', 'hardtop'), + dense_shape=(2,)), } self.assertEqual(1, features['price'].shape.ndims) self.assertEqual(1, features['body-style'].dense_shape.get_shape()[0]) - net = fc.linear_model(features, [price_buckets, body_style]) + net = get_keras_linear_model_predictions(features, + [price_buckets, body_style]) with _initialized_session() as sess: bias = get_linear_model_bias() price_buckets_var = get_linear_model_column_var(price_buckets) @@ -1636,7 +2519,12 @@ class LinearModelTest(test.TestCase): def test_with_1d_unknown_shape_sparse_tensor(self): price = fc.numeric_column('price') - price_buckets = fc.bucketized_column(price, boundaries=[0., 10., 100.,]) + price_buckets = fc.bucketized_column( + price, boundaries=[ + 0., + 10., + 100., + ]) body_style = fc.categorical_column_with_vocabulary_list( 'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan']) country = fc.categorical_column_with_vocabulary_list( @@ -1653,12 +2541,11 @@ class LinearModelTest(test.TestCase): price_data = np.array([-1., 12.]) body_style_data = sparse_tensor.SparseTensorValue( - indices=((0,), (1,)), - values=('sedan', 'hardtop'), - dense_shape=(2,)) + indices=((0,), (1,)), values=('sedan', 'hardtop'), dense_shape=(2,)) country_data = np.array(['US', 'CA']) - net = fc.linear_model(features, [price_buckets, body_style, country]) + net = get_keras_linear_model_predictions( + features, [price_buckets, body_style, country]) bias = get_linear_model_bias() price_buckets_var = get_linear_model_column_var(price_buckets) body_style_var = get_linear_model_column_var(body_style) @@ -1685,13 +2572,13 @@ class LinearModelTest(test.TestCase): # Static rank 0 should fail with self.assertRaisesRegexp(ValueError, 'Feature .* cannot have rank 0'): - fc.linear_model(features, [price]) + get_keras_linear_model_predictions(features, [price]) # Dynamic rank 0 should fail features = { 'price': array_ops.placeholder(dtypes.float32), } - net = fc.linear_model(features, [price]) + net = get_keras_linear_model_predictions(features, [price]) self.assertEqual(1, net.shape[1]) with _initialized_session() as sess: with self.assertRaisesOpError('Feature .* cannot have rank 0'): @@ -2035,6 +2922,114 @@ class FunctionalInputLayerTest(test.TestCase): features['price2']: [[1.], [5.]], }) + def test_multiple_layers_with_same_embedding_column(self): + some_sparse_column = fc.categorical_column_with_hash_bucket( + 'sparse_feature', hash_bucket_size=5) + some_embedding_column = fc.embedding_column( + some_sparse_column, dimension=10) + + with ops.Graph().as_default(): + features = { + 'sparse_feature': [['a'], ['x']], + } + all_cols = [some_embedding_column] + fc.input_layer(features, all_cols) + fc.input_layer(features, all_cols) + # Make sure that 2 variables get created in this case. + self.assertEqual(2, len( + ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES))) + expected_var_names = [ + 'input_layer/sparse_feature_embedding/embedding_weights:0', + 'input_layer_1/sparse_feature_embedding/embedding_weights:0' + ] + self.assertItemsEqual( + expected_var_names, + [v.name for v in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)]) + + def test_multiple_layers_with_same_shared_embedding_column(self): + categorical_column_a = fc.categorical_column_with_identity( + key='aaa', num_buckets=3) + categorical_column_b = fc.categorical_column_with_identity( + key='bbb', num_buckets=3) + embedding_dimension = 2 + embedding_column_b, embedding_column_a = fc.shared_embedding_columns( + [categorical_column_b, categorical_column_a], + dimension=embedding_dimension) + + with ops.Graph().as_default(): + features = { + 'aaa': + sparse_tensor.SparseTensor( + indices=((0, 0), (1, 0), (1, 1)), + values=(0, 1, 0), + dense_shape=(2, 2)), + 'bbb': + sparse_tensor.SparseTensor( + indices=((0, 0), (1, 0), (1, 1)), + values=(1, 2, 1), + dense_shape=(2, 2)), + } + all_cols = [embedding_column_a, embedding_column_b] + fc.input_layer(features, all_cols) + fc.input_layer(features, all_cols) + # Make sure that only 1 variable gets created in this case. + self.assertEqual(1, len( + ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES))) + self.assertItemsEqual( + ['input_layer/aaa_bbb_shared_embedding/embedding_weights:0'], + [v.name for v in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)]) + + def test_multiple_layers_with_same_shared_embedding_column_diff_graphs(self): + categorical_column_a = fc.categorical_column_with_identity( + key='aaa', num_buckets=3) + categorical_column_b = fc.categorical_column_with_identity( + key='bbb', num_buckets=3) + embedding_dimension = 2 + embedding_column_b, embedding_column_a = fc.shared_embedding_columns( + [categorical_column_b, categorical_column_a], + dimension=embedding_dimension) + all_cols = [embedding_column_a, embedding_column_b] + + with ops.Graph().as_default(): + features = { + 'aaa': + sparse_tensor.SparseTensor( + indices=((0, 0), (1, 0), (1, 1)), + values=(0, 1, 0), + dense_shape=(2, 2)), + 'bbb': + sparse_tensor.SparseTensor( + indices=((0, 0), (1, 0), (1, 1)), + values=(1, 2, 1), + dense_shape=(2, 2)), + } + fc.input_layer(features, all_cols) + # Make sure that only 1 variable gets created in this case. + self.assertEqual(1, len( + ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES))) + + with ops.Graph().as_default(): + features1 = { + 'aaa': + sparse_tensor.SparseTensor( + indices=((0, 0), (1, 0), (1, 1)), + values=(0, 1, 0), + dense_shape=(2, 2)), + 'bbb': + sparse_tensor.SparseTensor( + indices=((0, 0), (1, 0), (1, 1)), + values=(1, 2, 1), + dense_shape=(2, 2)), + } + + fc.input_layer(features1, all_cols) + # Make sure that only 1 variable gets created in this case. + self.assertEqual(1, len( + ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES))) + self.assertItemsEqual( + ['input_layer/aaa_bbb_shared_embedding/embedding_weights:0'], + [v.name for v in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)]) + def test_with_numpy_input_fn(self): embedding_values = ( (1., 2., 3., 4., 5.), # id 0 @@ -2715,6 +3710,32 @@ class VocabularyFileCategoricalColumnTest(test.TestCase): # 'skywalker' -> 3, 'omar' -> 0: wire_var[3] + wire_var[0] = 4+1 = 5 self.assertAllClose(((3.,), (5.,)), predictions.eval()) + def test_keras_linear_model(self): + wire_column = fc.categorical_column_with_vocabulary_file( + key='wire', + vocabulary_file=self._wire_vocabulary_file_name, + vocabulary_size=self._wire_vocabulary_size, + num_oov_buckets=1) + self.assertEqual(4, wire_column._num_buckets) + with ops.Graph().as_default(): + predictions = get_keras_linear_model_predictions({ + wire_column.name: + sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0), (1, 1)), + values=('marlo', 'skywalker', 'omar'), + dense_shape=(2, 2)) + }, (wire_column,)) + bias = get_linear_model_bias() + wire_var = get_linear_model_column_var(wire_column) + with _initialized_session(): + self.assertAllClose((0.,), bias.eval()) + self.assertAllClose(((0.,), (0.,), (0.,), (0.,)), wire_var.eval()) + self.assertAllClose(((0.,), (0.,)), predictions.eval()) + wire_var.assign(((1.,), (2.,), (3.,), (4.,))).eval() + # 'marlo' -> 2: wire_var[2] = 3 + # 'skywalker' -> 3, 'omar' -> 0: wire_var[3] + wire_var[0] = 4+1 = 5 + self.assertAllClose(((3.,), (5.,)), predictions.eval()) + class VocabularyListCategoricalColumnTest(test.TestCase): @@ -3082,6 +4103,31 @@ class VocabularyListCategoricalColumnTest(test.TestCase): # 'skywalker' -> 3, 'omar' -> 0: wire_var[3] + wire_var[0] = 4+1 = 5 self.assertAllClose(((3.,), (5.,)), predictions.eval()) + def test_keras_linear_model(self): + wire_column = fc.categorical_column_with_vocabulary_list( + key='aaa', + vocabulary_list=('omar', 'stringer', 'marlo'), + num_oov_buckets=1) + self.assertEqual(4, wire_column._num_buckets) + with ops.Graph().as_default(): + predictions = get_keras_linear_model_predictions({ + wire_column.name: + sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0), (1, 1)), + values=('marlo', 'skywalker', 'omar'), + dense_shape=(2, 2)) + }, (wire_column,)) + bias = get_linear_model_bias() + wire_var = get_linear_model_column_var(wire_column) + with _initialized_session(): + self.assertAllClose((0.,), bias.eval()) + self.assertAllClose(((0.,), (0.,), (0.,), (0.,)), wire_var.eval()) + self.assertAllClose(((0.,), (0.,)), predictions.eval()) + wire_var.assign(((1.,), (2.,), (3.,), (4.,))).eval() + # 'marlo' -> 2: wire_var[2] = 3 + # 'skywalker' -> 3, 'omar' -> 0: wire_var[3] + wire_var[0] = 4+1 = 5 + self.assertAllClose(((3.,), (5.,)), predictions.eval()) + class IdentityCategoricalColumnTest(test.TestCase): @@ -3285,15 +4331,37 @@ class IdentityCategoricalColumnTest(test.TestCase): input_shape: (2, 2), })) - def test_linear_model(self): + def test_linear_model(self): + column = fc.categorical_column_with_identity(key='aaa', num_buckets=3) + self.assertEqual(3, column._num_buckets) + with ops.Graph().as_default(): + predictions = fc.linear_model({ + column.name: sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0), (1, 1)), + values=(0, 2, 1), + dense_shape=(2, 2)) + }, (column,)) + bias = get_linear_model_bias() + weight_var = get_linear_model_column_var(column) + with _initialized_session(): + self.assertAllClose((0.,), bias.eval()) + self.assertAllClose(((0.,), (0.,), (0.,)), weight_var.eval()) + self.assertAllClose(((0.,), (0.,)), predictions.eval()) + weight_var.assign(((1.,), (2.,), (3.,))).eval() + # weight_var[0] = 1 + # weight_var[2] + weight_var[1] = 3+2 = 5 + self.assertAllClose(((1.,), (5.,)), predictions.eval()) + + def test_keras_linear_model(self): column = fc.categorical_column_with_identity(key='aaa', num_buckets=3) self.assertEqual(3, column._num_buckets) with ops.Graph().as_default(): - predictions = fc.linear_model({ - column.name: sparse_tensor.SparseTensorValue( - indices=((0, 0), (1, 0), (1, 1)), - values=(0, 2, 1), - dense_shape=(2, 2)) + predictions = get_keras_linear_model_predictions({ + column.name: + sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0), (1, 1)), + values=(0, 2, 1), + dense_shape=(2, 2)) }, (column,)) bias = get_linear_model_bias() weight_var = get_linear_model_column_var(column) @@ -3537,6 +4605,25 @@ class IndicatorColumnTest(test.TestCase): weight_var.assign([[1.], [2.], [3.], [4.]]).eval() self.assertAllClose([[2. + 3.]], predictions.eval()) + def test_keras_linear_model(self): + animal = fc.indicator_column( + fc.categorical_column_with_identity('animal', num_buckets=4)) + with ops.Graph().as_default(): + features = { + 'animal': + sparse_tensor.SparseTensor( + indices=[[0, 0], [0, 1]], values=[1, 2], dense_shape=[1, 2]) + } + + predictions = get_keras_linear_model_predictions(features, [animal]) + weight_var = get_linear_model_column_var(animal) + with _initialized_session(): + # All should be zero-initialized. + self.assertAllClose([[0.], [0.], [0.], [0.]], weight_var.eval()) + self.assertAllClose([[0.]], predictions.eval()) + weight_var.assign([[1.], [2.], [3.], [4.]]).eval() + self.assertAllClose([[2. + 3.]], predictions.eval()) + def test_input_layer(self): animal = fc.indicator_column( fc.categorical_column_with_identity('animal', num_buckets=4)) @@ -3562,7 +4649,6 @@ class EmbeddingColumnTest(test.TestCase): self.assertIs(categorical_column, embedding_column.categorical_column) self.assertEqual(embedding_dimension, embedding_column.dimension) self.assertEqual('mean', embedding_column.combiner) - self.assertIsNotNone(embedding_column.initializer) self.assertIsNone(embedding_column.ckpt_to_load_from) self.assertIsNone(embedding_column.tensor_name_in_ckpt) self.assertIsNone(embedding_column.max_norm) @@ -3587,7 +4673,6 @@ class EmbeddingColumnTest(test.TestCase): self.assertIs(categorical_column, embedding_column.categorical_column) self.assertEqual(embedding_dimension, embedding_column.dimension) self.assertEqual('my_combiner', embedding_column.combiner) - self.assertEqual('my_initializer', embedding_column.initializer()) self.assertEqual('my_ckpt', embedding_column.ckpt_to_load_from) self.assertEqual('my_ckpt_tensor', embedding_column.tensor_name_in_ckpt) self.assertEqual(42., embedding_column.max_norm) @@ -3618,7 +4703,6 @@ class EmbeddingColumnTest(test.TestCase): self.assertEqual(embedding_dimension, embedding_column.dimension) self.assertEqual('my_combiner', embedding_column.combiner) - self.assertEqual('my_initializer', embedding_column.initializer()) self.assertEqual('my_ckpt', embedding_column.ckpt_to_load_from) self.assertEqual('my_ckpt_tensor', embedding_column.tensor_name_in_ckpt) self.assertEqual(42., embedding_column.max_norm) @@ -3727,8 +4811,8 @@ class EmbeddingColumnTest(test.TestCase): # Assert expected embedding variable and lookups. global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) - self.assertItemsEqual( - ('embedding_weights:0',), tuple([v.name for v in global_vars])) + self.assertItemsEqual(('embedding_weights:0',), + tuple([v.name for v in global_vars])) with _initialized_session(): self.assertAllEqual(embedding_values, global_vars[0].eval()) self.assertAllEqual(expected_lookups, embedding_lookup.eval()) @@ -3787,8 +4871,8 @@ class EmbeddingColumnTest(test.TestCase): # Assert expected embedding variable and lookups. global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) - self.assertItemsEqual( - ('embedding_weights:0',), tuple([v.name for v in global_vars])) + self.assertItemsEqual(('embedding_weights:0',), + tuple([v.name for v in global_vars])) with _initialized_session(): self.assertAllEqual(embedding_values, global_vars[0].eval()) self.assertAllEqual(expected_lookups, embedding_lookup.eval()) @@ -3815,8 +4899,9 @@ class EmbeddingColumnTest(test.TestCase): }), weight_collections=('my_vars',)) # Assert expected embedding variable and lookups. - self.assertItemsEqual( - [], ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)) + global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) + self.assertItemsEqual(('embedding_weights:0',), + tuple([v.name for v in global_vars])) my_vars = ops.get_collection('my_vars') self.assertItemsEqual( ('embedding_weights:0',), tuple([v.name for v in my_vars])) @@ -4023,6 +5108,82 @@ class EmbeddingColumnTest(test.TestCase): # = [4*7 + 6*11, 4*2 + 6*3.5, 4*0 + 6*0, 4*3 + 6*5] = [94, 29, 0, 42] self.assertAllClose(((94.,), (29.,), (0.,), (42.,)), predictions.eval()) + def test_keras_linear_model(self): + # Inputs. + batch_size = 4 + vocabulary_size = 3 + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, ids [2] + # example 1, ids [0, 1] + # example 2, ids [] + # example 3, ids [1] + indices=((0, 0), (1, 0), (1, 4), (3, 0)), + values=(2, 0, 1, 1), + dense_shape=(batch_size, 5)) + + # Embedding variable. + embedding_dimension = 2 + embedding_shape = (vocabulary_size, embedding_dimension) + zeros_embedding_values = np.zeros(embedding_shape) + + def _initializer(shape, dtype, partition_info): + self.assertAllEqual(embedding_shape, shape) + self.assertEqual(dtypes.float32, dtype) + self.assertIsNone(partition_info) + return zeros_embedding_values + + # Build columns. + categorical_column = fc.categorical_column_with_identity( + key='aaa', num_buckets=vocabulary_size) + embedding_column = fc.embedding_column( + categorical_column, + dimension=embedding_dimension, + initializer=_initializer) + + with ops.Graph().as_default(): + predictions = get_keras_linear_model_predictions({ + categorical_column.name: sparse_input + }, (embedding_column,)) + expected_var_names = ( + 'linear_model/bias_weights:0', + 'linear_model/aaa_embedding/weights:0', + 'linear_model/aaa_embedding/embedding_weights:0', + ) + self.assertItemsEqual( + expected_var_names, + [v.name for v in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)]) + trainable_vars = { + v.name: v + for v in ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES) + } + self.assertItemsEqual(expected_var_names, trainable_vars.keys()) + bias = trainable_vars['linear_model/bias_weights:0'] + embedding_weights = trainable_vars[ + 'linear_model/aaa_embedding/embedding_weights:0'] + linear_weights = trainable_vars['linear_model/aaa_embedding/weights:0'] + with _initialized_session(): + # Predictions with all zero weights. + self.assertAllClose(np.zeros((1,)), bias.eval()) + self.assertAllClose(zeros_embedding_values, embedding_weights.eval()) + self.assertAllClose( + np.zeros((embedding_dimension, 1)), linear_weights.eval()) + self.assertAllClose(np.zeros((batch_size, 1)), predictions.eval()) + + # Predictions with all non-zero weights. + embedding_weights.assign(( + (1., 2.), # id 0 + (3., 5.), # id 1 + (7., 11.) # id 2 + )).eval() + linear_weights.assign(((4.,), (6.,))).eval() + # example 0, ids [2], embedding[0] = [7, 11] + # example 1, ids [0, 1], embedding[1] = mean([1, 2] + [3, 5]) = [2, 3.5] + # example 2, ids [], embedding[2] = [0, 0] + # example 3, ids [1], embedding[3] = [3, 5] + # sum(embeddings * linear_weights) + # = [4*7 + 6*11, 4*2 + 6*3.5, 4*0 + 6*0, 4*3 + 6*5] = [94, 29, 0, 42] + self.assertAllClose(((94.,), (29.,), (0.,), (42.,)), predictions.eval()) + def test_input_layer(self): # Inputs. vocabulary_size = 3 @@ -4159,14 +5320,12 @@ class SharedEmbeddingColumnTest(test.TestCase): self.assertEqual(embedding_dimension, embedding_column_b.dimension) self.assertEqual('mean', embedding_column_a.combiner) self.assertEqual('mean', embedding_column_b.combiner) - self.assertIsNotNone(embedding_column_a.initializer) - self.assertIsNotNone(embedding_column_b.initializer) self.assertIsNone(embedding_column_a.ckpt_to_load_from) self.assertIsNone(embedding_column_b.ckpt_to_load_from) self.assertEqual('aaa_bbb_shared_embedding', - embedding_column_a.shared_embedding_collection_name) + embedding_column_a.var_scope_name) self.assertEqual('aaa_bbb_shared_embedding', - embedding_column_b.shared_embedding_collection_name) + embedding_column_b.var_scope_name) self.assertIsNone(embedding_column_a.tensor_name_in_ckpt) self.assertIsNone(embedding_column_b.tensor_name_in_ckpt) self.assertIsNone(embedding_column_a.max_norm) @@ -4212,12 +5371,10 @@ class SharedEmbeddingColumnTest(test.TestCase): self.assertEqual(embedding_dimension, embedding_column_b.dimension) self.assertEqual('my_combiner', embedding_column_a.combiner) self.assertEqual('my_combiner', embedding_column_b.combiner) - self.assertEqual('my_initializer', embedding_column_a.initializer()) - self.assertEqual('my_initializer', embedding_column_b.initializer()) self.assertEqual('shared_embedding_collection_name', - embedding_column_a.shared_embedding_collection_name) + embedding_column_a.var_scope_name) self.assertEqual('shared_embedding_collection_name', - embedding_column_b.shared_embedding_collection_name) + embedding_column_b.var_scope_name) self.assertEqual('my_ckpt', embedding_column_a.ckpt_to_load_from) self.assertEqual('my_ckpt', embedding_column_b.ckpt_to_load_from) self.assertEqual('my_ckpt_tensor', embedding_column_a.tensor_name_in_ckpt) @@ -4267,9 +5424,8 @@ class SharedEmbeddingColumnTest(test.TestCase): self.assertEqual(embedding_dimension, embedding_column_a.dimension) self.assertEqual('my_combiner', embedding_column_a.combiner) - self.assertEqual('my_initializer', embedding_column_a.initializer()) self.assertEqual('shared_embedding_collection_name', - embedding_column_a.shared_embedding_collection_name) + embedding_column_a.var_scope_name) self.assertEqual('my_ckpt', embedding_column_a.ckpt_to_load_from) self.assertEqual('my_ckpt_tensor', embedding_column_a.tensor_name_in_ckpt) self.assertEqual(42., embedding_column_a.max_norm) @@ -4445,8 +5601,8 @@ class SharedEmbeddingColumnTest(test.TestCase): # Assert expected embedding variable and lookups. global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) - self.assertItemsEqual( - ('embedding_weights:0',), tuple([v.name for v in global_vars])) + self.assertItemsEqual(('embedding_weights:0',), + tuple([v.name for v in global_vars])) embedding_var = global_vars[0] with _initialized_session(): self.assertAllEqual(embedding_values, embedding_var.eval()) @@ -4595,6 +5751,97 @@ class SharedEmbeddingColumnTest(test.TestCase): # = [3*1 + 5*2, 3*0 +5*0] = [13, 0] self.assertAllClose([[94. + 13.], [29.]], predictions.eval()) + def test_keras_linear_model(self): + # Inputs. + batch_size = 2 + vocabulary_size = 3 + # -1 values are ignored. + input_a = np.array([ + [2, -1, -1], # example 0, ids [2] + [0, 1, -1] + ]) # example 1, ids [0, 1] + input_b = np.array([ + [0, -1, -1], # example 0, ids [0] + [-1, -1, -1] + ]) # example 1, ids [] + + # Embedding variable. + embedding_dimension = 2 + embedding_shape = (vocabulary_size, embedding_dimension) + zeros_embedding_values = np.zeros(embedding_shape) + + def _initializer(shape, dtype, partition_info): + self.assertAllEqual(embedding_shape, shape) + self.assertEqual(dtypes.float32, dtype) + self.assertIsNone(partition_info) + return zeros_embedding_values + + # Build columns. + categorical_column_a = fc.categorical_column_with_identity( + key='aaa', num_buckets=vocabulary_size) + categorical_column_b = fc.categorical_column_with_identity( + key='bbb', num_buckets=vocabulary_size) + embedding_column_a, embedding_column_b = fc.shared_embedding_columns( + [categorical_column_a, categorical_column_b], + dimension=embedding_dimension, + initializer=_initializer) + + with ops.Graph().as_default(): + predictions = get_keras_linear_model_predictions({ + categorical_column_a.name: input_a, + categorical_column_b.name: input_b, + }, (embedding_column_a, embedding_column_b)) + # Linear weights do not follow the column name. But this is a rare use + # case, and fixing it would add too much complexity to the code. + expected_var_names = ( + 'linear_model/bias_weights:0', + 'linear_model/aaa_bbb_shared_embedding/weights:0', + 'linear_model/aaa_bbb_shared_embedding/embedding_weights:0', + 'linear_model/aaa_bbb_shared_embedding_1/weights:0', + ) + self.assertItemsEqual( + expected_var_names, + [v.name for v in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)]) + trainable_vars = { + v.name: v + for v in ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES) + } + self.assertItemsEqual(expected_var_names, trainable_vars.keys()) + bias = trainable_vars['linear_model/bias_weights:0'] + embedding_weights = trainable_vars[ + 'linear_model/aaa_bbb_shared_embedding/embedding_weights:0'] + linear_weights_a = trainable_vars[ + 'linear_model/aaa_bbb_shared_embedding/weights:0'] + linear_weights_b = trainable_vars[ + 'linear_model/aaa_bbb_shared_embedding_1/weights:0'] + with _initialized_session(): + # Predictions with all zero weights. + self.assertAllClose(np.zeros((1,)), bias.eval()) + self.assertAllClose(zeros_embedding_values, embedding_weights.eval()) + self.assertAllClose( + np.zeros((embedding_dimension, 1)), linear_weights_a.eval()) + self.assertAllClose( + np.zeros((embedding_dimension, 1)), linear_weights_b.eval()) + self.assertAllClose(np.zeros((batch_size, 1)), predictions.eval()) + + # Predictions with all non-zero weights. + embedding_weights.assign(( + (1., 2.), # id 0 + (3., 5.), # id 1 + (7., 11.) # id 2 + )).eval() + linear_weights_a.assign(((4.,), (6.,))).eval() + # example 0, ids [2], embedding[0] = [7, 11] + # example 1, ids [0, 1], embedding[1] = mean([1, 2] + [3, 5]) = [2, 3.5] + # sum(embeddings * linear_weights) + # = [4*7 + 6*11, 4*2 + 6*3.5] = [94, 29] + linear_weights_b.assign(((3.,), (5.,))).eval() + # example 0, ids [0], embedding[0] = [1, 2] + # example 1, ids [], embedding[1] = 0, 0] + # sum(embeddings * linear_weights) + # = [3*1 + 5*2, 3*0 +5*0] = [13, 0] + self.assertAllClose([[94. + 13.], [29.]], predictions.eval()) + def _test_input_layer(self, trainable=True): # Inputs. vocabulary_size = 3 @@ -4663,10 +5910,7 @@ class SharedEmbeddingColumnTest(test.TestCase): tuple([v.name for v in trainable_vars])) else: self.assertItemsEqual([], tuple([v.name for v in trainable_vars])) - shared_embedding_vars = ops.get_collection('aaa_bbb_shared_embedding') - self.assertItemsEqual( - ['input_layer/aaa_bbb_shared_embedding/embedding_weights:0'], - tuple([v.name for v in shared_embedding_vars])) + shared_embedding_vars = global_vars with _initialized_session(): self.assertAllEqual(embedding_values, shared_embedding_vars[0].eval()) self.assertAllEqual(expected_lookups, input_layer.eval()) @@ -4880,6 +6124,103 @@ class WeightedCategoricalColumnTest(test.TestCase): dense_shape=(2, 2)), weight_tensor.eval()) + def test_keras_linear_model(self): + column = fc.weighted_categorical_column( + categorical_column=fc.categorical_column_with_identity( + key='ids', num_buckets=3), + weight_feature_key='values') + with ops.Graph().as_default(): + predictions = get_keras_linear_model_predictions({ + 'ids': + sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0), (1, 1)), + values=(0, 2, 1), + dense_shape=(2, 2)), + 'values': + sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0), (1, 1)), + values=(.5, 1., .1), + dense_shape=(2, 2)) + }, (column,)) + bias = get_linear_model_bias() + weight_var = get_linear_model_column_var(column) + with _initialized_session(): + self.assertAllClose((0.,), bias.eval()) + self.assertAllClose(((0.,), (0.,), (0.,)), weight_var.eval()) + self.assertAllClose(((0.,), (0.,)), predictions.eval()) + weight_var.assign(((1.,), (2.,), (3.,))).eval() + # weight_var[0] * weights[0, 0] = 1 * .5 = .5 + # weight_var[2] * weights[1, 0] + weight_var[1] * weights[1, 1] + # = 3*1 + 2*.1 = 3+.2 = 3.2 + self.assertAllClose(((.5,), (3.2,)), predictions.eval()) + + def test_keras_linear_model_mismatched_shape(self): + column = fc.weighted_categorical_column( + categorical_column=fc.categorical_column_with_identity( + key='ids', num_buckets=3), + weight_feature_key='values') + with ops.Graph().as_default(): + with self.assertRaisesRegexp(ValueError, + r'Dimensions.*are not compatible'): + get_keras_linear_model_predictions({ + 'ids': + sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0), (1, 1)), + values=(0, 2, 1), + dense_shape=(2, 2)), + 'values': + sparse_tensor.SparseTensorValue( + indices=((0, 0), (0, 1), (1, 0), (1, 1)), + values=(.5, 11., 1., .1), + dense_shape=(2, 2)) + }, (column,)) + + def test_keras_linear_model_mismatched_dense_values(self): + column = fc.weighted_categorical_column( + categorical_column=fc.categorical_column_with_identity( + key='ids', num_buckets=3), + weight_feature_key='values') + with ops.Graph().as_default(): + predictions = get_keras_linear_model_predictions( + { + 'ids': + sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0), (1, 1)), + values=(0, 2, 1), + dense_shape=(2, 2)), + 'values': ((.5,), (1.,)) + }, (column,), + sparse_combiner='mean') + with _initialized_session(): + with self.assertRaisesRegexp(errors.OpError, 'Incompatible shapes'): + predictions.eval() + + def test_keras_linear_model_mismatched_dense_shape(self): + column = fc.weighted_categorical_column( + categorical_column=fc.categorical_column_with_identity( + key='ids', num_buckets=3), + weight_feature_key='values') + with ops.Graph().as_default(): + predictions = get_keras_linear_model_predictions({ + 'ids': + sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0), (1, 1)), + values=(0, 2, 1), + dense_shape=(2, 2)), + 'values': ((.5,), (1.,), (.1,)) + }, (column,)) + bias = get_linear_model_bias() + weight_var = get_linear_model_column_var(column) + with _initialized_session(): + self.assertAllClose((0.,), bias.eval()) + self.assertAllClose(((0.,), (0.,), (0.,)), weight_var.eval()) + self.assertAllClose(((0.,), (0.,)), predictions.eval()) + weight_var.assign(((1.,), (2.,), (3.,))).eval() + # weight_var[0] * weights[0, 0] = 1 * .5 = .5 + # weight_var[2] * weights[1, 0] + weight_var[1] * weights[1, 1] + # = 3*1 + 2*.1 = 3+.2 = 3.2 + self.assertAllClose(((.5,), (3.2,)), predictions.eval()) + def test_linear_model(self): column = fc.weighted_categorical_column( categorical_column=fc.categorical_column_with_identity( @@ -4933,13 +6274,16 @@ class WeightedCategoricalColumnTest(test.TestCase): key='ids', num_buckets=3), weight_feature_key='values') with ops.Graph().as_default(): - predictions = fc.linear_model({ - 'ids': sparse_tensor.SparseTensorValue( - indices=((0, 0), (1, 0), (1, 1)), - values=(0, 2, 1), - dense_shape=(2, 2)), - 'values': ((.5,), (1.,)) - }, (column,)) + predictions = fc.linear_model( + { + 'ids': + sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0), (1, 1)), + values=(0, 2, 1), + dense_shape=(2, 2)), + 'values': ((.5,), (1.,)) + }, (column,), + sparse_combiner='mean') with _initialized_session(): with self.assertRaisesRegexp(errors.OpError, 'Incompatible shapes'): predictions.eval() diff --git a/tensorflow/python/framework/c_api_util.py b/tensorflow/python/framework/c_api_util.py index 6c522de452b59ea9a200ccf89cfb428a26970db1..7bbe3183dfa376776dde8412a3270b1684a02391 100644 --- a/tensorflow/python/framework/c_api_util.py +++ b/tensorflow/python/framework/c_api_util.py @@ -33,7 +33,7 @@ class ScopedTFStatus(object): def __del__(self): # Note: when we're destructing the global context (i.e when the process is # terminating) we can have already deleted other modules. - if c_api.TF_DeleteStatus is not None: + if c_api is not None and c_api.TF_DeleteStatus is not None: c_api.TF_DeleteStatus(self.status) @@ -46,7 +46,7 @@ class ScopedTFGraph(object): def __del__(self): # Note: when we're destructing the global context (i.e when the process is # terminating) we can have already deleted other modules. - if c_api.TF_DeleteGraph is not None: + if c_api is not None and c_api.TF_DeleteGraph is not None: c_api.TF_DeleteGraph(self.graph) @@ -59,10 +59,36 @@ class ScopedTFImportGraphDefOptions(object): def __del__(self): # Note: when we're destructing the global context (i.e when the process is # terminating) we can have already deleted other modules. - if c_api.TF_DeleteImportGraphDefOptions is not None: + if c_api is not None and c_api.TF_DeleteImportGraphDefOptions is not None: c_api.TF_DeleteImportGraphDefOptions(self.options) +class ScopedTFImportGraphDefResults(object): + """Wrapper around TF_ImportGraphDefOptions that handles deletion.""" + + def __init__(self, results): + self.results = results + + def __del__(self): + # Note: when we're destructing the global context (i.e when the process is + # terminating) we can have already deleted other modules. + if c_api is not None and c_api.TF_DeleteImportGraphDefResults is not None: + c_api.TF_DeleteImportGraphDefResults(self.results) + + +class ScopedTFFunction(object): + """Wrapper around TF_Function that handles deletion.""" + + def __init__(self, func): + self.func = func + + def __del__(self): + # Note: when we're destructing the global context (i.e when the process is + # terminating) we can have already deleted other modules. + if c_api is not None and c_api.TF_DeleteFunction is not None: + c_api.TF_DeleteFunction(self.func) + + @tf_contextlib.contextmanager def tf_buffer(data=None): """Context manager that creates and deletes TF_Buffer. diff --git a/tensorflow/python/framework/dtypes.py b/tensorflow/python/framework/dtypes.py index 0edae92fd4a86e7d10a180ce64364d3ea552bf60..7f9ef53457ae060600067b946e686487f55adda1 100644 --- a/tensorflow/python/framework/dtypes.py +++ b/tensorflow/python/framework/dtypes.py @@ -297,6 +297,9 @@ class DType(object): def __hash__(self): return self._type_enum + def __reduce__(self): + return as_dtype, (self.name,) + @property def size(self): if (self._type_enum == types_pb2.DT_VARIANT or @@ -345,7 +348,7 @@ tf_export("uint16").export_constant(__name__, "uint16") uint32 = DType(types_pb2.DT_UINT32) tf_export("uint32").export_constant(__name__, "uint32") uint64 = DType(types_pb2.DT_UINT64) -tf_export("uint64").export_constant(__name__, "uint32") +tf_export("uint64").export_constant(__name__, "uint64") int16 = DType(types_pb2.DT_INT16) tf_export("int16").export_constant(__name__, "int16") int8 = DType(types_pb2.DT_INT8) @@ -648,6 +651,11 @@ QUANTIZED_DTYPES = frozenset([ ]) tf_export("QUANTIZED_DTYPES").export_constant(__name__, "QUANTIZED_DTYPES") +_PYTHON_TO_TF = { + float: float32, + bool: bool, +} + @tf_export("as_dtype") def as_dtype(type_value): @@ -679,6 +687,11 @@ def as_dtype(type_value): except KeyError: pass + try: + return _PYTHON_TO_TF[type_value] + except KeyError: + pass + if isinstance(type_value, np.dtype): # The numpy dtype for strings is variable length. We can not compare # dtype with a single constant (np.string does not exist) to decide @@ -687,11 +700,13 @@ def as_dtype(type_value): if type_value.type == np.string_ or type_value.type == np.unicode_: return string - for key, val in _NP_TO_TF: - try: - if key == type_value: - return val - except TypeError as e: - raise TypeError("Cannot convert {} to a dtype. {}".format(type_value, e)) + if isinstance(type_value, (type, np.dtype)): + for key, val in _NP_TO_TF: + try: + if key == type_value: + return val + except TypeError as e: + raise TypeError("Cannot convert {} to a dtype. {}".format( + type_value, e)) raise TypeError("Cannot convert value %r to a TensorFlow DType." % type_value) diff --git a/tensorflow/python/framework/dtypes_test.py b/tensorflow/python/framework/dtypes_test.py index e49e2fda5d84da4f8f87fae73874351afe0a20f2..a873670e0461884d06cde1db4db2cf2db98fde3c 100644 --- a/tensorflow/python/framework/dtypes_test.py +++ b/tensorflow/python/framework/dtypes_test.py @@ -295,6 +295,20 @@ class TypesTest(test_util.TensorFlowTestCase): self.assertNotEqual(dtypes.int32, int) self.assertNotEqual(dtypes.float64, 2.1) + def testPythonTypesConversion(self): + self.assertIs(dtypes.float32, dtypes.as_dtype(float)) + self.assertIs(dtypes.bool, dtypes.as_dtype(bool)) + + def testReduce(self): + for enum in dtypes._TYPE_TO_STRING: + dtype = dtypes.DType(enum) + ctor, args = dtype.__reduce__() + self.assertEquals(ctor, dtypes.as_dtype) + self.assertEquals(args, (dtype.name,)) + reconstructed = ctor(*args) + self.assertEquals(reconstructed, dtype) + if __name__ == "__main__": googletest.main() + diff --git a/tensorflow/python/framework/errors_impl.py b/tensorflow/python/framework/errors_impl.py index 2a40316d51c023df9c664d0dd79a0df3b2ac5041..84106c32c673e15832ff747a7fededdfbfb94ed8 100644 --- a/tensorflow/python/framework/errors_impl.py +++ b/tensorflow/python/framework/errors_impl.py @@ -473,6 +473,8 @@ _CODE_TO_EXCEPTION_CLASS = { DATA_LOSS: DataLossError, } +c_api.PyExceptionRegistry_Init(_CODE_TO_EXCEPTION_CLASS) + _EXCEPTION_CLASS_TO_CODE = dict(( (class_, code) for (code, class_) in _CODE_TO_EXCEPTION_CLASS.items())) @@ -499,6 +501,7 @@ def _make_specific_exception(node_def, op, message, error_code): # Named like a function for backwards compatibility with the # @tf_contextlib.contextmanager version, which was switched to a class to avoid # some object creation overhead. +# TODO(b/77295559): expand use of TF_Status* SWIG typemap and deprecate this. @tf_export("errors.raise_exception_on_not_ok_status") # pylint: disable=invalid-name class raise_exception_on_not_ok_status(object): """Context manager to check for C API status.""" diff --git a/tensorflow/python/framework/function.py b/tensorflow/python/framework/function.py index 82dd2a3356fca775ff17d3b1d90c8e5b6d9fee4b..9570f009a5c458cb904968cc7990270b30da91a1 100644 --- a/tensorflow/python/framework/function.py +++ b/tensorflow/python/framework/function.py @@ -30,7 +30,6 @@ from tensorflow.python import pywrap_tensorflow as c_api from tensorflow.python.eager import context from tensorflow.python.framework import c_api_util from tensorflow.python.framework import dtypes -from tensorflow.python.framework import errors from tensorflow.python.framework import graph_to_function_def from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops @@ -275,8 +274,7 @@ class _DefinedFunction(object): self._create_definition_if_needed() if self._c_func: with c_api_util.tf_buffer() as buf: - with errors.raise_exception_on_not_ok_status() as status: - c_api.TF_FunctionToFunctionDef(self._c_func, buf, status) + c_api.TF_FunctionToFunctionDef(self._c_func.func, buf) fdef = function_pb2.FunctionDef() proto_data = c_api.TF_GetBuffer(buf) fdef.ParseFromString(compat.as_bytes(proto_data)) @@ -399,18 +397,17 @@ class _DefinedFunction(object): if self._out_names else []) description = self._func.__doc__ or None # pylint: disable=protected-access - with errors.raise_exception_on_not_ok_status() as status: - self._c_func = c_api.TF_GraphToFunction_wrapper( - temp_graph._c_graph, - base_func_name, - self._func_name is None, # append_hash_to_fn_name - None, # opers - [t._as_tf_output() for t in inputs], - [t._as_tf_output() for t in outputs], - output_names, - None, # opts - description, - status) + c_func = c_api.TF_GraphToFunction_wrapper( + temp_graph._c_graph, + base_func_name, + self._func_name is None, # append_hash_to_fn_name + None, # opers + [t._as_tf_output() for t in inputs], + [t._as_tf_output() for t in outputs], + output_names, + None, # opts + description) + self._c_func = c_api_util.ScopedTFFunction(c_func) # pylint: enable=protected-access self._set_c_attrs(kwargs_attr) @@ -433,9 +430,8 @@ class _DefinedFunction(object): serialized = attr_value.SerializeToString() # TODO(skyewm): this creates and deletes a new TF_Status for every attr. # It might be worth creating a convenient way to re-use the same status. - with errors.raise_exception_on_not_ok_status() as status: - c_api.TF_FunctionSetAttrValueProto(self._c_func, compat.as_str(name), - serialized, status) + c_api.TF_FunctionSetAttrValueProto(self._c_func.func, compat.as_str(name), + serialized) def _create_hash_str(self, input_arg, output_arg, node_def): """Creates an 8-character string unique to this input. @@ -830,8 +826,8 @@ def _from_definition(fdef, grad_func=None): # pylint: disable=protected-access if ops._USE_C_API: serialized = fdef.SerializeToString() - with errors.raise_exception_on_not_ok_status() as status: - result._c_func = c_api.TF_FunctionImportFunctionDef(serialized, status) + c_func = c_api.TF_FunctionImportFunctionDef(serialized) + result._c_func = c_api_util.ScopedTFFunction(c_func) result._extra_inputs = [] else: result._definition = fdef diff --git a/tensorflow/python/framework/function_test.py b/tensorflow/python/framework/function_test.py index 83d256fab6ed6c2fb46319fdc2d5de079346809e..d6bc14fbc75199a97f50c4dc120b2704970d1879 100644 --- a/tensorflow/python/framework/function_test.py +++ b/tensorflow/python/framework/function_test.py @@ -37,7 +37,6 @@ from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops -from tensorflow.python.ops import clip_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import functional_ops from tensorflow.python.ops import gen_logging_ops @@ -58,12 +57,32 @@ def _OptimizerOptions(): for cse in [False, True]: for inline in [False, True]: for cfold in [False, True]: - yield config_pb2.ConfigProto(graph_options=config_pb2.GraphOptions( - optimizer_options=config_pb2.OptimizerOptions( - opt_level=config_pb2.OptimizerOptions.L0, - do_common_subexpression_elimination=cse, - do_function_inlining=inline, - do_constant_folding=cfold))) + cfg = config_pb2.ConfigProto( + graph_options=config_pb2.GraphOptions( + optimizer_options=config_pb2.OptimizerOptions( + opt_level=config_pb2.OptimizerOptions.L0, + do_common_subexpression_elimination=cse, + do_function_inlining=inline, + do_constant_folding=cfold))) + if cse: + cfg.graph_options.rewrite_options.arithmetic_optimization = ( + rewriter_config_pb2.RewriterConfig.ON) + else: + cfg.graph_options.rewrite_options.arithmetic_optimization = ( + rewriter_config_pb2.RewriterConfig.OFF) + if inline: + cfg.graph_options.rewrite_options.function_optimization = ( + rewriter_config_pb2.RewriterConfig.ON) + else: + cfg.graph_options.rewrite_options.function_optimization = ( + rewriter_config_pb2.RewriterConfig.OFF) + if cfold: + cfg.graph_options.rewrite_options.constant_folding = ( + rewriter_config_pb2.RewriterConfig.ON) + else: + cfg.graph_options.rewrite_options.constant_folding = ( + rewriter_config_pb2.RewriterConfig.OFF) + yield cfg @test_util.with_c_api @@ -1342,7 +1361,7 @@ class UnrollLSTMTest(test.TestCase): value=math_ops.matmul(xm, weights), num_or_size_splits=4, axis=1) new_c = math_ops.sigmoid(f_g) * cprev + math_ops.sigmoid( i_g) * math_ops.tanh(i_i) - new_c = clip_ops.clip_by_value(new_c, -50.0, 50.0) + new_c = math_ops.maximum(math_ops.minimum(new_c, 50.0), -50.0) new_m = math_ops.sigmoid(o_g) * math_ops.tanh(new_c) return new_m, new_c diff --git a/tensorflow/python/framework/graph_util_impl.py b/tensorflow/python/framework/graph_util_impl.py index 910364364c8be84b1a629dbdaae5e69443d07e75..394fac6c856197030f85aab5b11fa881eddf670d 100644 --- a/tensorflow/python/framework/graph_util_impl.py +++ b/tensorflow/python/framework/graph_util_impl.py @@ -285,7 +285,7 @@ def convert_variables_to_constants(sess, output_graph_def.node.extend([output_node]) output_graph_def.library.CopyFrom(inference_graph.library) - print("Converted %d variables to const ops." % how_many_converted) + logging.info("Converted %d variables to const ops.", how_many_converted) return output_graph_def diff --git a/tensorflow/python/framework/graph_util_test.py b/tensorflow/python/framework/graph_util_test.py index b618152b0256fd043dc7259960d867278ba55b0a..2dafb94ba7e4e779c8703096795f4e49139100ab 100644 --- a/tensorflow/python/framework/graph_util_test.py +++ b/tensorflow/python/framework/graph_util_test.py @@ -209,7 +209,7 @@ class DeviceFunctionsTest(test.TestCase): defun_node, 2.0, name="output_node") with session.Session() as sess: - init = variables.initialize_variables([variable_node]) + init = variables.variables_initializer([variable_node]) sess.run(init) output = sess.run(output_node) self.assertNear(4.0, output, 0.00001) diff --git a/tensorflow/python/framework/importer.py b/tensorflow/python/framework/importer.py index 4ea34d7bb2831845aec1f40fcdb7f64a8f8c438a..3f8a8c4befb63abf14a70b48833d7e6e400e5c51 100644 --- a/tensorflow/python/framework/importer.py +++ b/tensorflow/python/framework/importer.py @@ -485,9 +485,9 @@ def import_graph_def(graph_def, with graph._lock: # pylint: disable=protected-access with c_api_util.tf_buffer(graph_def.SerializeToString()) as serialized: try: - with errors.raise_exception_on_not_ok_status() as status: - results = c_api.TF_GraphImportGraphDefWithResults( - graph._c_graph, serialized, options, status) # pylint: disable=protected-access + results = c_api.TF_GraphImportGraphDefWithResults( + graph._c_graph, serialized, options) # pylint: disable=protected-access + results = c_api_util.ScopedTFImportGraphDefResults(results) except errors.InvalidArgumentError as e: # Convert to ValueError for backwards compatibility. raise ValueError(str(e)) @@ -516,7 +516,7 @@ def import_graph_def(graph_def, # they are likely to be due to a typo. missing_unused_input_keys = ( c_api.TF_ImportGraphDefResultsMissingUnusedInputMappings_wrapper( - results)) + results.results)) if missing_unused_input_keys: missing_unused_input_keys = [ compat.as_str(s) for s in missing_unused_input_keys @@ -528,7 +528,7 @@ def import_graph_def(graph_def, if return_elements is None: return None else: - return _GatherReturnElements(return_elements, graph, results) + return _GatherReturnElements(return_elements, graph, results.results) else: g = graph @@ -685,11 +685,10 @@ def import_graph_def(graph_def, ', '.join(x.name for x in op._input_types)))) # pylint: enable=protected-access - if not g._is_function(op.type): # pylint: disable=protected-access - # Execute shape inference for this op. - # NOTE(mrry): If the graph contains a cycle, the full shape - # information may not be available for this op's inputs. - ops.set_shapes_for_outputs(op) + # Execute shape inference for this op. + # NOTE(mrry): If the graph contains a cycle, the full shape + # information may not be available for this op's inputs. + ops.set_shape_and_handle_data_for_outputs(op) # For nodes with _output_shapes set, set the output shapes. if '_output_shapes' in op.node_def.attr: for i, output in enumerate(op.outputs): diff --git a/tensorflow/python/framework/importer_test.py b/tensorflow/python/framework/importer_test.py index 369669c2e67771bb737c04714d67e7bbb99cef24..2c913d1e028e15e293158fe180e263a78c514ee4 100644 --- a/tensorflow/python/framework/importer_test.py +++ b/tensorflow/python/framework/importer_test.py @@ -219,6 +219,23 @@ class ImportGraphDefTest(test.TestCase): self.assertEqual(outer_inner.name, "outer/inner_1") self.assertEqual(outer_inner_c.name, "outer/inner/c_1") + def testEmptyNameScope(self): + with ops.Graph().as_default(): + # Create name scope but don't create any ops with it + with ops.name_scope("foo"): + pass + + # Import graph def that uses name scope name + op, = importer.import_graph_def( + self._MakeGraphDef("node { name: 'foo' op: 'IntOutput' }"), + return_elements=["foo"], + name="") + + if ops._USE_C_API: + self.assertEqual(op.name, "foo") + else: + self.assertEqual(op.name, "foo_1") + def testInputMap(self): with ops.Graph().as_default(): feed_a_0 = constant_op.constant(0, dtype=dtypes.int32) diff --git a/tensorflow/python/framework/load_library.py b/tensorflow/python/framework/load_library.py index 1f2aa264c110930b318f30e3a24010a96ebce47e..9a8477debb05fdf11d7d8c39adf9e5e55cc2caeb 100644 --- a/tensorflow/python/framework/load_library.py +++ b/tensorflow/python/framework/load_library.py @@ -26,7 +26,6 @@ import threading # pylint: disable=unused-import from tensorflow.core.framework import op_def_pb2 from tensorflow.core.lib.core import error_codes_pb2 # pylint: disable=unused-import from tensorflow.python import pywrap_tensorflow as py_tf -from tensorflow.python.framework import errors_impl from tensorflow.python.util import compat from tensorflow.python.util.tf_export import tf_export @@ -54,13 +53,12 @@ def load_op_library(library_filename): Raises: RuntimeError: when unable to load the library or get the python wrappers. """ - with errors_impl.raise_exception_on_not_ok_status() as status: - lib_handle = py_tf.TF_LoadLibrary(library_filename, status) + lib_handle = py_tf.TF_LoadLibrary(library_filename) op_list_str = py_tf.TF_GetOpList(lib_handle) op_list = op_def_pb2.OpList() op_list.ParseFromString(compat.as_bytes(op_list_str)) - wrappers = py_tf.GetPythonWrappers(op_list_str) + wrappers = py_tf.GetEagerPythonWrappers(op_list_str) # Delete the library handle to release any memory held in C # that are no longer needed. @@ -99,5 +97,4 @@ def load_file_system_library(library_filename): Raises: RuntimeError: when unable to load the library. """ - with errors_impl.raise_exception_on_not_ok_status() as status: - lib_handle = py_tf.TF_LoadLibrary(library_filename, status) + py_tf.TF_LoadLibrary(library_filename) diff --git a/tensorflow/python/framework/meta_graph_test.py b/tensorflow/python/framework/meta_graph_test.py index 5d5fb037fc217849ea32102bf60796c47d565f3b..e5b157648e0830d400b11d5fe310f0fc1422e004 100644 --- a/tensorflow/python/framework/meta_graph_test.py +++ b/tensorflow/python/framework/meta_graph_test.py @@ -522,6 +522,31 @@ class ScopedMetaGraphTest(test.TestCase): actual_grad_value = sess.run(grad) self.assertEqual(expected_grad_value, actual_grad_value) + def testImportWhileLoopInWhileLoop(self): + # Create a simple while loop. + with ops.Graph().as_default(): + var = variables.Variable(0.0) + _, output = control_flow_ops.while_loop(lambda i, x: i < 5, + lambda i, x: (i + 1, x * 2.0), + [0, var]) + output_name = output.name + + # Generate a MetaGraphDef containing the while loop with an export scope. + meta_graph_def, _ = meta_graph.export_scoped_meta_graph() + + # Restore the MetaGraphDef in a while loop in a new graph. + with ops.Graph().as_default(): + + def body(i, _): + meta_graph.import_scoped_meta_graph(meta_graph_def) + return i + 1, ops.get_default_graph().get_tensor_by_name(output_name) + + _, x = control_flow_ops.while_loop(lambda i, x: i < 2, body, [0, 0.0], + name="") + with session.Session() as sess: + sess.run(variables.global_variables_initializer()) + sess.run(x) + def testScopedImportUnderNameScope(self): graph = ops.Graph() with graph.as_default(): diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index 6930737a0c3a02d1f7bac51f795f00791f0ec27a..8cd6820f6a5adce7f82ed2eb2e3eba2bca1e3f80 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -20,6 +20,7 @@ from __future__ import print_function import collections import copy +import functools import linecache import os import re @@ -63,7 +64,7 @@ from tensorflow.python.util.tf_export import tf_export # calls to the C API. Currently disabled by default but can be manually enabled # in code or via the environment variable. This will be removed once all # functionality is supported and there's no performance penalty with it enabled. -_USE_C_API = os.getenv("TF_C_API_GRAPH_CONSTRUCTION", "0") is not "0" +_USE_C_API = os.getenv("TF_C_API_GRAPH_CONSTRUCTION", "1") is not "0" _USE_C_SHAPES = os.getenv("TF_C_API_GRAPH_CONSTRUCTION_SHAPES", "0") is not "0" @@ -289,15 +290,26 @@ class Tensor(_TensorLike): self._op = op self._value_index = value_index self._dtype = dtypes.as_dtype(dtype) - self._shape_val = tensor_shape.unknown_shape() + + if _USE_C_API: + # This will be set by set_shape_and_handle_data_for_outputs. + self._shape_val = None + else: + # The Python code requires all tensors start with a shape to support shape + # inference on imported while loops. This isn't necessary with the C API + # enabled because the C API provides the shapes for imported nodes. + # TODO(skyewm): remove when _USE_C_API is removed. + self._shape_val = tensor_shape.unknown_shape() + # List of operations that use this Tensor as input. We maintain this list # to easily navigate a computation graph. self._consumers = [] - # Attributes used for C++ shape inference. Not inspected, only forwarded. - # If set, will be a HandleData object from cpp_shape_inference.proto. - # TODO(b/74620627): remove when _USE_C_SHAPES is removed - self._handle_data = None + if not _USE_C_SHAPES: + # Attributes used for C++ shape inference. Not inspected, only forwarded. + # If set, will be a HandleData object from cpp_shape_inference.proto. + self._handle_data = None + self._id = uid() @property @@ -371,21 +383,45 @@ class Tensor(_TensorLike): A `TensorShape` representing the shape of this tensor. """ - graph = self._op._graph._c_graph # pylint: disable=protected-access - if graph and _USE_C_SHAPES: - with errors.raise_exception_on_not_ok_status() as status: - num_dims = c_api.TF_GraphGetTensorNumDims(graph, self._as_tf_output(), - status) - if num_dims == -1: - dim_list = None + if self._shape_val is None: + if _USE_C_SHAPES: + self._shape_val = self._c_api_shape() else: - with errors.raise_exception_on_not_ok_status() as status: - dim_list = c_api.TF_GraphGetTensorShape_wrapper( - graph, self._as_tf_output(), num_dims, status) - dim_list = [None if i == -1 else i for i in dim_list] - return tensor_shape.TensorShape(dim_list) + assert _USE_C_API + # Call set_shape_and_handle_data_for_outputs in topological order on all + # ops that are needed to compute self.op's shape. We do this instead of + # having set_shape_and_handle_data_for_outputs recursively call + # Operation.shape on self.op.inputs to overflowing the call stack. + need_shapes = self._get_input_ops_without_shapes(self.op) + need_shapes.sort(key=lambda op: op._id) + for op in need_shapes: + set_shape_and_handle_data_for_outputs(op) return self._shape_val + def _get_input_ops_without_shapes(self, target_op): + """Returns ops needing shape inference to compute target_op's shape.""" + result = [] + stack = [self._op] + visited = set() + while stack: + op = stack.pop() + if op in visited: continue + result.append(op) + stack.extend(t.op for t in op.inputs if t._shape_val is None) + visited.add(op) + return result + + def _c_api_shape(self): + """Returns the TensorShape of this tensor according to the C API.""" + c_graph = self._op._graph._c_graph # pylint: disable=protected-access + shape_vector, unknown_shape = c_api.TF_GraphGetTensorShapeHelper( + c_graph, self._as_tf_output()) + if unknown_shape: + return tensor_shape.unknown_shape() + else: + shape_vector = [None if d == -1 else d for d in shape_vector] + return tensor_shape.TensorShape(shape_vector) + @property def _shape(self): logging.warning("Tensor._shape is private, use Tensor.shape " @@ -469,8 +505,11 @@ class Tensor(_TensorLike): ValueError: If `shape` is not compatible with the current shape of this tensor. """ - if not _USE_C_SHAPES: # pylint: disable=protected-access - self._shape_val = self._shape_val.merge_with(shape) + if _USE_C_SHAPES: # pylint: disable=protected-access + # Reset cached shape. + self._shape_val = None + else: + self._shape_val = self.shape.merge_with(shape) if not self._op._graph._c_graph: return @@ -489,13 +528,11 @@ class Tensor(_TensorLike): else: dim_list.append(dim.value) try: - with errors.raise_exception_on_not_ok_status() as status: - c_api.TF_GraphSetTensorShape_wrapper( - self._op._graph._c_graph, # pylint: disable=protected-access - self._as_tf_output(), - dim_list, - unknown_shape, - status) + c_api.TF_GraphSetTensorShape_wrapper( + self._op._graph._c_graph, # pylint: disable=protected-access + self._as_tf_output(), + dim_list, + unknown_shape) except errors.InvalidArgumentError as e: # Convert to ValueError for backwards compatibility. raise ValueError(str(e)) @@ -584,6 +621,16 @@ class Tensor(_TensorLike): # Necessary to support Python's collection membership operators return id(self) == id(other) + def __copy__(self): + # Make sure _shape_val is computed before we copy. + # TODO(b/77597810): get rid of Tensor copies. + if self._shape_val is None: + set_shape_and_handle_data_for_outputs(self.op) + cls = self.__class__ + result = cls.__new__(cls) + result.__dict__.update(self.__dict__) + return result + # NOTE(mrry): This enables the Tensor's overloaded "right" binary # operators to run when the left operand is an ndarray, because it # accords the Tensor class higher priority than an ndarray, or a @@ -1338,6 +1385,22 @@ def register_tensor_conversion_function(base_type, if not callable(conversion_func): raise TypeError("conversion_func must be callable.") + # context._context is checked so that we don't inadvertently create it. + # This is because enable_eager_execution will fail when called from the main + # function if the context._context is already created, and the + # register_tensor_conversion_function calls happen when the module is + # imported. + if context._context is not None and context.executing_eagerly( + ) and isinstance(base_type, six.integer_types + ( + float, + np.ndarray, + )): + # TODO(nareshmodi): consider setting a context variable which disables the + # fastpath instead. + raise TypeError( + "Cannot register conversions for numpy arrays, python number types " + "when executing eagerly.") + try: funcs_at_priority = _tensor_conversion_func_registry[priority] except KeyError: @@ -1514,13 +1577,10 @@ def _create_c_op(graph, node_def, inputs, control_inputs): serialized = attr_value.SerializeToString() # TODO(skyewm): this creates and deletes a new TF_Status for every attr. # It might be worth creating a convenient way to re-use the same status. - with errors.raise_exception_on_not_ok_status() as status: - c_api.TF_SetAttrValueProto(op_desc, - compat.as_str(name), serialized, status) + c_api.TF_SetAttrValueProto(op_desc, compat.as_str(name), serialized) try: - with errors.raise_exception_on_not_ok_status() as status: - c_op = c_api.TF_FinishOperation(op_desc, status) + c_op = c_api.TF_FinishOperation(op_desc) except errors.InvalidArgumentError as e: # Convert to ValueError for backwards compatibility. raise ValueError(str(e)) @@ -1940,15 +2000,20 @@ class Operation(object): if not isinstance(tensor, Tensor): raise TypeError("tensor must be a Tensor: %s" % tensor) _assert_same_graph(self, tensor) + + # Make sure output shapes are already computed for this op in case we create + # a cycle (we cannot compute shapes for cycles). Usually shapes are computed + # lazily upon request. + if not _USE_C_SHAPES: + set_shape_and_handle_data_for_outputs(self) + if self._c_op: # Reset cached inputs. self._inputs_val = None - with errors.raise_exception_on_not_ok_status() as status: - c_api.UpdateEdge( - self._graph._c_graph, # pylint: disable=protected-access - tensor._as_tf_output(), # pylint: disable=protected-access - self._tf_input(index), - status) + c_api.UpdateEdge( + self._graph._c_graph, # pylint: disable=protected-access + tensor._as_tf_output(), # pylint: disable=protected-access + self._tf_input(index)) else: self._inputs_val[index].consumers().remove(self) self._inputs_val[index] = tensor @@ -2123,6 +2188,30 @@ class Operation(object): else: return self._control_inputs_val + @property + def _control_outputs(self): + """The `Operation` objects which have a control dependency on this op. + + Before any of the ops in self._control_outputs can execute tensorflow will + ensure self has finished executing. + + Returns: + A list of `Operation` objects. + + """ + if self._c_op: + control_c_ops = c_api.TF_OperationGetControlOutputs_wrapper(self._c_op) + # pylint: disable=protected-access + return [ + self.graph._get_operation_by_name_unsafe( + c_api.TF_OperationName(c_op)) for c_op in control_c_ops + ] + # pylint: enable=protected-access + else: + # TODO(apassos) this should be less inefficient. + return [o for o in self._graph.get_operations() + if self in o.control_inputs] + @property def _control_inputs(self): logging.warning("Operation._control_inputs is private, use " @@ -2169,8 +2258,7 @@ class Operation(object): # pylint: enable=line-too-long if self._c_op: with c_api_util.tf_buffer() as buf: - with errors.raise_exception_on_not_ok_status() as status: - c_api.TF_OperationToNodeDef(self._c_op, buf, status) + c_api.TF_OperationToNodeDef(self._c_op, buf) data = c_api.TF_GetBuffer(buf) node_def = node_def_pb2.NodeDef() node_def.ParseFromString(compat.as_bytes(data)) @@ -2228,11 +2316,9 @@ class Operation(object): buf = c_api.TF_NewBufferFromString( compat.as_bytes(attr_value.SerializeToString())) try: - with errors.raise_exception_on_not_ok_status() as status: - # pylint: disable=protected-access - c_api.SetAttr(self._graph._c_graph, self._c_op, attr_name, buf, - status) - # pylint: enable=protected-access + # pylint: disable=protected-access + c_api.SetAttr(self._graph._c_graph, self._c_op, attr_name, buf) + # pylint: enable=protected-access finally: c_api.TF_DeleteBuffer(buf) else: @@ -2254,8 +2340,7 @@ class Operation(object): if self._c_op: try: with c_api_util.tf_buffer() as buf: - with errors.raise_exception_on_not_ok_status() as status: - c_api.TF_OperationGetAttrValueProto(self._c_op, name, buf, status) + c_api.TF_OperationGetAttrValueProto(self._c_op, name, buf) data = c_api.TF_GetBuffer(buf) except errors.InvalidArgumentError as e: # Convert to ValueError for backwards compatibility. @@ -2464,36 +2549,41 @@ class RegisterShape(object): return f -def _set_shapes_for_outputs_c_api(op): - """set_shapes_for_outputs implementation when C API is enabled.""" - # The C API computes the shapes when the TF_Operation is created. Fetch the - # output shapes from the C object. +# TODO(b/74620627): remove when _USE_C_SHAPES is removed +def _set_shape_and_handle_data_for_outputs_c_api(op): + """Set shapes and resource handle data using info from the C API.""" + assert not _USE_C_SHAPES for output in op.outputs: - with errors.raise_exception_on_not_ok_status() as status: - # pylint: disable=protected-access - shape_vector, unknown_shape = c_api.TF_GraphGetTensorShapeHelper( - op._graph._c_graph, output._as_tf_output(), status) - # pylint: enable=protected-access - if unknown_shape: - output.set_shape(tensor_shape.unknown_shape()) - elif not shape_vector: - output.set_shape(tensor_shape.scalar()) - else: - shape_vector = [None if d == -1 else d for d in shape_vector] - output.set_shape(tensor_shape.TensorShape(shape_vector)) - - serialized = c_api.ResourceHandleShapeAndType(op._graph._c_graph, - output._as_tf_output()) + output._shape_val = output._c_api_shape() + # Set the resource handle data for compatibility with the Python shape + # inference code. + serialized = c_api.ResourceHandleShapeAndType( + op._graph._c_graph, output._as_tf_output()) if serialized: output._handle_data = ( - cpp_shape_inference_pb2.CppShapeInferenceResult.HandleData.FromString( - compat.as_bytes(serialized))) + cpp_shape_inference_pb2.CppShapeInferenceResult.HandleData + .FromString(compat.as_bytes(serialized))) else: output._handle_data = None -# TODO(skyewm): remove this when _USE_C_API flag is removed. -def _set_shapes_for_outputs(op): - """set_shapes_for_outputs implementation when C API is disabled.""" + +# TODO(b/74620627): remove when _USE_C_SHAPES is removed +def set_shape_and_handle_data_for_outputs(op): + """Set the shapes and resource handle data for op's outputs. + + When _USE_C_API = True, this is lazily called when a tensor's shape is first + requested. Usually this should work automatically, but some edge cases may + require manaully calling this first to make sure Tensor._shape_val and + Tensor._handle_data are set (e.g. manually overriding _handle_data, copying a + Tensor). + """ + if _USE_C_SHAPES: return + + if op.graph._is_function(op.type): + for output in op.outputs: + output._shape_val = tensor_shape.unknown_shape() + return + try: shape_func = _shape_registry.lookup(op.type) except LookupError: @@ -2512,8 +2602,10 @@ def _set_shapes_for_outputs(op): shapes = shapes_dict["shapes"] handle_datas = shapes_dict["handle_data"] for output, handle_data in zip(op.outputs, handle_datas): + # Don't override any existing handle data that may have been manually set. # pylint: disable=protected-access - output._handle_data = handle_data + if output._handle_data is None: + output._handle_data = handle_data # pylint: enable=protected-access if len(op.outputs) != len(shapes): @@ -2521,15 +2613,8 @@ def _set_shapes_for_outputs(op): "Shape function for op %s returned %d shapes but expected %d %s %s" % (op, len(shapes), len(op.outputs), shape_func.__name__, str(shapes))) for output, s in zip(op.outputs, shapes): - output.set_shape(s) - - -def set_shapes_for_outputs(op): - """Set the shapes for op's outputs.""" - if op._c_op and _USE_C_SHAPES: # pylint: disable=protected-access - return _set_shapes_for_outputs_c_api(op) - else: - return _set_shapes_for_outputs(op) + output._shape_val = tensor_shape.unknown_shape() + output._shape_val = output._shape_val.merge_with(s) class OpStats(object): @@ -2994,8 +3079,7 @@ class Graph(object): # pylint: enable=line-too-long if self._c_graph: with c_api_util.tf_buffer() as buf: - with errors.raise_exception_on_not_ok_status() as status: - c_api.TF_GraphVersions(self._c_graph, buf, status) + c_api.TF_GraphVersions(self._c_graph, buf) data = c_api.TF_GetBuffer(buf) version_def = versions_pb2.VersionDef() version_def.ParseFromString(compat.as_bytes(data)) @@ -3098,8 +3182,7 @@ class Graph(object): if self._c_graph: with self._lock: with c_api_util.tf_buffer() as buf: - with errors.raise_exception_on_not_ok_status() as status: - c_api.TF_GraphToGraphDef(self._c_graph, buf, status) + c_api.TF_GraphToGraphDef(self._c_graph, buf) data = c_api.TF_GetBuffer(buf) graph = graph_pb2.GraphDef() graph.ParseFromString(compat.as_bytes(data)) @@ -3208,14 +3291,12 @@ class Graph(object): # remove this when all functions are generated using the C API by default # as this will be unnecessary. if not function._c_func: - with errors.raise_exception_on_not_ok_status() as status: - serialized = function.definition.SerializeToString() - function._c_func = c_api.TF_FunctionImportFunctionDef( - serialized, status) - with errors.raise_exception_on_not_ok_status() as status: - gradient = function._grad_func._c_func if function._grad_func else None - c_api.TF_GraphCopyFunction(self._c_graph, function._c_func, gradient, - status) + serialized = function.definition.SerializeToString() + c_func = c_api.TF_FunctionImportFunctionDef(serialized) + function._c_func = c_api_util.ScopedTFFunction(c_func) + gradient = (function._grad_func._c_func.func if function._grad_func + else None) + c_api.TF_GraphCopyFunction(self._c_graph, function._c_func.func, gradient) else: # If there is already a function with the same name, raise an error # if bodies are different. Else, do nothing. The C API version above @@ -3326,18 +3407,14 @@ class Graph(object): original_op=self._default_original_op, op_def=op_def) - # TODO(vrv): Instead of eagerly filling in shape property for every op, - # only populate the shape when requested. + # Note: shapes are lazily computed with the C API enabled. # # TODO(skyewm): unlike in the original Python implementation, the C API # always computes shape information (even for function calls, which the # original Python shape inference code doesn't handle). Deprecate the # compute_shapes argument. - # - # TODO(b/74620627): move this back to _create_op_helper once _USE_C_SHAPES - # is removed - if (ret._c_op and _USE_C_SHAPES) or compute_shapes: # pylint: disable=protected-access - set_shapes_for_outputs(ret) + if not _USE_C_API and compute_shapes: + set_shape_and_handle_data_for_outputs(ret) self._create_op_helper(ret, compute_shapes=compute_shapes, compute_device=compute_device) @@ -3365,8 +3442,12 @@ class Graph(object): """ self._check_not_finalized() ret = Operation(c_op, self) - assert ret.name not in self._names_in_use - self._names_in_use[ret.name] = 1 + # If a name_scope was created with ret.name but no nodes were created in it, + # the name will still appear in _names_in_use even though the name hasn't + # been used. This is ok, just leave _names_in_use as-is in this case. + # TODO(skyewm): make the C API guarantee no name conflicts. + if ret.name not in self._names_in_use: + self._names_in_use[ret.name] = 1 self._create_op_helper(ret, compute_device=compute_device) return ret @@ -3475,18 +3556,17 @@ class Graph(object): for c_op in c_api_util.new_tf_operations(self) ] + # pylint: disable=protected-access for op in new_ops: # Operations created by the C API always retrieve shapes from the C API so # we preserve the shapes of ops created in import_graph_def (from the # "_output_shapes" attr of the imported NodeDef). - # TODO(b/74620627): move this back to _create_op_helper once _USE_C_SHAPES - # is removed. - _set_shapes_for_outputs_c_api(op) + if not _USE_C_SHAPES: + _set_shape_and_handle_data_for_outputs_c_api(op) new_control_inputs = self._control_dependencies_for_inputs(op.inputs) - # pylint: disable=protected-access op._add_control_inputs(new_control_inputs) op._control_flow_post_processing() - # pylint: enable=protected-access + # pylint: enable=protected-access return new_ops @@ -3732,11 +3812,9 @@ class Graph(object): """Returns the `OpDef` proto for `type`. `type` is a string.""" if self._c_graph: with c_api_util.tf_buffer() as buf: - with errors.raise_exception_on_not_ok_status() as status: - # pylint: disable=protected-access - c_api.TF_GraphGetOpDef(self._c_graph, - compat.as_bytes(type), buf, status) - # pylint: enable=protected-access + # pylint: disable=protected-access + c_api.TF_GraphGetOpDef(self._c_graph, compat.as_bytes(type), buf) + # pylint: enable=protected-access data = c_api.TF_GetBuffer(buf) op_def = op_def_pb2.OpDef() op_def.ParseFromString(compat.as_bytes(data)) @@ -4118,6 +4196,19 @@ class Graph(object): """ return self._name_stack + @tf_contextlib.contextmanager + def _colocate_with_for_gradient(self, op, gradient_uid, + ignore_existing=False): + with self.colocate_with(op, ignore_existing): + if gradient_uid is not None and self._control_flow_context is not None: + try: + self._control_flow_context.EnterGradientColocation(op, gradient_uid) + yield + finally: + self._control_flow_context.ExitGradientColocation(op, gradient_uid) + else: + yield + @tf_contextlib.contextmanager def colocate_with(self, op, ignore_existing=False): """Returns a context manager that specifies an op to colocate with. @@ -4512,6 +4603,22 @@ class Graph(object): return tf.matmul(tensor, tensor) ``` + Also note that though execution of ops created under this scope will trigger + execution of the dependencies, the ops created under this scope might still + be pruned from a normal tensorflow graph. For example, in the following + snippet of code the dependencies are never executed: + + ```python + loss = model.loss() + with tf.control_dependencies(dependencies): + loss = loss + tf.constant(1) # note: dependencies ignored in the + # backward pass + return tf.gradients(loss, model.variables) + ``` + + This is because evaluating the gradient graph does not require evaluating + the constant(1) op created in the forward pass. + Args: control_inputs: A list of `Operation` or `Tensor` objects which must be executed or computed before running the operations @@ -4881,8 +4988,7 @@ def container(container_name): return get_default_graph().container(container_name) -@tf_export("colocate_with") -def colocate_with(op, ignore_existing=False): +def _colocate_with_for_gradient(op, gradient_uid, ignore_existing=False): if context.executing_eagerly(): if op is not None: return device(op.device) @@ -4896,7 +5002,13 @@ def colocate_with(op, ignore_existing=False): else: raise ValueError("Encountered an Eager-defined Tensor during graph " "construction, but a function was not being built.") - return default_graph.colocate_with(op, ignore_existing) + return default_graph._colocate_with_for_gradient( + op, gradient_uid=gradient_uid, ignore_existing=ignore_existing) + + +@tf_export("colocate_with") +def colocate_with(op, ignore_existing=False): + return _colocate_with_for_gradient(op, None, ignore_existing=ignore_existing) @tf_export("control_dependencies") @@ -5149,14 +5261,35 @@ class _DefaultGraphStack(_DefaultStack): # pylint: disable=protected-access @tf_contextlib.contextmanager def get_controller(self, default): try: - context.context().context_switches.push(default.building_function, - default.as_default) + if context.executing_eagerly(): + # A Graph alone on the context stack would keep init_scope-wrapped + # operations graph building when entered (assuming init_scope is called + # in a graph building context). Instead, we push a context which first + # enables eager execution and then re-enters the Graph. + context.context().context_switches.push( + default.building_function, + functools.partial( + _enter_context_and_graph, + context.eager_mode, + default.as_default)) + else: + # This Graph is being used from a graph building context. A lack of + # context switch implies that the context is graph building. + context.context().context_switches.push(default.building_function, + default.as_default) with super(_DefaultGraphStack, self).get_controller(default) as g: yield g finally: context.context().context_switches.pop() +@tf_contextlib.contextmanager +def _enter_context_and_graph(context_fn, graph_fn): + """Combines two context managers.""" + with context_fn(), graph_fn(): + yield + + _default_graph_stack = _DefaultGraphStack() @@ -5350,6 +5483,10 @@ def enable_eager_execution(config=None, device_policy=None, raise ValueError( "tf.enable_eager_execution must be called at program startup.") + # Monkey patch to get rid of an unnecessary conditional since the context is + # now initialized. + context.context = context.context_safe + def eager_run(main=None, argv=None): """Runs the program with an optional main function and argv list. diff --git a/tensorflow/python/framework/ops_test.py b/tensorflow/python/framework/ops_test.py index aa51391871f4c12d34b86311cc5b8ea9aabd5434..c9c1a3d66be1051859b3dc4eef67803881efcd55 100644 --- a/tensorflow/python/framework/ops_test.py +++ b/tensorflow/python/framework/ops_test.py @@ -473,6 +473,7 @@ class OperationTest(test_util.TensorFlowTestCase): self.assertEqual(z.control_inputs, [x, x]) z._add_control_inputs([x, y, y]) # pylint: disable=protected-access self.assertEqual(z.control_inputs, [x, x, x, y, y]) + self.assertEqual(x._control_outputs, [z]) def testAddControlInputC(self): # The C API dedups redundant control edges, pure Python does not @@ -487,6 +488,7 @@ class OperationTest(test_util.TensorFlowTestCase): self.assertEqual(z.control_inputs, [x]) z._add_control_inputs([x, y, y]) # pylint: disable=protected-access self.assertEqual(z.control_inputs, [x, y]) + self.assertEqual(x._control_outputs, [z]) def testRemoveAllControlInputs(self): a = constant_op.constant(1) @@ -2303,6 +2305,13 @@ class InitScopeTest(test_util.TensorFlowTestCase): self.assertEqual(ops.get_name_scope(), "inner") self.assertEqual(ops.get_name_scope(), "") + def testEagerGraphContextsExecuteEagerly(self): + with context.eager_mode(): + with ops.Graph().as_default(): + with context.graph_mode(): + with ops.init_scope(): + self.assertTrue(context.executing_eagerly()) + def testPreservesNameScopeInEagerExecution(self): with context.eager_mode(): def foo(): diff --git a/tensorflow/python/framework/python_op_gen.cc b/tensorflow/python/framework/python_op_gen.cc index e5e3b821998718e7b87a95439a442df98ad7c997..ad6c36b4b1773ecbbb5be56f7eaf2d78af3d010c 100644 --- a/tensorflow/python/framework/python_op_gen.cc +++ b/tensorflow/python/framework/python_op_gen.cc @@ -98,7 +98,7 @@ bool IsOpWithUnderscorePrefix(const string& s) { // TODO(annarev): reduce usage of '*' imports and remove these from the // list. "fused_batch_norm", "histogram_fixed_width", "stack", - "batch_norm_with_global_normalization"}); + "batch_norm_with_global_normalization", "clip_by_value"}); return kUnderscoreOps->count(s) > 0; } diff --git a/tensorflow/python/framework/python_op_gen.i b/tensorflow/python/framework/python_op_gen.i index 26ec4e8e66b5d4e3be433c9e59f9b6034109d153..efcce2f20941790571199d358dfcfcc3c0283d08 100644 --- a/tensorflow/python/framework/python_op_gen.i +++ b/tensorflow/python/framework/python_op_gen.i @@ -16,10 +16,10 @@ limitations under the License. %include "tensorflow/python/platform/base.i" %{ -#include "tensorflow/python/framework/python_op_gen.h" +#include "tensorflow/python/eager/python_eager_op_gen.h" %} -// Input typemap for GetPythonWrappers. +// Input typemap for GetEagerPythonWrappers. // Accepts a python object of 'bytes' type, and converts it to // a const char* pointer and size_t length. The default typemap // going from python bytes to const char* tries to decode the @@ -37,5 +37,5 @@ limitations under the License. %ignoreall; -%unignore tensorflow::GetPythonWrappers; -%include "tensorflow/python/framework/python_op_gen.h" +%unignore tensorflow::GetEagerPythonWrappers; +%include "tensorflow/python/eager/python_eager_op_gen.h" diff --git a/tensorflow/python/framework/smart_cond.py b/tensorflow/python/framework/smart_cond.py index c7ff23e4ff809ed7bc57259fa3ec9feb921b5a71..48a834392b47b4cdcc82381153852584052a5aad 100644 --- a/tensorflow/python/framework/smart_cond.py +++ b/tensorflow/python/framework/smart_cond.py @@ -19,7 +19,6 @@ from __future__ import division from __future__ import print_function from tensorflow.python import pywrap_tensorflow as c_api -from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_util from tensorflow.python.ops import control_flow_ops @@ -83,9 +82,8 @@ def smart_constant_value(pred): # wanted to limit the change hidden behind _USE_C_API). # pylint: disable=protected-access if pred_value is None and ops._USE_C_API: - with errors.raise_exception_on_not_ok_status() as status: - pred_value = c_api.TF_TryEvaluateConstant_wrapper( - pred.graph._c_graph, pred._as_tf_output(), status) + pred_value = c_api.TF_TryEvaluateConstant_wrapper(pred.graph._c_graph, + pred._as_tf_output()) # pylint: enable=protected-access else: diff --git a/tensorflow/python/framework/tensor_shape.py b/tensorflow/python/framework/tensor_shape.py index af2a5b1a7ef9a70c0baf5d02257951803a7a76fa..0dd29460ed93aadf61ef1f1b2dbf1d7802ca4877 100644 --- a/tensorflow/python/framework/tensor_shape.py +++ b/tensorflow/python/framework/tensor_shape.py @@ -18,6 +18,7 @@ from __future__ import division from __future__ import print_function from tensorflow.core.framework import tensor_shape_pb2 +from tensorflow.python.framework import dtypes from tensorflow.python.util import compat from tensorflow.python.util.tf_export import tf_export @@ -30,6 +31,8 @@ class Dimension(object): """Creates a new Dimension with the given value.""" if value is None: self._value = None + elif isinstance(value, dtypes.DType): + raise TypeError("Cannot convert %s to Dimension" % value) else: self._value = int(value) if (not isinstance(value, compat.bytes_or_text_types) and @@ -456,6 +459,9 @@ class Dimension(object): else: return self._value >= other.value + def __reduce__(self): + return Dimension, (self._value,) + def as_dimension(value): """Converts the given value to a Dimension. @@ -928,6 +934,9 @@ class TensorShape(object): return True return self._dims != other.dims + def __reduce__(self): + return TensorShape, (self._dims,) + def as_shape(shape): """Converts the given object to a TensorShape.""" diff --git a/tensorflow/python/framework/tensor_shape_test.py b/tensorflow/python/framework/tensor_shape_test.py index 4e8ce4d889c4ef0c6e56806587a64e8f9be7e10a..9232d99a1f932b9e48cd7ddc125e353390064873 100644 --- a/tensorflow/python/framework/tensor_shape_test.py +++ b/tensorflow/python/framework/tensor_shape_test.py @@ -19,6 +19,7 @@ from __future__ import division from __future__ import print_function from tensorflow.core.framework import tensor_shape_pb2 +from tensorflow.python.framework import dtypes from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import test_util from tensorflow.python.platform import googletest @@ -184,6 +185,10 @@ class DimensionTest(test_util.TensorFlowTestCase): self.assertEqual(str(tensor_shape.Dimension(7)), "7") self.assertEqual(str(tensor_shape.Dimension(None)), "?") + def testUnsupportedType(self): + with self.assertRaises(TypeError): + tensor_shape.Dimension(dtypes.string) + def testMod(self): four = tensor_shape.Dimension(4) nine = tensor_shape.Dimension(9) @@ -192,6 +197,14 @@ class DimensionTest(test_util.TensorFlowTestCase): self.assertEqual(nine % 4, 1) self.assertEqual(4 % nine, 4) + def testReduce(self): + dim = tensor_shape.Dimension(5) + ctor, args = dim.__reduce__() + self.assertEquals(ctor, tensor_shape.Dimension) + self.assertEquals(args, (5,)) + reconstructed = ctor(*args) + self.assertEquals(reconstructed, dim) + class ShapeTest(test_util.TensorFlowTestCase): @@ -417,5 +430,15 @@ class ShapeTest(test_util.TensorFlowTestCase): self.assertAllEqual([2, None, 4], tensor_shape.TensorShape( (2, None, 4)).as_list()) + def testReduce(self): + shape = tensor_shape.TensorShape([2, 3]) + ctor, args = shape.__reduce__() + self.assertEquals(ctor, tensor_shape.TensorShape) + self.assertEquals(args, ([tensor_shape.Dimension(2), + tensor_shape.Dimension(3)],)) + reconstructed = ctor(*args) + self.assertEquals(reconstructed, shape) + + if __name__ == "__main__": googletest.main() diff --git a/tensorflow/python/framework/tensor_util.py b/tensorflow/python/framework/tensor_util.py index 984bcecdfe05efd79bdf218197c410b14abe3516..8cf24206edab8be807eca1d067662a57585e2bda 100644 --- a/tensorflow/python/framework/tensor_util.py +++ b/tensorflow/python/framework/tensor_util.py @@ -22,7 +22,6 @@ import six from tensorflow.core.framework import tensor_pb2 from tensorflow.core.framework import tensor_shape_pb2 -from tensorflow.python.eager import context from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.util import compat @@ -823,17 +822,32 @@ def constant_value_as_shape(tensor): # pylint: disable=invalid-name all-or-nothing. Args: - tensor: The rank-1 Tensor to be evaluated. + tensor: The rank-0 or rank-1 Tensor to be evaluated. Returns: A `TensorShape` based on the constant value of the given `tensor`. + + Raises: + ValueError: If the shape is rank-0 and is not statically known to be -1. """ - if context.executing_eagerly(): + if isinstance(tensor, ops.EagerTensor): return tensor_shape.as_shape( [dim if dim != -1 else None for dim in tensor.numpy()]) + if tensor.get_shape().ndims == 0: + value = constant_value(tensor) + if value is None: + raise ValueError( + "Received a scalar with unknown value as shape; require a statically " + "known scalar with value '-1' to describe an unknown shape.") + if value != -1: + raise ValueError( + "Received a scalar value '%s' as shape; require a statically known " + "scalar with value '-1' to describe an unknown shape." % value) + return tensor_shape.unknown_shape() + shape = tensor.get_shape().with_rank(1) - if tensor.get_shape() == [0]: + if shape == [0]: return tensor_shape.scalar() elif tensor.op.type == "Shape": return tensor.op.inputs[0].get_shape() diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py index bf00fa6439b82234e951598131d2d7ab579fb6c4..5a8bc437273471ccbbf2e11d0f7ef89f1354fc73 100644 --- a/tensorflow/python/framework/test_util.py +++ b/tensorflow/python/framework/test_util.py @@ -464,6 +464,30 @@ def with_c_api(cls): return cls +def with_c_shapes(cls): + """Adds methods that call original methods but with C API shapes enabled. + + Note this enables C shapes in new methods after running the test class's + setup method. + + Args: + cls: class to decorate + + Returns: + cls with new test methods added + """ + # If C shapes are already enabled, don't do anything. Some tests break if the + # same test is run twice, so this allows us to turn on the C shapes by default + # without breaking these tests. + if ops._USE_C_SHAPES: + return cls + + for name, value in cls.__dict__.copy().items(): + if callable(value) and name.startswith("test"): + setattr(cls, name + "WithCShapes", enable_c_shapes(value)) + return cls + + def assert_no_new_pyobjects_executing_eagerly(f): """Decorator for asserting that no new Python objects persist after a test. @@ -615,45 +639,68 @@ def assert_no_garbage_created(f): def run_in_graph_and_eager_modes(__unused__=None, - graph=None, config=None, - use_gpu=False, - force_gpu=False, + use_gpu=True, reset_test=True, assert_no_eager_garbage=False): - """Runs the test in both graph and eager modes. + """Execute the decorated test with and without enabling eager execution. + + This function returns a decorator intended to be applied to test methods in + a @{tf.test.TestCase} class. Doing so will cause the contents of the test + method to be executed twice - once normally, and once with eager execution + enabled. This allows unittests to confirm the equivalence between eager + and graph execution (see @{tf.enable_eager_execution}). + + For example, consider the following unittest: + + ```python + class MyTests(tf.test.TestCase): + + @run_in_graph_and_eager_modes() + def test_foo(self): + x = tf.constant([1, 2]) + y = tf.constant([3, 4]) + z = tf.add(x, y) + self.assertAllEqual([4, 6], self.evaluate(z)) + + if __name__ == "__main__": + tf.test.main() + ``` + + This test validates that `tf.add()` has the same behavior when computed with + eager execution enabled as it does when constructing a TensorFlow graph and + executing the `z` tensor in a session. + Args: __unused__: Prevents sliently skipping tests. - graph: Optional graph to use during the returned session. config: An optional config_pb2.ConfigProto to use to configure the - session. - use_gpu: If True, attempt to run as many ops as possible on GPU. - force_gpu: If True, pin all ops to `/device:GPU:0`. - reset_test: If True, tearDown and SetUp the test case again. + session when executing graphs. + use_gpu: If True, attempt to run as many operations as possible on GPU. + reset_test: If True, tearDown and SetUp the test case between the two + executions of the test (once with and once without eager execution). assert_no_eager_garbage: If True, sets DEBUG_SAVEALL on the garbage collector and asserts that no extra garbage has been created when running - the test in eager mode. This will fail if there are reference cycles - (e.g. a = []; a.append(a)). Off by default because some tests may create - garbage for legitimate reasons (e.g. they define a class which inherits - from `object`), and because DEBUG_SAVEALL is sticky in some Python - interpreters (meaning that tests which rely on objects being collected - elsewhere in the unit test file will not work). Additionally, checks that - nothing still has a reference to Tensors that the test allocated. + the test with eager execution enabled. This will fail if there are + reference cycles (e.g. a = []; a.append(a)). Off by default because some + tests may create garbage for legitimate reasons (e.g. they define a class + which inherits from `object`), and because DEBUG_SAVEALL is sticky in some + Python interpreters (meaning that tests which rely on objects being + collected elsewhere in the unit test file will not work). Additionally, + checks that nothing still has a reference to Tensors that the test + allocated. Returns: - Returns a decorator that will run the decorated test function - using both a graph and using eager execution. + Returns a decorator that will run the decorated test method twice: + once by constructing and executing a graph in a session and once with + eager execution enabled. """ assert not __unused__, "Add () after run_in_graph_and_eager_modes." def decorator(f): - """Test method decorator.""" - def decorated(self, **kwargs): - """Decorated the test method.""" with context.graph_mode(): - with self.test_session(graph, config, use_gpu, force_gpu): + with self.test_session(use_gpu=use_gpu): f(self, **kwargs) if reset_test: @@ -663,27 +710,20 @@ def run_in_graph_and_eager_modes(__unused__=None, self._tempdir = None self.setUp() - def run_eager_mode(self, **kwargs): - if force_gpu: - gpu_name = gpu_device_name() - if not gpu_name: - gpu_name = "/device:GPU:0" - with context.device(gpu_name): - f(self) - elif use_gpu: - # TODO(xpan): Support softplacement and gpu by default when available. - f(self, **kwargs) - else: - with context.device("/device:CPU:0"): + def run_eagerly(self, **kwargs): + if not use_gpu: + with ops.device("/cpu:0"): f(self, **kwargs) + else: + f(self, **kwargs) if assert_no_eager_garbage: - run_eager_mode = assert_no_new_tensors( - assert_no_garbage_created(run_eager_mode)) + run_eagerly = assert_no_new_tensors( + assert_no_garbage_created(run_eagerly)) with context.eager_mode(): with ops.Graph().as_default(): - run_eager_mode(self, **kwargs) + run_eagerly(self, **kwargs) return decorated @@ -974,6 +1014,8 @@ class TensorFlowTestCase(googletest.TestCase): config.graph_options.optimizer_options.opt_level = -1 config.graph_options.rewrite_options.constant_folding = ( rewriter_config_pb2.RewriterConfig.OFF) + config.graph_options.rewrite_options.arithmetic_optimization = ( + rewriter_config_pb2.RewriterConfig.OFF) return config if graph is None: @@ -1364,7 +1406,9 @@ class TensorFlowTestCase(googletest.TestCase): " %s" % (a.shape, b.shape, msg)) same = (a == b) - if a.dtype == np.float32 or a.dtype == np.float64: + if (a.dtype in [ + np.float16, np.float32, np.float64, dtypes.bfloat16.as_numpy_dtype + ]): same = np.logical_or(same, np.logical_and(np.isnan(a), np.isnan(b))) if not np.all(same): # Prints more details than np.testing.assert_array_equal. diff --git a/tensorflow/python/framework/versions.py b/tensorflow/python/framework/versions.py index 06955b885852a641bc814f88c99838effe03bfd4..472ccbcac7a447926989cfbef27ec1ea9d71e91c 100644 --- a/tensorflow/python/framework/versions.py +++ b/tensorflow/python/framework/versions.py @@ -29,15 +29,19 @@ __cxx11_abi_flag__ = pywrap_tensorflow.__cxx11_abi_flag__ __monolithic_build__ = pywrap_tensorflow.__monolithic_build__ VERSION = __version__ -tf_export("VERSION").export_constant(__name__, "VERSION") +tf_export("VERSION", "__version__").export_constant(__name__, "VERSION") GIT_VERSION = __git_version__ -tf_export("GIT_VERSION").export_constant(__name__, "GIT_VERSION") +tf_export("GIT_VERSION", "__git_version__").export_constant( + __name__, "GIT_VERSION") COMPILER_VERSION = __compiler_version__ -tf_export("COMPILER_VERSION").export_constant(__name__, "COMPILER_VERSION") +tf_export("COMPILER_VERSION", "__compiler_version__").export_constant( + __name__, "COMPILER_VERSION") CXX11_ABI_FLAG = __cxx11_abi_flag__ -tf_export("CXX11_ABI_FLAG").export_constant(__name__, "CXX11_ABI_FLAG") +tf_export("CXX11_ABI_FLAG", "__cxx11_abi_flag__").export_constant( + __name__, "CXX11_ABI_FLAG") MONOLITHIC_BUILD = __monolithic_build__ -tf_export("MONOLITHIC_BUILD").export_constant(__name__, "MONOLITHIC_BUILD") +tf_export("MONOLITHIC_BUILD", "__monolithic_build__").export_constant( + __name__, "MONOLITHIC_BUILD") GRAPH_DEF_VERSION = pywrap_tensorflow.GRAPH_DEF_VERSION tf_export("GRAPH_DEF_VERSION").export_constant(__name__, "GRAPH_DEF_VERSION") diff --git a/tensorflow/python/grappler/cluster.i b/tensorflow/python/grappler/cluster.i index 067c8213d4741936e4c28aaedf4f30639b8cdc41..6816e204075bc37c6958efa5b028417078c36b2b 100644 --- a/tensorflow/python/grappler/cluster.i +++ b/tensorflow/python/grappler/cluster.i @@ -320,7 +320,8 @@ static PyObject* TF_MeasureCosts( tensorflow::OpPerformanceList op_performance_data; tensorflow::StepStats step_stats; - tensorflow::grappler::MeasuringCostEstimator cost_measure(cluster.get(), 10, 0); + const int num_measurements = cluster->type() == "virtual" ? 1 : 10; + tensorflow::grappler::MeasuringCostEstimator cost_measure(cluster.get(), num_measurements, 0); tensorflow::grappler::Costs costs; tensorflow::Status status = _GetOpPerformanceDataAndRunTime( diff --git a/tensorflow/python/grappler/constant_folding_test.py b/tensorflow/python/grappler/constant_folding_test.py new file mode 100644 index 0000000000000000000000000000000000000000..ab1d0ed25b9130fabcffbb8da2265c046206da46 --- /dev/null +++ b/tensorflow/python/grappler/constant_folding_test.py @@ -0,0 +1,69 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for Grappler Constant Folding.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.python.client import session +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import functional_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.platform import test + + +class ConstantFoldingTest(test.TestCase): + + # See b/76008022. + def testScanInsideWhile(self): + + def loop_cond(idx_step, *unused_args): + return idx_step < 1 + + def loop_body(idx_step, y): + x = array_ops.zeros([10, 20, 30], dtype=dtypes.float32) + x = functional_ops.scan( + math_ops.add, + x, + initializer=array_ops.zeros([20, 30], dtype=dtypes.float32), + back_prop=False, + parallel_iterations=1) + + with ops.device('/cpu:0'): + y = array_ops.identity(x) + + return idx_step + 1, y + + if test.is_gpu_available(cuda_only=True): + init_y = array_ops.zeros([10, 20, 30], dtype=dtypes.float32) + _, y = control_flow_ops.while_loop( + loop_cond, + loop_body, + loop_vars=[0, init_y], + back_prop=False, + parallel_iterations=1) + with session.Session() as sess: + y_v = sess.run(y) + self.assertAllEqual(np.zeros([10, 20, 30]), y_v) + + +if __name__ == '__main__': + test.main() diff --git a/tensorflow/python/grappler/item.py b/tensorflow/python/grappler/item.py index 4a083849bd39f606877069419396d8c42ef077eb..1748efdd130268f2668cd8cb1b5c2da18bafd549 100644 --- a/tensorflow/python/grappler/item.py +++ b/tensorflow/python/grappler/item.py @@ -51,9 +51,7 @@ class Item(object): self._BuildTFItem() def IdentifyImportantOps(self, sort_topologically=False): - with errors.raise_exception_on_not_ok_status() as status: - return tf_item.TF_IdentifyImportantOps(self.tf_item, sort_topologically, - status) + return tf_item.TF_IdentifyImportantOps(self.tf_item, sort_topologically) def GetOpProperties(self): ret_from_swig = tf_item.TF_GetOpProperties(self.tf_item) diff --git a/tensorflow/python/grappler/layout_optimizer_test.py b/tensorflow/python/grappler/layout_optimizer_test.py index 5a84b16a23f567fba6d08aaefd3b816a76907735..e3dd4b0bdfbb28480c78b0add5947c79852fb44d 100644 --- a/tensorflow/python/grappler/layout_optimizer_test.py +++ b/tensorflow/python/grappler/layout_optimizer_test.py @@ -476,7 +476,7 @@ class LayoutOptimizerTest(test.TestCase): random_seed.set_random_seed(0) x = random_ops.truncated_normal([1, 784], seed=0) conv = _two_layer_model(x) - reduce_sum = math_ops.reduce_sum(conv, axis=[1, 2], keep_dims=True) + reduce_sum = math_ops.reduce_sum(conv, axis=[1, 2], keepdims=True) squeeze = array_ops.squeeze(reduce_sum, axis=[1, 2]) output = array_ops.identity(squeeze) @@ -506,7 +506,7 @@ class LayoutOptimizerTest(test.TestCase): random_seed.set_random_seed(0) x = random_ops.truncated_normal([1, 784], seed=0) conv = _two_layer_model(x) - reduce_sum = math_ops.reduce_sum(conv, axis=[0, 1, 2], keep_dims=True) + reduce_sum = math_ops.reduce_sum(conv, axis=[0, 1, 2], keepdims=True) squeeze = array_ops.squeeze(reduce_sum, axis=[0, 1, 2]) output = array_ops.identity(squeeze) @@ -623,7 +623,7 @@ class LayoutOptimizerTest(test.TestCase): random_seed.set_random_seed(0) x = random_ops.truncated_normal([1, 784], seed=0) conv = _two_layer_model(x) - reduce_sum = math_ops.reduce_sum(conv, axis=[3], keep_dims=True) + reduce_sum = math_ops.reduce_sum(conv, axis=[3], keepdims=True) output = array_ops.identity(reduce_sum) with session.Session(config=_get_config(False)) as sess: @@ -653,7 +653,7 @@ class LayoutOptimizerTest(test.TestCase): random_seed.set_random_seed(0) x = random_ops.truncated_normal([1, 784], seed=0) conv = _two_layer_model(x) - reduce_sum = math_ops.reduce_sum(conv, axis=[2], keep_dims=True) + reduce_sum = math_ops.reduce_sum(conv, axis=[2], keepdims=True) output = array_ops.identity(reduce_sum) with session.Session(config=_get_config(False)) as sess: @@ -682,7 +682,7 @@ class LayoutOptimizerTest(test.TestCase): random_seed.set_random_seed(0) x = random_ops.truncated_normal([1, 784], seed=0) conv = _two_layer_model(x) - reduce_sum = math_ops.reduce_sum(conv, axis=[2, 3], keep_dims=True) + reduce_sum = math_ops.reduce_sum(conv, axis=[2, 3], keepdims=True) output = array_ops.identity(reduce_sum) with session.Session(config=_get_config(False)) as sess: diff --git a/tensorflow/python/grappler/tf_optimizer_test.py b/tensorflow/python/grappler/tf_optimizer_test.py index 3ee4d7807ea5677a742514eb56267b94c6b92bba..1c0f072dd32d38f048cfa48d38b45264951d095e 100644 --- a/tensorflow/python/grappler/tf_optimizer_test.py +++ b/tensorflow/python/grappler/tf_optimizer_test.py @@ -17,12 +17,16 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function - from tensorflow.core.protobuf import rewriter_config_pb2 from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes from tensorflow.python.framework import meta_graph from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_shape +from tensorflow.python.grappler import item as gitem from tensorflow.python.grappler import tf_optimizer +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import variables from tensorflow.python.platform import test @@ -74,6 +78,47 @@ class PyWrapOptimizeGraphTest(test.TestCase): self.assertEqual(a2.op.name, optimized_graph.node[3].name) self.assertEqual('Variable/Assign', optimized_graph.node[4].name) + def testLoops(self): + g = ops.Graph() + with g.as_default(): + + def _Cond(_, counter): + return counter < end + + def _Body(buf, counter): + buf = array_ops.concat([buf, [counter]], 0) + counter += 1 + return [buf, counter] + + start = array_ops.placeholder(shape=[], dtype=dtypes.int32) + end = array_ops.placeholder(shape=[], dtype=dtypes.int32) + init_buf = array_ops.zeros(shape=[0], dtype=dtypes.int32) + loop_vars = [init_buf, start] + shape_inv = [ + tensor_shape.TensorShape([None]), + tensor_shape.TensorShape([]) + ] + buf, _ = control_flow_ops.while_loop(_Cond, _Body, loop_vars, shape_inv) + + f = -array_ops.ones_like(buf, optimize=False) + buf_shape = array_ops.shape(buf) + f_shape = array_ops.shape(f) + ops.add_to_collection('train_op', buf_shape) + ops.add_to_collection('train_op', f_shape) + + # Optimize the graph. + mg = meta_graph.create_meta_graph_def(graph=g) + rewriter_config = rewriter_config_pb2.RewriterConfig() + optimized_graph = tf_optimizer.OptimizeGraph(rewriter_config, mg) + mg.graph_def.CopyFrom(optimized_graph) + + # Check that the nodes referenced in various collections have been preserved + item = gitem.Item(mg) + props = item.GetOpProperties() + buf_prop = props[buf.op.name] + f_prop = props[f.op.name] + self.assertEqual(buf_prop, f_prop) + if __name__ == '__main__': test.main() diff --git a/tensorflow/python/keras/BUILD b/tensorflow/python/keras/BUILD index 2a06907f49da86af354b74099d22f0182c63a9d5..1c58553156e1fa42c1420afcb87d98a92630a65c 100755 --- a/tensorflow/python/keras/BUILD +++ b/tensorflow/python/keras/BUILD @@ -20,7 +20,6 @@ py_library( srcs = [ "__init__.py", "_impl/keras/__init__.py", - "_impl/keras/activations.py", "_impl/keras/applications/__init__.py", "_impl/keras/applications/densenet.py", "_impl/keras/applications/imagenet_utils.py", @@ -32,9 +31,6 @@ py_library( "_impl/keras/applications/vgg16.py", "_impl/keras/applications/vgg19.py", "_impl/keras/applications/xception.py", - "_impl/keras/backend.py", - "_impl/keras/callbacks.py", - "_impl/keras/constraints.py", "_impl/keras/datasets/__init__.py", "_impl/keras/datasets/boston_housing.py", "_impl/keras/datasets/cifar.py", @@ -44,49 +40,13 @@ py_library( "_impl/keras/datasets/imdb.py", "_impl/keras/datasets/mnist.py", "_impl/keras/datasets/reuters.py", - "_impl/keras/engine/__init__.py", - "_impl/keras/engine/base_layer.py", - "_impl/keras/engine/input_layer.py", - "_impl/keras/engine/network.py", - "_impl/keras/engine/saving.py", - "_impl/keras/engine/sequential.py", - "_impl/keras/engine/training.py", - "_impl/keras/engine/training_arrays.py", - "_impl/keras/engine/training_eager.py", - "_impl/keras/engine/training_generator.py", - "_impl/keras/engine/training_utils.py", "_impl/keras/estimator.py", - "_impl/keras/initializers.py", - "_impl/keras/layers/__init__.py", - "_impl/keras/layers/advanced_activations.py", - "_impl/keras/layers/convolutional.py", - "_impl/keras/layers/convolutional_recurrent.py", - "_impl/keras/layers/core.py", - "_impl/keras/layers/embeddings.py", - "_impl/keras/layers/local.py", - "_impl/keras/layers/merge.py", - "_impl/keras/layers/noise.py", - "_impl/keras/layers/normalization.py", - "_impl/keras/layers/pooling.py", - "_impl/keras/layers/recurrent.py", - "_impl/keras/layers/serialization.py", - "_impl/keras/layers/wrappers.py", - "_impl/keras/losses.py", - "_impl/keras/metrics.py", - "_impl/keras/models.py", - "_impl/keras/optimizers.py", "_impl/keras/preprocessing/__init__.py", "_impl/keras/preprocessing/image.py", "_impl/keras/preprocessing/sequence.py", "_impl/keras/preprocessing/text.py", - "_impl/keras/regularizers.py", "_impl/keras/testing_utils.py", "_impl/keras/utils/__init__.py", - "_impl/keras/utils/conv_utils.py", - "_impl/keras/utils/data_utils.py", - "_impl/keras/utils/generic_utils.py", - "_impl/keras/utils/io_utils.py", - "_impl/keras/utils/layer_utils.py", "_impl/keras/utils/multi_gpu_utils.py", "_impl/keras/utils/np_utils.py", "_impl/keras/utils/vis_utils.py", @@ -136,7 +96,21 @@ py_library( ":empty_condition": [], "//conditions:default": [], }) + [ - "@six_archive//:six", + ":backend", + ":engine", + ":layers", + "//tensorflow/python/estimator", + "//tensorflow/python/estimator:model_fn", + "//tensorflow/python/saved_model", + "//tensorflow/python:training", + ], +) + +py_library( + name = "backend", + srcs = ["_impl/keras/backend.py"], + srcs_version = "PY2AND3", + deps = [ "//tensorflow/core:protos_all_py", "//tensorflow/python:array_ops", "//tensorflow/python:check_ops", @@ -152,8 +126,6 @@ py_library( "//tensorflow/python:gradients", "//tensorflow/python:image_ops", "//tensorflow/python:init_ops", - "//tensorflow/python:layers", - "//tensorflow/python:layers_base", "//tensorflow/python:logging_ops", "//tensorflow/python:math_ops", "//tensorflow/python:metrics", @@ -168,13 +140,86 @@ py_library( "//tensorflow/python:tensor_array_grad", "//tensorflow/python:tensor_array_ops", "//tensorflow/python:tensor_shape", - "//tensorflow/python:training", "//tensorflow/python:util", "//tensorflow/python:variable_scope", "//tensorflow/python:variables", - "//tensorflow/python/estimator", - "//tensorflow/python/estimator:model_fn", - "//tensorflow/python/saved_model", + ], +) + +py_library( + name = "engine", + srcs = [ + "_impl/keras/activations.py", + "_impl/keras/callbacks.py", + "_impl/keras/constraints.py", + "_impl/keras/engine/__init__.py", + "_impl/keras/engine/base_layer.py", + "_impl/keras/engine/input_layer.py", + "_impl/keras/engine/network.py", + "_impl/keras/engine/saving.py", + "_impl/keras/engine/sequential.py", + "_impl/keras/engine/training.py", + "_impl/keras/engine/training_arrays.py", + "_impl/keras/engine/training_eager.py", + "_impl/keras/engine/training_generator.py", + "_impl/keras/engine/training_utils.py", + "_impl/keras/initializers.py", + "_impl/keras/losses.py", + "_impl/keras/metrics.py", + "_impl/keras/models.py", + "_impl/keras/optimizers.py", + "_impl/keras/regularizers.py", + "_impl/keras/utils/data_utils.py", + "_impl/keras/utils/io_utils.py", + ], + srcs_version = "PY2AND3", + deps = [ + ":backend", + "//tensorflow/python/data", + "@six_archive//:six", + ], +) + +py_library( + name = "layers", + srcs = [ + "_impl/keras/layers/__init__.py", + "_impl/keras/layers/advanced_activations.py", + "_impl/keras/layers/convolutional.py", + "_impl/keras/layers/convolutional_recurrent.py", + "_impl/keras/layers/core.py", + "_impl/keras/layers/embeddings.py", + "_impl/keras/layers/local.py", + "_impl/keras/layers/merge.py", + "_impl/keras/layers/noise.py", + "_impl/keras/layers/normalization.py", + "_impl/keras/layers/pooling.py", + "_impl/keras/layers/recurrent.py", + "_impl/keras/layers/serialization.py", + "_impl/keras/layers/wrappers.py", + "_impl/keras/utils/conv_utils.py", + "_impl/keras/utils/generic_utils.py", + "_impl/keras/utils/layer_utils.py", + "_impl/keras/utils/tf_utils.py", + ], + srcs_version = "PY2AND3", + deps = [ + ":engine", + "//tensorflow/python:array_ops", + "//tensorflow/python:distribute", + "//tensorflow/python:dtypes", + "//tensorflow/python:embedding_ops", + "//tensorflow/python:framework_ops", + "//tensorflow/python:logging_ops", + "//tensorflow/python:math_ops", + "//tensorflow/python:nn", + "//tensorflow/python:random_ops", + "//tensorflow/python:sparse_ops", + "//tensorflow/python:sparse_tensor", + "//tensorflow/python:standard_ops", + "//tensorflow/python:tensor_array_ops", + "//tensorflow/python:tensor_shape", + "//tensorflow/python:util", ], ) @@ -605,10 +650,11 @@ py_test( py_test( name = "data_utils_test", - size = "medium", + size = "large", srcs = ["_impl/keras/utils/data_utils_test.py"], srcs_version = "PY2AND3", tags = [ + "no_oss", "no_windows", "noasan", # times out "notsan", @@ -637,7 +683,10 @@ py_test( size = "small", srcs = ["_impl/keras/utils/io_utils_test.py"], srcs_version = "PY2AND3", - tags = ["notsan"], + tags = [ + "no_windows", # TODO: needs investigation on Windows + "notsan", + ], deps = [ ":keras", "//tensorflow/python:client_testlib", @@ -788,7 +837,7 @@ py_test( py_test( name = "saving_test", - size = "small", + size = "medium", srcs = ["_impl/keras/engine/saving_test.py"], srcs_version = "PY2AND3", deps = [ diff --git a/tensorflow/python/keras/_impl/keras/activations.py b/tensorflow/python/keras/_impl/keras/activations.py index 236e17653e1b762e1e6962f453b714d1bf7bcbf7..8def7ec49375c7ce23e8f2a24a4c3615d05ca9bb 100644 --- a/tensorflow/python/keras/_impl/keras/activations.py +++ b/tensorflow/python/keras/_impl/keras/activations.py @@ -22,8 +22,8 @@ import six from tensorflow.python.keras._impl.keras import backend as K from tensorflow.python.keras._impl.keras.utils.generic_utils import deserialize_keras_object -from tensorflow.python.layers.base import Layer -from tensorflow.python.platform import tf_logging as logging +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import nn from tensorflow.python.util.tf_export import tf_export @@ -43,10 +43,10 @@ def softmax(x, axis=-1): """ ndim = K.ndim(x) if ndim == 2: - return K.softmax(x) + return nn.softmax(x) elif ndim > 2: - e = K.exp(x - K.max(x, axis=axis, keepdims=True)) - s = K.sum(e, axis=axis, keepdims=True) + e = math_ops.exp(x - math_ops.reduce_max(x, axis=axis, keepdims=True)) + s = math_ops.reduce_sum(e, axis=axis, keepdims=True) return e / s else: raise ValueError('Cannot apply softmax to a tensor that is 1D') @@ -79,12 +79,12 @@ def selu(x): @tf_export('keras.activations.softplus') def softplus(x): - return K.softplus(x) + return nn.softplus(x) @tf_export('keras.activations.softsign') def softsign(x): - return K.softsign(x) + return nn.softsign(x) @tf_export('keras.activations.relu') @@ -94,12 +94,12 @@ def relu(x, alpha=0., max_value=None): @tf_export('keras.activations.tanh') def tanh(x): - return K.tanh(x) + return nn.tanh(x) @tf_export('keras.activations.sigmoid') def sigmoid(x): - return K.sigmoid(x) + return nn.sigmoid(x) @tf_export('keras.activations.hard_sigmoid') @@ -134,12 +134,6 @@ def get(identifier): identifier = str(identifier) return deserialize(identifier) elif callable(identifier): - if isinstance(identifier, Layer): - logging.warning( - 'Do not pass a layer instance (such as {identifier}) as the ' - 'activation argument of another layer. Instead, advanced ' - 'activation layers should be used just like any other ' - 'layer in a model.'.format(identifier=identifier.__class__.__name__)) return identifier else: raise ValueError('Could not interpret ' diff --git a/tensorflow/python/keras/_impl/keras/applications/imagenet_utils.py b/tensorflow/python/keras/_impl/keras/applications/imagenet_utils.py index c26a28ed4087e30968585ec8ac0b64b51513bcae..d928a7afdc639485d443be382420cac09ba9abd6 100644 --- a/tensorflow/python/keras/_impl/keras/applications/imagenet_utils.py +++ b/tensorflow/python/keras/_impl/keras/applications/imagenet_utils.py @@ -22,8 +22,10 @@ import json import numpy as np +from tensorflow.python.framework import constant_op from tensorflow.python.keras._impl.keras import backend as K from tensorflow.python.keras._impl.keras.utils.data_utils import get_file +from tensorflow.python.ops import math_ops from tensorflow.python.platform import tf_logging as logging from tensorflow.python.util.tf_export import tf_export @@ -151,11 +153,11 @@ def _preprocess_symbolic_input(x, data_format, mode): std = None if _IMAGENET_MEAN is None: - _IMAGENET_MEAN = K.constant(-np.array(mean)) + _IMAGENET_MEAN = constant_op.constant(-np.array(mean), dtype=K.floatx()) # Zero-center by mean pixel if K.dtype(x) != K.dtype(_IMAGENET_MEAN): - x = K.bias_add(x, K.cast(_IMAGENET_MEAN, K.dtype(x)), data_format) + x = K.bias_add(x, math_ops.cast(_IMAGENET_MEAN, K.dtype(x)), data_format) else: x = K.bias_add(x, _IMAGENET_MEAN, data_format) if std is not None: diff --git a/tensorflow/python/keras/_impl/keras/applications/mobilenet.py b/tensorflow/python/keras/_impl/keras/applications/mobilenet.py index ad96b53a4528d99a014a0214b52a78d6a60076f8..12775fccecddd96028a9f2b793be155da5b8d838 100644 --- a/tensorflow/python/keras/_impl/keras/applications/mobilenet.py +++ b/tensorflow/python/keras/_impl/keras/applications/mobilenet.py @@ -84,11 +84,13 @@ from tensorflow.python.keras._impl.keras.engine.network import get_source_inputs from tensorflow.python.keras._impl.keras.layers import Activation from tensorflow.python.keras._impl.keras.layers import BatchNormalization from tensorflow.python.keras._impl.keras.layers import Conv2D +from tensorflow.python.keras._impl.keras.layers import DepthwiseConv2D from tensorflow.python.keras._impl.keras.layers import Dropout from tensorflow.python.keras._impl.keras.layers import GlobalAveragePooling2D from tensorflow.python.keras._impl.keras.layers import GlobalMaxPooling2D from tensorflow.python.keras._impl.keras.layers import Input from tensorflow.python.keras._impl.keras.layers import Reshape +from tensorflow.python.keras._impl.keras.layers import ZeroPadding2D from tensorflow.python.keras._impl.keras.models import Model from tensorflow.python.keras._impl.keras.utils import conv_utils from tensorflow.python.keras._impl.keras.utils.data_utils import get_file @@ -116,195 +118,6 @@ def preprocess_input(x): return imagenet_utils.preprocess_input(x, mode='tf') -class DepthwiseConv2D(Conv2D): - """Depthwise separable 2D convolution. - - Depthwise Separable convolutions consists in performing - just the first step in a depthwise spatial convolution - (which acts on each input channel separately). - The `depth_multiplier` argument controls how many - output channels are generated per input channel in the depthwise step. - - Arguments: - kernel_size: An integer or tuple/list of 2 integers, specifying the - width and height of the 2D convolution window. - Can be a single integer to specify the same value for - all spatial dimensions. - strides: An integer or tuple/list of 2 integers, - specifying the strides of the convolution along the width and height. - Can be a single integer to specify the same value for - all spatial dimensions. - Specifying any stride value != 1 is incompatible with specifying - any `dilation_rate` value != 1. - padding: one of `'valid'` or `'same'` (case-insensitive). - depth_multiplier: The number of depthwise convolution output channels - for each input channel. - The total number of depthwise convolution output - channels will be equal to `filters_in * depth_multiplier`. - data_format: A string, - one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape - `(batch, height, width, channels)` while `channels_first` - corresponds to inputs with shape - `(batch, channels, height, width)`. - It defaults to the `image_data_format` value found in your - Keras config file at `~/.keras/keras.json`. - If you never set it, then it will be 'channels_last'. - activation: Activation function to use. - If you don't specify anything, no activation is applied - (ie. 'linear' activation: `a(x) = x`). - use_bias: Boolean, whether the layer uses a bias vector. - depthwise_initializer: Initializer for the depthwise kernel matrix. - bias_initializer: Initializer for the bias vector. - depthwise_regularizer: Regularizer function applied to - the depthwise kernel matrix. - bias_regularizer: Regularizer function applied to the bias vector. - activity_regularizer: Regularizer function applied to - the output of the layer (its 'activation').. - depthwise_constraint: Constraint function applied to - the depthwise kernel matrix. - bias_constraint: Constraint function applied to the bias vector. - - Input shape: - 4D tensor with shape: - `[batch, channels, rows, cols]` if data_format='channels_first' - or 4D tensor with shape: - `[batch, rows, cols, channels]` if data_format='channels_last'. - - Output shape: - 4D tensor with shape: - `[batch, filters, new_rows, new_cols]` if data_format='channels_first' - or 4D tensor with shape: - `[batch, new_rows, new_cols, filters]` if data_format='channels_last'. - `rows` and `cols` values might have changed due to padding. - """ - - def __init__(self, - kernel_size, - strides=(1, 1), - padding='valid', - depth_multiplier=1, - data_format=None, - activation=None, - use_bias=True, - depthwise_initializer='glorot_uniform', - bias_initializer='zeros', - depthwise_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - depthwise_constraint=None, - bias_constraint=None, - **kwargs): - super(DepthwiseConv2D, self).__init__( - filters=None, - kernel_size=kernel_size, - strides=strides, - padding=padding, - data_format=data_format, - activation=activation, - use_bias=use_bias, - bias_regularizer=bias_regularizer, - activity_regularizer=activity_regularizer, - bias_constraint=bias_constraint, - **kwargs) - self.depth_multiplier = depth_multiplier - self.depthwise_initializer = initializers.get(depthwise_initializer) - self.depthwise_regularizer = regularizers.get(depthwise_regularizer) - self.depthwise_constraint = constraints.get(depthwise_constraint) - self.bias_initializer = initializers.get(bias_initializer) - - @shape_type_conversion - def build(self, input_shape): - if len(input_shape) < 4: - raise ValueError('Inputs to `DepthwiseConv2D` should have rank 4. ' - 'Received input shape:', str(input_shape)) - if self.data_format == 'channels_first': - channel_axis = 1 - else: - channel_axis = 3 - if input_shape[channel_axis] is None: - raise ValueError('The channel dimension of the inputs to ' - '`DepthwiseConv2D` ' - 'should be defined. Found `None`.') - input_dim = int(input_shape[channel_axis]) - depthwise_kernel_shape = (self.kernel_size[0], self.kernel_size[1], - input_dim, self.depth_multiplier) - - self.depthwise_kernel = self.add_weight( - shape=depthwise_kernel_shape, - initializer=self.depthwise_initializer, - name='depthwise_kernel', - regularizer=self.depthwise_regularizer, - constraint=self.depthwise_constraint) - - if self.use_bias: - self.bias = self.add_weight( - shape=(input_dim * self.depth_multiplier,), - initializer=self.bias_initializer, - name='bias', - regularizer=self.bias_regularizer, - constraint=self.bias_constraint) - else: - self.bias = None - # Set input spec. - self.input_spec = InputSpec(ndim=4, axes={channel_axis: input_dim}) - self.built = True - - def call(self, inputs, training=None): - outputs = K.depthwise_conv2d( - inputs, - self.depthwise_kernel, - strides=self.strides, - padding=self.padding, - dilation_rate=self.dilation_rate, - data_format=self.data_format) - - if self.bias: - outputs = K.bias_add(outputs, self.bias, data_format=self.data_format) - - if self.activation is not None: - return self.activation(outputs) - - return outputs - - @shape_type_conversion - def compute_output_shape(self, input_shape): - if self.data_format == 'channels_first': - rows = input_shape[2] - cols = input_shape[3] - out_filters = input_shape[1] * self.depth_multiplier - elif self.data_format == 'channels_last': - rows = input_shape[1] - cols = input_shape[2] - out_filters = input_shape[3] * self.depth_multiplier - - rows = conv_utils.conv_output_length(rows, self.kernel_size[0], - self.padding, self.strides[0]) - cols = conv_utils.conv_output_length(cols, self.kernel_size[1], - self.padding, self.strides[1]) - - if self.data_format == 'channels_first': - return (input_shape[0], out_filters, rows, cols) - elif self.data_format == 'channels_last': - return (input_shape[0], rows, cols, out_filters) - - def get_config(self): - config = super(DepthwiseConv2D, self).get_config() - config.pop('filters') - config.pop('kernel_initializer') - config.pop('kernel_regularizer') - config.pop('kernel_constraint') - config['depth_multiplier'] = self.depth_multiplier - config['depthwise_initializer'] = initializers.serialize( - self.depthwise_initializer) - config['depthwise_regularizer'] = regularizers.serialize( - self.depthwise_regularizer) - config['depthwise_constraint'] = constraints.serialize( - self.depthwise_constraint) - return config - - @tf_export('keras.applications.MobileNet', 'keras.applications.mobilenet.MobileNet') def MobileNet(input_shape=None, @@ -318,18 +131,11 @@ def MobileNet(input_shape=None, classes=1000): """Instantiates the MobileNet architecture. - Note that only TensorFlow is supported for now, - therefore it only works with the data format - `image_data_format='channels_last'` in your Keras config - at `~/.keras/keras.json`. - To load a MobileNet model via `load_model`, import the custom - objects `relu6` and `DepthwiseConv2D` and pass them to the - `custom_objects` parameter. + objects `relu6` and pass them to the `custom_objects` parameter. E.g. model = load_model('mobilenet.h5', custom_objects={ - 'relu6': mobilenet.relu6, - 'DepthwiseConv2D': mobilenet.DepthwiseConv2D}) + 'relu6': mobilenet.relu6}) Arguments: input_shape: optional shape tuple, only to be specified @@ -383,11 +189,6 @@ def MobileNet(input_shape=None, backend that does not support separable convolutions. """ - if K.backend() != 'tensorflow': - raise RuntimeError('Only TensorFlow backend is currently supported, ' - 'as other backends do not support ' - 'depthwise convolution.') - if not (weights in {'imagenet', None} or os.path.exists(weights)): raise ValueError('The `weights` argument should be either ' '`None` (random initialization), `imagenet` ' @@ -522,7 +323,7 @@ def MobileNet(input_shape=None, # load weights if weights == 'imagenet': if K.image_data_format() == 'channels_first': - raise ValueError('Weights for "channels_last" format ' + raise ValueError('Weights for "channels_first" format ' 'are not available.') if alpha == 1.0: alpha_text = '1_0' @@ -598,14 +399,14 @@ def _conv_block(inputs, filters, alpha, kernel=(3, 3), strides=(1, 1)): """ channel_axis = 1 if K.image_data_format() == 'channels_first' else -1 filters = int(filters * alpha) + x = ZeroPadding2D(padding=(1, 1), name='conv1_pad')(inputs) x = Conv2D( filters, kernel, - padding='same', + padding='valid', use_bias=False, strides=strides, - name='conv1')( - inputs) + name='conv1')(x) x = BatchNormalization(axis=channel_axis, name='conv1_bn')(x) return Activation(relu6, name='conv1_relu')(x) @@ -665,15 +466,14 @@ def _depthwise_conv_block(inputs, """ channel_axis = 1 if K.image_data_format() == 'channels_first' else -1 pointwise_conv_filters = int(pointwise_conv_filters * alpha) - + x = ZeroPadding2D(padding=(1, 1), name='conv_pad_%d' % block_id)(inputs) x = DepthwiseConv2D( # pylint: disable=not-callable (3, 3), - padding='same', + padding='valid', depth_multiplier=depth_multiplier, strides=strides, use_bias=False, - name='conv_dw_%d' % block_id)( - inputs) + name='conv_dw_%d' % block_id)(x) x = BatchNormalization(axis=channel_axis, name='conv_dw_%d_bn' % block_id)(x) x = Activation(relu6, name='conv_dw_%d_relu' % block_id)(x) diff --git a/tensorflow/python/keras/_impl/keras/applications/resnet50.py b/tensorflow/python/keras/_impl/keras/applications/resnet50.py index 46c0e635578c7f4707b027247943d75b16d703ad..c3a92bea8920cad3297fee3efc50158813e72361 100644 --- a/tensorflow/python/keras/_impl/keras/applications/resnet50.py +++ b/tensorflow/python/keras/_impl/keras/applications/resnet50.py @@ -45,6 +45,7 @@ from tensorflow.python.keras._impl.keras.layers import GlobalAveragePooling2D from tensorflow.python.keras._impl.keras.layers import GlobalMaxPooling2D from tensorflow.python.keras._impl.keras.layers import Input from tensorflow.python.keras._impl.keras.layers import MaxPooling2D +from tensorflow.python.keras._impl.keras.layers import ZeroPadding2D from tensorflow.python.keras._impl.keras.models import Model from tensorflow.python.keras._impl.keras.utils import layer_utils from tensorflow.python.keras._impl.keras.utils.data_utils import get_file @@ -237,8 +238,7 @@ def ResNet50(include_top=True, bn_axis = 1 x = Conv2D( - 64, (7, 7), strides=(2, 2), padding='same', name='conv1')( - img_input) + 64, (7, 7), strides=(2, 2), padding='same', name='conv1')(img_input) x = BatchNormalization(axis=bn_axis, name='bn_conv1')(x) x = Activation('relu')(x) x = MaxPooling2D((3, 3), strides=(2, 2))(x) diff --git a/tensorflow/python/keras/_impl/keras/backend.py b/tensorflow/python/keras/_impl/keras/backend.py index 7baf27642a475eb3a09687a1d19a6ed05de046e9..449410fe082421193d178b768db2ad1eda183b36 100644 --- a/tensorflow/python/keras/_impl/keras/backend.py +++ b/tensorflow/python/keras/_impl/keras/backend.py @@ -24,6 +24,7 @@ from __future__ import print_function import collections import json import os +import weakref import numpy as np @@ -34,7 +35,7 @@ from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes as dtypes_module from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor -from tensorflow.python.layers import base as tf_base_layers +from tensorflow.python.framework import tensor_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import clip_ops from tensorflow.python.ops import control_flow_ops @@ -54,7 +55,7 @@ from tensorflow.python.ops import state_ops from tensorflow.python.ops import tensor_array_grad # pylint: disable=unused-import from tensorflow.python.ops import tensor_array_ops from tensorflow.python.ops import variables as variables_module -from tensorflow.python.training import moving_averages + from tensorflow.python.util import tf_contextlib from tensorflow.python.util import tf_inspect from tensorflow.python.util.tf_export import tf_export @@ -262,6 +263,12 @@ def set_image_data_format(data_format): _IMAGE_DATA_FORMAT = str(data_format) +# A global dictionary mapping graph objects to an index of counters used +# for various layer names in each graph. +# Allows to give unique autogenerated names to layers, in a graph-specific way. +PER_GRAPH_LAYER_NAME_UIDS = weakref.WeakKeyDictionary() + + @tf_export('keras.backend.get_uid') def get_uid(prefix=''): """Associates a string prefix with an integer counter in a TensorFlow graph. @@ -282,17 +289,16 @@ def get_uid(prefix=''): ``` """ graph = ops.get_default_graph() - if graph not in tf_base_layers.PER_GRAPH_LAYER_NAME_UIDS: - tf_base_layers.PER_GRAPH_LAYER_NAME_UIDS[graph] = collections.defaultdict( - int) - layer_name_uids = tf_base_layers.PER_GRAPH_LAYER_NAME_UIDS[graph] + if graph not in PER_GRAPH_LAYER_NAME_UIDS: + PER_GRAPH_LAYER_NAME_UIDS[graph] = collections.defaultdict(int) + layer_name_uids = PER_GRAPH_LAYER_NAME_UIDS[graph] layer_name_uids[prefix] += 1 return layer_name_uids[prefix] @tf_export('keras.backend.reset_uids') def reset_uids(): - per_graph_layer_name_uids = tf_base_layers.PER_GRAPH_LAYER_NAME_UIDS + per_graph_layer_name_uids = PER_GRAPH_LAYER_NAME_UIDS keys = list(per_graph_layer_name_uids.keys()) for key in keys: del per_graph_layer_name_uids[key] @@ -1275,6 +1281,11 @@ def moving_average_update(x, value, momentum): Returns: An Operation to update the variable. """ + # `training` is higher-up than the Keras backend in the abstraction hierarchy. + # In particular, `training` depends on layers, and thus on Keras. + # moving_averages, being low-level ops, should not be part of the training + # module. + from tensorflow.python.training import moving_averages # pylint: disable=g-import-not-at-top return moving_averages.assign_moving_average( x, value, momentum, zero_debias=True) @@ -2749,8 +2760,7 @@ class Function(object): outputs: Output tensors to fetch. updates: Additional update ops to be run at function call. name: A name to help users identify what this function does. - session_kwargs: Arguments to `tf.Session.run()`: `fetches`, `feed_dict`, - `options`, `run_metadata` + session_kwargs: Arguments to `tf.Session.run()`: `fetches`, `feed_dict`. """ def __init__(self, inputs, outputs, updates=None, name=None, @@ -2784,17 +2794,76 @@ class Function(object): self.fetches = session_kwargs.pop('fetches', []) if not isinstance(self.fetches, list): self.fetches = [self.fetches] + # The main use case of `fetches` being passed to a model is the ability + # to run custom updates (since the outputs of fetches are never returned). + # This requires us to wrap fetches in `identity` ops. + self.fetches = [array_ops.identity(x) for x in self.fetches] self.session_kwargs = session_kwargs + if session_kwargs: + raise ValueError('Some keys in session_kwargs are not supported at this ' + 'time: %s', session_kwargs.keys()) + + self._callable_fn = None + self._feed_arrays = None + self._feed_symbols = None + self._symbol_vals = None + self._session = None + + def _make_callable(self, feed_arrays, feed_symbols, symbol_vals, session): + """Generates a callable that runs the graph. + + Arguments: + feed_arrays: List of input tensors to be fed Numpy arrays at runtime. + feed_symbols: List of input tensors to be fed symbolic tensors at runtime. + symbol_vals: List of symbolic tensors to be fed to `feed_symbols`. + session: Session to use to generate the callable. + + Returns: + Function that runs the graph according to the above options. + """ + # Prepare callable options. + callable_opts = config_pb2.CallableOptions() + # Handle external-data feed. + for x in feed_arrays: + callable_opts.feed.append(x.name) + if self.feed_dict: + for key in sorted(self.feed_dict.keys()): + callable_opts.feed.append(key.name) + # Handle symbolic feed. + for x, y in zip(feed_symbols, symbol_vals): + connection = callable_opts.tensor_connection.add() + if x.dtype != y.dtype: + y = math_ops.cast(y, dtype=x.dtype) + from_tensor = ops._as_graph_element(y) + if from_tensor is None: + from_tensor = y + connection.from_tensor = from_tensor.name # Data tensor + connection.to_tensor = x.name # Placeholder + # Handle fetches. + for x in self.outputs + self.fetches: + callable_opts.fetch.append(x.name) + # Handle updates. + callable_opts.target.append(self.updates_op.name) + # Create callable. + callable_fn = session._make_callable_from_options(callable_opts) + # Cache parameters corresponding to the generated callable, so that + # we can detect future mismatches and refresh the callable. + self._callable_fn = callable_fn + self._feed_arrays = feed_arrays + self._feed_symbols = feed_symbols + self._symbol_vals = symbol_vals + self._session = session + def __call__(self, inputs): if not isinstance(inputs, (list, tuple)): raise TypeError('`inputs` should be a list or tuple.') - if self.feed_dict: - feed_dict = self.feed_dict.copy() - else: - feed_dict = {} - + session = get_session() + feed_arrays = [] + array_vals = [] + feed_symbols = [] + symbol_vals = [] for tensor, value in zip(self.inputs, inputs): if value is None: continue @@ -2803,12 +2872,31 @@ class Function(object): indices = np.concatenate((np.expand_dims(sparse_coo.row, 1), np.expand_dims(sparse_coo.col, 1)), 1) value = (indices, sparse_coo.data, sparse_coo.shape) - feed_dict[tensor] = value - fetches = self.outputs + [self.updates_op] + self.fetches - session = get_session() - updated = session.run( - fetches=fetches, feed_dict=feed_dict, **self.session_kwargs) - return updated[:len(self.outputs)] + if tensor_util.is_tensor(value): + # Case: feeding symbolic tensor. + feed_symbols.append(tensor) + symbol_vals.append(value) + else: + # Case: feeding Numpy array. + feed_arrays.append(tensor) + # We need to do array conversion and type casting at this level, since + # `callable_fn` only supports exact matches. + array_vals.append(np.asarray(value, dtype=tensor.dtype.base_dtype.name)) + if self.feed_dict: + for key in sorted(self.feed_dict.keys()): + array_vals.append( + np.asarray(self.feed_dict[key], dtype=key.dtype.base_dtype.name)) + + # Refresh callable if anything has changed. + if (self._callable_fn is None or + feed_arrays != self._feed_arrays or + symbol_vals != self._symbol_vals or + feed_symbols != self._feed_symbols or + session != self._session): + self._make_callable(feed_arrays, feed_symbols, symbol_vals, session) + + fetched = self._callable_fn(*array_vals) + return fetched[:len(self.outputs)] @tf_export('keras.backend.function') @@ -3360,7 +3448,7 @@ def categorical_crossentropy(target, output, from_logits=False): Returns: Output tensor. """ - # Note: nn.softmax_cross_entropy_with_logits + # Note: nn.softmax_cross_entropy_with_logits_v2 # expects logits, Keras expects probabilities. if not from_logits: # scale preds so that the class probas of each sample sum to 1 @@ -3424,7 +3512,7 @@ def binary_crossentropy(target, output, from_logits=False): Returns: A tensor. """ - # Note: nn.softmax_cross_entropy_with_logits + # Note: nn.sigmoid_cross_entropy_with_logits # expects logits, Keras expects probabilities. if not from_logits: # transform back to logits diff --git a/tensorflow/python/keras/_impl/keras/backend_test.py b/tensorflow/python/keras/_impl/keras/backend_test.py index fb4b2a0e1dc06c904d4b93038840dbf688d42ed4..de1ed467a2764a2b08269181bb8bcd615868d3b1 100644 --- a/tensorflow/python/keras/_impl/keras/backend_test.py +++ b/tensorflow/python/keras/_impl/keras/backend_test.py @@ -189,6 +189,39 @@ class BackendUtilsTest(test.TestCase): for y in ys: self.assertEqual(y.op.name[:12], 'StopGradient') + def test_function_tf_feed_symbols(self): + with self.test_session(): + # Test feeding a resource variable to `function`. + x1 = keras.backend.placeholder(shape=()) + x2 = keras.backend.placeholder(shape=()) + lr = keras.backend.learning_phase() # Include a placeholder_with_default. + + y1 = keras.backend.variable(10.) + y2 = 3 + + f = keras.backend.function( + inputs=[x1, x2, lr], + outputs=[x1 + 1, + keras.backend.in_train_phase(x2 + 2, x2 - 1)]) + outs = f([y1, y2, None]) # Use default learning_phase value. + self.assertEqual(outs, [11., 2.]) + outs = f([y1, y2, 1]) # Set learning phase value. + self.assertEqual(outs, [11., 5.]) + + # Test triggering a callable refresh by changing the input. + y3 = keras.backend.constant(20.) # Test with tensor + outs = f([y3, y2, None]) + self.assertEqual(outs, [21., 2.]) + + y4 = 4 # Test with non-symbol + outs = f([y4, y2, None]) + self.assertEqual(outs, [5., 2.]) + + # Test with a different dtype + y5 = keras.backend.constant(10., dtype='float64') + outs = f([y5, y2, None]) + self.assertEqual(outs, [11., 2.]) + def test_function_tf_fetches(self): # Additional operations can be passed to tf.Session().run() via its # `fetches` arguments. In contrast to `updates` argument of @@ -206,8 +239,9 @@ class BackendUtilsTest(test.TestCase): updates=[(x, x_placeholder + 1.)], fetches=[keras.backend.update(y, 5.)]) output = f([10., 20.]) - assert output == [30.] - assert keras.backend.get_session().run(fetches=[x, y]) == [11., 5.] + self.assertEqual(output, [30.]) + self.assertEqual( + keras.backend.get_session().run(fetches=[x, y]), [11., 5.]) def test_function_tf_feed_dict(self): # Additional substitutions can be passed to `tf.Session().run()` via its @@ -229,14 +263,16 @@ class BackendUtilsTest(test.TestCase): feed_dict=feed_dict, fetches=fetches) output = f([10.]) - assert output == [11.] - assert keras.backend.get_session().run(fetches=[x, y]) == [20., 30.] + self.assertEqual(output, [11.]) + self.assertEqual( + keras.backend.get_session().run(fetches=[x, y]), [20., 30.]) # updated value in feed_dict will be modified within the K.function() feed_dict[y_placeholder] = 4. output = f([20.]) - assert output == [21.] - assert keras.backend.get_session().run(fetches=[x, y]) == [30., 40.] + self.assertEqual(output, [21.]) + self.assertEqual( + keras.backend.get_session().run(fetches=[x, y]), [30., 40.]) class BackendVariableTest(test.TestCase): diff --git a/tensorflow/python/keras/_impl/keras/constraints.py b/tensorflow/python/keras/_impl/keras/constraints.py index 271fbbb63d3dfd50507837e190860d48315a14f2..abe95d8e0ca68b2e62f9574fba9ae912a9179fff 100644 --- a/tensorflow/python/keras/_impl/keras/constraints.py +++ b/tensorflow/python/keras/_impl/keras/constraints.py @@ -24,6 +24,7 @@ import six from tensorflow.python.keras._impl.keras import backend as K from tensorflow.python.keras._impl.keras.utils.generic_utils import deserialize_keras_object from tensorflow.python.keras._impl.keras.utils.generic_utils import serialize_keras_object +from tensorflow.python.ops import math_ops from tensorflow.python.util.tf_export import tf_export @@ -65,7 +66,8 @@ class MaxNorm(Constraint): self.axis = axis def __call__(self, w): - norms = K.sqrt(K.sum(K.square(w), axis=self.axis, keepdims=True)) + norms = K.sqrt( + math_ops.reduce_sum(math_ops.square(w), axis=self.axis, keepdims=True)) desired = K.clip(norms, 0, self.max_value) return w * (desired / (K.epsilon() + norms)) @@ -79,7 +81,7 @@ class NonNeg(Constraint): """ def __call__(self, w): - return w * K.cast(K.greater_equal(w, 0.), K.floatx()) + return w * math_ops.cast(math_ops.greater_equal(w, 0.), K.floatx()) @tf_export('keras.constraints.UnitNorm', 'keras.constraints.unit_norm') @@ -105,7 +107,9 @@ class UnitNorm(Constraint): def __call__(self, w): return w / ( - K.epsilon() + K.sqrt(K.sum(K.square(w), axis=self.axis, keepdims=True))) + K.epsilon() + K.sqrt( + math_ops.reduce_sum( + math_ops.square(w), axis=self.axis, keepdims=True))) def get_config(self): return {'axis': self.axis} @@ -148,7 +152,8 @@ class MinMaxNorm(Constraint): self.axis = axis def __call__(self, w): - norms = K.sqrt(K.sum(K.square(w), axis=self.axis, keepdims=True)) + norms = K.sqrt( + math_ops.reduce_sum(math_ops.square(w), axis=self.axis, keepdims=True)) desired = ( self.rate * K.clip(norms, self.min_value, self.max_value) + (1 - self.rate) * norms) diff --git a/tensorflow/python/keras/_impl/keras/engine/base_layer.py b/tensorflow/python/keras/_impl/keras/engine/base_layer.py index 755607aafb9343f0c9f10c5f9394bedc2f8afd76..6c68d251275cad4a6f2d6a5bf959a42c041537f3 100644 --- a/tensorflow/python/keras/_impl/keras/engine/base_layer.py +++ b/tensorflow/python/keras/_impl/keras/engine/base_layer.py @@ -13,143 +13,145 @@ # limitations under the License. # ============================================================================== # pylint: disable=protected-access -"""Base layer code (`Layer`). -""" +"""Contains the base Layer class, from which all layers inherit.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function +import collections import inspect # Necessary supplement to tf_inspect to deal with variadic args. +import re +import numpy as np from six.moves import zip # pylint: disable=redefined-builtin from tensorflow.python.eager import context +from tensorflow.python.estimator import util as estimator_util +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape -from tensorflow.python.keras._impl.keras import backend as K +from tensorflow.python.keras._impl.keras import backend from tensorflow.python.keras._impl.keras import constraints from tensorflow.python.keras._impl.keras import initializers from tensorflow.python.keras._impl.keras import regularizers from tensorflow.python.keras._impl.keras.utils import generic_utils -from tensorflow.python.layers import base as tf_base_layers -from tensorflow.python.platform import tf_logging as logging +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import init_ops +from tensorflow.python.ops import variable_scope as vs +from tensorflow.python.ops import variables as tf_variables +from tensorflow.python.training import checkpointable +from tensorflow.python.util import nest from tensorflow.python.util import tf_decorator from tensorflow.python.util import tf_inspect from tensorflow.python.util.tf_export import tf_export -# pylint: disable=invalid-name -InputSpec = tf_base_layers.InputSpec -Node = tf_base_layers.Node -TFBaseLayer = tf_base_layers.Layer -# pylint: enable=invalid-name +@tf_export('keras.layers.Layer') +class Layer(checkpointable.CheckpointableBase): + """Base layer class. + This is the class from which all layers inherit. -@tf_export('keras.layers.Layer') -class Layer(tf_base_layers.Layer): - """Abstract base layer class. - - # Properties - name: String, must be unique within a model. - input_spec: List of InputSpec class instances - each entry describes one required input: - - ndim - - dtype - A layer with `n` input tensors must have - an `input_spec` of length `n`. - trainable: Boolean, whether the layer weights - will be updated during training. - uses_learning_phase: Whether any operation - of the layer uses `K.in_training_phase()` - or `K.in_test_phase()`. - input_shape: Shape tuple. Provided for convenience, - but note that there may be cases in which this - attribute is ill-defined (e.g. a shared layer - with multiple input shapes), in which case - requesting `input_shape` will raise an Exception. - Prefer using `layer.get_input_shape_for(input_shape)`, - or `layer.get_input_shape_at(node_index)`. - output_shape: Shape tuple. See above. - inbound_nodes: List of nodes. - outbound_nodes: List of nodes. - input, output: Input/output tensor(s). Note that if the layer is used - more than once (shared layer), this is ill-defined - and will raise an exception. In such cases, use - `layer.get_input_at(node_index)`. - input_mask, output_mask: Same as above, for masks. - trainable_weights: List of variables. - non_trainable_weights: List of variables. - weights: The concatenation of the lists trainable_weights and - non_trainable_weights (in this order). - - # Methods - call(x, mask=None): Where the layer's logic lives. - __call__(x, mask=None): Wrapper around the layer logic (`call`). - If x is a Keras tensor: - - Connect current layer with last layer from tensor: - `self._add_inbound_node(last_layer)` - - Add layer to tensor history - If layer is not built: - - Build from inputs shape - get_weights() - set_weights(weights) - get_config() - count_params() - compute_output_shape(input_shape) - compute_mask(x, mask) - get_input_at(node_index) - get_output_at(node_index) - get_input_shape_at(node_index) - get_output_shape_at(node_index) - get_input_mask_at(node_index) - get_output_mask_at(node_index) - - # Class Methods - from_config(config) - - # Internal methods: - build(input_shape) - _add_inbound_node(layer, index=0) + A layer is a class implementing common neural networks operations, such + as convolution, batch norm, etc. These operations require managing weights, + losses, updates, and inter-layer connectivity. + + Users will just instantiate a layer and then treat it as a callable. + + We recommend that descendants of `Layer` implement the following methods: + * `__init__()`: Save configuration in member variables + * `build()`: Called once from `__call__`, when we know the shapes of inputs + and `dtype`. Should have the calls to `add_weight()`, and then + call the super's `build()` (which sets `self.built = True`, which is + nice in case the user wants to call `build()` manually before the + first `__call__`). + * `call()`: Called in `__call__` after making sure `build()` has been called + once. Should actually perform the logic of applying the layer to the + input tensors (which should be passed in as the first argument). + + Arguments: + trainable: Boolean, whether the layer's variables should be trainable. + name: String name of the layer. + dtype: Default dtype of the layer's weights (default of `None` means use the + type of the first input). + + Read-only properties: + name: The name of the layer (string). + dtype: Default dtype of the layer's weights (default of `None` means use the + type of the first input). + trainable_variables: List of trainable variables. + non_trainable_variables: List of non-trainable variables. + variables: List of all variables of this layer, trainable and + non-trainable. + updates: List of update ops of this layer. + losses: List of losses added by this layer. + trainable_weights: List of variables to be included in backprop. + non_trainable_weights: List of variables that should not be + included in backprop. + weights: The concatenation of the lists trainable_weights and + non_trainable_weights (in this order). + + Mutable properties: + trainable: Whether the layer should be trained (boolean). + input_spec: Optional (list of) `InputSpec` object(s) specifying the + constraints on inputs that can be accepted by the layer. """ - def __init__(self, **kwargs): + def __init__(self, trainable=True, name=None, dtype=None, **kwargs): # These properties should be set by the user via keyword arguments. # note that 'dtype', 'input_shape' and 'batch_input_shape' # are only applicable to input layers: do not pass these keywords # to non-input layers. allowed_kwargs = { - 'activity_regularizer', 'input_shape', 'batch_input_shape', 'batch_size', - 'dtype', - 'name', - 'trainable', 'weights', + 'activity_regularizer', } # Validate optional keyword arguments. for kwarg in kwargs: if kwarg not in allowed_kwargs: raise TypeError('Keyword argument not understood:', kwarg) - # Get layer name. - name = kwargs.get('name') - - # Get `trainable` status. - trainable = kwargs.get('trainable', True) - - # Get `dtype`. - dtype = kwargs.get('dtype') - if dtype is None: - dtype = K.floatx() - - # Call super, which will set all properties common to Keras layers - # and core TF layers. - super(Layer, self).__init__( - name=name, dtype=dtype, trainable=trainable, - activity_regularizer=kwargs.get('activity_regularizer')) + # Mutable properties + # Indicates whether the layer's weights are updated during training + # and whether the layer's updates are run during training + self.trainable = trainable + # A stateful layer is a layer whose updates are run during inference too, + # for instance stateful RNNs. + self.stateful = False + # Indicates whether `build` needs to be called upon layer call, to create + # the layer's weights. + self.built = False + # Provides information about which inputs are compatible with the layer. + self.input_spec = None + + self._init_set_name(name) + + activity_regularizer = kwargs.pop('activity_regularizer', None) + if activity_regularizer and context.executing_eagerly(): + raise ValueError( + ('Activity regularization is not supported when executing eagerly. ' + 'Got activity_regularizer=%s') % (activity_regularizer,)) + self._activity_regularizer = activity_regularizer + self._trainable_weights = [] + self._non_trainable_weights = [] + self._updates = [] + # When executing eagerly, _losses is a list of zero-argument lambdas which + # return tensors. When using graph execution, _losses is a list of ops. + self._losses = [] + self._dtype = None if dtype is None else dtypes.as_dtype(dtype).name + self._call_fn_args = estimator_util.fn_args(self.call) + self._compute_previous_mask = ('mask' in self._call_fn_args or + hasattr(self, 'compute_mask')) self._uses_inputs_arg = True - # Add properties that are Keras-only for now. + # These lists will be filled via successive calls + # to self._add_inbound_node(). + self._inbound_nodes = [] + self._outbound_nodes = [] + self.supports_masking = False # Manage input shape information if passed. @@ -172,39 +174,418 @@ class Layer(tf_base_layers.Layer): else: self._initial_weights = None - def add_weight(self, - name, - shape, + def _init_set_name(self, name, zero_based=True): + if not name: + self._name = unique_layer_name( + to_snake_case(self.__class__.__name__), zero_based=zero_based) + else: + self._name = name + + @property + def dtype(self): + return self._dtype + + @property + def name(self): + return self._name + + @property + def activity_regularizer(self): + """Optional regularizer function for the output of this layer.""" + return self._activity_regularizer + + @activity_regularizer.setter + def activity_regularizer(self, regularizer): + """Optional regularizer function for the output of this layer.""" + self._activity_regularizer = regularizer + + @property + def trainable_weights(self): + return self._trainable_weights if self.trainable else [] + + @property + def non_trainable_weights(self): + if self.trainable: + return self._non_trainable_weights + else: + return self._trainable_weights + self._non_trainable_weights + + @property + def trainable_variables(self): + return self.trainable_weights + + @property + def non_trainable_variables(self): + return self.non_trainable_weights + + @property + def weights(self): + """Returns the list of all layer variables/weights. + + Returns: + A list of variables. + """ + return self.trainable_weights + self.non_trainable_weights + + @property + def variables(self): + """Returns the list of all layer variables/weights. + + Returns: + A list of variables. + """ + return self.weights + + @property + def updates(self): + if context.executing_eagerly(): + raise RuntimeError('Layer.updates not supported in Eager mode.') + if not self.trainable and not self.stateful: + return [] + return self._updates + + def add_update(self, updates, inputs=None): + """Add update op(s), potentially dependent on layer inputs. + + Weight updates (for instance, the updates of the moving mean and variance + in a BatchNormalization layer) may be dependent on the inputs passed + when calling a layer. Hence, when reusing the same layer on + different inputs `a` and `b`, some entries in `layer.updates` may be + dependent on `a` and some on `b`. This method automatically keeps track + of dependencies. + + The `get_updates_for` method allows to retrieve the updates relevant to a + specific set of inputs. + + This call is ignored when eager execution is enabled (in that case, variable + updates are run on the fly and thus do not need to be tracked for later + execution). + + Arguments: + updates: Update op, or list/tuple of update ops. + inputs: If anything other than None is passed, it signals the updates + are conditional on some of the layer's inputs, + and thus they should only be run where these inputs are available. + This is the case for BatchNormalization updates, for instance. + If None, the updates will be taken into account unconditionally, + and you are responsible for making sure that any dependency they might + have is available at runtime. + A step counter might fall into this category. + """ + if context.executing_eagerly(): + return # Updates already applied when in eager mode. + + def process_update(x): + if isinstance(x, ops.Operation): + return x + elif hasattr(x, 'op'): + return x.op + else: + return ops.convert_to_tensor(x) + + updates = generic_utils.to_list(updates) + updates = [process_update(x) for x in updates] + self._updates += updates + if inputs is None: + for u in updates: + u._unconditional_update = True # pylint: disable=protected-access + else: + for u in updates: + u._unconditional_update = False # pylint: disable=protected-access + + def get_updates_for(self, inputs): + """Retrieves updates relevant to a specific set of inputs. + + Arguments: + inputs: Input tensor or list/tuple of input tensors. + + Returns: + List of update ops of the layer that depend on `inputs`. + + Raises: + RuntimeError: If called in Eager mode. + """ + if context.executing_eagerly(): + raise RuntimeError('`get_updates_for()` not supported in Eager mode.') + + # Updates disabled if layer is not trainable and not explicitly stateful. + if not self.trainable and not self.stateful: + return [] + + if inputs is None: + # Requesting unconditional updates. + return [x for x in self.updates if x._unconditional_update] # pylint: disable=protected-access + + # Requesting input-conditional updates. + inputs = nest.flatten(inputs) + reachable = get_reachable_from_inputs(inputs, self.updates) + updates = [] + for update in self.updates: + if update in reachable: + updates.append(update) + return updates + + @property + def losses(self): + """Losses which are associated with this `Layer`. + + Note that when executing eagerly, getting this property evaluates + regularizers. When using graph execution, variable regularization ops have + already been created and are simply returned here. + + Returns: + A list of tensors. + """ + if context.executing_eagerly(): + # _losses may only contain variable regularization losses when executing + # eagerly, and they have been saved as lambdas to be executed when + # requested. + return [regularizer() for regularizer in self._losses] + else: + return self._losses + + def add_loss(self, losses, inputs=None): + """Add loss tensor(s), potentially dependent on layer inputs. + + Some losses (for instance, activity regularization losses) may be dependent + on the inputs passed when calling a layer. Hence, when reusing the same + layer on different inputs `a` and `b`, some entries in `layer.losses` may + be dependent on `a` and some on `b`. This method automatically keeps track + of dependencies. + + The `get_losses_for` method allows to retrieve the losses relevant to a + specific set of inputs. + + Note that `add_loss` is not supported when executing eagerly. Instead, + variable regularizers may be added through `add_variable`. Activity + regularization is not supported directly (but such losses may be returned + from `Layer.call()`). + + Arguments: + losses: Loss tensor, or list/tuple of tensors. + inputs: If anything other than None is passed, it signals the losses + are conditional on some of the layer's inputs, + and thus they should only be run where these inputs are available. + This is the case for activity regularization losses, for instance. + If `None` is passed, the losses are assumed + to be unconditional, and will apply across all dataflows of the layer + (e.g. weight regularization losses). + + Raises: + RuntimeError: If called in Eager mode. + """ + if context.executing_eagerly(): + # TODO(fchollet): it should be possible (and highly desirable) to support + # `add_loss` in eager mode. This allows great convenience and flexibility + # in defining custom losses on the fly (e.g. in VAEs). + # Simply appending the loss value to `self._losses` + # is the correct behavior. + # The only caveat is that we need to force the user to only call + # `add_loss` from inside a model or Layer's `call` method + # (otherwise the loss computation cannot be backproped through). + raise RuntimeError('Layer.add_loss not supported in Eager mode.') + + losses = generic_utils.to_list(losses) + self._losses += losses + if inputs is None: + for loss in losses: + loss._unconditional_loss = True # pylint: disable=protected-access + else: + for loss in losses: + loss._unconditional_loss = False # pylint: disable=protected-access + + def get_losses_for(self, inputs): + """Retrieves losses relevant to a specific set of inputs. + + Arguments: + inputs: Input tensor or list/tuple of input tensors. + + Returns: + List of loss tensors of the layer that depend on `inputs`. + + Raises: + RuntimeError: If called in Eager mode. + """ + if context.executing_eagerly(): + raise RuntimeError('Layer.get_losses_for not supported in Eager mode.') + + if inputs is None: + # Requesting unconditional losses. + return [x for x in self.losses if x._unconditional_loss] # pylint: disable=protected-access + + # Requesting input-conditional losses. + inputs = nest.flatten(inputs) + # Retrieve the set of tensors in the TF graph that depend on `inputs`. + # The losses we want to return will be part of this set. + # To avoid unnecessary work, we stop the search in case all of + # `self.losses` have been retrieved. + reachable = get_reachable_from_inputs(inputs, self.losses) + losses = [] + for loss in self.losses: + if loss in reachable: + losses.append(loss) + return losses + + def _name_scope(self): + return self.name + + def build(self, _): + """Creates the variables of the layer.""" + self.built = True + + def add_variable(self, *args, **kwargs): + """Alias for `add_weight`.""" + return self.add_weight(*args, **kwargs) + + def add_weight(self, name, shape, dtype=None, initializer=None, regularizer=None, trainable=True, - constraint=None): - """Adds a weight variable to the layer. + constraint=None, + partitioner=None, + use_resource=None, + getter=None): + """Adds a new variable to the layer, or gets an existing one; returns it. Arguments: - name: String, the name for the weight variable. - shape: The shape tuple of the weight. - dtype: The dtype of the weight. - initializer: An Initializer instance (callable). - regularizer: An optional Regularizer instance. - trainable: A boolean, whether the weight should - be trained via backprop or not (assuming - that the layer itself is also trainable). - constraint: An optional Constraint instance. + name: variable name. + shape: variable shape. + dtype: The type of the variable. Defaults to `self.dtype` or `float32`. + initializer: initializer instance (callable). + regularizer: regularizer instance (callable). + trainable: whether the variable should be part of the layer's + "trainable_variables" (e.g. variables, biases) + or "non_trainable_variables" (e.g. BatchNorm mean, stddev). + Note, if the current variable scope is marked as non-trainable + then this parameter is ignored and any added variables are also + marked as non-trainable. + constraint: constraint instance (callable). + partitioner: Partitioner to be passed to the `Checkpointable` API. + use_resource: Whether to use `ResourceVariable`. + getter: Variable getter argument to be passed to the `Checkpointable` API. Returns: - The created weight variable. + The created variable. Usually either a `Variable` or `ResourceVariable` + instance. If `partitioner` is not `None`, a `PartitionedVariable` + instance is returned. + + Raises: + RuntimeError: If called with partioned variable regularization and + eager execution is enabled. + ValueError: When giving unsupported dtype and no initializer. """ if dtype is None: - dtype = K.floatx() - weight = self.add_variable(name, shape, - dtype=dtype, - initializer=initializers.get(initializer), - regularizer=regularizers.get(regularizer), - constraint=constraints.get(constraint), - trainable=trainable) - return weight + dtype = self.dtype or backend.floatx() + else: + dtype = dtypes.as_dtype(dtype) + initializer = initializers.get(initializer) + regularizer = regularizers.get(regularizer) + constraint = constraints.get(constraint) + + # Initialize variable when no initializer provided + if initializer is None: + # If dtype is DT_FLOAT, provide a uniform unit scaling initializer + if dtype.is_floating: + initializer = initializers.glorot_uniform() + # If dtype is DT_INT/DT_UINT, provide a default value `zero` + # If dtype is DT_BOOL, provide a default value `FALSE` + elif dtype.is_integer or dtype.is_unsigned or dtype.is_bool: + initializer = initializers.zeros() + # NOTES:Do we need to support for handling DT_STRING and DT_COMPLEX here? + else: + raise ValueError('An initializer for variable %s of type %s is required' + ' for layer %s' % (name, dtype.base_dtype, self.name)) + + variable = self._add_variable_with_custom_getter( + name=name, + shape=shape, + # TODO(allenl): a `make_variable` equivalent should be added as a + # `Checkpointable` method. + getter=getter or make_variable, + # Manage errors in Layer rather than Checkpointable. + overwrite=True, + initializer=initializer, + dtype=dtypes.as_dtype(dtype), + constraint=constraint, + trainable=trainable and self.trainable, + partitioner=partitioner, + use_resource=use_resource) + + if regularizer is not None: + # TODO(fchollet): in the future, this should be handled at the + # level of variable creation, and weight regularization losses + # should be variable attributes. + self._handle_weight_regularization(name, variable, regularizer) + + if trainable: + self._trainable_weights.append(variable) + else: + self._non_trainable_weights.append(variable) + return variable + + def _handle_weight_regularization(self, name, variable, regularizer): + # `init_graph` should point to the graph in which variable initialization + # will occur; it should be None if and only if initialization will take + # place in the eager context. + init_graph = None + if not context.executing_eagerly(): + default_graph = ops.get_default_graph() + if default_graph.building_function: + with ops.init_scope(): + # Retrieve the variables from the graph into which variables + # will be lifted; if initialization ops will be lifted into + # the eager context, then there is nothing to retrieve, since variable + # collections are not supported when eager execution is enabled. + if not context.executing_eagerly(): + init_graph = ops.get_default_graph() + else: + # Initialization ops will not be lifted out of the default graph. + init_graph = default_graph + + if init_graph is not None: # pylint: disable=protected-access + # The variable was created and initialized in a graph. + if regularizer: + if isinstance(variable, tf_variables.PartitionedVariable): + for v in variable: + with ops.colocate_with(v.op): + with ops.name_scope(name + '/Regularizer'): + regularization = regularizer(v) + if regularization is not None: + self.add_loss(regularization) + else: + with ops.colocate_with(variable.op): + with ops.name_scope(name + '/Regularizer'): + regularization = regularizer(variable) + if regularization is not None: + self.add_loss(regularization) + elif regularizer: # initialization took place in an eager context + if isinstance(variable, tf_variables.PartitionedVariable): + raise RuntimeError( + 'Partitioned variable regularization is not yet ' + 'supported when executing eagerly. File a feature request' + 'if this is important to you.') + # Save a zero-argument lambda which runs the regularizer on the + # variable, to be executed when `Layer.losses` is requested. + # This makes losses responsive to variable updates when executing + # eagerly. + # + # TODO(akshayka): Do the same for graphs as well, so that losses + # collected in a while_loop can be run outside its control flow + # context and so that losses won't be swallowed up by graph functions + # (i.e., `.losses()` should always create regularizers). + self._losses.append(lambda: regularizer(variable)) + + def _handle_activity_regularization(self, inputs, outputs): + # Apply activity regularization. + # Note that it should be applied every time the layer creates a new + # output, since it is output-specific. + if self._activity_regularizer: + output_list = nest.flatten(outputs) + for output in output_list: + with ops.name_scope('ActivityRegularizer'): + activity_regularization = self._activity_regularizer(output) + self.add_loss(activity_regularization, inputs=inputs) def call(self, inputs, **kwargs): # pylint: disable=unused-argument """This is where the layer's logic lives. @@ -218,6 +599,215 @@ class Layer(tf_base_layers.Layer): """ return inputs + def __call__(self, inputs, *args, **kwargs): + """Wraps `call`, applying pre- and post-processing steps. + + Arguments: + inputs: input tensor(s). + *args: additional positional arguments to be passed to `self.call`. + **kwargs: additional keyword arguments to be passed to `self.call`. + + Returns: + Output tensor(s). + + Note: + - The following optional keyword arguments are reserved for specific uses: + * `training`: Boolean scalar tensor of Python boolean indicating + whether the `call` is meant for training or inference. + * `mask`: Boolean input mask. + - If the layer's `call` method takes a `mask` argument (as some Keras + layers do), its default value will be set to the mask generated + for `inputs` by the previous layer (if `input` did come from + a layer that generated a corresponding mask, i.e. if it came from + a Keras layer with masking support. + + Raises: + ValueError: if the layer's `call` method returns None (an invalid value). + """ + input_list = nest.flatten(inputs) + + build_graph = not context.executing_eagerly() + # TODO(fchollet, allenl): Make deferred mode work with subclassed Models + # which don't use an "inputs" argument. + in_deferred_mode = isinstance(input_list[0], DeferredTensor) + + # Handle Keras mask propagation from previous layer to current layer. + previous_mask = None + if (not hasattr(self, '_compute_previous_mask') or + self._compute_previous_mask): + previous_mask = collect_previous_mask(inputs) + if not hasattr(self, '_call_fn_args'): + self._call_fn_args = estimator_util.fn_args(self.call) + if ('mask' in self._call_fn_args and 'mask' not in kwargs and + not is_all_none(previous_mask)): + # The previous layer generated a mask, and mask was not explicitly pass + # to __call__, hence we set previous_mask as the default value. + kwargs['mask'] = previous_mask + + input_shapes = None + + with ops.name_scope(self._name_scope()): + if not self.built: + if not build_graph: + # Activity regularization is currently unsupported in Eager mode. + if self._activity_regularizer: + raise ValueError( + 'activity_regularizer currently unsupported with ' + 'eager execution enabled. Found an activity_regularizer in ' + '%s(%s).' % (self.__class__.__name__, self)) + if not build_graph and not in_deferred_mode: + for x in input_list: + if hasattr(x, '_keras_history'): + raise ValueError('_keras_history currently unsupported in ' + 'Eager mode. Found _keras_history in %s while ' + 'executing __call__ for %s(%s)' % + (x, self.__class_.__name__, self)) + + # Check input assumptions set before layer building, e.g. input rank. + self._assert_input_compatibility(inputs) + if input_list and self._dtype is None: + try: + self._dtype = input_list[0].dtype.base_dtype.name + except AttributeError: + pass + if all(hasattr(x, 'get_shape') for x in input_list): + input_shapes = nest.map_structure(lambda x: x.get_shape(), inputs) + self.build(input_shapes) + + # Check input assumptions set after layer building, e.g. input shape. + if build_graph or in_deferred_mode: + self._assert_input_compatibility(inputs) + + if not in_deferred_mode: + outputs = self.call(inputs, *args, **kwargs) + if outputs is None: + raise ValueError('A layer\'s `call` method should return a Tensor ' + 'or a list of Tensors, not None (layer: ' + + self.name + ').') + else: + # Deferred mode behavior: use `compute_output_shape` to + # infer the number of outputs of the layer and their shapes. + if input_shapes is None: + input_shapes = nest.map_structure(lambda x: x.get_shape(), inputs) + + output_shapes = self.compute_output_shape(input_shapes) + output_shapes = nest.flatten(output_shapes) + outputs = [ + # TODO(fchollet): name the deferred tensors? + DeferredTensor(shape=shape, dtype=self._dtype) + for shape in output_shapes + ] + if len(outputs) == 1: + outputs = outputs[0] + + if build_graph: + self._handle_activity_regularization(inputs, outputs) + # TODO(fchollet): consider enabling masking for Eager mode. + self._set_mask_metadata(inputs, outputs, previous_mask) + + if in_deferred_mode or build_graph and have_all_keras_metadata(inputs): + inputs, outputs = self._set_connectivity_metadata_( + inputs, outputs, args, kwargs) + + self.built = True + if context.executing_eagerly(): + return outputs + + if hasattr(self, '_symbolic_set_inputs') and not self.inputs: + # Subclassed network: explicitly set metadata normally set by a call to + # self._set_inputs(). This is not relevant in eager execution. + self._symbolic_set_inputs(inputs, outputs) + + if in_deferred_mode or build_graph: + self._set_learning_phase_metadata(inputs, outputs) + + # Optionally load weight values that were specified at layer instantiation. + # TODO(fchollet): consider enabling this with eager execution too. + if hasattr(self, '_initial_weights') and self._initial_weights is not None: + self.set_weights(self._initial_weights) + del self._initial_weights + return outputs + + def apply(self, inputs, *args, **kwargs): + """Apply the layer on a input. + + This simply wraps `self.__call__`. + + Arguments: + inputs: Input tensor(s). + *args: additional positional arguments to be passed to `self.call`. + **kwargs: additional keyword arguments to be passed to `self.call`. + + Returns: + Output tensor(s). + """ + return self.__call__(inputs, *args, **kwargs) + + def _set_learning_phase_metadata(self, inputs, outputs): + # Update learning phase info. To work with subclassed models, + # this should be done even if Keras metadata is absent. + output_tensors = generic_utils.to_list(outputs) + uses_lp = any( + [getattr(x, '_uses_learning_phase', False) + for x in generic_utils.to_list(inputs)]) + uses_lp = getattr(self, 'uses_learning_phase', False) or uses_lp + for i in range(len(output_tensors)): + try: + output_tensors[i]._uses_learning_phase = getattr( + output_tensors[i], '_uses_learning_phase', False) or uses_lp + except AttributeError: + # An output element happens to be a C type (such as tuple or dict). + # We don't track learning phase info in such edge cases. + pass + + def _set_mask_metadata(self, inputs, outputs, previous_mask): + if hasattr(self, 'compute_mask'): + output_mask = self.compute_mask(inputs, previous_mask) + if isinstance(outputs, (list, tuple)): + if output_mask is None: + output_mask = [None for _ in range(len(outputs))] + for x, m in zip(outputs, output_mask): + try: + x._keras_mask = m # pylint: disable=protected-access + except AttributeError: + pass # C type such as dict. Masking not supported in this case. + else: + try: + outputs._keras_mask = output_mask # pylint: disable=protected-access + except AttributeError: + pass # C type such as dict. Masking not supported in this case. + + def _set_connectivity_metadata_(self, inputs, outputs, args, kwargs): + if args and getattr(self, '_uses_inputs_arg', True): + raise TypeError( + 'This Layer takes an `inputs` argument to call(), and only the ' + '`inputs` argument may be specified as a positional argument. ' + 'Pass everything else as a keyword argument (those arguments will' + ' not be tracked as inputs to the Layer).') + + # If the layer returns tensors from its inputs, unmodified, + # we copy them to avoid loss of tensor metadata. + output_ls = nest.flatten(outputs) + output_ls_copy = [] + for x in output_ls: + if x in nest.flatten(inputs): + with ops.name_scope(self.name): + x = array_ops.identity(x) + output_ls_copy.append(x) + if len(output_ls_copy) == 1: + outputs = output_ls_copy[0] + else: + outputs = output_ls_copy + + inputs, kwargs = self._inputs_from_call_args( + call_args=(inputs,) + args, call_kwargs=kwargs) + # Add an inbound node to the layer, so it can keep track of this call. + # This updates the layer history of the output tensor(s). + kwargs.pop('mask', None) # `mask` should not be serialized. + self._add_inbound_node( + input_tensors=inputs, output_tensors=outputs, arguments=kwargs) + return inputs, outputs + def _inputs_from_call_args(self, call_args, call_kwargs): """Get Layer inputs from __call__ *args and **kwargs. @@ -282,71 +872,6 @@ class Layer(tf_base_layers.Layer): input_arg_values.extend(bound_args[call_arg_spec.varargs]) return input_arg_values, non_input_arg_values - def __call__(self, inputs, *args, **kwargs): - """Wrapper around self.call(), for handling internal references. - - If a Keras tensor is passed: - - We call self._add_inbound_node(). - - If necessary, we `build` the layer to match - the shape of the input(s). - - We update the _keras_history of the output tensor(s) - with the current layer. - This is done as part of _add_inbound_node(). - - Arguments: - inputs: Can be a tensor or list/tuple of tensors. - *args: Additional positional arguments to be passed to `call()`. Only - allowed in subclassed Models with custom call() signatures. In other - cases, `Layer` inputs must be passed using the `inputs` argument and - non-inputs must be keyword arguments. - **kwargs: Additional keyword arguments to be passed to `call()`. - - Returns: - Output of the layer's `call` method. - - Raises: - ValueError: in case the layer is missing shape information - for its `build` call. - TypeError: If positional arguments are passed and this `Layer` is not a - subclassed `Model`. - """ - # Actually call the layer (optionally building it). - output = super(Layer, self).__call__(inputs, *args, **kwargs) - - if args and getattr(self, '_uses_inputs_arg', True): - raise TypeError( - 'This Layer takes an `inputs` argument to call(), and only the ' - '`inputs` argument may be specified as a positional argument. Pass ' - 'everything else as a keyword argument (those arguments will not be ' - 'tracked as inputs to the Layer).') - - if context.executing_eagerly(): - return output - - inputs, kwargs = self._inputs_from_call_args( - call_args=(inputs,) + args, call_kwargs=kwargs) - - if hasattr(self, '_symbolic_set_inputs') and not self.inputs: - # Subclassed network: explicitly set metadata normally set by a call to - # self._set_inputs(). - self._symbolic_set_inputs(inputs, output) - - # Update learning phase info. - output_tensors = generic_utils.to_list(output) - uses_lp = any( - [getattr(x, '_uses_learning_phase', False) - for x in generic_utils.to_list(inputs)]) - uses_lp = getattr(self, 'uses_learning_phase', False) or uses_lp - for i in range(len(output_tensors)): - output_tensors[i]._uses_learning_phase = getattr( - output_tensors[i], '_uses_learning_phase', False) or uses_lp - - # Optionally load weight values that were specified at layer instantiation. - if hasattr(self, '_initial_weights') and self._initial_weights is not None: - self.set_weights(self._initial_weights) - del self._initial_weights - return output - def compute_output_shape(self, input_shape): """Computes the output shape of the layer. @@ -362,13 +887,7 @@ class Layer(tf_base_layers.Layer): Returns: An input shape tuple. """ - logging.warning( - 'All custom layers should implement the ' - '`compute_output_shape` method. This layer (' + self.name + ') ' - 'is relying on the base `Layer.compute_output_shape` implementation, ' - 'which will start raising a `NotImplementedError` ' - 'as of July 1st, 2018.') - return input_shape + raise NotImplementedError def compute_mask(self, inputs, mask=None): # pylint: disable=unused-argument """Computes an output mask tensor. @@ -396,6 +915,87 @@ class Layer(tf_base_layers.Layer): # carry over the input mask return mask + def _add_inbound_node(self, + input_tensors, + output_tensors, + arguments=None): + """Internal method to create an inbound node for the layer. + + Arguments: + input_tensors: list of input tensors. + output_tensors: list of output tensors. + arguments: dictionary of keyword arguments that were passed to the + `call` method of the layer at the call that created the node. + """ + input_tensors = nest.flatten(input_tensors) + output_tensors = nest.flatten(output_tensors) + + # Collect input tensor(s) coordinates. + inbound_layers = [] + node_indices = [] + tensor_indices = [] + for x in input_tensors: + assert hasattr(x, '_keras_history') + inbound_layer, node_index, tensor_index = x._keras_history # pylint: disable=protected-access + inbound_layers.append(inbound_layer) + node_indices.append(node_index) + tensor_indices.append(tensor_index) + + # Create node, add it to inbound nodes. + Node( + self, + inbound_layers=inbound_layers, + node_indices=node_indices, + tensor_indices=tensor_indices, + input_tensors=input_tensors, + output_tensors=output_tensors, + arguments=arguments) + + # Update tensor history metadata. + for i in range(len(output_tensors)): + # The metadata attribute consists of 1) a layer instance + # 2) a node index for the layer, 3) a tensor index for the node. + # The allows layer reuse (multiple nodes per layer) and multi-output + # or multi-input layers (e.g. a layer can return multiple tensors, + # and each can be sent to a different layer). + output_tensors[i]._keras_history = (self, len(self._inbound_nodes) - 1, i) # pylint: disable=protected-access + + def _get_node_attribute_at_index(self, node_index, attr, attr_name): + """Private utility to retrieves an attribute (e.g. inputs) from a node. + + This is used to implement the methods: + - get_input_shape_at + - get_output_shape_at + - get_input_at + etc... + + Arguments: + node_index: Integer index of the node from which + to retrieve the attribute. + attr: Exact node attribute name. + attr_name: Human-readable attribute name, for error messages. + + Returns: + The layer's attribute `attr` at the node of index `node_index`. + + Raises: + RuntimeError: If the layer has no inbound nodes, or if called in Eager + mode. + ValueError: If the index provided does not match any node. + """ + if not self._inbound_nodes: + raise RuntimeError('The layer has never been called ' + 'and thus has no defined ' + attr_name + '.') + if not len(self._inbound_nodes) > node_index: + raise ValueError('Asked to get ' + attr_name + ' at node ' + + str(node_index) + ', but the layer has only ' + + str(len(self._inbound_nodes)) + ' inbound nodes.') + values = getattr(self._inbound_nodes[node_index], attr) + if len(values) == 1: + return values[0] + else: + return values + def get_input_mask_at(self, node_index): """Retrieves the input mask tensor(s) of a layer at a given node. @@ -476,6 +1076,325 @@ class Layer(tf_base_layers.Layer): else: return getattr(output, '_keras_mask', None) + def get_input_shape_at(self, node_index): + """Retrieves the input shape(s) of a layer at a given node. + + Arguments: + node_index: Integer, index of the node + from which to retrieve the attribute. + E.g. `node_index=0` will correspond to the + first time the layer was called. + + Returns: + A shape tuple + (or list of shape tuples if the layer has multiple inputs). + + Raises: + RuntimeError: If called in Eager mode. + """ + return self._get_node_attribute_at_index(node_index, 'input_shapes', + 'input shape') + + def get_output_shape_at(self, node_index): + """Retrieves the output shape(s) of a layer at a given node. + + Arguments: + node_index: Integer, index of the node + from which to retrieve the attribute. + E.g. `node_index=0` will correspond to the + first time the layer was called. + + Returns: + A shape tuple + (or list of shape tuples if the layer has multiple outputs). + + Raises: + RuntimeError: If called in Eager mode. + """ + return self._get_node_attribute_at_index(node_index, 'output_shapes', + 'output shape') + + def get_input_at(self, node_index): + """Retrieves the input tensor(s) of a layer at a given node. + + Arguments: + node_index: Integer, index of the node + from which to retrieve the attribute. + E.g. `node_index=0` will correspond to the + first time the layer was called. + + Returns: + A tensor (or list of tensors if the layer has multiple inputs). + + Raises: + RuntimeError: If called in Eager mode. + """ + return self._get_node_attribute_at_index(node_index, 'input_tensors', + 'input') + + def get_output_at(self, node_index): + """Retrieves the output tensor(s) of a layer at a given node. + + Arguments: + node_index: Integer, index of the node + from which to retrieve the attribute. + E.g. `node_index=0` will correspond to the + first time the layer was called. + + Returns: + A tensor (or list of tensors if the layer has multiple outputs). + + Raises: + RuntimeError: If called in Eager mode. + """ + return self._get_node_attribute_at_index(node_index, 'output_tensors', + 'output') + + @property + def input(self): + """Retrieves the input tensor(s) of a layer. + + Only applicable if the layer has exactly one input, + i.e. if it is connected to one incoming layer. + + Returns: + Input tensor or list of input tensors. + + Raises: + AttributeError: if the layer is connected to + more than one incoming layers. + + Raises: + RuntimeError: If called in Eager mode. + AttributeError: If no inbound nodes are found. + """ + if not self._inbound_nodes: + raise AttributeError('Layer ' + self.name + + ' is not connected, no input to return.') + return self._get_node_attribute_at_index(0, 'input_tensors', 'input') + + @property + def output(self): + """Retrieves the output tensor(s) of a layer. + + Only applicable if the layer has exactly one output, + i.e. if it is connected to one incoming layer. + + Returns: + Output tensor or list of output tensors. + + Raises: + AttributeError: if the layer is connected to more than one incoming + layers. + RuntimeError: if called in Eager mode. + """ + if not self._inbound_nodes: + raise AttributeError('Layer ' + self.name + ' has no inbound nodes.') + return self._get_node_attribute_at_index(0, 'output_tensors', 'output') + + @property + def input_shape(self): + """Retrieves the input shape(s) of a layer. + + Only applicable if the layer has exactly one input, + i.e. if it is connected to one incoming layer, or if all inputs + have the same shape. + + Returns: + Input shape, as an integer shape tuple + (or list of shape tuples, one tuple per input tensor). + + Raises: + AttributeError: if the layer has no defined input_shape. + RuntimeError: if called in Eager mode. + """ + if not self._inbound_nodes: + raise AttributeError('The layer has never been called ' + 'and thus has no defined input shape.') + all_input_shapes = set( + [str(node.input_shapes) for node in self._inbound_nodes]) + if len(all_input_shapes) == 1: + input_shapes = self._inbound_nodes[0].input_shapes + if len(input_shapes) == 1: + return tuple(tensor_shape.TensorShape(input_shapes[0]).as_list()) + else: + return [ + tuple(tensor_shape.TensorShape(shape).as_list()) + for shape in input_shapes + ] + else: + raise AttributeError('The layer "' + str(self.name) + + ' has multiple inbound nodes, ' + 'with different input shapes. Hence ' + 'the notion of "input shape" is ' + 'ill-defined for the layer. ' + 'Use `get_input_shape_at(node_index)` ' + 'instead.') + + def count_params(self): + """Count the total number of scalars composing the weights. + + Returns: + An integer count. + + Raises: + ValueError: if the layer isn't yet built + (in which case its weights aren't yet defined). + """ + if not self.built: + if self.__class__.__name__ == 'Sequential': + self.build() # pylint: disable=no-value-for-parameter + else: + raise ValueError('You tried to call `count_params` on ' + self.name + + ', but the layer isn\'t built. ' + 'You can build it manually via: `' + self.name + + '.build(batch_input_shape)`.') + weight_shapes = [w.get_shape().as_list() for w in self.weights] + return int(sum([np.prod(w) for w in weight_shapes])) + + @property + def output_shape(self): + """Retrieves the output shape(s) of a layer. + + Only applicable if the layer has one output, + or if all outputs have the same shape. + + Returns: + Output shape, as an integer shape tuple + (or list of shape tuples, one tuple per output tensor). + + Raises: + AttributeError: if the layer has no defined output shape. + RuntimeError: if called in Eager mode. + """ + if not self._inbound_nodes: + raise AttributeError('The layer has never been called ' + 'and thus has no defined output shape.') + all_output_shapes = set( + [str(node.output_shapes) for node in self._inbound_nodes]) + if len(all_output_shapes) == 1: + output_shapes = self._inbound_nodes[0].output_shapes + if len(output_shapes) == 1: + return tuple(tensor_shape.TensorShape(output_shapes[0]).as_list()) + else: + return [ + tuple(tensor_shape.TensorShape(shape).as_list()) + for shape in output_shapes + ] + else: + raise AttributeError('The layer "%s"' + ' has multiple inbound nodes, ' + 'with different output shapes. Hence ' + 'the notion of "output shape" is ' + 'ill-defined for the layer. ' + 'Use `get_output_shape_at(node_index)` ' + 'instead.' % self.name) + + @property + def inbound_nodes(self): + """Deprecated, do NOT use! Only for compatibility with external Keras.""" + return self._inbound_nodes + + @property + def outbound_nodes(self): + """Deprecated, do NOT use! Only for compatibility with external Keras.""" + return self._outbound_nodes + + def _assert_input_compatibility(self, inputs): + """Checks compatibility between the layer and provided inputs. + + This checks that the tensor(s) `inputs` verify the input assumptions + of the layer (if any). If not, a clear and actional exception gets raised. + + Arguments: + inputs: input tensor or list of input tensors. + + Raises: + ValueError: in case of mismatch between + the provided inputs and the expectations of the layer. + """ + if not self.input_spec: + return + if not isinstance(self.input_spec, (list, tuple)): + input_spec = nest.flatten(self.input_spec) + else: + input_spec = self.input_spec + inputs = nest.flatten(inputs) + if len(inputs) != len(input_spec): + raise ValueError('Layer ' + self.name + ' expects ' + + str(len(input_spec)) + ' inputs, ' + 'but it received ' + str(len(inputs)) + + ' input tensors. Inputs received: ' + str(inputs)) + for input_index, (x, spec) in enumerate(zip(inputs, input_spec)): + if spec is None: + continue + + if (spec.ndim is not None or + spec.min_ndim is not None or + spec.max_ndim is not None): + if x.get_shape().ndims is None: + raise ValueError('Input ' + str(input_index) + ' of layer ' + + self.name + ' is incompatible with the layer: ' + 'its rank is undefined, but the layer requires a ' + 'defined rank.') + + # Check ndim. + if spec.ndim is not None: + ndim = x.get_shape().ndims + if ndim != spec.ndim: + raise ValueError('Input ' + str(input_index) + ' of layer ' + + self.name + ' is incompatible with the layer: ' + 'expected ndim=' + str(spec.ndim) + ', found ndim=' + + str(ndim) + '. Full shape received: ' + + str(x.get_shape().as_list())) + if spec.max_ndim is not None: + ndim = x.get_shape().ndims + if ndim is not None and ndim > spec.max_ndim: + raise ValueError('Input ' + str(input_index) + ' of layer ' + + self.name + ' is incompatible with the layer: ' + 'expected max_ndim=' + str(spec.max_ndim) + + ', found ndim=' + str(ndim)) + if spec.min_ndim is not None: + ndim = x.get_shape().ndims + if ndim is not None and ndim < spec.min_ndim: + raise ValueError('Input ' + str(input_index) + ' of layer ' + + self.name + ' is incompatible with the layer: ' + ': expected min_ndim=' + str(spec.min_ndim) + + ', found ndim=' + str(ndim) + + '. Full shape received: ' + + str(x.get_shape().as_list())) + # Check dtype. + if spec.dtype is not None: + if x.dtype != spec.dtype: + raise ValueError('Input ' + str(input_index) + ' of layer ' + + self.name + ' is incompatible with the layer: ' + 'expected dtype=' + str(spec.dtype) + + ', found dtype=' + str(x.dtype)) + # Check specific shape axes. + if spec.axes: + shape = x.get_shape().as_list() + if shape is not None: + for axis, value in spec.axes.items(): + if hasattr(value, 'value'): + value = value.value + if value is not None and shape[int(axis)] not in {value, None}: + raise ValueError( + 'Input ' + str(input_index) + ' of layer ' + self.name + ' is' + ' incompatible with the layer: expected axis ' + str(axis) + + ' of input shape to have value ' + str(value) + + ' but received input with shape ' + str(shape)) + # Check shape. + if spec.shape is not None: + shape = x.get_shape().as_list() + if shape is not None: + for spec_dim, dim in zip(spec.shape, shape): + if spec_dim is not None and dim is not None: + if spec_dim != dim: + raise ValueError('Input ' + str(input_index) + + ' is incompatible with layer ' + self.name + + ': expected shape=' + str(spec.shape) + + ', found shape=' + str(shape)) + def set_weights(self, weights): """Sets the weights of the layer, from Numpy arrays. @@ -500,14 +1419,14 @@ class Layer(tf_base_layers.Layer): if not params: return weight_value_tuples = [] - param_values = K.batch_get_value(params) + param_values = backend.batch_get_value(params) for pv, p, w in zip(param_values, params, weights): if pv.shape != w.shape: raise ValueError('Layer weight shape ' + str(pv.shape) + ' not compatible with ' 'provided weight shape ' + str(w.shape)) weight_value_tuples.append((p, w)) - K.batch_set_value(weight_value_tuples) + backend.batch_set_value(weight_value_tuples) def get_weights(self): """Returns the current weights of the layer. @@ -516,7 +1435,7 @@ class Layer(tf_base_layers.Layer): Weights values as a list of numpy arrays. """ params = self.weights - return K.batch_get_value(params) + return backend.batch_get_value(params) def get_config(self): """Returns the config of the layer. @@ -558,9 +1477,196 @@ class Layer(tf_base_layers.Layer): """ return cls(**config) - @tf_base_layers.Layer.activity_regularizer.setter - def activity_regularizer(self, activity_regularizer): - self._activity_regularizer = activity_regularizer + +@tf_export('keras.layers.InputSpec', 'layers.InputSpec') +class InputSpec(object): + """Specifies the ndim, dtype and shape of every input to a layer. + + Every layer should expose (if appropriate) an `input_spec` attribute: + a list of instances of InputSpec (one per input tensor). + + A None entry in a shape is compatible with any dimension, + a None shape is compatible with any shape. + + Arguments: + dtype: Expected DataType of the input. + shape: Shape tuple, expected shape of the input + (may include None for unchecked axes). + ndim: Integer, expected rank of the input. + max_ndim: Integer, maximum rank of the input. + min_ndim: Integer, minimum rank of the input. + axes: Dictionary mapping integer axes to + a specific dimension value. + """ + + def __init__(self, + dtype=None, + shape=None, + ndim=None, + max_ndim=None, + min_ndim=None, + axes=None): + self.dtype = dtype + self.shape = shape + if shape is not None: + self.ndim = len(shape) + else: + self.ndim = ndim + self.max_ndim = max_ndim + self.min_ndim = min_ndim + self.axes = axes or {} + + def __repr__(self): + spec = [('dtype=' + str(self.dtype)) if self.dtype else '', + ('shape=' + str(self.shape)) if self.shape else '', + ('ndim=' + str(self.ndim)) if self.ndim else '', + ('max_ndim=' + str(self.max_ndim)) if self.max_ndim else '', + ('min_ndim=' + str(self.min_ndim)) if self.min_ndim else '', + ('axes=' + str(self.axes)) if self.axes else ''] + return 'InputSpec(%s)' % ', '.join(x for x in spec if x) + + +class Node(object): + """A `Node` describes the connectivity between two layers. + + Each time a layer is connected to some new input, + a node is added to `layer._inbound_nodes`. + Each time the output of a layer is used by another layer, + a node is added to `layer._outbound_nodes`. + + Arguments: + outbound_layer: the layer that takes + `input_tensors` and turns them into `output_tensors` + (the node gets created when the `call` + method of the layer was called). + inbound_layers: a list of layers, the same length as `input_tensors`, + the layers from where `input_tensors` originate. + node_indices: a list of integers, the same length as `inbound_layers`. + `node_indices[i]` is the origin node of `input_tensors[i]` + (necessary since each inbound layer might have several nodes, + e.g. if the layer is being shared with a different data stream). + tensor_indices: a list of integers, + the same length as `inbound_layers`. + `tensor_indices[i]` is the index of `input_tensors[i]` within the + output of the inbound layer + (necessary since each inbound layer might + have multiple tensor outputs, with each one being + independently manipulable). + input_tensors: list of input tensors. + output_tensors: list of output tensors. + arguments: dictionary of keyword arguments that were passed to the + `call` method of the layer at the call that created the node. + + `node_indices` and `tensor_indices` are basically fine-grained coordinates + describing the origin of the `input_tensors`. + + A node from layer A to layer B is added to: + - A._outbound_nodes + - B._inbound_nodes + """ + + def __init__(self, + outbound_layer, + inbound_layers, + node_indices, + tensor_indices, + input_tensors, + output_tensors, + arguments=None): + # Layer instance (NOT a list). + if isinstance(outbound_layer, list): + raise ValueError( + '`outbound_layer` should be a layer instance, not a list.') + # this is the layer that takes a list of input tensors + # and turns them into a list of output tensors. + # the current node will be added to + # the inbound_nodes of outbound_layer. + self.outbound_layer = outbound_layer + + # The following 3 properties describe where + # the input tensors come from: which layers, + # and for each layer, which node and which + # tensor output of each node. + + # List of layer instances. + self.inbound_layers = inbound_layers + # List of integers, 1:1 mapping with inbound_layers. + self.node_indices = node_indices + # List of integers, 1:1 mapping with inbound_layers. + self.tensor_indices = tensor_indices + + # Following 2 properties: + # tensor inputs and outputs of outbound_layer. + + # List of tensors. 1:1 mapping with inbound_layers. + self.input_tensors = input_tensors + # List of tensors, created by outbound_layer.call(). + self.output_tensors = output_tensors + + # Following 2 properties: input and output shapes. + + # List of shape tuples, shapes of input_tensors. + self.input_shapes = [static_shape(x) for x in input_tensors] + # List of shape tuples, shapes of output_tensors. + self.output_shapes = [static_shape(x) for x in output_tensors] + + # Optional keyword arguments to layer's `call`. + self.arguments = arguments + + # Add nodes to all layers involved. + for layer in inbound_layers: + if layer is not None: + # For compatibility with external Keras, we use the deprecated + # accessor here. + layer.outbound_nodes.append(self) + # For compatibility with external Keras, we use the deprecated + # accessor here. + outbound_layer.inbound_nodes.append(self) + + def get_config(self): + inbound_names = [] + for layer in self.inbound_layers: + if layer: + inbound_names.append(layer.name) + else: + inbound_names.append(None) + return { + 'outbound_layer': self.outbound_layer.name, + 'inbound_layers': inbound_names, + 'node_indices': self.node_indices, + 'tensor_indices': self.tensor_indices + } + + +class DeferredTensor(object): + """Tensor-like object used to build graphs of layers in Eager mode. + + When calling a layer on a DeferredTensor, the layer will not perform any + computation and will simply perfom shape inference to return new + DeferredTensors with appropriate shape information. Thus DeferredTensor + behaves like a graph-mode Tensor when manipulated by layers. + """ + + def __init__(self, shape, dtype, name=None): + self.shape = tensor_shape.TensorShape(shape) + if dtype is None: + self.dtype = dtypes.as_dtype(np.float32) + else: + self.dtype = dtypes.as_dtype(dtype) + self.name = name + + def get_shape(self): + return self.shape + + def __str__(self): + return "DeferredTensor('%s', shape=%s, dtype=%s)" % (self.name, + self.get_shape(), + self.dtype.name) + + def __repr__(self): + return "" % (self.name, + self.get_shape(), + self.dtype.name) def shape_type_conversion(fn): @@ -589,3 +1695,251 @@ def shape_type_conversion(fn): return tensor_shape.TensorShape(output_shape) return wrapper + + +def object_list_uid(object_list): + """Creates a single string from object ids.""" + object_list = nest.flatten(object_list) + return ', '.join([str(abs(id(x))) for x in object_list]) + + +def static_shape(x): + """Get the static shape of a Tensor, or None if it is unavailable.""" + if x is None: + return None + try: + return tuple(x.get_shape().as_list()) + except ValueError: + return None + + +def get_reachable_from_inputs(inputs, targets=None): + """Returns the set of tensors/ops reachable from `inputs`. + + Stops if all targets have been found (target is optional). + + Only valid in Symbolic mode, not Eager mode. + + Args: + inputs: List of tensors. + targets: List of tensors. + + Returns: + A set of tensors reachable from the inputs (includes the inputs themselves). + """ + reachable = set(inputs) + if targets: + targets = set(targets) + queue = inputs[:] + + while queue: + x = queue.pop() + if isinstance(x, ops.Operation): + outputs = x.outputs[:] or [] + outputs += x._control_outputs + elif isinstance(x, ops.Tensor): + outputs = x.consumers() + elif isinstance(x, tf_variables.Variable): + outputs = [x.op] + else: + raise TypeError('Expected Operation, Variable, or Tensor, got ' + str(x)) + + for y in outputs: + if y not in reachable: + reachable.add(y) + queue.insert(0, y) + + if targets and targets.issubset(reachable): + return reachable + return reachable + + +def unique_layer_name(name, name_uid_map=None, avoid_names=None, namespace='', + zero_based=False): + """Makes a layer name (or arbitrary string) unique within a TensorFlow graph. + + Arguments: + name: String name to make unique. + name_uid_map: An optional defaultdict(int) to use when creating unique + names. If None (default), uses a per-Graph dictionary. + avoid_names: An optional set or dict with names which should not be used. If + None (default) does not avoid any names. + namespace: Gets a name which is unique within the (graph, namespace). Layers + which are not Networks use a blank namespace and so get graph-global + names. + zero_based: If True, name sequences start with no suffix (e.g. "dense", + "dense_1"). If False, naming is one-based ("dense_1", "dense_2"). + + Returns: + Unique string name. + + Example: + + ```python + _unique_layer_name('dense') # dense_1 + _unique_layer_name('dense') # dense_2 + ``` + """ + if name_uid_map is None: + name_uid_map = get_default_graph_uid_map() + if avoid_names is None: + avoid_names = set() + proposed_name = None + while proposed_name is None or proposed_name in avoid_names: + name_key = (namespace, name) + if zero_based: + number = name_uid_map[name_key] + if number: + proposed_name = name + '_' + str(number) + else: + proposed_name = name + name_uid_map[name_key] += 1 + else: + name_uid_map[name_key] += 1 + proposed_name = name + '_' + str(name_uid_map[name_key]) + return proposed_name + + +def to_snake_case(name): + intermediate = re.sub('(.)([A-Z][a-z0-9]+)', r'\1_\2', name) + insecure = re.sub('([a-z])([A-Z])', r'\1_\2', intermediate).lower() + # If the class is private the name starts with "_" which is not secure + # for creating scopes. We prefix the name with "private" in this case. + if insecure[0] != '_': + return insecure + return 'private' + insecure + + +def is_all_none(iterable_or_element): + if not isinstance(iterable_or_element, (list, tuple)): + iterable = [iterable_or_element] + else: + iterable = iterable_or_element + # We cannot use Python's `any` because the iterable may return Tensors. + for element in iterable: + if element is not None: + return False + return True + + +def have_all_keras_metadata(iterable_or_element): + if not isinstance(iterable_or_element, (list, tuple)): + iterable = [iterable_or_element] + else: + iterable = iterable_or_element + return all([hasattr(x, '_keras_history') for x in iterable]) + + +def collect_previous_mask(input_tensors): + """Retrieves the output mask(s) of the previous node. + + Arguments: + input_tensors: A tensor or list of tensors. + + Returns: + A mask tensor or list of mask tensors. + """ + input_tensors = nest.flatten(input_tensors) + masks = [] + for x in input_tensors: + if hasattr(x, '_keras_mask'): + mask = x._keras_mask # pylint: disable=protected-access + masks.append(mask) + else: + masks.append(None) + if len(masks) == 1: + return masks[0] + return masks + + +def is_tensor_or_tensor_list(v): + v = nest.flatten(v) + if v and isinstance(v[0], ops.Tensor): + return True + else: + return False + + +def get_default_graph_uid_map(): + # TODO(fchollet): refactor this into backend. + graph = ops.get_default_graph() + name_uid_map = backend.PER_GRAPH_LAYER_NAME_UIDS.get(graph, None) + if name_uid_map is None: + name_uid_map = collections.defaultdict(int) + backend.PER_GRAPH_LAYER_NAME_UIDS[graph] = name_uid_map + return name_uid_map + + +def make_variable(name, + shape=None, + dtype=dtypes.float32, + initializer=None, + partition_info=None, + trainable=True, + caching_device=None, + validate_shape=True, + constraint=None, + use_resource=None, + partitioner=None): # pylint: disable=unused-argument + """Temporary util to create a variable (relies on `variable_scope.variable`). + + Some reuse-related technicalities prevent us from using + `variable_scope.get_variable()` directly, so we use a subcomponent + that has fewer constraints (`variable_scope.variable()`). + + In the longer term, it seems like a similar "default variable creator" method + should exist in `CheckpointableBase` instead. When this happens, we can get + rid of this temporary solution. + + TODO(fchollet): remove this method when no longer needed. + TODO(fchollet): handle `partitioner` argument. + + Arguments: + name: Variable name. + shape: Variable shape. + dtype: The type of the variable. Defaults to `self.dtype` or `float32`. + initializer: Initializer instance (callable). + partition_info: Not handled at this time. + trainable: Whether the variable should be part of the layer's + "trainable_variables" (e.g. variables, biases) + or "non_trainable_variables" (e.g. BatchNorm mean, stddev). + Note, if the current variable scope is marked as non-trainable + then this parameter is ignored and any added variables are also + marked as non-trainable. + caching_device: Passed to `vs.variable`. + validate_shape: Passed to `vs.variable`. + constraint: Constraint instance (callable). + use_resource: Whether to use a `ResourceVariable`. + partitioner: Not handled at this time. + + Returns: + Variable instance. + """ + initializing_from_value = False + if initializer is not None and not callable(initializer): + initializing_from_value = True + + with ops.init_scope(): + if initializing_from_value: + init_val = initializer + variable_dtype = None + else: + # Instantiate initializer if provided initializer is a type object. + if isinstance(initializer, type(init_ops.Initializer)): + initializer = initializer(dtype=dtype) + init_val = lambda: initializer( # pylint: disable=g-long-lambda + shape, dtype=dtype, partition_info=partition_info) + variable_dtype = dtype.base_dtype + if use_resource is None: + use_resource = True + + v = vs.variable( + initial_value=init_val, + name=name, + trainable=trainable, + caching_device=caching_device, + dtype=variable_dtype, + validate_shape=validate_shape, + constraint=constraint, + use_resource=use_resource) + return v diff --git a/tensorflow/python/keras/_impl/keras/engine/input_layer.py b/tensorflow/python/keras/_impl/keras/engine/input_layer.py index b51dd8a2189d0c8542c84dfeac9be0d72b96ff1b..bd9dcbe3c576851123dfcabe3e36379019627ac5 100644 --- a/tensorflow/python/keras/_impl/keras/engine/input_layer.py +++ b/tensorflow/python/keras/_impl/keras/engine/input_layer.py @@ -23,7 +23,6 @@ from tensorflow.python.eager import context from tensorflow.python.framework import tensor_shape from tensorflow.python.keras._impl.keras import backend as K from tensorflow.python.keras._impl.keras.engine import base_layer -from tensorflow.python.layers import base as tf_base_layers from tensorflow.python.ops import array_ops from tensorflow.python.util.tf_export import tf_export @@ -95,7 +94,7 @@ class InputLayer(base_layer.Layer): if context.executing_eagerly(): # In eager mode, create a temporary placeholder to call the layer on. - input_tensor = tf_base_layers._DeferredTensor( # pylint: disable=protected-access + input_tensor = base_layer.DeferredTensor( # pylint: disable=protected-access shape=batch_input_shape, dtype=dtype, name=self.name) @@ -123,7 +122,7 @@ class InputLayer(base_layer.Layer): # Create an input node to add to self.outbound_node # and set output_tensors' _keras_history. input_tensor._keras_history = (self, 0, 0) # pylint: disable=protected-access - tf_base_layers.Node( + base_layer.Node( self, inbound_layers=[], node_indices=[], diff --git a/tensorflow/python/keras/_impl/keras/engine/network.py b/tensorflow/python/keras/_impl/keras/engine/network.py index 9f1c7de1157a3659ccb27e4850e99e09016d0067..cc177c14a894040df37f75bbdc6b2651336fe869 100644 --- a/tensorflow/python/keras/_impl/keras/engine/network.py +++ b/tensorflow/python/keras/_impl/keras/engine/network.py @@ -35,8 +35,6 @@ from tensorflow.python.keras._impl.keras.engine import saving from tensorflow.python.keras._impl.keras.utils import generic_utils from tensorflow.python.keras._impl.keras.utils.io_utils import ask_to_proceed_with_overwrite from tensorflow.python.keras._impl.keras.utils.layer_utils import print_summary as print_layer_summary -from tensorflow.python.layers import base as tf_base_layers -from tensorflow.python.layers import utils as tf_layers_util from tensorflow.python.platform import tf_logging as logging from tensorflow.python.training import checkpointable from tensorflow.python.util import nest @@ -82,7 +80,7 @@ class Network(base_layer.Layer): # self.losses # self.updates - self._init_set_name(name) + self._init_set_name(name, zero_based=True) self._activity_regularizer = None # This acts just like the `trainable` attribute of any layer instance. # It does not affect users of the underlying layers, only users of the @@ -132,14 +130,14 @@ class Network(base_layer.Layer): if context.executing_eagerly(): # Check that all inputs/outputs are DeferredTensors. for tensor in self.inputs: - if not isinstance(tensor, tf_base_layers._DeferredTensor): # pylint: disable=protected-access + if not isinstance(tensor, base_layer.DeferredTensor): # pylint: disable=protected-access raise TypeError('When eager execution is enabled, ' 'inputs must come from a call to ' '`tf.keras.Input` (called after ' 'tfe.enable_eager_execution()). ' 'Received invalid input: ' + str(tensor)) for tensor in self.outputs: - if not isinstance(tensor, tf_base_layers._DeferredTensor): # pylint: disable=protected-access + if not isinstance(tensor, base_layer.DeferredTensor): # pylint: disable=protected-access raise TypeError('When eager execution is enabled, ' 'outputs must come from a call to ' 'a layer (called after ' @@ -230,7 +228,7 @@ class Network(base_layer.Layer): self._layers_by_depth = layers_by_depth # Create the node linking internal inputs to internal outputs. - tf_base_layers.Node( + base_layer.Node( outbound_layer=self, inbound_layers=[], node_indices=[], @@ -243,8 +241,8 @@ class Network(base_layer.Layer): for x in self.inputs: mask = x._keras_mask if hasattr(x, '_keras_mask') else None # pylint: disable=protected-access masks.append(mask) - mask_cache_key = (tf_layers_util.object_list_uid(self.inputs) + '_' + - tf_layers_util.object_list_uid(masks)) + mask_cache_key = (base_layer.object_list_uid(self.inputs) + '_' + + base_layer.object_list_uid(masks)) masks = [] for x in self.outputs: mask = x._keras_mask if hasattr(x, '_keras_mask') else None # pylint: disable=protected-access @@ -289,7 +287,7 @@ class Network(base_layer.Layer): self.built = False def __setattr__(self, name, value): - if isinstance(value, (tf_base_layers.Layer, Network)): + if isinstance(value, (base_layer.Layer, Network)): try: is_graph_network = self._is_graph_network except AttributeError: @@ -299,6 +297,10 @@ class Network(base_layer.Layer): if not is_graph_network: if value not in self._layers: self._layers.append(value) + if hasattr(value, '_use_resource_variables'): + # In subclassed models, legacy layers (tf.layers) must always use + # resource variables. + value._use_resource_variables = True if isinstance(value, checkpointable.CheckpointableBase): # Layer (and therefore Network/Model) inherit from CheckpointableBase # rather than Checkpointable, which means there is no Checkpointable @@ -387,8 +389,8 @@ class Network(base_layer.Layer): masks = [None for _ in range(len(inputs))] else: masks = generic_utils.to_list(mask) - cache_key = (tf_layers_util.object_list_uid(inputs) - + '_' + tf_layers_util.object_list_uid(masks)) + cache_key = (base_layer.object_list_uid(inputs) + + '_' + base_layer.object_list_uid(masks)) if cache_key in self._output_mask_cache: return self._output_mask_cache[cache_key] else: @@ -502,8 +504,7 @@ class Network(base_layer.Layer): relevant_inputs += inputs else: relevant_inputs.append(inputs) - reachable = tf_layers_util.get_reachable_from_inputs(relevant_inputs, - updates) + reachable = base_layer.get_reachable_from_inputs(relevant_inputs, updates) relevant_conditional_updates = [x for x in updates if x in reachable] unconditional_updates = [ x for x in updates if x._unconditional_update] # pylint: disable=protected-access @@ -540,8 +541,7 @@ class Network(base_layer.Layer): relevant_inputs += inputs else: relevant_inputs.append(inputs) - reachable = tf_layers_util.get_reachable_from_inputs(relevant_inputs, - losses) + reachable = base_layer.get_reachable_from_inputs(relevant_inputs, losses) relevant_conditional_losses = [x for x in losses if x in reachable] unconditional_losses = [ x for x in losses if x._unconditional_loss] # pylint: disable=protected-access @@ -623,8 +623,8 @@ class Network(base_layer.Layer): if not context.executing_eagerly(): # Try to retrieve cached outputs if the layer has already been called # on these exact inputs. - cache_key = (tf_layers_util.object_list_uid(inputs) - + '_' + tf_layers_util.object_list_uid(masks)) + cache_key = (base_layer.object_list_uid(inputs) + + '_' + base_layer.object_list_uid(masks)) if cache_key in self._output_tensor_cache: # Cache hit. return self._output_tensor_cache[cache_key] @@ -656,7 +656,7 @@ class Network(base_layer.Layer): ': model has ' + str(len(self._input_layers)) + ' tensor inputs.') - cache_key = tf_layers_util.object_list_uid(input_shapes) + cache_key = base_layer.object_list_uid(input_shapes) if cache_key not in self._output_shape_cache: # Cache miss. We have to run the network graph manually (recursive calls # to `compute_output_shape`). @@ -845,7 +845,7 @@ class Network(base_layer.Layer): for x in self.outputs: assert str(id(x)) in tensor_map, 'Could not compute output ' + str(x) tensor, mask = tensor_map[str(id(x))] - output_shapes.append(tf_layers_util.static_shape(x)) + output_shapes.append(base_layer.static_shape(x)) output_tensors.append(tensor) output_masks.append(mask) @@ -859,14 +859,14 @@ class Network(base_layer.Layer): if not context.executing_eagerly(): # Update cache; # keys are based on ids on input tensors and inputs masks. - cache_key = (tf_layers_util.object_list_uid(inputs) - + '_' + tf_layers_util.object_list_uid(masks)) + cache_key = (base_layer.object_list_uid(inputs) + + '_' + base_layer.object_list_uid(masks)) self._output_tensor_cache[cache_key] = output_tensors self._output_mask_cache[cache_key] = output_masks if output_shapes is not None: - input_shapes = [tf_layers_util.static_shape(x) for x in inputs] - cache_key = tf_layers_util.object_list_uid(input_shapes) + input_shapes = [base_layer.static_shape(x) for x in inputs] + cache_key = base_layer.object_list_uid(input_shapes) self._output_shape_cache[cache_key] = output_shapes return output_tensors, output_masks diff --git a/tensorflow/python/keras/_impl/keras/engine/saving_test.py b/tensorflow/python/keras/_impl/keras/engine/saving_test.py index dde090120456f968267e1c572f22eda1bd6ed7c4..3b1578cddfd97b31cae8619cdd2d8e1997585f51 100644 --- a/tensorflow/python/keras/_impl/keras/engine/saving_test.py +++ b/tensorflow/python/keras/_impl/keras/engine/saving_test.py @@ -422,7 +422,7 @@ class TestWholeModelSaving(test.TestCase): f = keras.layers.Dense(2, name='nested_model_dense_%d' % (i,))(f) # This layer name will make the `weights_name` # HDF5 attribute blow out of proportion. - f = keras.layers.Dense(2, name='nested_model_output' + ('x' * (2**15)))(f) + f = keras.layers.Dense(2, name='nested_model_output' + ('x' * (2**14)))(f) nested_model = keras.Model(inputs=[x], outputs=[f], name='nested_model') x = keras.Input(shape=(2,), name='outer_model_input') diff --git a/tensorflow/python/keras/_impl/keras/engine/sequential.py b/tensorflow/python/keras/_impl/keras/engine/sequential.py index 2ef99d5ab3f432058fdf685b99b01aa0b5eeffdc..bd13ca671340551c3e96895951be360b15e55cfe 100644 --- a/tensorflow/python/keras/_impl/keras/engine/sequential.py +++ b/tensorflow/python/keras/_impl/keras/engine/sequential.py @@ -123,7 +123,7 @@ class Sequential(Model): multiple output tensors, or is already connected somewhere else (forbidden in `Sequential` models). """ - if not isinstance(layer, (base_layer.Layer, base_layer.TFBaseLayer)): + if not isinstance(layer, base_layer.Layer): raise TypeError('The added layer must be ' 'an instance of class Layer. ' 'Found: ' + str(layer)) diff --git a/tensorflow/python/keras/_impl/keras/engine/sequential_test.py b/tensorflow/python/keras/_impl/keras/engine/sequential_test.py index c9a47581df03e0fc1ad38552ba8634862435cd80..8aba16aef3e187e9e33bdb65c7d44b0e622730ef 100644 --- a/tensorflow/python/keras/_impl/keras/engine/sequential_test.py +++ b/tensorflow/python/keras/_impl/keras/engine/sequential_test.py @@ -151,6 +151,7 @@ class TestSequential(test.TestCase): with self.test_session(): model = keras.models.Sequential() model.add(keras.layers.BatchNormalization(input_shape=(4,))) + assert model.updates model.trainable = False assert not model.updates diff --git a/tensorflow/python/keras/_impl/keras/engine/topology_test.py b/tensorflow/python/keras/_impl/keras/engine/topology_test.py index b50277c8fff917d77694903c989fd02ea98b1711..49cc1cd3b38325b4f42d5b26bac9442d7cc09b05 100644 --- a/tensorflow/python/keras/_impl/keras/engine/topology_test.py +++ b/tensorflow/python/keras/_impl/keras/engine/topology_test.py @@ -25,7 +25,7 @@ from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import test_util from tensorflow.python.keras._impl import keras -from tensorflow.python.layers import base as tf_base_layers +from tensorflow.python.keras._impl.keras.engine import base_layer from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import state_ops @@ -52,11 +52,13 @@ class TopologyConstructionTest(test.TestCase): (1, 1), 'float32', trainable=False) - self.add_update(state_ops.assign_add(self.a, [[1.]])) + self.add_update(state_ops.assign_add(self.a, [[1.]], + name='unconditional_update')) self.built = True def call(self, inputs): - self.add_update(state_ops.assign_add(self.a, inputs), + self.add_update(state_ops.assign_add(self.b, inputs, + name='conditional_update'), inputs=True) return inputs + 1 @@ -97,10 +99,20 @@ class TopologyConstructionTest(test.TestCase): self.assertEqual(len(network.updates), 4) self.assertEqual(len(network.get_updates_for(None)), 2) - network.add_update(state_ops.assign_add(layer.a, x4), inputs=True) + network.add_update(state_ops.assign_add(layer.b, x4), inputs=True) self.assertEqual(len(network.updates), 5) self.assertEqual(len(network.get_updates_for(x4)), 2) + def test_get_updates_bn(self): + x1 = keras.Input(shape=(1,)) + layer = keras.layers.BatchNormalization() + _ = layer.apply(x1) + + print('BN updates', layer._updates) + self.assertEqual(len(layer.updates), 2) + self.assertEqual(len(layer.get_updates_for(x1)), 2) + self.assertEqual(len(layer.get_updates_for(None)), 0) + def test_get_losses(self): class MyLayer(keras.layers.Layer): @@ -783,7 +795,7 @@ class TopologyConstructionTest(test.TestCase): def test_activity_regularization_with_model_composition(self): def reg(x): - return keras.backend.sum(x) + return math_ops.reduce_sum(x) net_a_input = keras.Input((2,)) net_a = net_a_input @@ -875,25 +887,25 @@ class TopologyConstructionTest(test.TestCase): class DeferredModeTest(test.TestCase): def testDeferredTensorAttributes(self): - x = tf_base_layers._DeferredTensor(shape=(None, 2), - dtype='float32', - name='x') + x = base_layer.DeferredTensor(shape=(None, 2), + dtype='float32', + name='x') self.assertEqual(str(x), 'DeferredTensor(\'x\', shape=(?, 2), dtype=float32)') self.assertEqual(repr(x), - '<_DeferredTensor \'x\' shape=(?, 2) dtype=float32>') + '') @test_util.run_in_graph_and_eager_modes() def testSimpleNetworkBuilding(self): inputs = keras.engine.Input(shape=(32,)) if context.executing_eagerly(): - self.assertIsInstance(inputs, tf_base_layers._DeferredTensor) + self.assertIsInstance(inputs, base_layer.DeferredTensor) self.assertEqual(inputs.dtype.name, 'float32') self.assertEqual(inputs.shape.as_list(), [None, 32]) x = keras.layers.Dense(2)(inputs) if context.executing_eagerly(): - self.assertIsInstance(x, tf_base_layers._DeferredTensor) + self.assertIsInstance(x, base_layer.DeferredTensor) self.assertEqual(x.dtype.name, 'float32') self.assertEqual(x.shape.as_list(), [None, 2]) @@ -936,5 +948,34 @@ class DeferredModeTest(test.TestCase): self.assertEqual(outputs[0].shape.as_list(), [10, 16]) self.assertEqual(outputs[1].shape.as_list(), [10, 2]) + +class GraphUtilsTest(test.TestCase): + + def testGetReachableFromInputs(self): + + with self.test_session(): + pl_1 = array_ops.placeholder(shape=None, dtype='float32') + pl_2 = array_ops.placeholder(shape=None, dtype='float32') + pl_3 = array_ops.placeholder(shape=None, dtype='float32') + x_1 = pl_1 + pl_2 + x_2 = pl_2 * 2 + x_3 = pl_3 + 1 + x_4 = x_1 + x_2 + x_5 = x_3 * pl_1 + + self.assertEqual( + keras.engine.base_layer.get_reachable_from_inputs([pl_1]), + {pl_1, x_1, x_4, x_5, x_1.op, x_4.op, x_5.op}) + self.assertEqual( + keras.engine.base_layer.get_reachable_from_inputs([pl_1, pl_2]), + {pl_1, pl_2, x_1, x_2, x_4, x_5, x_1.op, x_2.op, x_4.op, x_5.op}) + self.assertEqual( + keras.engine.base_layer.get_reachable_from_inputs([pl_3]), + {pl_3, x_3, x_5, x_3.op, x_5.op}) + self.assertEqual( + keras.engine.base_layer.get_reachable_from_inputs([x_3]), + {x_3, x_5, x_5.op}) + + if __name__ == '__main__': test.main() diff --git a/tensorflow/python/keras/_impl/keras/engine/training.py b/tensorflow/python/keras/_impl/keras/engine/training.py index 971245c162a6cf872fdf86f2a09f34908de67b7c..146e8fdac9a2dd51ad928958b5281485c1e732ae 100644 --- a/tensorflow/python/keras/_impl/keras/engine/training.py +++ b/tensorflow/python/keras/_impl/keras/engine/training.py @@ -20,6 +20,8 @@ from __future__ import print_function import numpy as np +from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.data.ops import iterator_ops from tensorflow.python.eager import context from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_util @@ -31,10 +33,10 @@ from tensorflow.python.keras._impl.keras.engine import training_arrays from tensorflow.python.keras._impl.keras.engine import training_eager from tensorflow.python.keras._impl.keras.engine import training_generator from tensorflow.python.keras._impl.keras.engine import training_utils +from tensorflow.python.keras._impl.keras.engine.base_layer import DeferredTensor from tensorflow.python.keras._impl.keras.engine.base_layer import Layer from tensorflow.python.keras._impl.keras.engine.network import Network from tensorflow.python.keras._impl.keras.utils.generic_utils import slice_arrays -from tensorflow.python.layers.base import _DeferredTensor from tensorflow.python.ops import array_ops from tensorflow.python.platform import tf_logging as logging from tensorflow.python.training import optimizer as tf_optimizer_module @@ -276,6 +278,8 @@ class Model(Network): self.metrics_names.append(self.output_names[i] + '_loss') self.nested_metrics = training_utils.collect_metrics(metrics, self.output_names) + with K.name_scope('metrics'): + training_utils.populate_metric_names(self) self._feed_sample_weight_modes = [] for i in range(len(self.outputs)): self._feed_sample_weight_modes.append(None) @@ -462,7 +466,6 @@ class Model(Network): output_weighted_metrics = nested_weighted_metrics[i] def handle_metrics(metrics, weights=None): - metric_name_prefix = 'weighted_' if weights is not None else '' for metric in metrics: if metric in ('accuracy', 'acc', 'crossentropy', 'ce'): @@ -489,39 +492,19 @@ class Model(Network): metric_fn = metrics_module.categorical_accuracy elif metric in ('crossentropy', 'ce'): metric_fn = metrics_module.categorical_crossentropy - if metric in ('accuracy', 'acc'): - suffix = 'acc' - elif metric in ('crossentropy', 'ce'): - suffix = 'ce' weighted_metric_fn = training_utils.weighted_masked_objective( metric_fn) - metric_name = metric_name_prefix + suffix else: metric_fn = metrics_module.get(metric) weighted_metric_fn = training_utils.weighted_masked_objective( metric_fn) - # Get metric name as string - if hasattr(metric_fn, 'name'): - metric_name = metric_fn.name - else: - metric_name = metric_fn.__name__ - metric_name = metric_name_prefix + metric_name - + metric_name = training_utils.get_base_metric_name( + metric, weighted=weights is not None) with K.name_scope(metric_name): metric_result = weighted_metric_fn( y_true, y_pred, weights=weights, mask=masks[i]) - # Append to self.metrics_names, self.metric_tensors, - # self.stateful_metric_names - if len(self.output_names) > 1: - metric_name = '%s_%s' % (self.output_names[i], metric_name) - # Dedupe name - j = 1 - base_metric_name = metric_name - while metric_name in self.metrics_names: - metric_name = '%s_%d' % (base_metric_name, j) - j += 1 - self.metrics_names.append(metric_name) + training_utils.add_metric_name(self, metric_name, i) self.metrics_tensors.append(metric_result) # Keep track of state updates created by @@ -653,12 +636,20 @@ class Model(Network): This is a purely internal method, subject to refactoring at any time. Args: - x: An array or list of arrays, to be used as input data. If the model - has known, named inputs, this could also be a dict mapping input names - to the corresponding array. - y: An array or list of arrays, to be used as target data. If the model - has known, named outputs, this could also be a dict mapping output names - to the corresponding array. + x: Input data. It could be: + - A Numpy array (or array-like), or a list of arrays + (in case the model has multiple inputs). + - A TensorFlow tensor, or a list of tensors + (in case the model has multiple inputs). + - A dict mapping input names to the corresponding array/tensors, + if the model has named inputs. + - A `tf.data` dataset iterator. + y: Target data. Like the input data `x`, + it could be either Numpy array(s) or TensorFlow tensor(s). + It should be consistent with `x` (you cannot have Numpy inputs and + tensor targets, or inversely). If `x` is a dataset iterator, + `y` should not be specified + (since targets will be obtained from the iterator). sample_weight: An optional sample-weight array passed by the user to weight the importance of each sample in `x`. class_weight: An optional class-weight array by the user to @@ -678,6 +669,31 @@ class Model(Network): RuntimeError: If the model was never compiled. """ # First, we build/compile the model on the fly if necessary. + if isinstance(x, dataset_ops.Dataset): + raise ValueError('You passed a `Dataset` instance to your model (%s), ' + 'which is not supported. Instead, pass an `Iterator`, ' + 'which you can obtain e.g. via ' + '`dataset.make_one_shot_iterator()` (the exact method ' + 'to use will depend on your specific dataset).' % x) + if isinstance(x, iterator_ops.Iterator): + if y is not None: + raise ValueError('You passed a dataset iterator (%s) as input `x` to ' + 'your model. In that case, you should not specify ' + 'a target (`y`) argument, since the dataset iterator ' + 'generates both input data and target data. ' + 'Received: %s' % (x, y)) + if not context.executing_eagerly(): + x, y = x.get_next() + # TODO(fchollet): handle case of `get_next` not returning 2 tensors? + else: + # TODO(psv): implement this. The way to support it will be to typecheck + # for `iterator` before `_standardize_user_data` is called and redirect + # to new training/eval functions in `training_eager.py`. The model + # may need to get built using the specs of the data from the first batch + # drawn from the iterator. + raise ValueError('Dataset iterators are not supported ' + 'with eager execution yet.') + all_inputs = [] if not self.built: # We need to use `x` to set the model inputs. @@ -891,15 +907,6 @@ class Model(Network): else: self._symbolic_set_inputs(inputs, training=training) - def _set_scope(self, scope=None): - """Modify the Layer scope creation logic to create ResourceVariables.""" - super(Model, self)._set_scope(scope=scope) - # Subclassed Models create ResourceVariables by default. This makes it - # easier to use Models in an eager/graph agnostic way (since eager execution - # always uses ResourceVariables). - if not self._is_graph_network: - self._scope.set_use_resource(True) - def _eager_set_inputs(self, inputs): """Set model's input and output specs based on the input data received. @@ -933,11 +940,11 @@ class Model(Network): else: dummy_output_values = [dummy_output_values] self.outputs = [ - _DeferredTensor(shape=(None for _ in v.shape), - dtype=v.dtype) for v in dummy_output_values] + DeferredTensor(shape=(None for _ in v.shape), + dtype=v.dtype) for v in dummy_output_values] self.inputs = [ - _DeferredTensor(shape=(None for _ in v.shape), - dtype=v.dtype) for v in dummy_input_values] + DeferredTensor(shape=(None for _ in v.shape), + dtype=v.dtype) for v in dummy_input_values] self.input_names = [ 'input_%d' % (i + 1) for i in range(len(dummy_input_values))] self.output_names = [ @@ -1044,22 +1051,26 @@ class Model(Network): """Trains the model for a fixed number of epochs (iterations on a dataset). Arguments: - x: Numpy array of training data (if the model has a single input), - or list of Numpy arrays (if the model has multiple inputs). - If input layers in the model are named, you can also pass a - dictionary mapping input names to Numpy arrays. - `x` can be `None` (default) if feeding from - TensorFlow data tensors. - y: Numpy array of target (label) data - (if the model has a single output), - or list of Numpy arrays (if the model has multiple outputs). - If output layers in the model are named, you can also pass a - dictionary mapping output names to Numpy arrays. - `y` can be `None` (default) if feeding from - TensorFlow data tensors. + x: Input data. It could be: + - A Numpy array (or array-like), or a list of arrays + (in case the model has multiple inputs). + - A TensorFlow tensor, or a list of tensors + (in case the model has multiple inputs). + - A dict mapping input names to the corresponding array/tensors, + if the model has named inputs. + - A `tf.data` dataset iterator. + y: Target data. Like the input data `x`, + it could be either Numpy array(s) or TensorFlow tensor(s). + It should be consistent with `x` (you cannot have Numpy inputs and + tensor targets, or inversely). If `x` is a dataset iterator, + `y` should not be specified + (since targets will be obtained from the iterator). batch_size: Integer or `None`. Number of samples per gradient update. If unspecified, `batch_size` will default to 32. + Do not specify the `batch_size` is your data is in the + form of symbolic tensors or dataset iterators (since they generate + batches). epochs: Integer. Number of epochs to train the model. An epoch is an iteration over the entire `x` and `y` data provided. @@ -1081,11 +1092,14 @@ class Model(Network): on this data at the end of each epoch. The validation data is selected from the last samples in the `x` and `y` data provided, before shuffling. - validation_data: tuple `(x_val, y_val)` or tuple - `(x_val, y_val, val_sample_weights)` on which to evaluate + validation_data: Data on which to evaluate the loss and any model metrics at the end of each epoch. The model will not be trained on this data. `validation_data` will override `validation_split`. + `validation_data` could be: + - tuple `(x_val, y_val)` of Numpy arrays or tensors + - tuple `(x_val, y_val, val_sample_weights)` of Numpy arrays + - dataset iterator shuffle: Boolean (whether to shuffle the training data before each epoch) or str (for 'batch'). 'batch' is a special option for dealing with the @@ -1162,17 +1176,22 @@ class Model(Network): batch_size=batch_size) # Prepare validation data. if validation_data: - if len(validation_data) == 2: + if isinstance(validation_data, iterator_ops.Iterator): + val_x = validation_data + val_y = None + val_sample_weight = None + elif len(validation_data) == 2: val_x, val_y = validation_data # pylint: disable=unpacking-non-sequence val_sample_weight = None elif len(validation_data) == 3: val_x, val_y, val_sample_weight = validation_data # pylint: disable=unpacking-non-sequence else: raise ValueError( - 'When passing validation_data, ' - 'it must contain 2 (x_val, y_val) ' - 'or 3 (x_val, y_val, val_sample_weights) ' - 'items, however it contains %d items' % len(validation_data)) + 'When passing a `validation_data` argument, ' + 'it must contain either 2 items (x_val, y_val), ' + 'or 3 items (x_val, y_val, val_sample_weights), ' + 'or alternatively it could be a dataset iterator. However we ' + 'received `validation_data=%s`' % validation_data) val_x, val_y, val_sample_weights = self._standardize_user_data( val_x, @@ -1181,6 +1200,9 @@ class Model(Network): batch_size=batch_size) elif validation_split and 0. < validation_split < 1.: + if training_utils.has_symbolic_tensors(x): + raise ValueError('If your data is in the form of symbolic tensors, ' + 'you cannot use `validation_split`.') if hasattr(x[0], 'shape'): split_at = int(x[0].shape[0] * (1. - validation_split)) else: @@ -1243,22 +1265,26 @@ class Model(Network): Computation is done in batches. Arguments: - x: Numpy array of test data (if the model has a single input), - or list of Numpy arrays (if the model has multiple inputs). - If input layers in the model are named, you can also pass a - dictionary mapping input names to Numpy arrays. - `x` can be `None` (default) if feeding from - TensorFlow data tensors. - y: Numpy array of target (label) data - (if the model has a single output), - or list of Numpy arrays (if the model has multiple outputs). - If output layers in the model are named, you can also pass a - dictionary mapping output names to Numpy arrays. - `y` can be `None` (default) if feeding from - TensorFlow data tensors. + x: Input data. It could be: + - A Numpy array (or array-like), or a list of arrays + (in case the model has multiple inputs). + - A TensorFlow tensor, or a list of tensors + (in case the model has multiple inputs). + - A dict mapping input names to the corresponding array/tensors, + if the model has named inputs. + - A `tf.data` dataset iterator. + y: Target data. Like the input data `x`, + it could be either Numpy array(s) or TensorFlow tensor(s). + It should be consistent with `x` (you cannot have Numpy inputs and + tensor targets, or inversely). If `x` is a dataset iterator, + `y` should not be specified + (since targets will be obtained from the iterator). batch_size: Integer or `None`. - Number of samples per evaluation step. + Number of samples per gradient update. If unspecified, `batch_size` will default to 32. + Do not specify the `batch_size` is your data is in the + form of symbolic tensors or dataset iterators (since they generate + batches). verbose: 0 or 1. Verbosity mode. 0 = silent, 1 = progress bar. sample_weight: Optional Numpy array of weights for @@ -1316,9 +1342,13 @@ class Model(Network): Computation is done in batches. Arguments: - x: The input data, as a Numpy array - (or list of Numpy arrays if the model has multiple outputs). - batch_size: Integer. If unspecified, it will default to 32. + x: Input samples, as Numpy array(s) or tensor(s). + batch_size: Integer or `None`. + Number of samples per gradient update. + If unspecified, `batch_size` will default to 32. + Do not specify the `batch_size` is your data is in the + form of symbolic tensors or dataset iterators (since they generate + batches). verbose: Verbosity mode, 0 or 1. steps: Total number of steps (batches of samples) before declaring the prediction round finished. @@ -1349,20 +1379,24 @@ class Model(Network): return training_arrays.predict_loop( self, x, batch_size=batch_size, verbose=verbose, steps=steps) - def train_on_batch(self, x, y, sample_weight=None, class_weight=None): + def train_on_batch(self, x, y=None, sample_weight=None, class_weight=None): """Runs a single gradient update on a single batch of data. Arguments: - x: Numpy array of training data, - or list of Numpy arrays if the model has multiple inputs. - If all inputs in the model are named, - you can also pass a dictionary - mapping input names to Numpy arrays. - y: Numpy array of target data, - or list of Numpy arrays if the model has multiple outputs. - If all outputs in the model are named, - you can also pass a dictionary - mapping output names to Numpy arrays. + x: Input data. It could be: + - A Numpy array (or array-like), or a list of arrays + (in case the model has multiple inputs). + - A TensorFlow tensor, or a list of tensors + (in case the model has multiple inputs). + - A dict mapping input names to the corresponding array/tensors, + if the model has named inputs. + - A `tf.data` dataset iterator. + y: Target data. Like the input data `x`, + it could be either Numpy array(s) or TensorFlow tensor(s). + It should be consistent with `x` (you cannot have Numpy inputs and + tensor targets, or inversely). If `x` is a dataset iterator, + `y` should not be specified + (since targets will be obtained from the iterator). sample_weight: Optional array of the same length as x, containing weights to apply to the model's loss for each sample. In the case of temporal data, you can pass a 2D array @@ -1409,20 +1443,24 @@ class Model(Network): return outputs[0] return outputs - def test_on_batch(self, x, y, sample_weight=None): + def test_on_batch(self, x, y=None, sample_weight=None): """Test the model on a single batch of samples. Arguments: - x: Numpy array of test data, - or list of Numpy arrays if the model has multiple inputs. - If all inputs in the model are named, - you can also pass a dictionary - mapping input names to Numpy arrays. - y: Numpy array of target data, - or list of Numpy arrays if the model has multiple outputs. - If all outputs in the model are named, - you can also pass a dictionary - mapping output names to Numpy arrays. + x: Input data. It could be: + - A Numpy array (or array-like), or a list of arrays + (in case the model has multiple inputs). + - A TensorFlow tensor, or a list of tensors + (in case the model has multiple inputs). + - A dict mapping input names to the corresponding array/tensors, + if the model has named inputs. + - A `tf.data` dataset iterator. + y: Target data. Like the input data `x`, + it could be either Numpy array(s) or TensorFlow tensor(s). + It should be consistent with `x` (you cannot have Numpy inputs and + tensor targets, or inversely). If `x` is a dataset iterator, + `y` should not be specified + (since targets will be obtained from the iterator). sample_weight: Optional array of the same length as x, containing weights to apply to the model's loss for each sample. In the case of temporal data, you can pass a 2D array @@ -1462,7 +1500,7 @@ class Model(Network): """Returns predictions for a single batch of samples. Arguments: - x: Input samples, as a Numpy array. + x: Input samples, as Numpy array(s) or tensor(s). Returns: Numpy array(s) of predictions. diff --git a/tensorflow/python/keras/_impl/keras/engine/training_arrays.py b/tensorflow/python/keras/_impl/keras/engine/training_arrays.py index 18116e3a14d6b1365f1a9db1a23243cd07763a62..4164cae864c7f8aa8f59bb66d585aaa682fc8ff8 100644 --- a/tensorflow/python/keras/_impl/keras/engine/training_arrays.py +++ b/tensorflow/python/keras/_impl/keras/engine/training_arrays.py @@ -23,6 +23,7 @@ import copy import numpy as np +from tensorflow.python.framework import errors from tensorflow.python.keras._impl.keras import backend as K from tensorflow.python.keras._impl.keras import callbacks as cbks from tensorflow.python.keras._impl.keras.engine import training_utils @@ -30,6 +31,7 @@ from tensorflow.python.keras._impl.keras.engine.base_layer import Layer from tensorflow.python.keras._impl.keras.utils.generic_utils import make_batches from tensorflow.python.keras._impl.keras.utils.generic_utils import Progbar from tensorflow.python.keras._impl.keras.utils.generic_utils import slice_arrays +from tensorflow.python.platform import tf_logging as logging try: from scipy.sparse import issparse # pylint: disable=g-import-not-at-top @@ -190,7 +192,15 @@ def fit_loop(model, batch_logs['batch'] = step_index batch_logs['size'] = 1 callbacks.on_batch_begin(step_index, batch_logs) - outs = f(ins) + try: + outs = f(ins) + except errors.OutOfRangeError: + logging.warning('Your dataset iterator ran out of data; ' + 'interrupting training. Make sure that your dataset ' + 'can generate at least `steps_per_epoch * epochs` ' + 'batches (in this case, %d batches).' % + steps_per_epoch * epochs) + break if not isinstance(outs, list): outs = [outs] diff --git a/tensorflow/python/keras/_impl/keras/engine/training_eager.py b/tensorflow/python/keras/_impl/keras/engine/training_eager.py index 67858a578c5c95b3099e1e6713f3287748fc861f..ad239d6151e02a0c1b2753908cc775db8498d244 100644 --- a/tensorflow/python/keras/_impl/keras/engine/training_eager.py +++ b/tensorflow/python/keras/_impl/keras/engine/training_eager.py @@ -31,9 +31,8 @@ from tensorflow.python.keras._impl.keras import callbacks as cbks from tensorflow.python.keras._impl.keras import losses from tensorflow.python.keras._impl.keras import metrics as metrics_module from tensorflow.python.keras._impl.keras.engine import training_utils -from tensorflow.python.keras._impl.keras.utils.generic_utils import make_batches -from tensorflow.python.keras._impl.keras.utils.generic_utils import Progbar -from tensorflow.python.keras._impl.keras.utils.generic_utils import slice_arrays +from tensorflow.python.keras._impl.keras.utils import generic_utils +from tensorflow.python.ops import array_ops from tensorflow.python.platform import tf_logging as logging @@ -101,7 +100,7 @@ def _eager_metrics_fn(model, outputs, targets): metric_names.append(metric_name) metric_results.append(backend.mean(metric_result)) - return metric_names, metric_results + return metric_results def _model_loss(model, inputs, targets, sample_weights=None, training=False): @@ -151,8 +150,13 @@ def _model_loss(model, inputs, targets, sample_weights=None, training=False): weighted_masked_fn = training_utils.weighted_masked_objective(loss_fn) with backend.name_scope(model.output_names[i] + '_loss'): output_loss = weighted_masked_fn( - outs[i], targets[i], weights, mask=mask) - loss_metrics.append(backend.mean(output_loss)) + targets[i], outs[i], weights, mask=mask) + # If the number of outputs is 1 then we don't append the loss metric + # associated with each model output. When there are multiple outputs + # associated with a model, each output's loss is calculated and returned + # as part of the loss_metrics. + if len(model.outputs) > 1: + loss_metrics.append(backend.mean(output_loss)) loss_weight = model.loss_weights_list[i] if total_loss is None: @@ -173,6 +177,41 @@ def _model_loss(model, inputs, targets, sample_weights=None, training=False): return outs, total_loss, loss_metrics +def slice_arrays(arrays, indices, contiguous=True): + """Slices batches out of provided arrays (workaround for eager tensors). + + Unfortunately eager tensors don't have the same slicing behavior as + Numpy arrays (they folow the same slicing behavior as symbolic TF tensors), + hence we cannot use `generic_utils.slice_arrays` directly + and we have to implement this workaround based on `concat`. This has a + performance cost. + + Arguments: + arrays: Single array or list of arrays. + indices: List of indices in the array that should be included in the output + batch. + contiguous: Boolean flag indicating whether the indices are contiguous. + + Returns: + Slice of data (either single array or list of arrays). + """ + if any(tensor_util.is_tensor(x) for x in arrays): + converted_to_list = False + if not isinstance(arrays, list): + converted_to_list = True + arrays = [arrays] + if not contiguous: + entries = [[x[i:i + 1] for i in indices] for x in arrays] + slices = [array_ops.concat(x, axis=0) for x in entries] + else: + slices = [x[indices[0]:indices[-1] + 1] for x in arrays] + if converted_to_list: + slices = slices[0] + return slices + else: + return generic_utils.slice_arrays(arrays, indices) + + def _process_single_batch(model, inputs, targets, @@ -240,7 +279,7 @@ def train_on_batch(model, inputs, targets, sample_weights=None): model, inputs, targets, sample_weights=sample_weights, training=True) if not isinstance(outs, list): outs = [outs] - _, metrics_results = _eager_metrics_fn( + metrics_results = _eager_metrics_fn( model, outs, targets) if not isinstance(loss, list): loss = [loss] @@ -270,9 +309,8 @@ def test_on_batch(model, inputs, targets, sample_weights=None): model, inputs, targets, sample_weights=sample_weights, training=False) if not isinstance(outs, list): outs = [outs] - metric_names, metrics_results = _eager_metrics_fn( + metrics_results = _eager_metrics_fn( model, outs, targets) - model.metrics_names.append(metric_names) if not isinstance(loss, list): loss = [loss] return loss + loss_metrics + metrics_results @@ -328,6 +366,12 @@ def fit_loop( Raises: ValueError: In case of invalid argument values. """ + if not batch_size: + raise ValueError('With eager execution, `batch_size` should be specified.') + if steps_per_epoch or validation_steps: + raise ValueError('With eager execution, `steps_per_epoch` and ' + '`validation_steps` are not valid arguments ' + '(set `batch_size` instead).') # Required for Eager mode with backend.learning_phase_scope(1): do_validation = False @@ -410,15 +454,18 @@ def fit_loop( elif shuffle: np.random.shuffle(index_array) - batches = make_batches(num_train_samples, batch_size) + batches = generic_utils.make_batches(num_train_samples, batch_size) for batch_index, (batch_start, batch_end) in enumerate(batches): batch_ids = index_array[batch_start:batch_end] try: - inputs_batch = slice_arrays(inputs, batch_ids) - targets_batch = slice_arrays(targets, batch_ids) + inputs_batch = slice_arrays(inputs, batch_ids, + contiguous=not shuffle) + targets_batch = slice_arrays(targets, batch_ids, + contiguous=not shuffle) if sample_weights: - sample_weights_batch = slice_arrays(sample_weights, batch_ids) + sample_weights_batch = slice_arrays(sample_weights, batch_ids, + contiguous=not shuffle) else: sample_weights_batch = None except TypeError: @@ -456,34 +503,12 @@ def fit_loop( for l, o in zip(out_labels, outs): batch_logs[l] = o # Required for Eager mode - metrics_names, metrics_results = _eager_metrics_fn( - model, outs, targets_batch) + metrics_results = _eager_metrics_fn(model, outs, targets_batch) batch_logs['loss'] = tensor_util.constant_value(backend.mean(loss)) - # TODO(anjalisridhar): Move this to compile to avoid duplicate code. - # In graph mode we set the metric names in compile. However in - # Eager mode we calculate the metrics for each batch in fit_loop. - # We could calculate the metric names and functions in compile. - # This would avoid setting the callback parameters separately. - # We need to do this for the first iteration alone - for m in metrics_names: - if m not in callback_metrics: - callback_metrics.append(m) - - callbacks.set_params({ - 'batch_size': batch_size, - 'epochs': epochs, - 'steps': steps_per_epoch, - 'samples': num_train_samples, - 'verbose': verbose, - 'do_validation': do_validation, - 'metrics': callback_metrics or [], - }) - for k, v in zip(model.metrics_names, [backend.mean(loss)] + loss_metrics + metrics_results): batch_logs[k] = tensor_util.constant_value(v) - callbacks.on_batch_end(batch_index, batch_logs) if callback_model.stop_training: break @@ -539,8 +564,8 @@ def test_loop(model, inputs, targets, feed_data, batch_size=batch_size, steps=steps, steps_name='steps') outs = [] if verbose == 1: - progbar = Progbar(target=num_samples) - batches = make_batches(num_samples, batch_size) + progbar = generic_utils.Progbar(target=num_samples) + batches = generic_utils.make_batches(num_samples, batch_size) index_array = np.arange(num_samples) for batch_index, (batch_start, batch_end) in enumerate(batches): batch_ids = index_array[batch_start:batch_end] @@ -569,7 +594,7 @@ def test_loop(model, inputs, targets, targets_batch, sample_weights=sample_weights_batch, training=False) - _, metrics_results = _eager_metrics_fn(model, loss_outs, targets_batch) + metrics_results = _eager_metrics_fn(model, loss_outs, targets_batch) batch_outs = [] for _, v in zip(model.metrics_names, [backend.mean(loss)] + loss_metrics + metrics_results): @@ -620,12 +645,12 @@ def predict_loop(model, inputs, inputs, batch_size, steps, 'steps') if verbose == 1: if steps is not None: - progbar = Progbar(target=steps) + progbar = generic_utils.Progbar(target=steps) else: - progbar = Progbar(target=num_samples) + progbar = generic_utils.Progbar(target=num_samples) outs = [] - batches = make_batches(num_samples, batch_size) + batches = generic_utils.make_batches(num_samples, batch_size) index_array = np.arange(num_samples) for batch_index, (batch_start, batch_end) in enumerate(batches): batch_ids = index_array[batch_start:batch_end] diff --git a/tensorflow/python/keras/_impl/keras/engine/training_eager_test.py b/tensorflow/python/keras/_impl/keras/engine/training_eager_test.py index 8848b393d5e602e564cb357c32a937eaabd68203..deaf1d13064e285e9b85befc699074cf33dd218f 100644 --- a/tensorflow/python/keras/_impl/keras/engine/training_eager_test.py +++ b/tensorflow/python/keras/_impl/keras/engine/training_eager_test.py @@ -18,10 +18,10 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import os import numpy as np from tensorflow.python.framework import ops +from tensorflow.python.framework import test_util as tf_test_util from tensorflow.python.keras._impl import keras from tensorflow.python.keras._impl.keras import testing_utils from tensorflow.python.platform import test @@ -212,7 +212,7 @@ class TrainingTest(test.TestCase): optimizer = RMSPropOptimizer(learning_rate=0.001) loss = 'mse' loss_weights = [1., 0.5] - metrics = ['mae'] + metrics = ['acc', 'mae'] model.compile( optimizer, loss, @@ -231,20 +231,20 @@ class TrainingTest(test.TestCase): [input_a_np, input_b_np], [output_d_np, output_e_np], batch_size=5, verbose=0) - self.assertEqual(len(out), 5) + self.assertEqual(len(out), 7) out = model.evaluate( [input_a_np, input_b_np], [output_d_np, output_e_np], batch_size=5, verbose=1) - self.assertEqual(len(out), 5) + self.assertEqual(len(out), 7) out = model.evaluate( [input_a_np, input_b_np], [output_d_np, output_e_np], batch_size=5, verbose=2) - self.assertEqual(len(out), 5) + self.assertEqual(len(out), 7) out = model.test_on_batch([input_a_np, input_b_np], [output_d_np, output_e_np]) - self.assertEqual(len(out), 5) + self.assertEqual(len(out), 7) # Test evaluate with dictionary inputs model.evaluate( @@ -308,6 +308,100 @@ class TrainingTest(test.TestCase): model.compile(loss=None, optimizer='rms') + def test_model_methods_with_eager_tensors_multi_io(self): + a = keras.layers.Input(shape=(3,), name='input_a') + b = keras.layers.Input(shape=(3,), name='input_b') + + dense = keras.layers.Dense(4, name='dense') + c = dense(a) + d = dense(b) + e = keras.layers.Dropout(0.5, name='dropout')(c) + + model = keras.models.Model([a, b], [d, e]) + + optimizer = RMSPropOptimizer(learning_rate=0.001) + loss = 'mse' + loss_weights = [1., 0.5] + metrics = ['mae'] + model.compile( + optimizer, + loss, + metrics=metrics, + loss_weights=loss_weights, + sample_weight_mode=None) + + input_a = keras.backend.zeros(shape=(10, 3)) + input_b = keras.backend.zeros(shape=(10, 3)) + target_d = keras.backend.zeros(shape=(10, 4)) + target_e = keras.backend.zeros(shape=(10, 4)) + + model.fit( + [input_a, input_b], [target_d, target_e], + epochs=1, + batch_size=5, + verbose=0) + # Test: no shuffle. + model.fit( + [input_a, input_b], [target_d, target_e], + epochs=1, + batch_size=5, + verbose=0, + shuffle=False) + # Test: validation data. + model.fit([input_a, input_b], [target_d, target_e], + epochs=1, batch_size=2, verbose=0, + validation_data=([input_a, input_b], [target_d, target_e])) + model.train_on_batch([input_a, input_b], [target_d, target_e]) + model.predict([input_a, input_b], batch_size=5) + model.evaluate([input_a, input_b], [target_d, target_e], + batch_size=2, verbose=0) + model.test_on_batch([input_a, input_b], [target_d, target_e]) + + # Test: mix np and tensors. + input_b = np.zeros(shape=(10, 3)).astype('float32') + target_e = np.zeros(shape=(10, 4)).astype('float32') + model.fit( + [input_a, input_b], [target_d, target_e], + epochs=1, + batch_size=5, + verbose=0) + model.fit([input_a, input_b], [target_d, target_e], + epochs=1, batch_size=2, verbose=0, + validation_data=([input_a, input_b], [target_d, target_e])) + model.fit( + [input_a, input_b], [target_d, target_e], + epochs=1, + batch_size=5, + verbose=0, + shuffle=False) + model.train_on_batch([input_a, input_b], [target_d, target_e]) + model.predict([input_a, input_b], batch_size=5) + model.evaluate([input_a, input_b], [target_d, target_e], + batch_size=2, verbose=0) + model.test_on_batch([input_a, input_b], [target_d, target_e]) + + def test_model_methods_with_eager_tensors_single_io(self): + x = keras.layers.Input(shape=(3,), name='input') + y = keras.layers.Dense(4, name='dense')(x) + model = keras.Model(x, y) + + optimizer = RMSPropOptimizer(learning_rate=0.001) + loss = 'mse' + metrics = ['mae'] + model.compile(optimizer, loss, metrics=metrics) + + inputs = keras.backend.zeros(shape=(10, 3)) + targets = keras.backend.zeros(shape=(10, 4)) + + model.fit(inputs, targets, epochs=1, batch_size=2, verbose=0) + model.fit(inputs, targets, epochs=1, batch_size=3, verbose=0, shuffle=False) + model.fit(inputs, targets, epochs=1, batch_size=4, verbose=0, + validation_data=(inputs, targets)) + model.evaluate(inputs, targets, batch_size=2, verbose=0) + model.predict(inputs, batch_size=2) + model.train_on_batch(inputs, targets) + model.test_on_batch(inputs, targets) + class LossWeightingTest(test.TestCase): @@ -531,16 +625,29 @@ class LossWeightingTest(test.TestCase): bad_w_np = np.random.random((10, 2, 2)) model.fit(x_np, [y_np, y_np], epochs=1, sample_weight={'1': bad_w_np}) +class CorrectnessTest(test.TestCase): + + @tf_test_util.run_in_graph_and_eager_modes() + def test_loss_correctness(self): + # Test that training loss is the same in eager and graph + # (by comparing it to a reference value in a deterministic case) + model = keras.Sequential() + model.add(keras.layers.Dense(3, + activation='relu', + input_dim=4, + kernel_initializer='ones')) + model.add(keras.layers.Dense(2, + activation='softmax', + kernel_initializer='ones')) + model.compile(loss='sparse_categorical_crossentropy', + optimizer=RMSPropOptimizer(learning_rate=0.001)) + x = np.ones((100, 4)) + np.random.seed(123) + y = np.random.randint(0, 1, size=(100, 1)) + history = model.fit(x, y, epochs=1, batch_size=10) + self.assertEqual( + np.around(history.history['loss'][-1], decimals=4), 0.6173) if __name__ == '__main__': - # Bazel sets these environment variables to very long paths. - # Tempfile uses them to create long paths, and in turn multiprocessing - # library tries to create sockets named after paths. Delete whatever bazel - # writes to these to avoid tests failing due to socket addresses being too - # long. - for var in ('TMPDIR', 'TMP', 'TEMP'): - if var in os.environ: - del os.environ[var] - ops.enable_eager_execution() test.main() diff --git a/tensorflow/python/keras/_impl/keras/engine/training_test.py b/tensorflow/python/keras/_impl/keras/engine/training_test.py index fd91dbba52ff7d152335514085ef3b057ae5eec4..58011a141268e588e6b746afca982f36f4d4d176 100644 --- a/tensorflow/python/keras/_impl/keras/engine/training_test.py +++ b/tensorflow/python/keras/_impl/keras/engine/training_test.py @@ -23,11 +23,17 @@ import unittest import numpy as np +from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.framework import tensor_shape +from tensorflow.python.framework import test_util as tf_test_util from tensorflow.python.keras._impl import keras from tensorflow.python.keras._impl.keras import testing_utils from tensorflow.python.keras._impl.keras.engine.training_utils import weighted_masked_objective from tensorflow.python.keras._impl.keras.utils.generic_utils import slice_arrays +from tensorflow.python.ops import array_ops from tensorflow.python.platform import test +from tensorflow.python.platform import tf_logging as logging +from tensorflow.python.training.rmsprop import RMSPropOptimizer try: import scipy.sparse as scipy_sparse # pylint: disable=g-import-not-at-top @@ -1117,6 +1123,136 @@ class TestTrainingUtils(test.TestCase): class TestTrainingWithDataTensors(test.TestCase): + def test_training_and_eval_methods_on_symbolic_tensors_single_io(self): + with self.test_session(): + x = keras.layers.Input(shape=(3,), name='input') + y = keras.layers.Dense(4, name='dense')(x) + model = keras.Model(x, y) + + optimizer = 'rmsprop' + loss = 'mse' + metrics = ['mae'] + model.compile(optimizer, loss, metrics=metrics) + + inputs = keras.backend.zeros(shape=(10, 3)) + targets = keras.backend.zeros(shape=(10, 4)) + + model.fit(inputs, targets, epochs=1, steps_per_epoch=2, verbose=0) + model.evaluate(inputs, targets, steps=2, verbose=0) + model.predict(inputs, steps=2) + model.train_on_batch(inputs, targets) + model.test_on_batch(inputs, targets) + model.fit(inputs, targets, + epochs=1, steps_per_epoch=2, verbose=0, + validation_data=(inputs, targets), validation_steps=2) + + # Test with dynamic shape + inputs = array_ops.placeholder_with_default( + np.zeros((2, 3)), shape=tensor_shape.TensorShape([None, 3])) + targets = array_ops.placeholder_with_default( + np.zeros((2, 4)), shape=tensor_shape.TensorShape([None, 4])) + self.assertEqual(inputs.shape[0].value, None) + model.fit(inputs, targets, epochs=1, steps_per_epoch=2, verbose=0) + model.evaluate(inputs, targets, steps=2, verbose=0) + model.predict(inputs, steps=2) + model.train_on_batch(inputs, targets) + model.test_on_batch(inputs, targets) + model.fit(inputs, targets, + epochs=1, steps_per_epoch=2, verbose=0, + validation_data=(inputs, targets), validation_steps=2) + + def test_training_and_eval_methods_on_symbolic_tensors_multi_io(self): + with self.test_session(): + a = keras.layers.Input(shape=(3,), name='input_a') + b = keras.layers.Input(shape=(3,), name='input_b') + + dense = keras.layers.Dense(4, name='dense') + c = dense(a) + d = dense(b) + e = keras.layers.Dropout(0.5, name='dropout')(c) + + model = keras.models.Model([a, b], [d, e]) + + optimizer = 'rmsprop' + loss = 'mse' + loss_weights = [1., 0.5] + metrics = ['mae'] + model.compile(optimizer, loss, metrics=metrics, loss_weights=loss_weights) + + input_a_tf = keras.backend.zeros(shape=(10, 3)) + input_b_tf = keras.backend.zeros(shape=(10, 3)) + + output_d_tf = keras.backend.zeros(shape=(10, 4)) + output_e_tf = keras.backend.zeros(shape=(10, 4)) + + model.fit( + [input_a_tf, input_b_tf], [output_d_tf, output_e_tf], + epochs=1, + steps_per_epoch=2, + verbose=0) + with self.assertRaisesRegexp(ValueError, + 'should specify the `steps_per_epoch`'): + model.fit( + [input_a_tf, input_b_tf], [output_d_tf, output_e_tf], + epochs=1, + batch_size=5, + verbose=0) + model.train_on_batch([input_a_tf, input_b_tf], [output_d_tf, output_e_tf]) + + # Test with dictionary inputs + model.fit( + {'input_a': input_a_tf, + 'input_b': input_b_tf}, + {'dense': output_d_tf, + 'dropout': output_e_tf}, + epochs=1, + steps_per_epoch=2, + verbose=0) + model.fit( + {'input_a': input_a_tf, + 'input_b': input_b_tf}, + {'dense': output_d_tf, + 'dropout': output_e_tf}, + validation_data=({'input_a': input_a_tf, + 'input_b': input_b_tf}, + {'dense': output_d_tf, + 'dropout': output_e_tf}), + epochs=1, + steps_per_epoch=2, + validation_steps=2, + verbose=0) + model.train_on_batch( + {'input_a': input_a_tf, + 'input_b': input_b_tf}, + {'dense': output_d_tf, + 'dropout': output_e_tf}) + + # Test with validation data + model.fit( + [input_a_tf, input_b_tf], [output_d_tf, output_e_tf], + validation_data=([input_a_tf, input_b_tf], + [output_d_tf, output_e_tf]), + epochs=1, + steps_per_epoch=2, + validation_steps=2, + verbose=0) + # Test with validation split + with self.assertRaisesRegexp(ValueError, + 'you cannot use `validation_split`'): + model.fit( + [input_a_tf, input_b_tf], [output_d_tf, output_e_tf], + epochs=2, + steps_per_epoch=2, + verbose=0, + validation_split=0.2, + validation_steps=2) + + # Test evaluation / prediction methods + model.evaluate([input_a_tf, input_b_tf], [output_d_tf, output_e_tf], + steps=2, verbose=0) + model.predict([input_a_tf, input_b_tf], steps=2) + model.test_on_batch([input_a_tf, input_b_tf], [output_d_tf, output_e_tf]) + def test_model_with_input_feed_tensor(self): """We test building a model with a TF variable as input. @@ -1552,15 +1688,101 @@ class TestTrainingWithDataTensors(test.TestCase): model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) + @tf_test_util.run_in_graph_and_eager_modes() + def test_metric_names_are_identical_in_graph_and_eager(self): + a = keras.layers.Input(shape=(3,), name='input_a') + b = keras.layers.Input(shape=(3,), name='input_b') -if __name__ == '__main__': - # Bazel sets these environment variables to very long paths. - # Tempfile uses them to create long paths, and in turn multiprocessing - # library tries to create sockets named after paths. Delete whatever bazel - # writes to these to avoid tests failing due to socket addresses being too - # long. - for var in ('TMPDIR', 'TMP', 'TEMP'): - if var in os.environ: - del os.environ[var] + dense = keras.layers.Dense(4, name='dense') + c = dense(a) + d = dense(b) + e = keras.layers.Dropout(0.5, name='dropout')(c) + + model = keras.models.Model([a, b], [d, e]) + + optimizer = RMSPropOptimizer(learning_rate=0.001) + loss = 'mse' + loss_weights = [1., 0.5] + metrics = ['mae', 'acc'] + model.compile(optimizer, loss, metrics=metrics, loss_weights=loss_weights) + reference_metric_names = ['loss', 'dense_loss', 'dropout_loss', + 'dense_mean_absolute_error', + 'dense_acc', + 'dropout_mean_absolute_error', + 'dropout_acc'] + self.assertEqual(reference_metric_names, model.metrics_names) + + +class TestTrainingWithDatasetIterators(test.TestCase): + + def test_training_and_eval_methods_on_iterators_single_io(self): + with self.test_session(): + x = keras.layers.Input(shape=(3,), name='input') + y = keras.layers.Dense(4, name='dense')(x) + model = keras.Model(x, y) + optimizer = 'rmsprop' + loss = 'mse' + metrics = ['mae'] + model.compile(optimizer, loss, metrics=metrics) + + inputs = np.zeros((10, 3)) + targets = np.zeros((10, 4)) + dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets)) + dataset = dataset.repeat(100) + dataset = dataset.batch(10) + iterator = dataset.make_one_shot_iterator() + + model.fit(iterator, epochs=1, steps_per_epoch=2, verbose=0) + model.evaluate(iterator, steps=2, verbose=0) + model.predict(iterator, steps=2) + model.train_on_batch(iterator) + model.test_on_batch(iterator) + # Test with validation data + model.fit(iterator, + epochs=1, steps_per_epoch=2, verbose=0, + validation_data=iterator, validation_steps=2) + # Test with validation split + with self.assertRaisesRegexp(ValueError, + 'you cannot use `validation_split`'): + model.fit(iterator, + epochs=1, steps_per_epoch=2, verbose=0, + validation_split=0.5, validation_steps=2) + + # Test invalid usage + with self.assertRaisesRegexp(ValueError, + 'Instead, pass an `Iterator`'): + model.fit(dataset, + epochs=1, steps_per_epoch=2, verbose=0) + with self.assertRaisesRegexp(ValueError, + 'you should not specify a target'): + model.fit(iterator, iterator, + epochs=1, steps_per_epoch=2, verbose=0) + + def test_iterators_running_out_of_data(self): + with self.test_session(): + x = keras.layers.Input(shape=(3,), name='input') + y = keras.layers.Dense(4, name='dense')(x) + model = keras.Model(x, y) + + optimizer = 'rmsprop' + loss = 'mse' + metrics = ['mae'] + model.compile(optimizer, loss, metrics=metrics) + + inputs = np.zeros((10, 3)) + targets = np.zeros((10, 4)) + dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets)) + dataset = dataset.repeat(2) + dataset = dataset.batch(10) + iterator = dataset.make_one_shot_iterator() + + with test.mock.patch.object(logging, 'warning') as mock_log: + model.fit(iterator, epochs=1, steps_per_epoch=3, verbose=0) + self.assertRegexpMatches( + str(mock_log.call_args), + 'dataset iterator ran out of data') + + +if __name__ == '__main__': test.main() diff --git a/tensorflow/python/keras/_impl/keras/engine/training_utils.py b/tensorflow/python/keras/_impl/keras/engine/training_utils.py index 105638ce1087e8668b49b6653a847667e8f9157d..662938f421b3a338d6720b2911347664711c1ac1 100644 --- a/tensorflow/python/keras/_impl/keras/engine/training_utils.py +++ b/tensorflow/python/keras/_impl/keras/engine/training_utils.py @@ -22,9 +22,12 @@ import copy import numpy as np +from tensorflow.python.eager import context from tensorflow.python.framework import tensor_util from tensorflow.python.keras._impl.keras import backend as K from tensorflow.python.keras._impl.keras import losses +from tensorflow.python.keras._impl.keras import metrics as metrics_module +from tensorflow.python.ops import math_ops def check_num_samples(ins, @@ -59,18 +62,31 @@ def check_num_samples(ins, Raises: ValueError: In case of invalid arguments. """ - if steps is not None: - num_samples = None - if batch_size is not None: - raise ValueError( - 'If ' + steps_name + ' is set, the `batch_size` must be None.') - elif ins and hasattr(ins[0], 'shape'): - num_samples = ins[0].shape[0] - else: + if steps is not None and batch_size is not None: raise ValueError( - 'Either the input data should have ' - 'a defined shape, or ' + steps_name + ' should be specified.') - return num_samples + 'If ' + steps_name + ' is set, the `batch_size` must be None.') + + if not ins or has_symbolic_tensors(ins): + if steps is None: + raise ValueError('If your data is in the form of symbolic tensors, ' + 'you should specify the `' + steps_name + '` argument ' + '(instead of the `batch_size` argument, ' + 'because symbolic tensors are expected to produce ' + 'batches of input data).') + return None + if hasattr(ins[0], 'shape'): + return int(ins[0].shape[0]) + return None # Edge case where ins == [static_learning_phase] + + +def standardize_single_array(x): + if x is None: + return None + elif tensor_util.is_tensor(x): + return x + elif x.ndim == 1: + x = np.expand_dims(x, 1) + return x def standardize_input_data(data, @@ -130,9 +146,7 @@ def standardize_input_data(data, else: data = data.values if data.__class__.__name__ == 'DataFrame' else data data = [data] - data = [ - np.expand_dims(x, 1) if x is not None and x.ndim == 1 else x for x in data - ] + data = [standardize_single_array(x) for x in data] if len(data) != len(names): if data and hasattr(data[0], 'shape'): @@ -158,7 +172,7 @@ def standardize_input_data(data, # Check shapes compatibility. if shapes: for i in range(len(names)): - if shapes[i] is not None: + if shapes[i] is not None and not tensor_util.is_tensor(data[i]): data_shape = data[i].shape shape = shapes[i] if data[i].ndim != len(shape): @@ -245,12 +259,13 @@ def check_array_lengths(inputs, targets, weights=None): """ def set_of_lengths(x): - # return a set with the variation between + # Returns a set with the variation between # different shapes, with None => 0 if x is None: return {} else: - return set([y.shape[0] for y in x if y is not None]) + return set([y.shape[0] for y in x + if y is not None and not tensor_util.is_tensor(y)]) set_x = set_of_lengths(inputs) set_y = set_of_lengths(targets) @@ -422,7 +437,7 @@ def weighted_masked_objective(fn): score_array = fn(y_true, y_pred) if mask is not None: # Cast the mask to floatX to avoid float64 upcasting in theano - mask = K.cast(mask, K.floatx()) + mask = math_ops.cast(mask, K.floatx()) # mask should have the same shape as score_array score_array *= mask # the loss per batch should be proportional @@ -436,7 +451,8 @@ def weighted_masked_objective(fn): weight_ndim = K.ndim(weights) score_array = K.mean(score_array, axis=list(range(weight_ndim, ndim))) score_array *= weights - score_array /= K.mean(K.cast(K.not_equal(weights, 0), K.floatx())) + score_array /= K.mean( + math_ops.cast(math_ops.not_equal(weights, 0), K.floatx())) return K.mean(score_array) return weighted @@ -532,3 +548,69 @@ def standardize_weights(y, return weights else: return None + + +def has_symbolic_tensors(ls): + return (any(tensor_util.is_tensor(v) for v in ls) + and not context.executing_eagerly()) + + +def populate_metric_names(model): + for i in range(len(model.outputs)): + metrics = model.nested_metrics[i] + for metric in metrics: + base_metric_name = get_base_metric_name(metric) + add_metric_name(model, base_metric_name, i) + + +def get_base_metric_name(metric, weighted=False): + """Returns the metric name given the metric function. + + Arguments: + metric: Metric function name or reference. + weighted: Boolean indicating if the metric for which we are adding + names is weighted. + + Returns: + a metric name. + """ + metric_name_prefix = 'weighted_' if weighted else '' + if metric in ('accuracy', 'acc', 'crossentropy', 'ce'): + if metric in ('accuracy', 'acc'): + suffix = 'acc' + elif metric in ('crossentropy', 'ce'): + suffix = 'ce' + metric_name = metric_name_prefix + suffix + else: + metric_fn = metrics_module.get(metric) + # Get metric name as string + if hasattr(metric_fn, 'name'): + metric_name = metric_fn.name + else: + metric_name = metric_fn.__name__ + metric_name = metric_name_prefix + metric_name + + return metric_name + + +def add_metric_name(model, metric_name, index): + """Makes the metric name unique and adds it to the model's metric name list. + + If there are multiple outputs for which the metrics are calculated, the + metric names have to be made unique by appending an integer. + + Arguments: + model: Model to which we are adding metric names. + metric_name: Metric name that corresponds to the metric specified by the + user. For example: 'acc' + index: The index of the model output for which the metric name is being + added. + """ + if len(model.output_names) > 1: + metric_name = '%s_%s' % (model.output_names[index], metric_name) + j = 1 + base_metric_name = metric_name + while metric_name in model.metrics_names: + metric_name = '%s_%d' % (base_metric_name, j) + j += 1 + model.metrics_names.append(metric_name) diff --git a/tensorflow/python/keras/_impl/keras/estimator.py b/tensorflow/python/keras/_impl/keras/estimator.py index 8426d84df964092435b10c9e28e1843df7e423f4..c3c3fceb454773f18400bd31fe8b649c557fc013 100644 --- a/tensorflow/python/keras/_impl/keras/estimator.py +++ b/tensorflow/python/keras/_impl/keras/estimator.py @@ -29,14 +29,17 @@ from tensorflow.python.estimator import run_config as run_config_lib from tensorflow.python.framework import ops from tensorflow.python.framework import random_seed from tensorflow.python.framework import sparse_tensor as sparse_tensor_lib +from tensorflow.python.framework import tensor_util from tensorflow.python.keras._impl.keras import backend as K from tensorflow.python.keras._impl.keras import models from tensorflow.python.keras._impl.keras import optimizers from tensorflow.python.keras._impl.keras.engine.base_layer import Layer from tensorflow.python.keras._impl.keras.engine.network import Network from tensorflow.python.keras._impl.keras.utils.generic_utils import CustomObjectScope +from tensorflow.python.ops import check_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import metrics as metrics_module +from tensorflow.python.ops import variables as variables_module from tensorflow.python.platform import tf_logging as logging from tensorflow.python.saved_model import signature_constants from tensorflow.python.training import saver as saver_lib @@ -54,6 +57,30 @@ def _cast_tensor_to_floatx(x): return math_ops.cast(x, K.floatx()) +def _convert_tensor(x): + """Create or cast tensor if needed.""" + if not tensor_util.is_tensor(x): + # x is a numpy array + x = sparse_tensor_lib.convert_to_tensor_or_sparse_tensor(x) + if check_ops.is_numeric_tensor(x): + # is_numeric_tensor returns False if provided with a numpy array + x = _cast_tensor_to_floatx(x) + return x + + +def _any_variable_initalized(): + """Check if any variable has been initialized in the Keras model. + + Returns: + boolean, True if at least one variable has been initalized, else False. + """ + variables = variables_module.global_variables() + for v in variables: + if getattr(v, '_keras_initialized', False): + return True + return False + + def _create_ordered_io(keras_model, estimator_io, is_input=True): """Create a list of tensors from IO dictionary based on Keras IO order. @@ -72,7 +99,7 @@ def _create_ordered_io(keras_model, estimator_io, is_input=True): if isinstance(estimator_io, (list, tuple)): # Case currently not supported by most built-in input_fn, # but it's good to have for sanity - return [_cast_tensor_to_floatx(x) for x in estimator_io] + return [_convert_tensor(x) for x in estimator_io] elif isinstance(estimator_io, dict): if is_input: if keras_model._is_graph_network: @@ -94,12 +121,12 @@ def _create_ordered_io(keras_model, estimator_io, is_input=True): 'It needs to match one ' 'of the following: %s' % ('input' if is_input else 'output', key, ', '.join(keras_io_names))) - tensors = [_cast_tensor_to_floatx(estimator_io[io_name]) + tensors = [_convert_tensor(estimator_io[io_name]) for io_name in keras_io_names] return tensors else: # Plain array. - return _cast_tensor_to_floatx(estimator_io) + return _convert_tensor(estimator_io) def _in_place_subclassed_model_reset(model): @@ -260,8 +287,7 @@ def _clone_and_build_model(mode, is_input=False) else: target_tensors = [ - _cast_tensor_to_floatx( - sparse_tensor_lib.convert_to_tensor_or_sparse_tensor(labels)) + _convert_tensor(labels) ] if keras_model._is_graph_network: @@ -395,7 +421,8 @@ def _save_first_checkpoint(keras_model, estimator, custom_objects, custom_objects) # save to checkpoint with session.Session(config=estimator._session_config) as sess: - model.set_weights(keras_weights) + if keras_weights: + model.set_weights(keras_weights) # Make update ops and initialize all variables. if not model.train_function: # pylint: disable=protected-access @@ -465,11 +492,22 @@ def model_to_estimator(keras_model=None, estimator = estimator_lib.Estimator( keras_model_fn, model_dir=model_dir, config=config) - # Pass the config into keras backend's default session. - with session.Session(config=estimator._session_config) as sess: + # Check if we need to call get_weights: + if _any_variable_initalized(): + keras_weights = keras_model.get_weights() + # Warn if config passed to estimator tries to update GPUOptions. If a + # session has already been created, the GPUOptions passed to the first + # session sticks. + if estimator._session_config.HasField('gpu_options'): + logging.warning( + 'The Keras backend session has already been set. ' + 'The _session_config passed to model_to_estimator will not be used.') + else: + # Pass the config into keras backend's default session. + sess = session.Session(config=estimator._session_config) K.set_session(sess) + keras_weights = None - keras_weights = keras_model.get_weights() if keras_model._is_graph_network: # TODO(yifeif): move checkpoint initialization to scaffold.init_fn _save_first_checkpoint(keras_model, diff --git a/tensorflow/python/keras/_impl/keras/estimator_test.py b/tensorflow/python/keras/_impl/keras/estimator_test.py index e076dc25b16900636313f0ddd85a61b8d917fc91..80fa87d0410871c30b8a7c46e7ff02bc81c96f3b 100644 --- a/tensorflow/python/keras/_impl/keras/estimator_test.py +++ b/tensorflow/python/keras/_impl/keras/estimator_test.py @@ -27,10 +27,13 @@ import numpy as np from tensorflow.core.protobuf import config_pb2 from tensorflow.python.estimator import run_config as run_config_lib from tensorflow.python.estimator.inputs import numpy_io +from tensorflow.python.framework import ops from tensorflow.python.framework import test_util from tensorflow.python.keras._impl import keras +from tensorflow.python.keras._impl.keras import backend as K from tensorflow.python.keras._impl.keras import testing_utils from tensorflow.python.keras._impl.keras.applications import mobilenet +from tensorflow.python.keras._impl.keras.optimizers import SGD from tensorflow.python.platform import gfile from tensorflow.python.platform import test from tensorflow.python.summary.writer import writer_cache @@ -140,16 +143,20 @@ def randomize_io_type(array, name): def multi_inputs_multi_outputs_model(): - # test multi-input layer a = keras.layers.Input(shape=(16,), name='input_a') b = keras.layers.Input(shape=(16,), name='input_b') + m = keras.layers.Input(shape=(8,), dtype='bool', name='input_m') dense = keras.layers.Dense(8, name='dense_1') + a_2 = dense(a) + # Apply a mask + s_2 = keras.layers.Lambda(lambda k: + K.switch(k[0], k[1], K.zeros_like(k[1])))([m, a_2]) b_2 = dense(b) - merged = keras.layers.concatenate([a_2, b_2], name='merge') + merged = keras.layers.concatenate([s_2, b_2], name='merge') c = keras.layers.Dense(3, activation='softmax', name='dense_2')(merged) d = keras.layers.Dense(2, activation='softmax', name='dense_3')(merged) - model = keras.models.Model(inputs=[a, b], outputs=[c, d]) + model = keras.models.Model(inputs=[a, b, m], outputs=[c, d]) model.compile( loss='categorical_crossentropy', optimizer='rmsprop', @@ -350,18 +357,27 @@ class TestKerasEstimator(test_util.TensorFlowTestCase): test_samples=50, input_shape=(16,), num_classes=2) + np.random.seed(_RANDOM_SEED) + (input_m_train, _), (input_m_test, _) = testing_utils.get_test_data( + train_samples=_TRAIN_SIZE, + test_samples=50, + input_shape=(8,), + num_classes=2) + c_train = keras.utils.to_categorical(c_train) c_test = keras.utils.to_categorical(c_test) d_train = keras.utils.to_categorical(d_train) d_test = keras.utils.to_categorical(d_test) def train_input_fn(): - input_dict = {'input_a': a_train, 'input_b': b_train} + input_dict = {'input_a': a_train, 'input_b': b_train, + 'input_m': input_m_train > 0} output_dict = {'dense_2': c_train, 'dense_3': d_train} return input_dict, output_dict def eval_input_fn(): - input_dict = {'input_a': a_test, 'input_b': b_test} + input_dict = {'input_a': a_test, 'input_b': b_test, + 'input_m': input_m_test > 0} output_dict = {'dense_2': c_test, 'dense_3': d_test} return input_dict, output_dict @@ -443,8 +459,9 @@ class TestKerasEstimator(test_util.TensorFlowTestCase): model = simple_functional_model() model.compile( loss='categorical_crossentropy', optimizer='adam', metrics=['acc']) - est_keras = keras.estimator.model_to_estimator( - keras_model=model, config=self._config) + with self.test_session(): + est_keras = keras.estimator.model_to_estimator( + keras_model=model, config=self._config) with self.test_session(): with self.assertRaises(ValueError): @@ -497,20 +514,42 @@ class TestKerasEstimator(test_util.TensorFlowTestCase): model_dir=tempfile.mkdtemp(dir=self._base_dir)) def test_gpu_config(self): + with ops.Graph().as_default(): + keras_model, (_, _), (_, _), _, _ = get_resource_for_simple_model() + keras_model.compile( + loss='categorical_crossentropy', + optimizer='rmsprop', + metrics=['mse', keras.metrics.categorical_accuracy]) + + gpu_options = config_pb2.GPUOptions(per_process_gpu_memory_fraction=0.3) + sess_config = config_pb2.ConfigProto(gpu_options=gpu_options) + self._config._session_config = sess_config + keras.estimator.model_to_estimator( + keras_model=keras_model, config=self._config) + self.assertEqual( + keras.backend.get_session() + ._config.gpu_options.per_process_gpu_memory_fraction, + gpu_options.per_process_gpu_memory_fraction) + + def test_pretrained_weights(self): keras_model, (_, _), (_, _), _, _ = get_resource_for_simple_model() keras_model.compile( loss='categorical_crossentropy', - optimizer='rmsprop', + optimizer=rmsprop.RMSPropOptimizer(1e-3), metrics=['mse', keras.metrics.categorical_accuracy]) - - gpu_options = config_pb2.GPUOptions(per_process_gpu_memory_fraction=0.3) - sess_config = config_pb2.ConfigProto(gpu_options=gpu_options) - self._config._session_config = sess_config - keras.estimator.model_to_estimator( - keras_model=keras_model, config=self._config) - self.assertEqual(keras.backend.get_session() - ._config.gpu_options.per_process_gpu_memory_fraction, - gpu_options.per_process_gpu_memory_fraction) + with self.test_session(): + keras_model.train_on_batch( + np.random.random((10,) + _INPUT_SIZE), + np.random.random((10, _NUM_CLASS))) + weights = keras_model.get_weights() + keras_model, (_, _), (_, _), _, _ = get_resource_for_simple_model() + keras_model.set_weights(weights) + keras_model.compile( + loss='categorical_crossentropy', + optimizer=SGD(lr=0.0001, momentum=0.9), + metrics=['mse', keras.metrics.categorical_accuracy]) + keras.estimator.model_to_estimator( + keras_model=keras_model, config=self._config) if __name__ == '__main__': diff --git a/tensorflow/python/keras/_impl/keras/initializers.py b/tensorflow/python/keras/_impl/keras/initializers.py index 300bed5e1437074d010760c427c14f68e58ac363..ecb71d00e2c78ced6095aaa3a0180b454b04917a 100644 --- a/tensorflow/python/keras/_impl/keras/initializers.py +++ b/tensorflow/python/keras/_impl/keras/initializers.py @@ -201,6 +201,8 @@ def deserialize(config, custom_objects=None): @tf_export('keras.initializers.get') def get(identifier): + if identifier is None: + return None if isinstance(identifier, dict): return deserialize(identifier) elif isinstance(identifier, six.string_types): diff --git a/tensorflow/python/keras/_impl/keras/integration_test.py b/tensorflow/python/keras/_impl/keras/integration_test.py index 280f7ed1b11e2026ac196eb319f7d5da8301f060..43aff67ef93c8ec495beafdd17c5557b6398671f 100644 --- a/tensorflow/python/keras/_impl/keras/integration_test.py +++ b/tensorflow/python/keras/_impl/keras/integration_test.py @@ -29,16 +29,15 @@ from tensorflow.python.platform import test class KerasIntegrationTest(test.TestCase): - def test_vector_classification_declarative(self): + def test_vector_classification_sequential(self): with self.test_session(): np.random.seed(1337) - (x_train, y_train), (x_test, y_test) = testing_utils.get_test_data( - train_samples=200, - test_samples=100, + (x_train, y_train), _ = testing_utils.get_test_data( + train_samples=100, + test_samples=0, input_shape=(10,), num_classes=2) y_train = keras.utils.to_categorical(y_train) - y_test = keras.utils.to_categorical(y_test) model = keras.models.Sequential([ keras.layers.Dense(16, @@ -48,23 +47,22 @@ class KerasIntegrationTest(test.TestCase): keras.layers.Dense(y_train.shape[-1], activation='softmax') ]) model.compile(loss='categorical_crossentropy', - optimizer='rmsprop', + optimizer=keras.optimizers.Adam(lr=0.1), metrics=['accuracy']) history = model.fit(x_train, y_train, epochs=10, batch_size=16, - validation_data=(x_test, y_test), + validation_data=(x_train, y_train), verbose=2) - self.assertGreater(history.history['val_acc'][-1], 0.85) + self.assertGreater(history.history['val_acc'][-1], 0.7) def test_vector_classification_functional(self): with self.test_session(): np.random.seed(1337) - (x_train, y_train), (x_test, y_test) = testing_utils.get_test_data( - train_samples=200, - test_samples=100, - input_shape=(10,), + (x_train, y_train), _ = testing_utils.get_test_data( + train_samples=100, + test_samples=0, + input_shape=(20,), num_classes=2) y_train = keras.utils.to_categorical(y_train) - y_test = keras.utils.to_categorical(y_test) inputs = keras.layers.Input(shape=x_train.shape[1:]) x = keras.layers.Dense(16, activation='relu')(inputs) @@ -73,77 +71,78 @@ class KerasIntegrationTest(test.TestCase): model = keras.models.Model(inputs, outputs) model.compile(loss='categorical_crossentropy', - optimizer='rmsprop', + optimizer=keras.optimizers.Adam(lr=0.1), metrics=['accuracy']) history = model.fit(x_train, y_train, epochs=10, batch_size=16, - validation_data=(x_test, y_test), + validation_data=(x_train, y_train), verbose=2) - self.assertGreater(history.history['val_acc'][-1], 0.85) + self.assertGreater(history.history['val_acc'][-1], 0.7) - def test_temporal_classification_declarative(self): + def test_temporal_classification_sequential(self): with self.test_session(): - np.random.seed(1336) - (x_train, y_train), (x_test, y_test) = testing_utils.get_test_data( - train_samples=200, - test_samples=100, - input_shape=(4, 8), + np.random.seed(1337) + (x_train, y_train), _ = testing_utils.get_test_data( + train_samples=100, + test_samples=0, + input_shape=(4, 10), num_classes=2) y_train = keras.utils.to_categorical(y_train) - y_test = keras.utils.to_categorical(y_test) model = keras.models.Sequential() model.add(keras.layers.LSTM(5, return_sequences=True, input_shape=x_train.shape[1:])) model.add(keras.layers.GRU(y_train.shape[-1], activation='softmax')) model.compile(loss='categorical_crossentropy', - optimizer='adam', + optimizer=keras.optimizers.Adam(lr=0.1), metrics=['accuracy']) - history = model.fit(x_train, y_train, epochs=10, batch_size=16, - validation_data=(x_test, y_test), + history = model.fit(x_train, y_train, epochs=15, batch_size=16, + validation_data=(x_train, y_train), verbose=2) - self.assertGreater(history.history['val_acc'][-1], 0.85) + self.assertGreater(history.history['val_acc'][-1], 0.7) - def test_image_classification_declarative(self): + def test_image_classification_sequential(self): with self.test_session(): np.random.seed(1337) - (x_train, y_train), (x_test, y_test) = testing_utils.get_test_data( - train_samples=200, - test_samples=100, - input_shape=(8, 8, 3), + (x_train, y_train), _ = testing_utils.get_test_data( + train_samples=100, + test_samples=0, + input_shape=(12, 12, 3), num_classes=2) y_train = keras.utils.to_categorical(y_train) - y_test = keras.utils.to_categorical(y_test) model = keras.models.Sequential() model.add(keras.layers.Conv2D( - 8, 3, + 4, 3, + padding='same', activation='relu', input_shape=x_train.shape[1:])) - model.add(keras.layers.BatchNormalization()) model.add(keras.layers.Conv2D( 8, 3, padding='same', activation='relu')) - model.add(keras.layers.GlobalMaxPooling2D()) + model.add(keras.layers.Conv2D( + 16, 3, + padding='same', + activation='relu')) + model.add(keras.layers.Flatten()) model.add(keras.layers.Dense(y_train.shape[-1], activation='softmax')) model.compile(loss='categorical_crossentropy', - optimizer='adam', + optimizer=keras.optimizers.SGD(lr=0.01, momentum=0.8), metrics=['accuracy']) history = model.fit(x_train, y_train, epochs=10, batch_size=16, - validation_data=(x_test, y_test), + validation_data=(x_train, y_train), verbose=2) - self.assertGreater(history.history['val_acc'][-1], 0.85) + self.assertGreater(history.history['val_acc'][-1], 0.7) def test_video_classification_functional(self): with self.test_session(): np.random.seed(1337) - (x_train, y_train), (x_test, y_test) = testing_utils.get_test_data( - train_samples=200, - test_samples=100, + (x_train, y_train), _ = testing_utils.get_test_data( + train_samples=100, + test_samples=0, input_shape=(4, 8, 8, 3), num_classes=3) y_train = keras.utils.to_categorical(y_train) - y_test = keras.utils.to_categorical(y_test) inputs = keras.layers.Input(shape=x_train.shape[1:]) x = keras.layers.TimeDistributed( @@ -159,22 +158,21 @@ class KerasIntegrationTest(test.TestCase): optimizer=keras.optimizers.SGD(lr=0.01, momentum=0.8), metrics=['accuracy']) history = model.fit(x_train, y_train, epochs=10, batch_size=16, - validation_data=(x_test, y_test), + validation_data=(x_train, y_train), verbose=2) - self.assertGreater(history.history['val_acc'][-1], 0.70) + self.assertGreater(history.history['val_acc'][-1], 0.7) def test_vector_classification_shared_sequential(self): # Test that Sequential models that feature internal updates # and internal losses can be shared. with self.test_session(): np.random.seed(1337) - (x_train, y_train), (x_test, y_test) = testing_utils.get_test_data( - train_samples=200, - test_samples=100, + (x_train, y_train), _ = testing_utils.get_test_data( + train_samples=100, + test_samples=0, input_shape=(10,), num_classes=2) y_train = keras.utils.to_categorical(y_train) - y_test = keras.utils.to_categorical(y_test) base_model = keras.models.Sequential([ keras.layers.Dense(16, @@ -189,27 +187,26 @@ class KerasIntegrationTest(test.TestCase): y = keras.layers.Dense(y_train.shape[-1], activation='softmax')(y) model = keras.models.Model(x, y) model.compile(loss='categorical_crossentropy', - optimizer='rmsprop', + optimizer=keras.optimizers.Adam(lr=0.1), metrics=['accuracy']) self.assertEqual(len(model.losses), 2) self.assertEqual(len(model.updates), 2) history = model.fit(x_train, y_train, epochs=10, batch_size=16, - validation_data=(x_test, y_test), + validation_data=(x_train, y_train), verbose=2) - self.assertGreater(history.history['val_acc'][-1], 0.84) + self.assertGreater(history.history['val_acc'][-1], 0.7) def test_vector_classification_shared_model(self): # Test that functional models that feature internal updates # and internal losses can be shared. with self.test_session(): np.random.seed(1337) - (x_train, y_train), (x_test, y_test) = testing_utils.get_test_data( - train_samples=200, - test_samples=100, + (x_train, y_train), _ = testing_utils.get_test_data( + train_samples=100, + test_samples=0, input_shape=(10,), num_classes=2) y_train = keras.utils.to_categorical(y_train) - y_test = keras.utils.to_categorical(y_test) inputs = keras.layers.Input(x_train.shape[1:]) x = keras.layers.Dense(16, @@ -225,12 +222,12 @@ class KerasIntegrationTest(test.TestCase): y = keras.layers.Dense(y_train.shape[-1], activation='softmax')(y) model = keras.models.Model(x, y) model.compile(loss='categorical_crossentropy', - optimizer='rmsprop', + optimizer=keras.optimizers.Adam(lr=0.1), metrics=['accuracy']) history = model.fit(x_train, y_train, epochs=10, batch_size=16, - validation_data=(x_test, y_test), + validation_data=(x_train, y_train), verbose=2) - self.assertGreater(history.history['val_acc'][-1], 0.85) + self.assertGreater(history.history['val_acc'][-1], 0.7) def test_embedding_with_clipnorm(self): with self.test_session(): @@ -242,9 +239,9 @@ class KerasIntegrationTest(test.TestCase): def test_using_tf_layers_in_keras_sequential_model(self): with self.test_session(): np.random.seed(1337) - (x_train, y_train), (x_test, y_test) = testing_utils.get_test_data( - train_samples=200, - test_samples=100, + (x_train, y_train), _ = testing_utils.get_test_data( + train_samples=100, + test_samples=0, input_shape=(10,), num_classes=2) @@ -254,25 +251,23 @@ class KerasIntegrationTest(test.TestCase): model.summary() y_train = keras.utils.to_categorical(y_train) - y_test = keras.utils.to_categorical(y_test) model.compile(loss='categorical_crossentropy', - optimizer='adam', + optimizer=keras.optimizers.Adam(lr=0.1), metrics=['accuracy']) history = model.fit(x_train, y_train, epochs=10, batch_size=16, - validation_data=(x_test, y_test), + validation_data=(x_train, y_train), verbose=0) - self.assertGreater(history.history['val_acc'][-1], 0.85) + self.assertGreater(history.history['val_acc'][-1], 0.7) def test_using_tf_layers_in_keras_functional_model(self): with self.test_session(): np.random.seed(1337) - (x_train, y_train), (x_test, y_test) = testing_utils.get_test_data( - train_samples=200, - test_samples=100, + (x_train, y_train), _ = testing_utils.get_test_data( + train_samples=100, + test_samples=0, input_shape=(10,), num_classes=2) y_train = keras.utils.to_categorical(y_train) - y_test = keras.utils.to_categorical(y_test) inputs = keras.Input(shape=(10,)) x = tf_core_layers.Dense(32, activation=nn.relu)(inputs) @@ -281,12 +276,12 @@ class KerasIntegrationTest(test.TestCase): model.summary() model.compile(loss='categorical_crossentropy', - optimizer='adam', + optimizer=keras.optimizers.Adam(lr=0.1), metrics=['accuracy']) history = model.fit(x_train, y_train, epochs=10, batch_size=16, - validation_data=(x_test, y_test), + validation_data=(x_train, y_train), verbose=0) - self.assertGreater(history.history['val_acc'][-1], 0.85) + self.assertGreater(history.history['val_acc'][-1], 0.7) if __name__ == '__main__': diff --git a/tensorflow/python/keras/_impl/keras/layers/advanced_activations.py b/tensorflow/python/keras/_impl/keras/layers/advanced_activations.py index c40ee109aaea7dacea72e095b1d8cea3ed2e9bf8..11ca89d625bebb607b2bddbe65b8251f52aa6e4c 100644 --- a/tensorflow/python/keras/_impl/keras/layers/advanced_activations.py +++ b/tensorflow/python/keras/_impl/keras/layers/advanced_activations.py @@ -26,6 +26,7 @@ from tensorflow.python.keras._impl.keras import regularizers from tensorflow.python.keras._impl.keras.engine import InputSpec from tensorflow.python.keras._impl.keras.engine import Layer from tensorflow.python.keras._impl.keras.engine.base_layer import shape_type_conversion +from tensorflow.python.ops import math_ops from tensorflow.python.util.tf_export import tf_export @@ -146,7 +147,7 @@ class PReLU(Layer): if K.backend() == 'theano': neg = ( K.pattern_broadcast(self.alpha, self.param_broadcast) * - (inputs - K.abs(inputs)) * 0.5) + (inputs - math_ops.abs(inputs)) * 0.5) else: neg = -self.alpha * K.relu(-inputs) return pos + neg @@ -232,7 +233,8 @@ class ThresholdedReLU(Layer): self.theta = K.cast_to_floatx(theta) def call(self, inputs, mask=None): - return inputs * K.cast(K.greater(inputs, self.theta), K.floatx()) + return inputs * math_ops.cast( + math_ops.greater(inputs, self.theta), K.floatx()) def get_config(self): config = {'theta': float(self.theta)} diff --git a/tensorflow/python/keras/_impl/keras/layers/convolutional.py b/tensorflow/python/keras/_impl/keras/layers/convolutional.py index 162ae6c28f1afae1dd8aaf70213b808d9ad9598f..12b965587f5695fe410d2363956f54386c9fa8cf 100644 --- a/tensorflow/python/keras/_impl/keras/layers/convolutional.py +++ b/tensorflow/python/keras/_impl/keras/layers/convolutional.py @@ -19,14 +19,16 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from tensorflow.python.eager import context from tensorflow.python.framework import tensor_shape from tensorflow.python.keras._impl.keras import activations -from tensorflow.python.keras._impl.keras import backend as K +from tensorflow.python.keras._impl.keras import backend from tensorflow.python.keras._impl.keras import constraints from tensorflow.python.keras._impl.keras import initializers from tensorflow.python.keras._impl.keras import regularizers from tensorflow.python.keras._impl.keras.engine import InputSpec from tensorflow.python.keras._impl.keras.engine import Layer +from tensorflow.python.keras._impl.keras.engine.base_layer import shape_type_conversion # imports for backwards namespace compatibility # pylint: disable=unused-import from tensorflow.python.keras._impl.keras.layers.pooling import AveragePooling1D @@ -37,12 +39,232 @@ from tensorflow.python.keras._impl.keras.layers.pooling import MaxPooling2D from tensorflow.python.keras._impl.keras.layers.pooling import MaxPooling3D # pylint: enable=unused-import from tensorflow.python.keras._impl.keras.utils import conv_utils -from tensorflow.python.layers import convolutional as tf_convolutional_layers +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import nn +from tensorflow.python.ops import nn_ops from tensorflow.python.util.tf_export import tf_export +class Conv(Layer): + """Abstract nD convolution layer (private, used as implementation base). + + This layer creates a convolution kernel that is convolved + (actually cross-correlated) with the layer input to produce a tensor of + outputs. If `use_bias` is True (and a `bias_initializer` is provided), + a bias vector is created and added to the outputs. Finally, if + `activation` is not `None`, it is applied to the outputs as well. + + Arguments: + rank: An integer, the rank of the convolution, e.g. "2" for 2D convolution. + filters: Integer, the dimensionality of the output space (i.e. the number + of filters in the convolution). + kernel_size: An integer or tuple/list of n integers, specifying the + length of the convolution window. + strides: An integer or tuple/list of n integers, + specifying the stride length of the convolution. + Specifying any stride value != 1 is incompatible with specifying + any `dilation_rate` value != 1. + padding: One of `"valid"` or `"same"` (case-insensitive). + data_format: A string, one of `channels_last` (default) or `channels_first`. + The ordering of the dimensions in the inputs. + `channels_last` corresponds to inputs with shape + `(batch, ..., channels)` while `channels_first` corresponds to + inputs with shape `(batch, channels, ...)`. + dilation_rate: An integer or tuple/list of n integers, specifying + the dilation rate to use for dilated convolution. + Currently, specifying any `dilation_rate` value != 1 is + incompatible with specifying any `strides` value != 1. + activation: Activation function. Set it to None to maintain a + linear activation. + use_bias: Boolean, whether the layer uses a bias. + kernel_initializer: An initializer for the convolution kernel. + bias_initializer: An initializer for the bias vector. If None, the default + initializer will be used. + kernel_regularizer: Optional regularizer for the convolution kernel. + bias_regularizer: Optional regularizer for the bias vector. + activity_regularizer: Optional regularizer function for the output. + kernel_constraint: Optional projection function to be applied to the + kernel after being updated by an `Optimizer` (e.g. used to implement + norm constraints or value constraints for layer weights). The function + must take as input the unprojected variable and must return the + projected variable (which must have the same shape). Constraints are + not safe to use when doing asynchronous distributed training. + bias_constraint: Optional projection function to be applied to the + bias after being updated by an `Optimizer`. + trainable: Boolean, if `True` also add variables to the graph collection + `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). + name: A string, the name of the layer. + """ + + def __init__(self, rank, + filters, + kernel_size, + strides=1, + padding='valid', + data_format=None, + dilation_rate=1, + activation=None, + use_bias=True, + kernel_initializer='glorot_uniform', + bias_initializer='zeros', + kernel_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + bias_constraint=None, + trainable=True, + name=None, + **kwargs): + super(Conv, self).__init__( + trainable=trainable, + name=name, + activity_regularizer=regularizers.get(activity_regularizer), + **kwargs) + self.rank = rank + self.filters = filters + self.kernel_size = conv_utils.normalize_tuple( + kernel_size, rank, 'kernel_size') + self.strides = conv_utils.normalize_tuple(strides, rank, 'strides') + self.padding = conv_utils.normalize_padding(padding) + self.data_format = conv_utils.normalize_data_format(data_format) + self.dilation_rate = conv_utils.normalize_tuple( + dilation_rate, rank, 'dilation_rate') + self.activation = activations.get(activation) + self.use_bias = use_bias + self.kernel_initializer = initializers.get(kernel_initializer) + self.bias_initializer = initializers.get(bias_initializer) + self.kernel_regularizer = regularizers.get(kernel_regularizer) + self.bias_regularizer = regularizers.get(bias_regularizer) + self.kernel_constraint = constraints.get(kernel_constraint) + self.bias_constraint = constraints.get(bias_constraint) + self.input_spec = InputSpec(ndim=self.rank + 2) + + def build(self, input_shape): + input_shape = tensor_shape.TensorShape(input_shape) + if self.data_format == 'channels_first': + channel_axis = 1 + else: + channel_axis = -1 + if input_shape[channel_axis].value is None: + raise ValueError('The channel dimension of the inputs ' + 'should be defined. Found `None`.') + input_dim = int(input_shape[channel_axis]) + kernel_shape = self.kernel_size + (input_dim, self.filters) + + self.kernel = self.add_variable(name='kernel', + shape=kernel_shape, + initializer=self.kernel_initializer, + regularizer=self.kernel_regularizer, + constraint=self.kernel_constraint, + trainable=True, + dtype=self.dtype) + if self.use_bias: + self.bias = self.add_variable(name='bias', + shape=(self.filters,), + initializer=self.bias_initializer, + regularizer=self.bias_regularizer, + constraint=self.bias_constraint, + trainable=True, + dtype=self.dtype) + else: + self.bias = None + self.input_spec = InputSpec(ndim=self.rank + 2, + axes={channel_axis: input_dim}) + self._convolution_op = nn_ops.Convolution( + input_shape, + filter_shape=self.kernel.get_shape(), + dilation_rate=self.dilation_rate, + strides=self.strides, + padding=self.padding.upper(), + data_format=conv_utils.convert_data_format(self.data_format, + self.rank + 2)) + self.built = True + + def call(self, inputs): + outputs = self._convolution_op(inputs, self.kernel) + + if self.use_bias: + if self.data_format == 'channels_first': + if self.rank == 1: + # nn.bias_add does not accept a 1D input tensor. + bias = array_ops.reshape(self.bias, (1, self.filters, 1)) + outputs += bias + if self.rank == 2: + outputs = nn.bias_add(outputs, self.bias, data_format='NCHW') + if self.rank == 3: + # As of Mar 2017, direct addition is significantly slower than + # bias_add when computing gradients. To use bias_add, we collapse Z + # and Y into a single dimension to obtain a 4D input tensor. + outputs_shape = outputs.shape.as_list() + if outputs_shape[0] is None: + outputs_shape[0] = -1 + outputs_4d = array_ops.reshape(outputs, + [outputs_shape[0], outputs_shape[1], + outputs_shape[2] * outputs_shape[3], + outputs_shape[4]]) + outputs_4d = nn.bias_add(outputs_4d, self.bias, data_format='NCHW') + outputs = array_ops.reshape(outputs_4d, outputs_shape) + else: + outputs = nn.bias_add(outputs, self.bias, data_format='NHWC') + + if self.activation is not None: + return self.activation(outputs) + return outputs + + def compute_output_shape(self, input_shape): + input_shape = tensor_shape.TensorShape(input_shape).as_list() + if self.data_format == 'channels_last': + space = input_shape[1:-1] + new_space = [] + for i in range(len(space)): + new_dim = conv_utils.conv_output_length( + space[i], + self.kernel_size[i], + padding=self.padding, + stride=self.strides[i], + dilation=self.dilation_rate[i]) + new_space.append(new_dim) + return tensor_shape.TensorShape([input_shape[0]] + new_space + + [self.filters]) + else: + space = input_shape[2:] + new_space = [] + for i in range(len(space)): + new_dim = conv_utils.conv_output_length( + space[i], + self.kernel_size[i], + padding=self.padding, + stride=self.strides[i], + dilation=self.dilation_rate[i]) + new_space.append(new_dim) + return tensor_shape.TensorShape([input_shape[0], self.filters] + + new_space) + + def get_config(self): + config = { + 'filters': self.filters, + 'kernel_size': self.kernel_size, + 'strides': self.strides, + 'padding': self.padding, + 'data_format': self.data_format, + 'dilation_rate': self.dilation_rate, + 'activation': activations.serialize(self.activation), + 'use_bias': self.use_bias, + 'kernel_initializer': initializers.serialize(self.kernel_initializer), + 'bias_initializer': initializers.serialize(self.bias_initializer), + 'kernel_regularizer': regularizers.serialize(self.kernel_regularizer), + 'bias_regularizer': regularizers.serialize(self.bias_regularizer), + 'activity_regularizer': + regularizers.serialize(self.activity_regularizer), + 'kernel_constraint': constraints.serialize(self.kernel_constraint), + 'bias_constraint': constraints.serialize(self.bias_constraint) + } + base_config = super(Conv, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + @tf_export('keras.layers.Conv1D', 'keras.layers.Convolution1D') -class Conv1D(tf_convolutional_layers.Conv1D, Layer): +class Conv1D(Conv): """1D convolution layer (e.g. temporal convolution). This layer creates a convolution kernel that is convolved @@ -73,6 +295,8 @@ class Conv1D(tf_convolutional_layers.Conv1D, Layer): where the model should not violate the temporal order. See [WaveNet: A Generative Model for Raw Audio, section 2.1](https://arxiv.org/abs/1609.03499). + data_format: A string, + one of `channels_last` (default) or `channels_first`. dilation_rate: an integer or tuple/list of a single integer, specifying the dilation rate to use for dilated convolution. Currently, specifying any `dilation_rate` value != 1 is @@ -104,6 +328,7 @@ class Conv1D(tf_convolutional_layers.Conv1D, Layer): kernel_size, strides=1, padding='valid', + data_format='channels_last', dilation_rate=1, activation=None, use_bias=True, @@ -116,11 +341,12 @@ class Conv1D(tf_convolutional_layers.Conv1D, Layer): bias_constraint=None, **kwargs): super(Conv1D, self).__init__( + rank=1, filters=filters, kernel_size=kernel_size, strides=strides, padding=padding, - data_format='channels_last', + data_format=data_format, dilation_rate=dilation_rate, activation=activations.get(activation), use_bias=use_bias, @@ -133,30 +359,9 @@ class Conv1D(tf_convolutional_layers.Conv1D, Layer): bias_constraint=constraints.get(bias_constraint), **kwargs) - def get_config(self): - config = { - 'filters': self.filters, - 'kernel_size': self.kernel_size, - 'strides': self.strides, - 'padding': self.padding, - 'dilation_rate': self.dilation_rate, - 'activation': activations.serialize(self.activation), - 'use_bias': self.use_bias, - 'kernel_initializer': initializers.serialize(self.kernel_initializer), - 'bias_initializer': initializers.serialize(self.bias_initializer), - 'kernel_regularizer': regularizers.serialize(self.kernel_regularizer), - 'bias_regularizer': regularizers.serialize(self.bias_regularizer), - 'activity_regularizer': - regularizers.serialize(self.activity_regularizer), - 'kernel_constraint': constraints.serialize(self.kernel_constraint), - 'bias_constraint': constraints.serialize(self.bias_constraint) - } - base_config = super(Conv1D, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - @tf_export('keras.layers.Conv2D', 'keras.layers.Convolution2D') -class Conv2D(tf_convolutional_layers.Conv2D, Layer): +class Conv2D(Conv): """2D convolution layer (e.g. spatial convolution over images). This layer creates a convolution kernel that is convolved @@ -246,9 +451,8 @@ class Conv2D(tf_convolutional_layers.Conv2D, Layer): kernel_constraint=None, bias_constraint=None, **kwargs): - if data_format is None: - data_format = K.image_data_format() super(Conv2D, self).__init__( + rank=2, filters=filters, kernel_size=kernel_size, strides=strides, @@ -266,31 +470,9 @@ class Conv2D(tf_convolutional_layers.Conv2D, Layer): bias_constraint=constraints.get(bias_constraint), **kwargs) - def get_config(self): - config = { - 'filters': self.filters, - 'kernel_size': self.kernel_size, - 'strides': self.strides, - 'padding': self.padding, - 'data_format': self.data_format, - 'dilation_rate': self.dilation_rate, - 'activation': activations.serialize(self.activation), - 'use_bias': self.use_bias, - 'kernel_initializer': initializers.serialize(self.kernel_initializer), - 'bias_initializer': initializers.serialize(self.bias_initializer), - 'kernel_regularizer': regularizers.serialize(self.kernel_regularizer), - 'bias_regularizer': regularizers.serialize(self.bias_regularizer), - 'activity_regularizer': - regularizers.serialize(self.activity_regularizer), - 'kernel_constraint': constraints.serialize(self.kernel_constraint), - 'bias_constraint': constraints.serialize(self.bias_constraint) - } - base_config = super(Conv2D, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - @tf_export('keras.layers.Conv3D', 'keras.layers.Convolution3D') -class Conv3D(tf_convolutional_layers.Conv3D, Layer): +class Conv3D(Conv): """3D convolution layer (e.g. spatial convolution over volumes). This layer creates a convolution kernel that is convolved @@ -387,9 +569,8 @@ class Conv3D(tf_convolutional_layers.Conv3D, Layer): kernel_constraint=None, bias_constraint=None, **kwargs): - if data_format is None: - data_format = K.image_data_format() super(Conv3D, self).__init__( + rank=3, filters=filters, kernel_size=kernel_size, strides=strides, @@ -407,32 +588,10 @@ class Conv3D(tf_convolutional_layers.Conv3D, Layer): bias_constraint=constraints.get(bias_constraint), **kwargs) - def get_config(self): - config = { - 'filters': self.filters, - 'kernel_size': self.kernel_size, - 'strides': self.strides, - 'padding': self.padding, - 'data_format': self.data_format, - 'dilation_rate': self.dilation_rate, - 'activation': activations.serialize(self.activation), - 'use_bias': self.use_bias, - 'kernel_initializer': initializers.serialize(self.kernel_initializer), - 'bias_initializer': initializers.serialize(self.bias_initializer), - 'kernel_regularizer': regularizers.serialize(self.kernel_regularizer), - 'bias_regularizer': regularizers.serialize(self.bias_regularizer), - 'activity_regularizer': - regularizers.serialize(self.activity_regularizer), - 'kernel_constraint': constraints.serialize(self.kernel_constraint), - 'bias_constraint': constraints.serialize(self.bias_constraint) - } - base_config = super(Conv3D, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - @tf_export('keras.layers.Conv2DTranspose', 'keras.layers.Convolution2DTranspose') -class Conv2DTranspose(tf_convolutional_layers.Conv2DTranspose, Layer): +class Conv2DTranspose(Conv2D): """Transposed convolution layer (sometimes called Deconvolution). The need for transposed convolutions generally arises @@ -528,8 +687,6 @@ class Conv2DTranspose(tf_convolutional_layers.Conv2DTranspose, Layer): kernel_constraint=None, bias_constraint=None, **kwargs): - if data_format is None: - data_format = K.image_data_format() super(Conv2DTranspose, self).__init__( filters=filters, kernel_size=kernel_size, @@ -547,31 +704,124 @@ class Conv2DTranspose(tf_convolutional_layers.Conv2DTranspose, Layer): bias_constraint=constraints.get(bias_constraint), **kwargs) - def get_config(self): - config = { - 'filters': self.filters, - 'kernel_size': self.kernel_size, - 'strides': self.strides, - 'padding': self.padding, - 'data_format': self.data_format, - 'activation': activations.serialize(self.activation), - 'use_bias': self.use_bias, - 'kernel_initializer': initializers.serialize(self.kernel_initializer), - 'bias_initializer': initializers.serialize(self.bias_initializer), - 'kernel_regularizer': regularizers.serialize(self.kernel_regularizer), - 'bias_regularizer': regularizers.serialize(self.bias_regularizer), - 'activity_regularizer': - regularizers.serialize(self.activity_regularizer), - 'kernel_constraint': constraints.serialize(self.kernel_constraint), - 'bias_constraint': constraints.serialize(self.bias_constraint) - } - base_config = super(Conv2DTranspose, self).get_config() - return dict(list(base_config.items()) + list(config.items())) + def build(self, input_shape): + input_shape = tensor_shape.TensorShape(input_shape) + if len(input_shape) != 4: + raise ValueError('Inputs should have rank 4. Received input shape: ' + + str(input_shape)) + if self.data_format == 'channels_first': + channel_axis = 1 + else: + channel_axis = -1 + if input_shape[channel_axis].value is None: + raise ValueError('The channel dimension of the inputs ' + 'should be defined. Found `None`.') + input_dim = int(input_shape[channel_axis]) + self.input_spec = InputSpec(ndim=4, axes={channel_axis: input_dim}) + kernel_shape = self.kernel_size + (self.filters, input_dim) + + self.kernel = self.add_variable(name='kernel', + shape=kernel_shape, + initializer=self.kernel_initializer, + regularizer=self.kernel_regularizer, + constraint=self.kernel_constraint, + trainable=True, + dtype=self.dtype) + if self.use_bias: + self.bias = self.add_variable(name='bias', + shape=(self.filters,), + initializer=self.bias_initializer, + regularizer=self.bias_regularizer, + constraint=self.bias_constraint, + trainable=True, + dtype=self.dtype) + else: + self.bias = None + self.built = True + + def call(self, inputs): + inputs_shape = array_ops.shape(inputs) + batch_size = inputs_shape[0] + if self.data_format == 'channels_first': + c_axis, h_axis, w_axis = 1, 2, 3 + else: + c_axis, h_axis, w_axis = 3, 1, 2 + + height, width = inputs_shape[h_axis], inputs_shape[w_axis] + kernel_h, kernel_w = self.kernel_size + stride_h, stride_w = self.strides + + # Infer the dynamic output shape: + out_height = conv_utils.deconv_output_length(height, + kernel_h, + self.padding, + stride_h) + out_width = conv_utils.deconv_output_length(width, + kernel_w, + self.padding, + stride_w) + if self.data_format == 'channels_first': + output_shape = (batch_size, self.filters, out_height, out_width) + strides = (1, 1, stride_h, stride_w) + else: + output_shape = (batch_size, out_height, out_width, self.filters) + strides = (1, stride_h, stride_w, 1) + + output_shape_tensor = array_ops.stack(output_shape) + outputs = nn.conv2d_transpose( + inputs, + self.kernel, + output_shape_tensor, + strides, + padding=self.padding.upper(), + data_format=conv_utils.convert_data_format(self.data_format, ndim=4)) + + if not context.executing_eagerly(): + # Infer the static output shape: + out_shape = inputs.get_shape().as_list() + out_shape[c_axis] = self.filters + out_shape[h_axis] = conv_utils.deconv_output_length(out_shape[h_axis], + kernel_h, + self.padding, + stride_h) + out_shape[w_axis] = conv_utils.deconv_output_length(out_shape[w_axis], + kernel_w, + self.padding, + stride_w) + outputs.set_shape(out_shape) + + if self.use_bias: + outputs = nn.bias_add( + outputs, + self.bias, + data_format=conv_utils.convert_data_format(self.data_format, ndim=4)) + + if self.activation is not None: + return self.activation(outputs) + return outputs + + def compute_output_shape(self, input_shape): + input_shape = tensor_shape.TensorShape(input_shape).as_list() + output_shape = list(input_shape) + if self.data_format == 'channels_first': + c_axis, h_axis, w_axis = 1, 2, 3 + else: + c_axis, h_axis, w_axis = 3, 1, 2 + + kernel_h, kernel_w = self.kernel_size + stride_h, stride_w = self.strides + + output_shape[c_axis] = self.filters + output_shape[h_axis] = conv_utils.deconv_output_length( + output_shape[h_axis], kernel_h, self.padding, stride_h) + output_shape[w_axis] = conv_utils.deconv_output_length( + output_shape[w_axis], kernel_w, self.padding, stride_w) + return tensor_shape.TensorShape(output_shape) @tf_export('keras.layers.Conv3DTranspose', 'keras.layers.Convolution3DTranspose') -class Conv3DTranspose(tf_convolutional_layers.Conv3DTranspose, Layer): +class Conv3DTranspose(Conv3D): """Transposed convolution layer (sometimes called Deconvolution). The need for transposed convolutions generally arises @@ -678,8 +928,6 @@ class Conv3DTranspose(tf_convolutional_layers.Conv3DTranspose, Layer): kernel_constraint=None, bias_constraint=None, **kwargs): - if data_format is None: - data_format = K.image_data_format() super(Conv3DTranspose, self).__init__( filters=filters, kernel_size=kernel_size, @@ -697,6 +945,314 @@ class Conv3DTranspose(tf_convolutional_layers.Conv3DTranspose, Layer): bias_constraint=constraints.get(bias_constraint), **kwargs) + def build(self, input_shape): + input_shape = tensor_shape.TensorShape(input_shape) + if len(input_shape) != 5: + raise ValueError('Inputs should have rank 5, received input shape:', + str(input_shape)) + if self.data_format == 'channels_first': + channel_axis = 1 + else: + channel_axis = -1 + if input_shape[channel_axis].value is None: + raise ValueError('The channel dimension of the inputs ' + 'should be defined, found None: ' + str(input_shape)) + input_dim = int(input_shape[channel_axis]) + kernel_shape = self.kernel_size + (self.filters, input_dim) + self.input_spec = InputSpec(ndim=5, axes={channel_axis: input_dim}) + + self.kernel = self.add_variable( + 'kernel', + shape=kernel_shape, + initializer=self.kernel_initializer, + regularizer=self.kernel_regularizer, + constraint=self.kernel_constraint, + trainable=True, + dtype=self.dtype) + if self.use_bias: + self.bias = self.add_variable( + 'bias', + shape=(self.filters,), + initializer=self.bias_initializer, + regularizer=self.bias_regularizer, + constraint=self.bias_constraint, + trainable=True, + dtype=self.dtype) + else: + self.bias = None + self.built = True + + def call(self, inputs): + inputs_shape = array_ops.shape(inputs) + batch_size = inputs_shape[0] + if self.data_format == 'channels_first': + c_axis, d_axis, h_axis, w_axis = 1, 2, 3, 4 + else: + c_axis, d_axis, h_axis, w_axis = 4, 1, 2, 3 + + self.input_spec = InputSpec(ndim=5, axes={c_axis: inputs_shape[c_axis]}) + + depth = inputs_shape[d_axis] + height = inputs_shape[h_axis] + width = inputs_shape[w_axis] + + kernel_d, kernel_h, kernel_w = self.kernel_size + stride_d, stride_h, stride_w = self.strides + + # Infer the dynamic output shape: + out_depth = conv_utils.deconv_output_length(depth, + kernel_d, + self.padding, + stride_d) + out_height = conv_utils.deconv_output_length(height, + kernel_h, + self.padding, + stride_h) + out_width = conv_utils.deconv_output_length(width, + kernel_w, + self.padding, + stride_w) + if self.data_format == 'channels_first': + output_shape = (batch_size, self.filters, out_depth, out_height, + out_width) + strides = (1, 1, stride_d, stride_h, stride_w) + else: + output_shape = (batch_size, out_depth, out_height, out_width, + self.filters) + strides = (1, stride_d, stride_h, stride_w, 1) + + output_shape_tensor = array_ops.stack(output_shape) + outputs = nn.conv3d_transpose( + inputs, + self.kernel, + output_shape_tensor, + strides, + data_format=conv_utils.convert_data_format(self.data_format, ndim=5), + padding=self.padding.upper()) + + if not context.executing_eagerly(): + # Infer the static output shape: + out_shape = inputs.get_shape().as_list() + out_shape[c_axis] = self.filters + out_shape[d_axis] = conv_utils.deconv_output_length(out_shape[d_axis], + kernel_d, + self.padding, + stride_d) + out_shape[h_axis] = conv_utils.deconv_output_length(out_shape[h_axis], + kernel_h, + self.padding, + stride_h) + out_shape[w_axis] = conv_utils.deconv_output_length(out_shape[w_axis], + kernel_w, + self.padding, + stride_w) + outputs.set_shape(out_shape) + + if self.use_bias: + outputs_shape = outputs.shape.as_list() + if outputs_shape[0] is None: + outputs_shape[0] = -1 + if self.data_format == 'channels_first': + outputs_4d = array_ops.reshape(outputs, [ + outputs_shape[0], outputs_shape[1], + outputs_shape[2] * outputs_shape[3], outputs_shape[4] + ]) + else: + outputs_4d = array_ops.reshape(outputs, [ + outputs_shape[0], outputs_shape[1] * outputs_shape[2], + outputs_shape[3], outputs_shape[4] + ]) + outputs_4d = nn.bias_add( + outputs_4d, + self.bias, + data_format=conv_utils.convert_data_format(self.data_format, ndim=4)) + outputs = array_ops.reshape(outputs_4d, outputs_shape) + + if self.activation is not None: + return self.activation(outputs) + return outputs + + def compute_output_shape(self, input_shape): + input_shape = tensor_shape.TensorShape(input_shape).as_list() + output_shape = list(input_shape) + if self.data_format == 'channels_first': + c_axis, d_axis, h_axis, w_axis = 1, 2, 3, 4 + else: + c_axis, d_axis, h_axis, w_axis = 4, 1, 2, 3 + + kernel_d, kernel_h, kernel_w = self.kernel_size + stride_d, stride_h, stride_w = self.strides + + output_shape[c_axis] = self.filters + output_shape[d_axis] = conv_utils.deconv_output_length( + output_shape[d_axis], kernel_d, self.padding, stride_d) + output_shape[h_axis] = conv_utils.deconv_output_length( + output_shape[h_axis], kernel_h, self.padding, stride_h) + output_shape[w_axis] = conv_utils.deconv_output_length( + output_shape[w_axis], kernel_w, self.padding, stride_w) + return tensor_shape.TensorShape(output_shape) + + +class SeparableConv(Conv): + """Abstract base layer for separable nD convolution. + + This layer performs a depthwise convolution that acts separately on + channels, followed by a pointwise convolution that mixes channels. + If `use_bias` is True and a bias initializer is provided, + it adds a bias vector to the output. + It then optionally applies an activation function to produce the final output. + + Arguments: + rank: An integer, the rank of the convolution, e.g. "2" for 2D convolution. + filters: Integer, the dimensionality of the output space (i.e. the number + of filters in the convolution). + kernel_size: A tuple or list of integers specifying the spatial + dimensions of the filters. Can be a single integer to specify the same + value for all spatial dimensions. + strides: A tuple or list of integers specifying the strides + of the convolution. Can be a single integer to specify the same value for + all spatial dimensions. + Specifying any `stride` value != 1 is incompatible with specifying + any `dilation_rate` value != 1. + padding: One of `"valid"` or `"same"` (case-insensitive). + data_format: A string, one of `channels_last` (default) or `channels_first`. + The ordering of the dimensions in the inputs. + `channels_last` corresponds to inputs with shape + `(batch, ..., channels)` while `channels_first` corresponds to + inputs with shape `(batch, channels, ...)`. + dilation_rate: An integer or tuple/list of 2 integers, specifying + the dilation rate to use for dilated convolution. + Can be a single integer to specify the same value for + all spatial dimensions. + Currently, specifying any `dilation_rate` value != 1 is + incompatible with specifying any stride value != 1. + depth_multiplier: The number of depthwise convolution output channels for + each input channel. The total number of depthwise convolution output + channels will be equal to `num_filters_in * depth_multiplier`. + activation: Activation function. Set it to None to maintain a + linear activation. + use_bias: Boolean, whether the layer uses a bias. + depthwise_initializer: An initializer for the depthwise convolution kernel. + pointwise_initializer: An initializer for the pointwise convolution kernel. + bias_initializer: An initializer for the bias vector. If None, the default + initializer will be used. + depthwise_regularizer: Optional regularizer for the depthwise + convolution kernel. + pointwise_regularizer: Optional regularizer for the pointwise + convolution kernel. + bias_regularizer: Optional regularizer for the bias vector. + activity_regularizer: Optional regularizer function for the output. + depthwise_constraint: Optional projection function to be applied to the + depthwise kernel after being updated by an `Optimizer` (e.g. used for + norm constraints or value constraints for layer weights). The function + must take as input the unprojected variable and must return the + projected variable (which must have the same shape). Constraints are + not safe to use when doing asynchronous distributed training. + pointwise_constraint: Optional projection function to be applied to the + pointwise kernel after being updated by an `Optimizer`. + bias_constraint: Optional projection function to be applied to the + bias after being updated by an `Optimizer`. + trainable: Boolean, if `True` also add variables to the graph collection + `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). + name: A string, the name of the layer. + """ + + def __init__(self, + rank, + filters, + kernel_size, + strides=1, + padding='valid', + data_format=None, + dilation_rate=1, + depth_multiplier=1, + activation=None, + use_bias=True, + depthwise_initializer='glorot_uniform', + pointwise_initializer='glorot_uniform', + bias_initializer='zeros', + depthwise_regularizer=None, + pointwise_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + depthwise_constraint=None, + pointwise_constraint=None, + bias_constraint=None, + trainable=True, + name=None, + **kwargs): + super(SeparableConv, self).__init__( + rank=rank, + filters=filters, + kernel_size=kernel_size, + strides=strides, + padding=padding, + data_format=data_format, + dilation_rate=dilation_rate, + activation=activations.get(activation), + use_bias=use_bias, + bias_regularizer=regularizers.get(bias_regularizer), + activity_regularizer=regularizers.get(activity_regularizer), + bias_constraint=bias_constraint, + trainable=trainable, + name=name, + **kwargs) + self.depth_multiplier = depth_multiplier + self.depthwise_initializer = initializers.get(depthwise_initializer) + self.pointwise_initializer = initializers.get(pointwise_initializer) + self.depthwise_regularizer = regularizers.get(depthwise_regularizer) + self.pointwise_regularizer = regularizers.get(pointwise_regularizer) + self.depthwise_constraint = constraints.get(depthwise_constraint) + self.pointwise_constraint = constraints.get(pointwise_constraint) + + def build(self, input_shape): + input_shape = tensor_shape.TensorShape(input_shape) + if self.data_format == 'channels_first': + channel_axis = 1 + else: + channel_axis = -1 + if input_shape[channel_axis].value is None: + raise ValueError('The channel dimension of the inputs ' + 'should be defined. Found `None`.') + input_dim = int(input_shape[channel_axis]) + self.input_spec = InputSpec(ndim=self.rank + 2, + axes={channel_axis: input_dim}) + depthwise_kernel_shape = self.kernel_size + (input_dim, + self.depth_multiplier) + pointwise_kernel_shape = ( + 1,) * self.rank + (self.depth_multiplier * input_dim, self.filters) + + self.depthwise_kernel = self.add_variable( + name='depthwise_kernel', + shape=depthwise_kernel_shape, + initializer=self.depthwise_initializer, + regularizer=self.depthwise_regularizer, + constraint=self.depthwise_constraint, + trainable=True, + dtype=self.dtype) + self.pointwise_kernel = self.add_variable( + name='pointwise_kernel', + shape=pointwise_kernel_shape, + initializer=self.pointwise_initializer, + regularizer=self.pointwise_regularizer, + constraint=self.pointwise_constraint, + trainable=True, + dtype=self.dtype) + if self.use_bias: + self.bias = self.add_variable(name='bias', + shape=(self.filters,), + initializer=self.bias_initializer, + regularizer=self.bias_regularizer, + constraint=self.bias_constraint, + trainable=True, + dtype=self.dtype) + else: + self.bias = None + self.built = True + + def call(self, inputs): + raise NotImplementedError + def get_config(self): config = { 'filters': self.filters, @@ -704,24 +1260,34 @@ class Conv3DTranspose(tf_convolutional_layers.Conv3DTranspose, Layer): 'strides': self.strides, 'padding': self.padding, 'data_format': self.data_format, + 'dilation_rate': self.dilation_rate, 'activation': activations.serialize(self.activation), 'use_bias': self.use_bias, - 'kernel_initializer': initializers.serialize(self.kernel_initializer), + 'depthwise_initializer': + initializers.serialize(self.depthwise_initializer), + 'pointwise_initializer': + initializers.serialize(self.pointwise_initializer), 'bias_initializer': initializers.serialize(self.bias_initializer), - 'kernel_regularizer': regularizers.serialize(self.kernel_regularizer), + 'depthwise_regularizer': + regularizers.serialize(self.depthwise_regularizer), + 'pointwise_regularizer': + regularizers.serialize(self.pointwise_regularizer), 'bias_regularizer': regularizers.serialize(self.bias_regularizer), 'activity_regularizer': regularizers.serialize(self.activity_regularizer), - 'kernel_constraint': constraints.serialize(self.kernel_constraint), + 'depthwise_constraint': + constraints.serialize(self.depthwise_constraint), + 'pointwise_constraint': + constraints.serialize(self.pointwise_constraint), 'bias_constraint': constraints.serialize(self.bias_constraint) } - base_config = super(Conv3DTranspose, self).get_config() + base_config = super(SeparableConv, self).get_config() return dict(list(base_config.items()) + list(config.items())) @tf_export('keras.layers.SeparableConv1D', 'keras.layers.SeparableConvolution1D') -class SeparableConv1D(tf_convolutional_layers.SeparableConv1D, Layer): +class SeparableConv1D(SeparableConv): """Depthwise separable 1D convolution. This layer performs a depthwise convolution that acts separately on @@ -801,15 +1367,15 @@ class SeparableConv1D(tf_convolutional_layers.SeparableConv1D, Layer): pointwise_constraint=None, bias_constraint=None, **kwargs): - if data_format is None: - data_format = K.image_data_format() super(SeparableConv1D, self).__init__( + rank=1, filters=filters, kernel_size=kernel_size, strides=strides, padding=padding, data_format=data_format, dilation_rate=dilation_rate, + depth_multiplier=depth_multiplier, activation=activations.get(activation), use_bias=use_bias, depthwise_initializer=initializers.get(depthwise_initializer), @@ -824,44 +1390,46 @@ class SeparableConv1D(tf_convolutional_layers.SeparableConv1D, Layer): bias_constraint=constraints.get(bias_constraint), **kwargs) - def get_config(self): - config = { - 'filters': self.filters, - 'kernel_size': self.kernel_size, - 'strides': self.strides, - 'padding': self.padding, - 'data_format': self.data_format, - 'dilation_rate': self.dilation_rate, - 'activation': activations.serialize(self.activation), - 'use_bias': self.use_bias, - 'depthwise_initializer': - initializers.serialize(self.depthwise_initializer), - 'pointwise_initializer': - initializers.serialize(self.pointwise_initializer), - 'bias_initializer': - initializers.serialize(self.bias_initializer), - 'depthwise_regularizer': - regularizers.serialize(self.depthwise_regularizer), - 'pointwise_regularizer': - regularizers.serialize(self.pointwise_regularizer), - 'bias_regularizer': - regularizers.serialize(self.bias_regularizer), - 'activity_regularizer': - regularizers.serialize(self.activity_regularizer), - 'depthwise_constraint': - constraints.serialize(self.depthwise_constraint), - 'pointwise_constraint': - constraints.serialize(self.pointwise_constraint), - 'bias_constraint': - constraints.serialize(self.bias_constraint) - } - base_config = super(SeparableConv1D, self).get_config() - return dict(list(base_config.items()) + list(config.items())) + def call(self, inputs): + if self.data_format == 'channels_last': + strides = (1,) + self.strides * 2 + (1,) + spatial_start_dim = 1 + else: + strides = (1, 1) + self.strides * 2 + spatial_start_dim = 2 + + # Explicitly broadcast inputs and kernels to 4D. + # TODO(fchollet): refactor when a native separable_conv1d op is available. + inputs = array_ops.expand_dims(inputs, spatial_start_dim) + depthwise_kernel = array_ops.expand_dims(self.depthwise_kernel, 0) + pointwise_kernel = array_ops.expand_dims(self.pointwise_kernel, 0) + dilation_rate = (1,) + self.dilation_rate + + outputs = nn.separable_conv2d( + inputs, + depthwise_kernel, + pointwise_kernel, + strides=strides, + padding=self.padding.upper(), + rate=dilation_rate, + data_format=conv_utils.convert_data_format(self.data_format, ndim=4)) + + if self.use_bias: + outputs = nn.bias_add( + outputs, + self.bias, + data_format=conv_utils.convert_data_format(self.data_format, ndim=4)) + + outputs = array_ops.squeeze(outputs, [spatial_start_dim]) + + if self.activation is not None: + return self.activation(outputs) + return outputs @tf_export('keras.layers.SeparableConv2D', 'keras.layers.SeparableConvolution2D') -class SeparableConv2D(tf_convolutional_layers.SeparableConv2D, Layer): +class SeparableConv2D(SeparableConv): """Depthwise separable 2D convolution. Separable convolutions consist in first performing @@ -958,15 +1526,15 @@ class SeparableConv2D(tf_convolutional_layers.SeparableConv2D, Layer): pointwise_constraint=None, bias_constraint=None, **kwargs): - if data_format is None: - data_format = K.image_data_format() super(SeparableConv2D, self).__init__( + rank=2, filters=filters, kernel_size=kernel_size, strides=strides, padding=padding, data_format=data_format, dilation_rate=dilation_rate, + depth_multiplier=depth_multiplier, activation=activations.get(activation), use_bias=use_bias, depthwise_initializer=initializers.get(depthwise_initializer), @@ -981,47 +1549,224 @@ class SeparableConv2D(tf_convolutional_layers.SeparableConv2D, Layer): bias_constraint=constraints.get(bias_constraint), **kwargs) + def call(self, inputs): + # Apply the actual ops. + if self.data_format == 'channels_last': + strides = (1,) + self.strides + (1,) + else: + strides = (1, 1) + self.strides + outputs = nn.separable_conv2d( + inputs, + self.depthwise_kernel, + self.pointwise_kernel, + strides=strides, + padding=self.padding.upper(), + rate=self.dilation_rate, + data_format=conv_utils.convert_data_format(self.data_format, ndim=4)) + + if self.use_bias: + outputs = nn.bias_add( + outputs, + self.bias, + data_format=conv_utils.convert_data_format(self.data_format, ndim=4)) + + if self.activation is not None: + return self.activation(outputs) + return outputs + + +@tf_export('keras.layers.DepthwiseConv2D') +class DepthwiseConv2D(Conv2D): + """Depthwise separable 2D convolution. + + Depthwise Separable convolutions consists in performing + just the first step in a depthwise spatial convolution + (which acts on each input channel separately). + The `depth_multiplier` argument controls how many + output channels are generated per input channel in the depthwise step. + + Arguments: + kernel_size: An integer or tuple/list of 2 integers, specifying the + width and height of the 2D convolution window. + Can be a single integer to specify the same value for + all spatial dimensions. + strides: An integer or tuple/list of 2 integers, + specifying the strides of the convolution along the width and height. + Can be a single integer to specify the same value for + all spatial dimensions. + Specifying any stride value != 1 is incompatible with specifying + any `dilation_rate` value != 1. + padding: one of `'valid'` or `'same'` (case-insensitive). + depth_multiplier: The number of depthwise convolution output channels + for each input channel. + The total number of depthwise convolution output + channels will be equal to `filters_in * depth_multiplier`. + data_format: A string, + one of `channels_last` (default) or `channels_first`. + The ordering of the dimensions in the inputs. + `channels_last` corresponds to inputs with shape + `(batch, height, width, channels)` while `channels_first` + corresponds to inputs with shape + `(batch, channels, height, width)`. + It defaults to the `image_data_format` value found in your + Keras config file at `~/.keras/keras.json`. + If you never set it, then it will be 'channels_last'. + activation: Activation function to use. + If you don't specify anything, no activation is applied + (ie. 'linear' activation: `a(x) = x`). + use_bias: Boolean, whether the layer uses a bias vector. + depthwise_initializer: Initializer for the depthwise kernel matrix. + bias_initializer: Initializer for the bias vector. + depthwise_regularizer: Regularizer function applied to + the depthwise kernel matrix. + bias_regularizer: Regularizer function applied to the bias vector. + activity_regularizer: Regularizer function applied to + the output of the layer (its 'activation'). + depthwise_constraint: Constraint function applied to + the depthwise kernel matrix. + bias_constraint: Constraint function applied to the bias vector. + + Input shape: + 4D tensor with shape: + `[batch, channels, rows, cols]` if data_format='channels_first' + or 4D tensor with shape: + `[batch, rows, cols, channels]` if data_format='channels_last'. + + Output shape: + 4D tensor with shape: + `[batch, filters, new_rows, new_cols]` if data_format='channels_first' + or 4D tensor with shape: + `[batch, new_rows, new_cols, filters]` if data_format='channels_last'. + `rows` and `cols` values might have changed due to padding. + """ + + def __init__(self, + kernel_size, + strides=(1, 1), + padding='valid', + depth_multiplier=1, + data_format=None, + activation=None, + use_bias=True, + depthwise_initializer='glorot_uniform', + bias_initializer='zeros', + depthwise_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + depthwise_constraint=None, + bias_constraint=None, + **kwargs): + super(DepthwiseConv2D, self).__init__( + filters=None, + kernel_size=kernel_size, + strides=strides, + padding=padding, + data_format=data_format, + activation=activation, + use_bias=use_bias, + bias_regularizer=bias_regularizer, + activity_regularizer=activity_regularizer, + bias_constraint=bias_constraint, + **kwargs) + self.depth_multiplier = depth_multiplier + self.depthwise_initializer = initializers.get(depthwise_initializer) + self.depthwise_regularizer = regularizers.get(depthwise_regularizer) + self.depthwise_constraint = constraints.get(depthwise_constraint) + self.bias_initializer = initializers.get(bias_initializer) + + def build(self, input_shape): + if len(input_shape) < 4: + raise ValueError('Inputs to `DepthwiseConv2D` should have rank 4. ' + 'Received input shape:', str(input_shape)) + if self.data_format == 'channels_first': + channel_axis = 1 + else: + channel_axis = 3 + if input_shape[channel_axis] is None: + raise ValueError('The channel dimension of the inputs to ' + '`DepthwiseConv2D` ' + 'should be defined. Found `None`.') + input_dim = int(input_shape[channel_axis]) + depthwise_kernel_shape = (self.kernel_size[0], + self.kernel_size[1], + input_dim, + self.depth_multiplier) + + self.depthwise_kernel = self.add_weight( + shape=depthwise_kernel_shape, + initializer=self.depthwise_initializer, + name='depthwise_kernel', + regularizer=self.depthwise_regularizer, + constraint=self.depthwise_constraint) + + if self.use_bias: + self.bias = self.add_weight(shape=(input_dim * self.depth_multiplier,), + initializer=self.bias_initializer, + name='bias', + regularizer=self.bias_regularizer, + constraint=self.bias_constraint) + else: + self.bias = None + # Set input spec. + self.input_spec = InputSpec(ndim=4, axes={channel_axis: input_dim}) + self.built = True + + def call(self, inputs, training=None): + outputs = backend.depthwise_conv2d( + inputs, + self.depthwise_kernel, + strides=self.strides, + padding=self.padding, + dilation_rate=self.dilation_rate, + data_format=self.data_format) + + if self.bias: + outputs = backend.bias_add( + outputs, + self.bias, + data_format=self.data_format) + + if self.activation is not None: + return self.activation(outputs) + + return outputs + + @shape_type_conversion + def compute_output_shape(self, input_shape): + if self.data_format == 'channels_first': + rows = input_shape[2] + cols = input_shape[3] + out_filters = input_shape[1] * self.depth_multiplier + elif self.data_format == 'channels_last': + rows = input_shape[1] + cols = input_shape[2] + out_filters = input_shape[3] * self.depth_multiplier + + rows = conv_utils.conv_output_length(rows, self.kernel_size[0], + self.padding, + self.strides[0]) + cols = conv_utils.conv_output_length(cols, self.kernel_size[1], + self.padding, + self.strides[1]) + if self.data_format == 'channels_first': + return (input_shape[0], out_filters, rows, cols) + elif self.data_format == 'channels_last': + return (input_shape[0], rows, cols, out_filters) + def get_config(self): - config = { - 'filters': - self.filters, - 'kernel_size': - self.kernel_size, - 'strides': - self.strides, - 'padding': - self.padding, - 'data_format': - self.data_format, - 'dilation_rate': - self.dilation_rate, - 'activation': - activations.serialize(self.activation), - 'use_bias': - self.use_bias, - 'depthwise_initializer': - initializers.serialize(self.depthwise_initializer), - 'pointwise_initializer': - initializers.serialize(self.pointwise_initializer), - 'bias_initializer': - initializers.serialize(self.bias_initializer), - 'depthwise_regularizer': - regularizers.serialize(self.depthwise_regularizer), - 'pointwise_regularizer': - regularizers.serialize(self.pointwise_regularizer), - 'bias_regularizer': - regularizers.serialize(self.bias_regularizer), - 'activity_regularizer': - regularizers.serialize(self.activity_regularizer), - 'depthwise_constraint': - constraints.serialize(self.depthwise_constraint), - 'pointwise_constraint': - constraints.serialize(self.pointwise_constraint), - 'bias_constraint': - constraints.serialize(self.bias_constraint) - } - base_config = super(SeparableConv2D, self).get_config() - return dict(list(base_config.items()) + list(config.items())) + config = super(DepthwiseConv2D, self).get_config() + config.pop('filters') + config.pop('kernel_initializer') + config.pop('kernel_regularizer') + config.pop('kernel_constraint') + config['depth_multiplier'] = self.depth_multiplier + config['depthwise_initializer'] = initializers.serialize( + self.depthwise_initializer) + config['depthwise_regularizer'] = regularizers.serialize( + self.depthwise_regularizer) + config['depthwise_constraint'] = constraints.serialize( + self.depthwise_constraint) + return config @tf_export('keras.layers.UpSampling1D') @@ -1051,7 +1796,7 @@ class UpSampling1D(Layer): return tensor_shape.TensorShape([input_shape[0], size, input_shape[2]]) def call(self, inputs): - output = K.repeat_elements(inputs, self.size, axis=1) + output = backend.repeat_elements(inputs, self.size, axis=1) return output def get_config(self): @@ -1120,7 +1865,8 @@ class UpSampling2D(Layer): [input_shape[0], height, width, input_shape[3]]) def call(self, inputs): - return K.resize_images(inputs, self.size[0], self.size[1], self.data_format) + return backend.resize_images( + inputs, self.size[0], self.size[1], self.data_format) def get_config(self): config = {'size': self.size, 'data_format': self.data_format} @@ -1192,8 +1938,8 @@ class UpSampling3D(Layer): [input_shape[0], dim1, dim2, dim3, input_shape[4]]) def call(self, inputs): - return K.resize_volumes(inputs, self.size[0], self.size[1], self.size[2], - self.data_format) + return backend.resize_volumes( + inputs, self.size[0], self.size[1], self.size[2], self.data_format) def get_config(self): config = {'size': self.size, 'data_format': self.data_format} @@ -1234,7 +1980,7 @@ class ZeroPadding1D(Layer): return tensor_shape.TensorShape([input_shape[0], length, input_shape[2]]) def call(self, inputs): - return K.temporal_padding(inputs, padding=self.padding) + return backend.temporal_padding(inputs, padding=self.padding) def get_config(self): config = {'padding': self.padding} @@ -1335,7 +2081,7 @@ class ZeroPadding2D(Layer): [input_shape[0], rows, cols, input_shape[3]]) def call(self, inputs): - return K.spatial_2d_padding( + return backend.spatial_2d_padding( inputs, padding=self.padding, data_format=self.data_format) def get_config(self): @@ -1453,7 +2199,7 @@ class ZeroPadding3D(Layer): [input_shape[0], dim1, dim2, dim3, input_shape[4]]) def call(self, inputs): - return K.spatial_3d_padding( + return backend.spatial_3d_padding( inputs, padding=self.padding, data_format=self.data_format) def get_config(self): diff --git a/tensorflow/python/keras/_impl/keras/layers/convolutional_recurrent.py b/tensorflow/python/keras/_impl/keras/layers/convolutional_recurrent.py index d95a0942452afa82e277c358be5c3b2ba061ac98..6b2a1d98fe736744755134aa3aef26c433a604bc 100644 --- a/tensorflow/python/keras/_impl/keras/layers/convolutional_recurrent.py +++ b/tensorflow/python/keras/_impl/keras/layers/convolutional_recurrent.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== +# pylint: disable=protected-access """Convolutional-recurrent layers. """ from __future__ import absolute_import @@ -26,179 +27,456 @@ from tensorflow.python.keras._impl.keras import constraints from tensorflow.python.keras._impl.keras import initializers from tensorflow.python.keras._impl.keras import regularizers from tensorflow.python.keras._impl.keras.engine import InputSpec +from tensorflow.python.keras._impl.keras.engine import Layer from tensorflow.python.keras._impl.keras.engine.base_layer import shape_type_conversion -from tensorflow.python.keras._impl.keras.layers.recurrent import Recurrent +from tensorflow.python.keras._impl.keras.layers.recurrent import _generate_dropout_mask +from tensorflow.python.keras._impl.keras.layers.recurrent import RNN from tensorflow.python.keras._impl.keras.utils import conv_utils +from tensorflow.python.keras._impl.keras.utils import generic_utils from tensorflow.python.util.tf_export import tf_export -class ConvRecurrent2D(Recurrent): - """Abstract base class for convolutional recurrent layers. - - Do not use in a model -- it's not a functional layer! +class ConvRNN2D(RNN): + """Base class for convolutional-recurrent layers. Arguments: - filters: Integer, the dimensionality of the output space - (i.e. the number of output filters in the convolution). - kernel_size: An integer or tuple/list of n integers, specifying the - dimensions of the convolution window. - strides: An integer or tuple/list of n integers, - specifying the strides of the convolution. - Specifying any stride value != 1 is incompatible with specifying - any `dilation_rate` value != 1. - padding: One of `"valid"` or `"same"` (case-insensitive). - data_format: A string, - one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape - `(batch, time, ..., channels)` - while `channels_first` corresponds to - inputs with shape `(batch, time, channels, ...)`. - It defaults to the `image_data_format` value found in your - Keras config file at `~/.keras/keras.json`. - If you never set it, then it will be "channels_last". - dilation_rate: An integer or tuple/list of n integers, specifying - the dilation rate to use for dilated convolution. - Currently, specifying any `dilation_rate` value != 1 is - incompatible with specifying any `strides` value != 1. - return_sequences: Boolean. Whether to return the last output - in the output sequence, or the full sequence. - go_backwards: Boolean (default False). - If True, rocess the input sequence backwards. - stateful: Boolean (default False). If True, the last state - for each sample at index i in a batch will be used as initial - state for the sample of index i in the following batch. + cell: A RNN cell instance. A RNN cell is a class that has: + - a `call(input_at_t, states_at_t)` method, returning + `(output_at_t, states_at_t_plus_1)`. The call method of the + cell can also take the optional argument `constants`, see + section "Note on passing external constants" below. + - a `state_size` attribute. This can be a single integer + (single state) in which case it is + the number of channels of the recurrent state + (which should be the same as the number of channels of the cell + output). This can also be a list/tuple of integers + (one size per state). In this case, the first entry + (`state_size[0]`) should be the same as + the size of the cell output. + return_sequences: Boolean. Whether to return the last output. + in the output sequence, or the full sequence. + return_state: Boolean. Whether to return the last state + in addition to the output. + go_backwards: Boolean (default False). + If True, process the input sequence backwards and return the + reversed sequence. + stateful: Boolean (default False). If True, the last state + for each sample at index i in a batch will be used as initial + state for the sample of index i in the following batch. + input_shape: Use this argument to specify the shape of the + input when this layer is the first one in a model. Input shape: - 5D tensor with shape `(num_samples, timesteps, channels, rows, cols)`. + 5D tensor with shape: + `(samples, timesteps, channels, rows, cols)` + if data_format='channels_first' or 5D tensor with shape: + `(samples, timesteps, rows, cols, channels)` + if data_format='channels_last'. Output shape: - - if `return_sequences`: 5D tensor with shape - `(num_samples, timesteps, channels, rows, cols)`. - - else, 4D tensor with shape `(num_samples, channels, rows, cols)`. - - # Masking - This layer supports masking for input data with a variable number - of timesteps. To introduce masks to your data, - use an `Embedding` layer with the `mask_zero` parameter - set to `True`. - **Note:** for the time being, masking is only supported with Theano. - - # Note on using statefulness in RNNs - You can set RNN layers to be 'stateful', which means that the states - computed for the samples in one batch will be reused as initial states - for the samples in the next batch. - This assumes a one-to-one mapping between - samples in different successive batches. - - To enable statefulness: - - specify `stateful=True` in the layer constructor. - - specify a fixed batch size for your model, by passing - a `batch_input_size=(...)` to the first layer in your model. - This is the expected shape of your inputs *including the batch - size*. - It should be a tuple of integers, e.g. `(32, 10, 100)`. - - To reset the states of your model, call `.reset_states()` on either - a specific layer, or on your entire model. + - if `return_state`: a list of tensors. The first tensor is + the output. The remaining tensors are the last states, + each 5D tensor with shape: + `(samples, timesteps, filters, new_rows, new_cols)` + if data_format='channels_first' + or 5D tensor with shape: + `(samples, timesteps, new_rows, new_cols, filters)` + if data_format='channels_last'. + `rows` and `cols` values might have changed due to padding. + - if `return_sequences`: 5D tensor with shape: + `(samples, timesteps, filters, new_rows, new_cols)` + if data_format='channels_first' + or 5D tensor with shape: + `(samples, timesteps, new_rows, new_cols, filters)` + if data_format='channels_last'. + - else, 4D tensor with shape: + `(samples, filters, new_rows, new_cols)` + if data_format='channels_first' + or 4D tensor with shape: + `(samples, new_rows, new_cols, filters)` + if data_format='channels_last'. + + Masking: + This layer supports masking for input data with a variable number + of timesteps. To introduce masks to your data, + use an Embedding layer with the `mask_zero` parameter + set to `True`. + + Note on using statefulness in RNNs: + You can set RNN layers to be 'stateful', which means that the states + computed for the samples in one batch will be reused as initial states + for the samples in the next batch. This assumes a one-to-one mapping + between samples in different successive batches. + To enable statefulness: + - specify `stateful=True` in the layer constructor. + - specify a fixed batch size for your model, by passing + - if sequential model: + `batch_input_shape=(...)` to the first layer in your model. + - if functional model with 1 or more Input layers: + `batch_shape=(...)` to all the first layers in your model. + This is the expected shape of your inputs + *including the batch size*. + It should be a tuple of integers, + e.g. `(32, 10, 100, 100, 32)`. + Note that the number of rows and columns should be specified + too. + - specify `shuffle=False` when calling fit(). + To reset the states of your model, call `.reset_states()` on either + a specific layer, or on your entire model. + + Note on specifying the initial state of RNNs: + You can specify the initial state of RNN layers symbolically by + calling them with the keyword argument `initial_state`. The value of + `initial_state` should be a tensor or list of tensors representing + the initial state of the RNN layer. + You can specify the initial state of RNN layers numerically by + calling `reset_states` with the keyword argument `states`. The value of + `states` should be a numpy array or list of numpy arrays representing + the initial state of the RNN layer. + + Note on passing external constants to RNNs: + You can pass "external" constants to the cell using the `constants` + keyword argument of `RNN.__call__` (as well as `RNN.call`) method. This + requires that the `cell.call` method accepts the same keyword argument + `constants`. Such constants can be used to condition the cell + transformation on additional static inputs (not changing over time), + a.k.a. an attention mechanism. """ def __init__(self, - filters, - kernel_size, - strides=(1, 1), - padding='valid', - data_format=None, - dilation_rate=(1, 1), + cell, return_sequences=False, + return_state=False, go_backwards=False, stateful=False, + unroll=False, **kwargs): - super(ConvRecurrent2D, self).__init__(**kwargs) - self.filters = filters - self.kernel_size = conv_utils.normalize_tuple(kernel_size, 2, 'kernel_size') - self.strides = conv_utils.normalize_tuple(strides, 2, 'strides') - self.padding = conv_utils.normalize_padding(padding) - self.data_format = conv_utils.normalize_data_format(data_format) - self.dilation_rate = conv_utils.normalize_tuple(dilation_rate, 2, - 'dilation_rate') - self.return_sequences = return_sequences - self.go_backwards = go_backwards - self.stateful = stateful + if unroll: + raise TypeError('Unrolling isn\'t possible with ' + 'convolutional RNNs.') + if isinstance(cell, (list, tuple)): + # The StackedConvRNN2DCells isn't implemented yet. + raise TypeError('It is not possible at the moment to' + 'stack convolutional cells.') + super(ConvRNN2D, self).__init__(cell, + return_sequences, + return_state, + go_backwards, + stateful, + unroll, + **kwargs) self.input_spec = [InputSpec(ndim=5)] - self.state_spec = None + self.states = None @shape_type_conversion def compute_output_shape(self, input_shape): if isinstance(input_shape, list): input_shape = input_shape[0] - if self.data_format == 'channels_first': + + cell = self.cell + if cell.data_format == 'channels_first': rows = input_shape[3] cols = input_shape[4] - elif self.data_format == 'channels_last': + elif cell.data_format == 'channels_last': rows = input_shape[2] cols = input_shape[3] - rows = conv_utils.conv_output_length( - rows, - self.kernel_size[0], - padding=self.padding, - stride=self.strides[0], - dilation=self.dilation_rate[0]) - cols = conv_utils.conv_output_length( - cols, - self.kernel_size[1], - padding=self.padding, - stride=self.strides[1], - dilation=self.dilation_rate[1]) + rows = conv_utils.conv_output_length(rows, + cell.kernel_size[0], + padding=cell.padding, + stride=cell.strides[0], + dilation=cell.dilation_rate[0]) + cols = conv_utils.conv_output_length(cols, + cell.kernel_size[1], + padding=cell.padding, + stride=cell.strides[1], + dilation=cell.dilation_rate[1]) + + if cell.data_format == 'channels_first': + output_shape = input_shape[:2] + (cell.filters, rows, cols) + elif cell.data_format == 'channels_last': + output_shape = input_shape[:2] + (rows, cols, cell.filters) + + if not self.return_sequences: + output_shape = output_shape[:1] + output_shape[2:] + + if self.return_state: + output_shape = [output_shape] + if cell.data_format == 'channels_first': + output_shape += [(input_shape[0], cell.filters, rows, cols) + for _ in range(2)] + elif cell.data_format == 'channels_last': + output_shape += [(input_shape[0], rows, cols, cell.filters) + for _ in range(2)] + return output_shape + + @shape_type_conversion + def build(self, input_shape): + # Note input_shape will be list of shapes of initial states and + # constants if these are passed in __call__. + if self._num_constants is not None: + constants_shape = input_shape[-self._num_constants:] + else: + constants_shape = None + + if isinstance(input_shape, list): + input_shape = input_shape[0] + + batch_size = input_shape[0] if self.stateful else None + self.input_spec[0] = InputSpec(shape=(batch_size, None) + input_shape[2:5]) + + # allow cell (if layer) to build before we set or validate state_spec + if isinstance(self.cell, Layer): + step_input_shape = (input_shape[0],) + input_shape[2:] + if constants_shape is not None: + self.cell.build([step_input_shape] + constants_shape) + else: + self.cell.build(step_input_shape) + + # set or validate state_spec + if hasattr(self.cell.state_size, '__len__'): + state_size = list(self.cell.state_size) + else: + state_size = [self.cell.state_size] + + if self.state_spec is not None: + # initial_state was passed in call, check compatibility + if self.cell.data_format == 'channels_first': + ch_dim = 1 + elif self.cell.data_format == 'channels_last': + ch_dim = 3 + if [spec.shape[ch_dim] for spec in self.state_spec] != state_size: + raise ValueError( + 'An initial_state was passed that is not compatible with ' + '`cell.state_size`. Received `state_spec`={}; ' + 'However `cell.state_size` is ' + '{}'.format([spec.shape for spec in self.state_spec], + self.cell.state_size)) + else: + if self.cell.data_format == 'channels_first': + self.state_spec = [InputSpec(shape=(None, dim, None, None)) + for dim in state_size] + elif self.cell.data_format == 'channels_last': + self.state_spec = [InputSpec(shape=(None, None, None, dim)) + for dim in state_size] + if self.stateful: + self.reset_states() + self.built = True + + def get_initial_state(self, inputs): + # (samples, timesteps, rows, cols, filters) + initial_state = K.zeros_like(inputs) + # (samples, rows, cols, filters) + initial_state = K.sum(initial_state, axis=1) + shape = list(self.cell.kernel_shape) + shape[-1] = self.cell.filters + initial_state = self.cell.input_conv(initial_state, + K.zeros(tuple(shape)), + padding=self.cell.padding) + + if hasattr(self.cell.state_size, '__len__'): + return [initial_state for _ in self.cell.state_size] + else: + return [initial_state] + + def __call__(self, inputs, initial_state=None, constants=None, **kwargs): + inputs, initial_state, constants = self._standardize_args( + inputs, initial_state, constants) + + if initial_state is None and constants is None: + return super(ConvRNN2D, self).__call__(inputs, **kwargs) + + # If any of `initial_state` or `constants` are specified and are Keras + # tensors, then add them to the inputs and temporarily modify the + # input_spec to include them. + + additional_inputs = [] + additional_specs = [] + if initial_state is not None: + kwargs['initial_state'] = initial_state + additional_inputs += initial_state + self.state_spec = [] + for state in initial_state: + shape = K.int_shape(state) + self.state_spec.append(InputSpec(shape=shape)) + + additional_specs += self.state_spec + if constants is not None: + kwargs['constants'] = constants + additional_inputs += constants + self.constants_spec = [InputSpec(shape=K.int_shape(constant)) + for constant in constants] + self._num_constants = len(constants) + additional_specs += self.constants_spec + # at this point additional_inputs cannot be empty + for tensor in additional_inputs: + if K.is_keras_tensor(tensor) != K.is_keras_tensor(additional_inputs[0]): + raise ValueError('The initial state or constants of an RNN' + ' layer cannot be specified with a mix of' + ' Keras tensors and non-Keras tensors') + + if K.is_keras_tensor(additional_inputs[0]): + # Compute the full input spec, including state and constants + full_input = [inputs] + additional_inputs + full_input_spec = self.input_spec + additional_specs + # Perform the call with temporarily replaced input_spec + original_input_spec = self.input_spec + self.input_spec = full_input_spec + output = super(ConvRNN2D, self).__call__(full_input, **kwargs) + self.input_spec = original_input_spec + return output + else: + return super(ConvRNN2D, self).__call__(inputs, **kwargs) + + def call(self, + inputs, + mask=None, + training=None, + initial_state=None, + constants=None): + # note that the .build() method of subclasses MUST define + # self.input_spec and self.state_spec with complete input shapes. + if isinstance(inputs, list): + inputs = inputs[0] + if initial_state is not None: + pass + elif self.stateful: + initial_state = self.states + else: + initial_state = self.get_initial_state(inputs) + + if isinstance(mask, list): + mask = mask[0] + + if len(initial_state) != len(self.states): + raise ValueError('Layer has ' + str(len(self.states)) + + ' states but was passed ' + + str(len(initial_state)) + + ' initial states.') + timesteps = K.int_shape(inputs)[1] + + kwargs = {} + if generic_utils.has_arg(self.cell.call, 'training'): + kwargs['training'] = training + + if constants: + if not generic_utils.has_arg(self.cell.call, 'constants'): + raise ValueError('RNN cell does not support constants') + + def step(inputs, states): + constants = states[-self._num_constants:] + states = states[:-self._num_constants] + return self.cell.call(inputs, states, constants=constants, + **kwargs) + else: + def step(inputs, states): + return self.cell.call(inputs, states, **kwargs) + + last_output, outputs, states = K.rnn(step, + inputs, + initial_state, + constants=constants, + go_backwards=self.go_backwards, + mask=mask, + input_length=timesteps) + if self.stateful: + updates = [] + for i in range(len(states)): + updates.append(K.update(self.states[i], states[i])) + self.add_update(updates, inputs=True) + if self.return_sequences: - if self.data_format == 'channels_first': - output_shape = (input_shape[0], input_shape[1], self.filters, rows, - cols) - elif self.data_format == 'channels_last': - output_shape = (input_shape[0], input_shape[1], rows, cols, - self.filters) + output = outputs else: - if self.data_format == 'channels_first': - output_shape = (input_shape[0], self.filters, rows, cols) - elif self.data_format == 'channels_last': - output_shape = (input_shape[0], rows, cols, self.filters) + output = last_output + + # Properly set learning phase + if getattr(last_output, '_uses_learning_phase', False): + output._uses_learning_phase = True if self.return_state: - if self.data_format == 'channels_first': - output_shape = [output_shape] + [ - (input_shape[0], self.filters, rows, cols) for _ in range(2) - ] - elif self.data_format == 'channels_last': - output_shape = [output_shape] + [ - (input_shape[0], rows, cols, self.filters) for _ in range(2) - ] + if not isinstance(states, (list, tuple)): + states = [states] + else: + states = list(states) + return [output] + states + else: + return output - return output_shape + def reset_states(self, states=None): + if not self.stateful: + raise AttributeError('Layer must be stateful.') + input_shape = self.input_spec[0].shape + state_shape = self.compute_output_shape(input_shape) + if self.return_state: + state_shape = state_shape[0] + if self.return_sequences: + state_shape = state_shape[:1].concatenate(state_shape[2:]) + if None in state_shape: + raise ValueError('If a RNN is stateful, it needs to know ' + 'its batch size. Specify the batch size ' + 'of your input tensors: \n' + '- If using a Sequential model, ' + 'specify the batch size by passing ' + 'a `batch_input_shape` ' + 'argument to your first layer.\n' + '- If using the functional API, specify ' + 'the time dimension by passing a ' + '`batch_shape` argument to your Input layer.\n' + 'The same thing goes for the number of rows and ' + 'columns.') - def get_config(self): - config = { - 'filters': self.filters, - 'kernel_size': self.kernel_size, - 'strides': self.strides, - 'padding': self.padding, - 'data_format': self.data_format, - 'dilation_rate': self.dilation_rate, - 'return_sequences': self.return_sequences, - 'go_backwards': self.go_backwards, - 'stateful': self.stateful - } - base_config = super(ConvRecurrent2D, self).get_config() - return dict(list(base_config.items()) + list(config.items())) + # helper function + def get_tuple_shape(nb_channels): + result = list(state_shape) + if self.cell.data_format == 'channels_first': + result[1] = nb_channels + elif self.cell.data_format == 'channels_last': + result[3] = nb_channels + else: + raise KeyError + return tuple(result) + # initialize state if None + if self.states[0] is None: + if hasattr(self.cell.state_size, '__len__'): + self.states = [K.zeros(get_tuple_shape(dim)) + for dim in self.cell.state_size] + else: + self.states = [K.zeros(get_tuple_shape(self.cell.state_size))] + elif states is None: + if hasattr(self.cell.state_size, '__len__'): + for state, dim in zip(self.states, self.cell.state_size): + K.set_value(state, np.zeros(get_tuple_shape(dim))) + else: + K.set_value(self.states[0], + np.zeros(get_tuple_shape(self.cell.state_size))) + else: + if not isinstance(states, (list, tuple)): + states = [states] + if len(states) != len(self.states): + raise ValueError('Layer ' + self.name + ' expects ' + + str(len(self.states)) + ' states, ' + + 'but it received ' + str(len(states)) + + ' state values. Input received: ' + str(states)) + for index, (value, state) in enumerate(zip(states, self.states)): + if hasattr(self.cell.state_size, '__len__'): + dim = self.cell.state_size[index] + else: + dim = self.cell.state_size + if value.shape != get_tuple_shape(dim): + raise ValueError('State ' + str(index) + + ' is incompatible with layer ' + + self.name + ': expected shape=' + + str(get_tuple_shape(dim)) + + ', found shape=' + str(value.shape)) + # TODO(anjalisridhar): consider batch calls to `set_value`. + K.set_value(state, value) -@tf_export('keras.layers.ConvLSTM2D') -class ConvLSTM2D(ConvRecurrent2D): - """Convolutional LSTM. - It is similar to an LSTM layer, but the input transformations - and recurrent transformations are both convolutional. +class ConvLSTM2DCell(Layer): + """Cell class for the ConvLSTM2D layer. - Arguments: + # Arguments filters: Integer, the dimensionality of the output space (i.e. the number of output filters in the convolution). kernel_size: An integer or tuple/list of n integers, specifying the @@ -210,11 +488,6 @@ class ConvLSTM2D(ConvRecurrent2D): padding: One of `"valid"` or `"same"` (case-insensitive). data_format: A string, one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape - `(batch, time, ..., channels)` - while `channels_first` corresponds to - inputs with shape `(batch, time, channels, ...)`. It defaults to the `image_data_format` value found in your Keras config file at `~/.keras/keras.json`. If you never set it, then it will be "channels_last". @@ -229,71 +502,32 @@ class ConvLSTM2D(ConvRecurrent2D): for the recurrent step. use_bias: Boolean, whether the layer uses a bias vector. kernel_initializer: Initializer for the `kernel` weights matrix, - used for the linear transformation of the inputs.. + used for the linear transformation of the inputs. recurrent_initializer: Initializer for the `recurrent_kernel` weights matrix, - used for the linear transformation of the recurrent state.. + used for the linear transformation of the recurrent state. bias_initializer: Initializer for the bias vector. unit_forget_bias: Boolean. If True, add 1 to the bias of the forget gate at initialization. Use in combination with `bias_initializer="zeros"`. - This is recommended in [Jozefowicz et - al.](http://www.jmlr.org/proceedings/papers/v37/jozefowicz15.pdf) + This is recommended in [Jozefowicz et al.] + (http://www.jmlr.org/proceedings/papers/v37/jozefowicz15.pdf) kernel_regularizer: Regularizer function applied to the `kernel` weights matrix. recurrent_regularizer: Regularizer function applied to the `recurrent_kernel` weights matrix. bias_regularizer: Regularizer function applied to the bias vector. - activity_regularizer: Regularizer function applied to - the output of the layer (its "activation").. kernel_constraint: Constraint function applied to the `kernel` weights matrix. recurrent_constraint: Constraint function applied to the `recurrent_kernel` weights matrix. bias_constraint: Constraint function applied to the bias vector. - return_sequences: Boolean. Whether to return the last output - in the output sequence, or the full sequence. - go_backwards: Boolean (default False). - If True, rocess the input sequence backwards. - stateful: Boolean (default False). If True, the last state - for each sample at index i in a batch will be used as initial - state for the sample of index i in the following batch. dropout: Float between 0 and 1. Fraction of the units to drop for the linear transformation of the inputs. recurrent_dropout: Float between 0 and 1. Fraction of the units to drop for the linear transformation of the recurrent state. - - Input shape: - - if data_format='channels_first' - 5D tensor with shape: - `(samples,time, channels, rows, cols)` - - if data_format='channels_last' - 5D tensor with shape: - `(samples,time, rows, cols, channels)` - - Output shape: - - if `return_sequences` - - if data_format='channels_first' - 5D tensor with shape: - `(samples, time, filters, output_row, output_col)` - - if data_format='channels_last' - 5D tensor with shape: - `(samples, time, output_row, output_col, filters)` - - else - - if data_format ='channels_first' - 4D tensor with shape: - `(samples, filters, output_row, output_col)` - - if data_format='channels_last' - 4D tensor with shape: - `(samples, output_row, output_col, filters)` - where o_row and o_col depend on the shape of the filter and - the padding - - Raises: - ValueError: in case of invalid constructor arguments. - """ def __init__(self, @@ -313,27 +547,20 @@ class ConvLSTM2D(ConvRecurrent2D): kernel_regularizer=None, recurrent_regularizer=None, bias_regularizer=None, - activity_regularizer=None, kernel_constraint=None, recurrent_constraint=None, bias_constraint=None, - return_sequences=False, - go_backwards=False, - stateful=False, dropout=0., recurrent_dropout=0., **kwargs): - super(ConvLSTM2D, self).__init__( - filters, - kernel_size, - strides=strides, - padding=padding, - data_format=data_format, - dilation_rate=dilation_rate, - return_sequences=return_sequences, - go_backwards=go_backwards, - stateful=stateful, - **kwargs) + super(ConvLSTM2DCell, self).__init__(**kwargs) + self.filters = filters + self.kernel_size = conv_utils.normalize_tuple(kernel_size, 2, 'kernel_size') + self.strides = conv_utils.normalize_tuple(strides, 2, 'strides') + self.padding = conv_utils.normalize_padding(padding) + self.data_format = conv_utils.normalize_data_format(data_format) + self.dilation_rate = conv_utils.normalize_tuple(dilation_rate, 2, + 'dilation_rate') self.activation = activations.get(activation) self.recurrent_activation = activations.get(recurrent_activation) self.use_bias = use_bias @@ -346,7 +573,6 @@ class ConvLSTM2D(ConvRecurrent2D): self.kernel_regularizer = regularizers.get(kernel_regularizer) self.recurrent_regularizer = regularizers.get(recurrent_regularizer) self.bias_regularizer = regularizers.get(bias_regularizer) - self.activity_regularizer = regularizers.get(activity_regularizer) self.kernel_constraint = constraints.get(kernel_constraint) self.recurrent_constraint = constraints.get(recurrent_constraint) @@ -354,45 +580,29 @@ class ConvLSTM2D(ConvRecurrent2D): self.dropout = min(1., max(0., dropout)) self.recurrent_dropout = min(1., max(0., recurrent_dropout)) - self.state_spec = [InputSpec(ndim=4), InputSpec(ndim=4)] + self.state_size = (self.filters, self.filters) + self._dropout_mask = None + self._recurrent_dropout_mask = None - @shape_type_conversion def build(self, input_shape): - if isinstance(input_shape, list): - input_shape = input_shape[0] - batch_size = input_shape[0] if self.stateful else None - self.input_spec[0] = InputSpec(shape=(batch_size, None) + input_shape[2:]) - if self.stateful: - self.reset_states() - else: - # initial states: 2 all-zero tensor of shape (filters) - self.states = [None, None] if self.data_format == 'channels_first': - channel_axis = 2 + channel_axis = 1 else: channel_axis = -1 if input_shape[channel_axis] is None: raise ValueError('The channel dimension of the inputs ' 'should be defined. Found `None`.') input_dim = input_shape[channel_axis] - state_shape = [None] * 4 - state_shape[channel_axis] = input_dim - state_shape = tuple(state_shape) - self.state_spec = [ - InputSpec(shape=state_shape), - InputSpec(shape=state_shape) - ] kernel_shape = self.kernel_size + (input_dim, self.filters * 4) self.kernel_shape = kernel_shape recurrent_kernel_shape = self.kernel_size + (self.filters, self.filters * 4) - self.kernel = self.add_weight( - shape=kernel_shape, - initializer=self.kernel_initializer, - name='kernel', - regularizer=self.kernel_regularizer, - constraint=self.kernel_constraint) + self.kernel = self.add_weight(shape=kernel_shape, + initializer=self.kernel_initializer, + name='kernel', + regularizer=self.kernel_regularizer, + constraint=self.kernel_constraint) self.recurrent_kernel = self.add_weight( shape=recurrent_kernel_shape, initializer=self.recurrent_initializer, @@ -400,25 +610,24 @@ class ConvLSTM2D(ConvRecurrent2D): regularizer=self.recurrent_regularizer, constraint=self.recurrent_constraint) if self.use_bias: - self.bias = self.add_weight( - shape=(self.filters * 4,), - initializer=self.bias_initializer, - name='bias', - regularizer=self.bias_regularizer, - constraint=self.bias_constraint) + self.bias = self.add_weight(shape=(self.filters * 4,), + initializer=self.bias_initializer, + name='bias', + regularizer=self.bias_regularizer, + constraint=self.bias_constraint) if self.unit_forget_bias: bias_value = np.zeros((self.filters * 4,)) - bias_value[self.filters:self.filters * 2] = 1. + bias_value[self.filters: self.filters * 2] = 1. K.set_value(self.bias, bias_value) else: self.bias = None self.kernel_i = self.kernel[:, :, :, :self.filters] self.recurrent_kernel_i = self.recurrent_kernel[:, :, :, :self.filters] - self.kernel_f = self.kernel[:, :, :, self.filters:self.filters * 2] + self.kernel_f = self.kernel[:, :, :, self.filters: self.filters * 2] self.recurrent_kernel_f = self.recurrent_kernel[:, :, :, self.filters: self.filters * 2] - self.kernel_c = self.kernel[:, :, :, self.filters * 2:self.filters * 3] + self.kernel_c = self.kernel[:, :, :, self.filters * 2: self.filters * 3] self.recurrent_kernel_c = self.recurrent_kernel[:, :, :, self.filters * 2: self.filters * 3] self.kernel_o = self.kernel[:, :, :, self.filters * 3:] @@ -426,8 +635,8 @@ class ConvLSTM2D(ConvRecurrent2D): if self.use_bias: self.bias_i = self.bias[:self.filters] - self.bias_f = self.bias[self.filters:self.filters * 2] - self.bias_c = self.bias[self.filters * 2:self.filters * 3] + self.bias_f = self.bias[self.filters: self.filters * 2] + self.bias_c = self.bias[self.filters * 2: self.filters * 3] self.bias_o = self.bias[self.filters * 3:] else: self.bias_i = None @@ -436,166 +645,419 @@ class ConvLSTM2D(ConvRecurrent2D): self.bias_o = None self.built = True - def get_initial_state(self, inputs): - # (samples, timesteps, rows, cols, filters) - initial_state = K.zeros_like(inputs) - # (samples, rows, cols, filters) - initial_state = K.sum(initial_state, axis=1) - shape = list(self.kernel_shape) - shape[-1] = self.filters - initial_state = self.input_conv( - initial_state, K.zeros(tuple(shape)), padding=self.padding) - - initial_states = [initial_state for _ in range(2)] - return initial_states + def call(self, inputs, states, training=None): + if 0 < self.dropout < 1 and self._dropout_mask is None: + self._dropout_mask = _generate_dropout_mask( + K.ones_like(inputs), + self.dropout, + training=training, + count=4) + if (0 < self.recurrent_dropout < 1 and + self._recurrent_dropout_mask is None): + self._recurrent_dropout_mask = _generate_dropout_mask( + K.ones_like(states[1]), + self.recurrent_dropout, + training=training, + count=4) - def reset_states(self): - if not self.stateful: - raise RuntimeError('Layer must be stateful.') - input_shape = self.input_spec[0].shape + # dropout matrices for input units + dp_mask = self._dropout_mask + # dropout matrices for recurrent units + rec_dp_mask = self._recurrent_dropout_mask - if not input_shape[0]: - raise ValueError('If a RNN is stateful, a complete ' - 'input_shape must be provided ' - '(including batch size). ' - 'Got input shape: ' + str(input_shape)) + h_tm1 = states[0] # previous memory state + c_tm1 = states[1] # previous carry state - if self.return_state: - output_shape = tuple(self.compute_output_shape(input_shape)[0].as_list()) + if 0 < self.dropout < 1.: + inputs_i = inputs * dp_mask[0] + inputs_f = inputs * dp_mask[1] + inputs_c = inputs * dp_mask[2] + inputs_o = inputs * dp_mask[3] else: - output_shape = tuple(self.compute_output_shape(input_shape).as_list()) - if self.return_sequences: - output_shape = (input_shape[0],) + output_shape[2:] - else: - output_shape = (input_shape[0],) + output_shape[1:] + inputs_i = inputs + inputs_f = inputs + inputs_c = inputs + inputs_o = inputs - if hasattr(self, 'states'): - K.set_value(self.states[0], - np.zeros(output_shape)) - K.set_value(self.states[1], - np.zeros(output_shape)) - else: - self.states = [ - K.zeros(output_shape), - K.zeros(output_shape) - ] - - def get_constants(self, inputs, training=None): - constants = [] - if self.implementation == 0 and 0 < self.dropout < 1: - ones = K.zeros_like(inputs) - ones = K.sum(ones, axis=1) - ones += 1 - - def dropped_inputs(): - return K.dropout(ones, self.dropout) - - dp_mask = [ - K.in_train_phase(dropped_inputs, ones, training=training) - for _ in range(4) - ] - constants.append(dp_mask) - else: - constants.append([K.cast_to_floatx(1.) for _ in range(4)]) - - if 0 < self.recurrent_dropout < 1: - shape = list(self.kernel_shape) - shape[-1] = self.filters - ones = K.zeros_like(inputs) - ones = K.sum(ones, axis=1) - ones = self.input_conv(ones, K.zeros(shape), padding=self.padding) - ones += 1. - - def dropped_inputs(): # pylint: disable=function-redefined - return K.dropout(ones, self.recurrent_dropout) - - rec_dp_mask = [ - K.in_train_phase(dropped_inputs, ones, training=training) - for _ in range(4) - ] - constants.append(rec_dp_mask) + if 0 < self.recurrent_dropout < 1.: + h_tm1_i = h_tm1 * rec_dp_mask[0] + h_tm1_f = h_tm1 * rec_dp_mask[1] + h_tm1_c = h_tm1 * rec_dp_mask[2] + h_tm1_o = h_tm1 * rec_dp_mask[3] else: - constants.append([K.cast_to_floatx(1.) for _ in range(4)]) - return constants + h_tm1_i = h_tm1 + h_tm1_f = h_tm1 + h_tm1_c = h_tm1 + h_tm1_o = h_tm1 + + x_i = self.input_conv(inputs_i, self.kernel_i, self.bias_i, + padding=self.padding) + x_f = self.input_conv(inputs_f, self.kernel_f, self.bias_f, + padding=self.padding) + x_c = self.input_conv(inputs_c, self.kernel_c, self.bias_c, + padding=self.padding) + x_o = self.input_conv(inputs_o, self.kernel_o, self.bias_o, + padding=self.padding) + h_i = self.recurrent_conv(h_tm1_i, + self.recurrent_kernel_i) + h_f = self.recurrent_conv(h_tm1_f, + self.recurrent_kernel_f) + h_c = self.recurrent_conv(h_tm1_c, + self.recurrent_kernel_c) + h_o = self.recurrent_conv(h_tm1_o, + self.recurrent_kernel_o) + + i = self.recurrent_activation(x_i + h_i) + f = self.recurrent_activation(x_f + h_f) + c = f * c_tm1 + i * self.activation(x_c + h_c) + o = self.recurrent_activation(x_o + h_o) + h = o * self.activation(c) + + if 0 < self.dropout + self.recurrent_dropout: + if training is None: + h._uses_learning_phase = True + + return h, [h, c] def input_conv(self, x, w, b=None, padding='valid'): - conv_out = K.conv2d( - x, - w, - strides=self.strides, - padding=padding, - data_format=self.data_format, - dilation_rate=self.dilation_rate) + conv_out = K.conv2d(x, w, strides=self.strides, + padding=padding, + data_format=self.data_format, + dilation_rate=self.dilation_rate) if b is not None: - conv_out = K.bias_add(conv_out, b, data_format=self.data_format) + conv_out = K.bias_add(conv_out, b, + data_format=self.data_format) return conv_out def recurrent_conv(self, x, w): - conv_out = K.conv2d( - x, w, strides=(1, 1), padding='same', data_format=self.data_format) + conv_out = K.conv2d(x, w, strides=(1, 1), + padding='same', + data_format=self.data_format) return conv_out - def step(self, inputs, states): - assert len(states) == 4 - h_tm1 = states[0] - c_tm1 = states[1] - dp_mask = states[2] - rec_dp_mask = states[3] - - x_i = self.input_conv( - inputs * dp_mask[0], self.kernel_i, self.bias_i, padding=self.padding) - x_f = self.input_conv( - inputs * dp_mask[1], self.kernel_f, self.bias_f, padding=self.padding) - x_c = self.input_conv( - inputs * dp_mask[2], self.kernel_c, self.bias_c, padding=self.padding) - x_o = self.input_conv( - inputs * dp_mask[3], self.kernel_o, self.bias_o, padding=self.padding) - h_i = self.recurrent_conv(h_tm1 * rec_dp_mask[0], self.recurrent_kernel_i) - h_f = self.recurrent_conv(h_tm1 * rec_dp_mask[1], self.recurrent_kernel_f) - h_c = self.recurrent_conv(h_tm1 * rec_dp_mask[2], self.recurrent_kernel_c) - h_o = self.recurrent_conv(h_tm1 * rec_dp_mask[3], self.recurrent_kernel_o) + def get_config(self): + config = {'filters': self.filters, + 'kernel_size': self.kernel_size, + 'strides': self.strides, + 'padding': self.padding, + 'data_format': self.data_format, + 'dilation_rate': self.dilation_rate, + 'activation': activations.serialize(self.activation), + 'recurrent_activation': activations.serialize( + self.recurrent_activation), + 'use_bias': self.use_bias, + 'kernel_initializer': initializers.serialize( + self.kernel_initializer), + 'recurrent_initializer': initializers.serialize( + self.recurrent_initializer), + 'bias_initializer': initializers.serialize(self.bias_initializer), + 'unit_forget_bias': self.unit_forget_bias, + 'kernel_regularizer': regularizers.serialize( + self.kernel_regularizer), + 'recurrent_regularizer': regularizers.serialize( + self.recurrent_regularizer), + 'bias_regularizer': regularizers.serialize(self.bias_regularizer), + 'kernel_constraint': constraints.serialize( + self.kernel_constraint), + 'recurrent_constraint': constraints.serialize( + self.recurrent_constraint), + 'bias_constraint': constraints.serialize(self.bias_constraint), + 'dropout': self.dropout, + 'recurrent_dropout': self.recurrent_dropout} + base_config = super(ConvLSTM2DCell, self).get_config() + return dict(list(base_config.items()) + list(config.items())) - i = self.recurrent_activation(x_i + h_i) - f = self.recurrent_activation(x_f + h_f) - c = f * c_tm1 + i * self.activation(x_c + h_c) - o = self.recurrent_activation(x_o + h_o) - h = o * self.activation(c) - return h, [h, c] + +@tf_export('keras.layers.ConvLSTM2D') +class ConvLSTM2D(ConvRNN2D): + """Convolutional LSTM. + + It is similar to an LSTM layer, but the input transformations + and recurrent transformations are both convolutional. + + Arguments: + filters: Integer, the dimensionality of the output space + (i.e. the number output of filters in the convolution). + kernel_size: An integer or tuple/list of n integers, specifying the + dimensions of the convolution window. + strides: An integer or tuple/list of n integers, + specifying the strides of the convolution. + Specifying any stride value != 1 is incompatible with specifying + any `dilation_rate` value != 1. + padding: One of `"valid"` or `"same"` (case-insensitive). + data_format: A string, + one of `channels_last` (default) or `channels_first`. + The ordering of the dimensions in the inputs. + `channels_last` corresponds to inputs with shape + `(batch, time, ..., channels)` + while `channels_first` corresponds to + inputs with shape `(batch, time, channels, ...)`. + It defaults to the `image_data_format` value found in your + Keras config file at `~/.keras/keras.json`. + If you never set it, then it will be "channels_last". + dilation_rate: An integer or tuple/list of n integers, specifying + the dilation rate to use for dilated convolution. + Currently, specifying any `dilation_rate` value != 1 is + incompatible with specifying any `strides` value != 1. + activation: Activation function to use. + If you don't specify anything, no activation is applied + (ie. "linear" activation: `a(x) = x`). + recurrent_activation: Activation function to use + for the recurrent step. + use_bias: Boolean, whether the layer uses a bias vector. + kernel_initializer: Initializer for the `kernel` weights matrix, + used for the linear transformation of the inputs. + recurrent_initializer: Initializer for the `recurrent_kernel` + weights matrix, + used for the linear transformation of the recurrent state. + bias_initializer: Initializer for the bias vector. + unit_forget_bias: Boolean. + If True, add 1 to the bias of the forget gate at initialization. + Use in combination with `bias_initializer="zeros"`. + This is recommended in [Jozefowicz et al.] + (http://www.jmlr.org/proceedings/papers/v37/jozefowicz15.pdf) + kernel_regularizer: Regularizer function applied to + the `kernel` weights matrix. + recurrent_regularizer: Regularizer function applied to + the `recurrent_kernel` weights matrix. + bias_regularizer: Regularizer function applied to the bias vector. + activity_regularizer: Regularizer function applied to. + kernel_constraint: Constraint function applied to + the `kernel` weights matrix. + recurrent_constraint: Constraint function applied to + the `recurrent_kernel` weights matrix. + bias_constraint: Constraint function applied to the bias vector. + return_sequences: Boolean. Whether to return the last output + in the output sequence, or the full sequence. + go_backwards: Boolean (default False). + If True, process the input sequence backwards. + stateful: Boolean (default False). If True, the last state + for each sample at index i in a batch will be used as initial + state for the sample of index i in the following batch. + dropout: Float between 0 and 1. + Fraction of the units to drop for + the linear transformation of the inputs. + recurrent_dropout: Float between 0 and 1. + Fraction of the units to drop for + the linear transformation of the recurrent state. + + Input shape: + - if data_format='channels_first' + 5D tensor with shape: + `(samples,time, channels, rows, cols)` + - if data_format='channels_last' + 5D tensor with shape: + `(samples,time, rows, cols, channels)` + + Output shape: + - if `return_sequences` + - if data_format='channels_first' + 5D tensor with shape: + `(samples, time, filters, output_row, output_col)` + - if data_format='channels_last' + 5D tensor with shape: + `(samples, time, output_row, output_col, filters)` + - else + - if data_format ='channels_first' + 4D tensor with shape: + `(samples, filters, output_row, output_col)` + - if data_format='channels_last' + 4D tensor with shape: + `(samples, output_row, output_col, filters)` + where o_row and o_col depend on the shape of the filter and + the padding + + Raises: + ValueError: in case of invalid constructor arguments. + + References: + - [Convolutional LSTM Network: A Machine Learning Approach for + Precipitation Nowcasting](http://arxiv.org/abs/1506.04214v1) + The current implementation does not include the feedback loop on the + cells output. + + """ + + def __init__(self, + filters, + kernel_size, + strides=(1, 1), + padding='valid', + data_format=None, + dilation_rate=(1, 1), + activation='tanh', + recurrent_activation='hard_sigmoid', + use_bias=True, + kernel_initializer='glorot_uniform', + recurrent_initializer='orthogonal', + bias_initializer='zeros', + unit_forget_bias=True, + kernel_regularizer=None, + recurrent_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + recurrent_constraint=None, + bias_constraint=None, + return_sequences=False, + go_backwards=False, + stateful=False, + dropout=0., + recurrent_dropout=0., + **kwargs): + cell = ConvLSTM2DCell(filters=filters, + kernel_size=kernel_size, + strides=strides, + padding=padding, + data_format=data_format, + dilation_rate=dilation_rate, + activation=activation, + recurrent_activation=recurrent_activation, + use_bias=use_bias, + kernel_initializer=kernel_initializer, + recurrent_initializer=recurrent_initializer, + bias_initializer=bias_initializer, + unit_forget_bias=unit_forget_bias, + kernel_regularizer=kernel_regularizer, + recurrent_regularizer=recurrent_regularizer, + bias_regularizer=bias_regularizer, + kernel_constraint=kernel_constraint, + recurrent_constraint=recurrent_constraint, + bias_constraint=bias_constraint, + dropout=dropout, + recurrent_dropout=recurrent_dropout) + super(ConvLSTM2D, self).__init__(cell, + return_sequences=return_sequences, + go_backwards=go_backwards, + stateful=stateful, + **kwargs) + self.activity_regularizer = regularizers.get(activity_regularizer) + + def call(self, inputs, mask=None, training=None, initial_state=None): + return super(ConvLSTM2D, self).call(inputs, + mask=mask, + training=training, + initial_state=initial_state) + + @property + def filters(self): + return self.cell.filters + + @property + def kernel_size(self): + return self.cell.kernel_size + + @property + def strides(self): + return self.cell.strides + + @property + def padding(self): + return self.cell.padding + + @property + def data_format(self): + return self.cell.data_format + + @property + def dilation_rate(self): + return self.cell.dilation_rate + + @property + def activation(self): + return self.cell.activation + + @property + def recurrent_activation(self): + return self.cell.recurrent_activation + + @property + def use_bias(self): + return self.cell.use_bias + + @property + def kernel_initializer(self): + return self.cell.kernel_initializer + + @property + def recurrent_initializer(self): + return self.cell.recurrent_initializer + + @property + def bias_initializer(self): + return self.cell.bias_initializer + + @property + def unit_forget_bias(self): + return self.cell.unit_forget_bias + + @property + def kernel_regularizer(self): + return self.cell.kernel_regularizer + + @property + def recurrent_regularizer(self): + return self.cell.recurrent_regularizer + + @property + def bias_regularizer(self): + return self.cell.bias_regularizer + + @property + def kernel_constraint(self): + return self.cell.kernel_constraint + + @property + def recurrent_constraint(self): + return self.cell.recurrent_constraint + + @property + def bias_constraint(self): + return self.cell.bias_constraint + + @property + def dropout(self): + return self.cell.dropout + + @property + def recurrent_dropout(self): + return self.cell.recurrent_dropout def get_config(self): - config = { - 'activation': - activations.serialize(self.activation), - 'recurrent_activation': - activations.serialize(self.recurrent_activation), - 'use_bias': - self.use_bias, - 'kernel_initializer': - initializers.serialize(self.kernel_initializer), - 'recurrent_initializer': - initializers.serialize(self.recurrent_initializer), - 'bias_initializer': - initializers.serialize(self.bias_initializer), - 'unit_forget_bias': - self.unit_forget_bias, - 'kernel_regularizer': - regularizers.serialize(self.kernel_regularizer), - 'recurrent_regularizer': - regularizers.serialize(self.recurrent_regularizer), - 'bias_regularizer': - regularizers.serialize(self.bias_regularizer), - 'activity_regularizer': - regularizers.serialize(self.activity_regularizer), - 'kernel_constraint': - constraints.serialize(self.kernel_constraint), - 'recurrent_constraint': - constraints.serialize(self.recurrent_constraint), - 'bias_constraint': - constraints.serialize(self.bias_constraint), - 'dropout': - self.dropout, - 'recurrent_dropout': - self.recurrent_dropout - } + config = {'filters': self.filters, + 'kernel_size': self.kernel_size, + 'strides': self.strides, + 'padding': self.padding, + 'data_format': self.data_format, + 'dilation_rate': self.dilation_rate, + 'activation': activations.serialize(self.activation), + 'recurrent_activation': activations.serialize( + self.recurrent_activation), + 'use_bias': self.use_bias, + 'kernel_initializer': initializers.serialize( + self.kernel_initializer), + 'recurrent_initializer': initializers.serialize( + self.recurrent_initializer), + 'bias_initializer': initializers.serialize(self.bias_initializer), + 'unit_forget_bias': self.unit_forget_bias, + 'kernel_regularizer': regularizers.serialize( + self.kernel_regularizer), + 'recurrent_regularizer': regularizers.serialize( + self.recurrent_regularizer), + 'bias_regularizer': regularizers.serialize(self.bias_regularizer), + 'activity_regularizer': regularizers.serialize( + self.activity_regularizer), + 'kernel_constraint': constraints.serialize( + self.kernel_constraint), + 'recurrent_constraint': constraints.serialize( + self.recurrent_constraint), + 'bias_constraint': constraints.serialize(self.bias_constraint), + 'dropout': self.dropout, + 'recurrent_dropout': self.recurrent_dropout} base_config = super(ConvLSTM2D, self).get_config() + del base_config['cell'] return dict(list(base_config.items()) + list(config.items())) + + @classmethod + def from_config(cls, config): + return cls(**config) diff --git a/tensorflow/python/keras/_impl/keras/layers/convolutional_recurrent_test.py b/tensorflow/python/keras/_impl/keras/layers/convolutional_recurrent_test.py index 60137bdd724676af2c89bb7531cf4ea4e529b2a1..9e768b4e9552d126eac9c586d19810634fac9013 100644 --- a/tensorflow/python/keras/_impl/keras/layers/convolutional_recurrent_test.py +++ b/tensorflow/python/keras/_impl/keras/layers/convolutional_recurrent_test.py @@ -64,6 +64,7 @@ class ConvLSTMTest(test.TestCase): self.assertEqual(len(states), 2) model = keras.models.Model(x, states[0]) state = model.predict(inputs) + self.assertAllClose( keras.backend.eval(layer.states[0]), state, atol=1e-4) diff --git a/tensorflow/python/keras/_impl/keras/layers/convolutional_test.py b/tensorflow/python/keras/_impl/keras/layers/convolutional_test.py index f4a134b96cec0385cb24a208f3403db944b68edc..12b42676759d499c910707cb1b78e788e3c443fd 100644 --- a/tensorflow/python/keras/_impl/keras/layers/convolutional_test.py +++ b/tensorflow/python/keras/_impl/keras/layers/convolutional_test.py @@ -961,5 +961,43 @@ class CroppingTest(test.TestCase): keras.layers.Cropping3D(cropping=None) +class DepthwiseConv2DTest(test.TestCase): + + def _run_test(self, kwargs, arg, values): + num_samples = 2 + stack_size = 3 + num_row = 7 + num_col = 6 + + test_kwargs = copy.copy(kwargs) + for value in values: + test_kwargs[arg] = value + with self.test_session(use_gpu=True): + testing_utils.layer_test( + keras.layers.DepthwiseConv2D, + kwargs=test_kwargs, + input_shape=(num_samples, num_row, num_col, stack_size)) + + def test_depthwise_conv2d(self): + kwargs = {'kernel_size': (3, 3)} + + self._run_test(kwargs, 'padding', ['valid', 'same']) + self._run_test(kwargs, 'strides', [(2, 2)]) + if test.is_gpu_available(cuda_only=True): + self._run_test(kwargs, 'data_format', ['channels_first']) + self._run_test(kwargs, 'depth_multiplier', [1, 2]) + + kwargs = {'kernel_size': 3, + 'padding': 'valid', + 'data_format': 'channels_first', + 'activation': None, + 'depthwise_regularizer': 'l2', + 'bias_regularizer': 'l2', + 'activity_regularizer': 'l2', + 'depthwise_constraint': 'unit_norm', + 'strides': (2, 2), + } + self._run_test(kwargs, 'depth_multiplier', [1]) + if __name__ == '__main__': test.main() diff --git a/tensorflow/python/keras/_impl/keras/layers/core.py b/tensorflow/python/keras/_impl/keras/layers/core.py index 73e4f15f7e259211c892fdc663e14dcb14aec58d..9c4cb0f4fda681ce3236222460cd87439ea67810 100644 --- a/tensorflow/python/keras/_impl/keras/layers/core.py +++ b/tensorflow/python/keras/_impl/keras/layers/core.py @@ -24,6 +24,7 @@ import types as python_types import numpy as np from tensorflow.python.eager import context +from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.keras._impl.keras import activations from tensorflow.python.keras._impl.keras import backend as K @@ -32,11 +33,14 @@ from tensorflow.python.keras._impl.keras import initializers from tensorflow.python.keras._impl.keras import regularizers from tensorflow.python.keras._impl.keras.engine import InputSpec from tensorflow.python.keras._impl.keras.engine import Layer -from tensorflow.python.keras._impl.keras.utils.generic_utils import deserialize_keras_object -from tensorflow.python.keras._impl.keras.utils.generic_utils import func_dump -from tensorflow.python.keras._impl.keras.utils.generic_utils import func_load -from tensorflow.python.keras._impl.keras.utils.generic_utils import has_arg -from tensorflow.python.layers import core as tf_core_layers +from tensorflow.python.keras._impl.keras.utils import generic_utils +from tensorflow.python.keras._impl.keras.utils import tf_utils +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import gen_math_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import nn +from tensorflow.python.ops import nn_ops +from tensorflow.python.ops import standard_ops from tensorflow.python.util.tf_export import tf_export @@ -75,12 +79,12 @@ class Masking(Layer): self.mask_value = mask_value def compute_mask(self, inputs, mask=None): - return K.any(K.not_equal(inputs, self.mask_value), axis=-1) + return K.any(math_ops.not_equal(inputs, self.mask_value), axis=-1) def call(self, inputs): boolean_mask = K.any( - K.not_equal(inputs, self.mask_value), axis=-1, keepdims=True) - return inputs * K.cast(boolean_mask, inputs.dtype) + math_ops.not_equal(inputs, self.mask_value), axis=-1, keepdims=True) + return inputs * math_ops.cast(boolean_mask, inputs.dtype) def compute_output_shape(self, input_shape): return input_shape @@ -92,7 +96,7 @@ class Masking(Layer): @tf_export('keras.layers.Dropout') -class Dropout(tf_core_layers.Dropout, Layer): +class Dropout(Layer): """Applies Dropout to the input. Dropout consists in randomly setting @@ -111,23 +115,40 @@ class Dropout(tf_core_layers.Dropout, Layer): """ def __init__(self, rate, noise_shape=None, seed=None, **kwargs): - # Inheritance call order: - # 1) tf.layers.Dropout, 2) keras.layers.Layer, 3) tf.layers.Layer - super(Dropout, self).__init__(rate=rate, - noise_shape=noise_shape, - seed=seed, - **kwargs) + super(Dropout, self).__init__(**kwargs) + self.rate = rate + self.noise_shape = noise_shape + self.seed = seed self.supports_masking = True + def _get_noise_shape(self, inputs): + # Subclasses of `Dropout` may implement `_get_noise_shape(self, inputs)`, + # which will override `self.noise_shape`, and allows for custom noise + # shapes with dynamically sized inputs. + if self.noise_shape is None: + return self.noise_shape + return nn_ops._get_noise_shape(inputs, self.noise_shape) # pylint: disable=protected-access + def call(self, inputs, training=None): + original_training_value = training if training is None: training = K.learning_phase() - output = super(Dropout, self).call(inputs, training=training) + + def dropped_inputs(): + return nn.dropout(inputs, 1 - self.rate, + noise_shape=self._get_noise_shape(inputs), + seed=self.seed) + output = tf_utils.smart_cond(training, + dropped_inputs, + lambda: array_ops.identity(inputs)) # EagerTensor object has no attribute _uses_learning_phase - if not context.executing_eagerly() and training is K.learning_phase(): + if not context.executing_eagerly() and original_training_value is None: output._uses_learning_phase = True # pylint: disable=protected-access return output + def compute_output_shape(self, input_shape): + return input_shape + def get_config(self): config = { 'rate': self.rate, @@ -170,7 +191,7 @@ class SpatialDropout1D(Dropout): self.input_spec = InputSpec(ndim=3) def _get_noise_shape(self, inputs): - input_shape = K.shape(inputs) + input_shape = array_ops.shape(inputs) noise_shape = (input_shape[0], 1, input_shape[2]) return noise_shape @@ -222,7 +243,7 @@ class SpatialDropout2D(Dropout): self.input_spec = InputSpec(ndim=4) def _get_noise_shape(self, inputs): - input_shape = K.shape(inputs) + input_shape = array_ops.shape(inputs) if self.data_format == 'channels_first': return (input_shape[0], input_shape[1], 1, 1) elif self.data_format == 'channels_last': @@ -275,7 +296,7 @@ class SpatialDropout3D(Dropout): self.input_spec = InputSpec(ndim=5) def _get_noise_shape(self, inputs): - input_shape = K.shape(inputs) + input_shape = array_ops.shape(inputs) if self.data_format == 'channels_first': return (input_shape[0], input_shape[1], 1, 1, 1) elif self.data_format == 'channels_last': @@ -414,7 +435,8 @@ class Reshape(Layer): return tensor_shape.TensorShape(output_shape) def call(self, inputs): - return K.reshape(inputs, (K.shape(inputs)[0],) + self.target_shape) + return array_ops.reshape(inputs, + (array_ops.shape(inputs)[0],) + self.target_shape) def get_config(self): config = {'target_shape': self.target_shape} @@ -467,7 +489,7 @@ class Permute(Layer): return tensor_shape.TensorShape(output_shape) def call(self, inputs): - return K.permute_dimensions(inputs, (0,) + self.dims) + return array_ops.transpose(inputs, perm=(0,) + self.dims) def get_config(self): config = {'dims': self.dims} @@ -476,7 +498,7 @@ class Permute(Layer): @tf_export('keras.layers.Flatten') -class Flatten(tf_core_layers.Flatten, Layer): +class Flatten(Layer): """Flattens the input. Does not affect the batch size. Example: @@ -492,7 +514,25 @@ class Flatten(tf_core_layers.Flatten, Layer): # now: model.output_shape == (None, 65536) ``` """ - pass + + def __init__(self, **kwargs): + super(Flatten, self).__init__(**kwargs) + self.input_spec = InputSpec(min_ndim=2) + + def call(self, inputs): + outputs = array_ops.reshape(inputs, (array_ops.shape(inputs)[0], -1)) + if not context.executing_eagerly(): + outputs.set_shape(self.compute_output_shape(inputs.get_shape())) + return outputs + + def compute_output_shape(self, input_shape): + input_shape = tensor_shape.TensorShape(input_shape).as_list() + output_shape = [input_shape[0]] + if all(input_shape[1:]): + output_shape += [np.prod(input_shape[1:])] + else: + output_shape += [None] + return tensor_shape.TensorShape(output_shape) @tf_export('keras.layers.RepeatVector') @@ -608,10 +648,12 @@ class Lambda(Layer): 'must be a list, a tuple, or a function.') self._output_shape = output_shape - def _compute_output_shape(self, input_shape): + def compute_output_shape(self, input_shape): input_shape = tuple(tensor_shape.TensorShape(input_shape).as_list()) if self._output_shape is None: + if context.executing_eagerly(): + raise NotImplementedError x = K.placeholder(shape=input_shape) x = self.call(x) if isinstance(x, list): @@ -637,7 +679,7 @@ class Lambda(Layer): def call(self, inputs, mask=None): arguments = self.arguments - if has_arg(self.function, 'mask'): + if generic_utils.has_arg(self.function, 'mask'): arguments['mask'] = mask return self.function(inputs, **arguments) @@ -648,14 +690,14 @@ class Lambda(Layer): def get_config(self): if isinstance(self.function, python_types.LambdaType): - function = func_dump(self.function) + function = generic_utils.func_dump(self.function) function_type = 'lambda' else: function = self.function.__name__ function_type = 'function' if isinstance(self._output_shape, python_types.LambdaType): - output_shape = func_dump(self._output_shape) + output_shape = generic_utils.func_dump(self._output_shape) output_shape_type = 'lambda' elif callable(self._output_shape): output_shape = self._output_shape.__name__ @@ -683,26 +725,27 @@ class Lambda(Layer): function_type = config.pop('function_type') if function_type == 'function': # Simple lookup in custom objects - function = deserialize_keras_object( + function = generic_utils.deserialize_keras_object( config['function'], custom_objects=custom_objects, printable_module_name='function in Lambda layer') elif function_type == 'lambda': # Unsafe deserialization from bytecode - function = func_load(config['function'], globs=globs) + function = generic_utils.func_load(config['function'], globs=globs) else: raise TypeError('Unknown function type:', function_type) output_shape_type = config.pop('output_shape_type') if output_shape_type == 'function': # Simple lookup in custom objects - output_shape = deserialize_keras_object( + output_shape = generic_utils.deserialize_keras_object( config['output_shape'], custom_objects=custom_objects, printable_module_name='output_shape function in Lambda layer') elif output_shape_type == 'lambda': # Unsafe deserialization from bytecode - output_shape = func_load(config['output_shape'], globs=globs) + output_shape = generic_utils.func_load(config['output_shape'], + globs=globs) else: output_shape = config['output_shape'] @@ -722,7 +765,7 @@ class Lambda(Layer): @tf_export('keras.layers.Dense') -class Dense(tf_core_layers.Dense, Layer): +class Dense(Layer): """Just your regular densely-connected NN layer. `Dense` implements the operation: @@ -792,21 +835,74 @@ class Dense(tf_core_layers.Dense, Layer): if 'input_shape' not in kwargs and 'input_dim' in kwargs: kwargs['input_shape'] = (kwargs.pop('input_dim'),) - # Inheritance call order: - # 1) tf.layers.Dense, 2) keras.layers.Layer, 3) tf.layers.Layer super(Dense, self).__init__( - units, - activation=activations.get(activation), - use_bias=use_bias, - kernel_initializer=initializers.get(kernel_initializer), - bias_initializer=initializers.get(bias_initializer), - kernel_regularizer=regularizers.get(kernel_regularizer), - bias_regularizer=regularizers.get(bias_regularizer), - activity_regularizer=regularizers.get(activity_regularizer), - kernel_constraint=constraints.get(kernel_constraint), - bias_constraint=constraints.get(bias_constraint), - **kwargs) + activity_regularizer=regularizers.get(activity_regularizer), **kwargs) + self.units = int(units) + self.activation = activations.get(activation) + self.use_bias = use_bias + self.kernel_initializer = initializers.get(kernel_initializer) + self.bias_initializer = initializers.get(bias_initializer) + self.kernel_regularizer = regularizers.get(kernel_regularizer) + self.bias_regularizer = regularizers.get(bias_regularizer) + self.kernel_constraint = constraints.get(kernel_constraint) + self.bias_constraint = constraints.get(bias_constraint) + self.supports_masking = True + self.input_spec = InputSpec(min_ndim=2) + + def build(self, input_shape): + input_shape = tensor_shape.TensorShape(input_shape) + if input_shape[-1].value is None: + raise ValueError('The last dimension of the inputs to `Dense` ' + 'should be defined. Found `None`.') + self.input_spec = InputSpec(min_ndim=2, + axes={-1: input_shape[-1].value}) + self.kernel = self.add_variable('kernel', + shape=[input_shape[-1].value, self.units], + initializer=self.kernel_initializer, + regularizer=self.kernel_regularizer, + constraint=self.kernel_constraint, + dtype=self.dtype, + trainable=True) + if self.use_bias: + self.bias = self.add_variable('bias', + shape=[self.units,], + initializer=self.bias_initializer, + regularizer=self.bias_regularizer, + constraint=self.bias_constraint, + dtype=self.dtype, + trainable=True) + else: + self.bias = None + self.built = True + + def call(self, inputs): + inputs = ops.convert_to_tensor(inputs, dtype=self.dtype) + shape = inputs.get_shape().as_list() + if len(shape) > 2: + # Broadcasting is required for the inputs. + outputs = standard_ops.tensordot(inputs, self.kernel, [[len(shape) - 1], + [0]]) + # Reshape the output back to the original ndim of the input. + if not context.executing_eagerly(): + output_shape = shape[:-1] + [self.units] + outputs.set_shape(output_shape) + else: + outputs = gen_math_ops.mat_mul(inputs, self.kernel) + if self.use_bias: + outputs = nn.bias_add(outputs, self.bias) + if self.activation is not None: + return self.activation(outputs) # pylint: disable=not-callable + return outputs + + def compute_output_shape(self, input_shape): + input_shape = tensor_shape.TensorShape(input_shape) + input_shape = input_shape.with_rank_at_least(2) + if input_shape[-1].value is None: + raise ValueError( + 'The innermost dimension of input_shape must be defined, but saw: %s' + % input_shape) + return input_shape[:-1].concatenate(self.units) def get_config(self): config = { diff --git a/tensorflow/python/keras/_impl/keras/layers/core_test.py b/tensorflow/python/keras/_impl/keras/layers/core_test.py index 2ca816adbdcecaf371776d99f3da60d0d8790832..d22d8d12dc4e76998c177dbe96fb87e3fffa5175 100644 --- a/tensorflow/python/keras/_impl/keras/layers/core_test.py +++ b/tensorflow/python/keras/_impl/keras/layers/core_test.py @@ -23,6 +23,7 @@ import numpy as np from tensorflow.python.framework import test_util as tf_test_util from tensorflow.python.keras._impl import keras from tensorflow.python.keras._impl.keras import testing_utils +from tensorflow.python.ops import math_ops from tensorflow.python.platform import test @@ -128,7 +129,6 @@ class CoreLayersTest(test.TestCase): testing_utils.layer_test( keras.layers.RepeatVector, kwargs={'n': 3}, input_shape=(3, 2)) - @tf_test_util.run_in_graph_and_eager_modes() def test_lambda(self): testing_utils.layer_test( keras.layers.Lambda, @@ -159,7 +159,7 @@ class CoreLayersTest(test.TestCase): # test with lambda ld = keras.layers.Lambda( - lambda x: keras.backend.concatenate([keras.backend.square(x), x])) + lambda x: keras.backend.concatenate([math_ops.square(x), x])) config = ld.get_config() ld = keras.layers.Lambda.from_config(config) @@ -235,4 +235,3 @@ class CoreLayersTest(test.TestCase): if __name__ == '__main__': test.main() - diff --git a/tensorflow/python/keras/_impl/keras/layers/embeddings.py b/tensorflow/python/keras/_impl/keras/layers/embeddings.py index 006ecd3135be25d43133daed1603734ecd1be955..07b8726b8595f7c5bbf36e296df79c2e82afdbaf 100644 --- a/tensorflow/python/keras/_impl/keras/layers/embeddings.py +++ b/tensorflow/python/keras/_impl/keras/layers/embeddings.py @@ -24,6 +24,8 @@ from tensorflow.python.keras._impl.keras import initializers from tensorflow.python.keras._impl.keras import regularizers from tensorflow.python.keras._impl.keras.engine import Layer from tensorflow.python.keras._impl.keras.engine.base_layer import shape_type_conversion +from tensorflow.python.ops import embedding_ops +from tensorflow.python.ops import math_ops from tensorflow.python.util.tf_export import tf_export @@ -100,7 +102,8 @@ class Embedding(Layer): kwargs['input_shape'] = (input_length,) else: kwargs['input_shape'] = (None,) - super(Embedding, self).__init__(**kwargs) + dtype = kwargs.pop('dtype', K.floatx()) + super(Embedding, self).__init__(dtype=dtype, **kwargs) self.input_dim = input_dim self.output_dim = output_dim @@ -118,15 +121,14 @@ class Embedding(Layer): initializer=self.embeddings_initializer, name='embeddings', regularizer=self.embeddings_regularizer, - constraint=self.embeddings_constraint, - dtype=self.dtype) + constraint=self.embeddings_constraint) self.built = True def compute_mask(self, inputs, mask=None): if not self.mask_zero: return None else: - return K.not_equal(inputs, 0) + return math_ops.not_equal(inputs, 0) @shape_type_conversion def compute_output_shape(self, input_shape): @@ -152,8 +154,8 @@ class Embedding(Layer): def call(self, inputs): if K.dtype(inputs) != 'int32': - inputs = K.cast(inputs, 'int32') - out = K.gather(self.embeddings, inputs) + inputs = math_ops.cast(inputs, 'int32') + out = embedding_ops.embedding_lookup(self.embeddings, inputs) return out def get_config(self): diff --git a/tensorflow/python/keras/_impl/keras/layers/embeddings_test.py b/tensorflow/python/keras/_impl/keras/layers/embeddings_test.py index 26fd1f1c114587c2f1b3e0155f1259dd5f0dcf60..6ebf5dc94adb423abae7ec9e6910fb86439410f1 100644 --- a/tensorflow/python/keras/_impl/keras/layers/embeddings_test.py +++ b/tensorflow/python/keras/_impl/keras/layers/embeddings_test.py @@ -18,6 +18,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import numpy as np + from tensorflow.python.framework import test_util as tf_test_util from tensorflow.python.keras._impl import keras from tensorflow.python.keras._impl.keras import testing_utils @@ -26,7 +28,7 @@ from tensorflow.python.platform import test class EmbeddingTest(test.TestCase): - @tf_test_util.run_in_graph_and_eager_modes() + @tf_test_util.run_in_graph_and_eager_modes(use_gpu=False) def test_embedding(self): testing_utils.layer_test( keras.layers.Embedding, @@ -65,6 +67,17 @@ class EmbeddingTest(test.TestCase): input_dtype='int32', expected_output_dtype='float32') + def test_embedding_correctness(self): + with self.test_session(): + layer = keras.layers.Embedding(output_dim=2, input_dim=2) + layer.build((None, 2)) + matrix = np.array([[1, 1], [2, 2]]) + layer.set_weights([matrix]) + + inputs = keras.backend.constant([[0, 1, 0]], dtype='int32') + outputs = keras.backend.eval(layer(inputs)) + self.assertAllClose(outputs, [[[1, 1], [2, 2], [1, 1]]]) + if __name__ == '__main__': test.main() diff --git a/tensorflow/python/keras/_impl/keras/layers/merge.py b/tensorflow/python/keras/_impl/keras/layers/merge.py index c660cbd449b11a139f64cfa8b3a35310a597491c..7c87e6c0671138efacbf1bca02fdf6779e21537f 100644 --- a/tensorflow/python/keras/_impl/keras/layers/merge.py +++ b/tensorflow/python/keras/_impl/keras/layers/merge.py @@ -23,6 +23,9 @@ from __future__ import print_function from tensorflow.python.keras._impl.keras import backend as K from tensorflow.python.keras._impl.keras.engine.base_layer import Layer from tensorflow.python.keras._impl.keras.engine.base_layer import shape_type_conversion +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import nn from tensorflow.python.util.tf_export import tf_export @@ -127,7 +130,7 @@ class _Merge(Layer): for x in inputs: x_ndim = K.ndim(x) for _ in range(max_ndim - x_ndim): - x = K.expand_dims(x, 1) + x = array_ops.expand_dims(x, axis=1) reshaped_inputs.append(x) return self._merge_function(reshaped_inputs) else: @@ -137,19 +140,22 @@ class _Merge(Layer): for x in inputs: x_ndim = K.ndim(x) if x_ndim is None: - x_shape = K.shape(x) + x_shape = array_ops.shape(x) batch_size = x_shape[0] - new_shape = K.concatenate([x_shape[1:], K.expand_dims(batch_size)]) - x_transposed = K.reshape(x, - K.stack([batch_size, - K.prod(x_shape[1:])])) - x_transposed = K.permute_dimensions(x_transposed, (1, 0)) - x_transposed = K.reshape(x_transposed, new_shape) + new_shape = K.concatenate( + [x_shape[1:], + array_ops.expand_dims(batch_size, axis=-1)]) + x_transposed = array_ops.reshape( + x, + array_ops.stack( + [batch_size, math_ops.reduce_prod(x_shape[1:])], axis=0)) + x_transposed = array_ops.transpose(x_transposed, perm=(1, 0)) + x_transposed = array_ops.reshape(x_transposed, new_shape) reshaped_inputs.append(x_transposed) transposed = True elif x_ndim > 1: dims = list(range(1, x_ndim)) + [0] - reshaped_inputs.append(K.permute_dimensions(x, dims)) + reshaped_inputs.append(array_ops.transpose(x, perm=dims)) transposed = True else: # We don't transpose inputs if they are 1D vectors or scalars. @@ -159,17 +165,18 @@ class _Merge(Layer): if transposed: # If inputs have been transposed, we have to transpose the output too. if y_ndim is None: - y_shape = K.shape(y) - y_ndim = K.shape(y_shape)[0] + y_shape = array_ops.shape(y) + y_ndim = array_ops.shape(y_shape)[0] batch_size = y_shape[y_ndim - 1] - new_shape = K.concatenate( - [K.expand_dims(batch_size), y_shape[:y_ndim - 1]]) - y = K.reshape(y, (-1, batch_size)) - y = K.permute_dimensions(y, (1, 0)) - y = K.reshape(y, new_shape) + new_shape = K.concatenate([ + array_ops.expand_dims(batch_size, axis=-1), y_shape[:y_ndim - 1] + ]) + y = array_ops.reshape(y, (-1, batch_size)) + y = array_ops.transpose(y, perm=(1, 0)) + y = array_ops.reshape(y, new_shape) elif y_ndim > 1: dims = [y_ndim - 1] + list(range(y_ndim - 1)) - y = K.permute_dimensions(y, dims) + y = array_ops.transpose(y, perm=dims) return y else: return self._merge_function(inputs) @@ -207,7 +214,7 @@ class _Merge(Layer): 'should have the same length.') if all([m is None for m in mask]): return None - masks = [K.expand_dims(m, 0) for m in mask if m is not None] + masks = [array_ops.expand_dims(m, axis=0) for m in mask if m is not None] return K.all(K.concatenate(masks, axis=0), axis=0, keepdims=False) @@ -325,7 +332,7 @@ class Maximum(_Merge): def _merge_function(self, inputs): output = inputs[0] for i in range(1, len(inputs)): - output = K.maximum(output, inputs[i]) + output = math_ops.maximum(output, inputs[i]) return output @@ -340,7 +347,7 @@ class Minimum(_Merge): def _merge_function(self, inputs): output = inputs[0] for i in range(1, len(inputs)): - output = K.minimum(output, inputs[i]) + output = math_ops.minimum(output, inputs[i]) return output @@ -418,10 +425,10 @@ class Concatenate(_Merge): for input_i, mask_i in zip(inputs, mask): if mask_i is None: # Input is unmasked. Append all 1s to masks, - masks.append(K.ones_like(input_i, dtype='bool')) + masks.append(array_ops.ones_like(input_i, dtype='bool')) elif K.ndim(mask_i) < K.ndim(input_i): # Mask is smaller than the input, expand it - masks.append(K.expand_dims(mask_i)) + masks.append(array_ops.expand_dims(mask_i, axis=-1)) else: masks.append(mask_i) concatenated = K.concatenate(masks, axis=self.axis) @@ -511,8 +518,8 @@ class Dot(_Merge): else: axes.append(self.axes[i]) if self.normalize: - x1 = K.l2_normalize(x1, axis=axes[0]) - x2 = K.l2_normalize(x2, axis=axes[1]) + x1 = nn.l2_normalize(x1, axis=axes[0]) + x2 = nn.l2_normalize(x2, axis=axes[1]) output = K.batch_dot(x1, x2, axes) return output diff --git a/tensorflow/python/keras/_impl/keras/layers/noise.py b/tensorflow/python/keras/_impl/keras/layers/noise.py index e309d160e5a9be97ff5f5356dad9dfaf85430233..72dc7a1ff8b7887ed97ae44bddf8ae3cd32c408d 100644 --- a/tensorflow/python/keras/_impl/keras/layers/noise.py +++ b/tensorflow/python/keras/_impl/keras/layers/noise.py @@ -23,6 +23,8 @@ import numpy as np from tensorflow.python.keras._impl.keras import backend as K from tensorflow.python.keras._impl.keras.engine import Layer from tensorflow.python.keras._impl.keras.engine.base_layer import shape_type_conversion +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import math_ops from tensorflow.python.util.tf_export import tf_export @@ -58,7 +60,7 @@ class GaussianNoise(Layer): def noised(): return inputs + K.random_normal( - shape=K.shape(inputs), mean=0., stddev=self.stddev) + shape=array_ops.shape(inputs), mean=0., stddev=self.stddev) return K.in_train_phase(noised, inputs, training=training) @@ -104,7 +106,7 @@ class GaussianDropout(Layer): def noised(): stddev = np.sqrt(self.rate / (1.0 - self.rate)) return inputs * K.random_normal( - shape=K.shape(inputs), mean=1.0, stddev=stddev) + shape=array_ops.shape(inputs), mean=1.0, stddev=stddev) return K.in_train_phase(noised, inputs, training=training) return inputs @@ -153,7 +155,7 @@ class AlphaDropout(Layer): self.supports_masking = True def _get_noise_shape(self, inputs): - return self.noise_shape if self.noise_shape else K.shape(inputs) + return self.noise_shape if self.noise_shape else array_ops.shape(inputs) def call(self, inputs, training=None): if 0. < self.rate < 1.: @@ -164,9 +166,9 @@ class AlphaDropout(Layer): scale = 1.0507009873554804934193349852946 alpha_p = -alpha * scale - kept_idx = K.greater_equal( + kept_idx = math_ops.greater_equal( K.random_uniform(noise_shape, seed=seed), rate) - kept_idx = K.cast(kept_idx, K.floatx()) + kept_idx = math_ops.cast(kept_idx, K.floatx()) # Get affine transformation params a = ((1 - rate) * (1 + rate * alpha_p**2))**-0.5 diff --git a/tensorflow/python/keras/_impl/keras/layers/normalization.py b/tensorflow/python/keras/_impl/keras/layers/normalization.py index 3b44b20bf822429351002c0f81fe8f9596d595d3..c16fc07fb4ecda66bd8bcc70dce5d753c73f5dd9 100644 --- a/tensorflow/python/keras/_impl/keras/layers/normalization.py +++ b/tensorflow/python/keras/_impl/keras/layers/normalization.py @@ -19,17 +19,28 @@ from __future__ import division from __future__ import print_function from tensorflow.python.eager import context +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_shape from tensorflow.python.keras._impl.keras import backend as K from tensorflow.python.keras._impl.keras import constraints from tensorflow.python.keras._impl.keras import initializers from tensorflow.python.keras._impl.keras import regularizers +from tensorflow.python.keras._impl.keras.engine import InputSpec from tensorflow.python.keras._impl.keras.engine import Layer -from tensorflow.python.layers import normalization as tf_normalization_layers +from tensorflow.python.keras._impl.keras.utils import tf_utils +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import init_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import nn +from tensorflow.python.ops import state_ops +from tensorflow.python.platform import tf_logging as logging +from tensorflow.python.training import distribute as distribute_lib from tensorflow.python.util.tf_export import tf_export @tf_export('keras.layers.BatchNormalization') -class BatchNormalization(tf_normalization_layers.BatchNormalization, Layer): +class BatchNormalization(Layer): """Batch normalization layer (Ioffe and Szegedy, 2014). Normalize the activations of the previous layer at each batch, @@ -37,28 +48,63 @@ class BatchNormalization(tf_normalization_layers.BatchNormalization, Layer): close to 0 and the activation standard deviation close to 1. Arguments: - axis: Integer, the axis that should be normalized - (typically the features axis). - For instance, after a `Conv2D` layer with - `data_format="channels_first"`, - set `axis=1` in `BatchNormalization`. - momentum: Momentum for the moving average. - epsilon: Small float added to variance to avoid dividing by zero. - center: If True, add offset of `beta` to normalized tensor. - If False, `beta` is ignored. - scale: If True, multiply by `gamma`. - If False, `gamma` is not used. - When the next layer is linear (also e.g. `nn.relu`), - this can be disabled since the scaling - will be done by the next layer. - beta_initializer: Initializer for the beta weight. - gamma_initializer: Initializer for the gamma weight. - moving_mean_initializer: Initializer for the moving mean. - moving_variance_initializer: Initializer for the moving variance. - beta_regularizer: Optional regularizer for the beta weight. - gamma_regularizer: Optional regularizer for the gamma weight. - beta_constraint: Optional constraint for the beta weight. - gamma_constraint: Optional constraint for the gamma weight. + axis: Integer, the axis that should be normalized + (typically the features axis). + For instance, after a `Conv2D` layer with + `data_format="channels_first"`, + set `axis=1` in `BatchNormalization`. + momentum: Momentum for the moving average. + epsilon: Small float added to variance to avoid dividing by zero. + center: If True, add offset of `beta` to normalized tensor. + If False, `beta` is ignored. + scale: If True, multiply by `gamma`. + If False, `gamma` is not used. + When the next layer is linear (also e.g. `nn.relu`), + this can be disabled since the scaling + will be done by the next layer. + beta_initializer: Initializer for the beta weight. + gamma_initializer: Initializer for the gamma weight. + moving_mean_initializer: Initializer for the moving mean. + moving_variance_initializer: Initializer for the moving variance. + beta_regularizer: Optional regularizer for the beta weight. + gamma_regularizer: Optional regularizer for the gamma weight. + beta_constraint: Optional constraint for the beta weight. + gamma_constraint: Optional constraint for the gamma weight. + renorm: Whether to use Batch Renormalization + (https://arxiv.org/abs/1702.03275). This adds extra variables during + training. The inference is the same for either value of this parameter. + renorm_clipping: A dictionary that may map keys 'rmax', 'rmin', 'dmax' to + scalar `Tensors` used to clip the renorm correction. The correction + `(r, d)` is used as `corrected_value = normalized_value * r + d`, with + `r` clipped to [rmin, rmax], and `d` to [-dmax, dmax]. Missing rmax, rmin, + dmax are set to inf, 0, inf, respectively. + renorm_momentum: Momentum used to update the moving means and standard + deviations with renorm. Unlike `momentum`, this affects training + and should be neither too small (which would add noise) nor too large + (which would give stale estimates). Note that `momentum` is still applied + to get the means and variances for inference. + fused: if `None` or `True`, use a faster, fused implementation if possible. + If `False`, use the system recommended implementation. + trainable: Boolean, if `True` also add variables to the graph collection + `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). + virtual_batch_size: An `int`. By default, `virtual_batch_size` is `None`, + which means batch normalization is performed across the whole batch. When + `virtual_batch_size` is not `None`, instead perform "Ghost Batch + Normalization", which creates virtual sub-batches which are each + normalized separately (with shared gamma, beta, and moving statistics). + Must divide the actual batch size during execution. + adjustment: A function taking the `Tensor` containing the (dynamic) shape of + the input tensor and returning a pair (scale, bias) to apply to the + normalized values (before gamma and beta), only during training. For + example, if axis==-1, + `adjustment = lambda shape: ( + tf.random_uniform(shape[-1:], 0.93, 1.07), + tf.random_uniform(shape[-1:], -0.1, 0.1))` + will scale the normalized value by up to 7% up or down, then shift the + result by up to 0.1 (with independent scaling and bias for each feature + but shared across all examples), and finally apply gamma and/or beta. If + `None`, no adjustment is applied. Cannot be specified if + virtual_batch_size is specified. Input shape: Arbitrary. Use the keyword argument `input_shape` @@ -87,33 +133,508 @@ class BatchNormalization(tf_normalization_layers.BatchNormalization, Layer): gamma_regularizer=None, beta_constraint=None, gamma_constraint=None, + renorm=False, + renorm_clipping=None, + renorm_momentum=0.99, + fused=None, + trainable=True, + virtual_batch_size=None, + adjustment=None, + name=None, **kwargs): - self.supports_masking = True super(BatchNormalization, self).__init__( - axis=axis, - momentum=momentum, - epsilon=epsilon, - center=center, - scale=scale, - beta_initializer=initializers.get(beta_initializer), - gamma_initializer=initializers.get(gamma_initializer), - moving_mean_initializer=initializers.get(moving_mean_initializer), - moving_variance_initializer=initializers.get( - moving_variance_initializer), - beta_regularizer=regularizers.get(beta_regularizer), - gamma_regularizer=regularizers.get(gamma_regularizer), - beta_constraint=constraints.get(beta_constraint), - gamma_constraint=constraints.get(gamma_constraint), - **kwargs - ) + name=name, trainable=trainable, **kwargs) + if isinstance(axis, list): + self.axis = axis[:] + else: + self.axis = axis + self.momentum = momentum + self.epsilon = epsilon + self.center = center + self.scale = scale + self.beta_initializer = initializers.get(beta_initializer) + self.gamma_initializer = initializers.get(gamma_initializer) + self.moving_mean_initializer = initializers.get(moving_mean_initializer) + self.moving_variance_initializer = initializers.get( + moving_variance_initializer) + self.beta_regularizer = regularizers.get(beta_regularizer) + self.gamma_regularizer = regularizers.get(gamma_regularizer) + self.beta_constraint = constraints.get(beta_constraint) + self.gamma_constraint = constraints.get(gamma_constraint) + self.renorm = renorm + self.virtual_batch_size = virtual_batch_size + self.adjustment = adjustment + if fused is None: + fused = True + self.supports_masking = True + + self.fused = fused + self._bessels_correction_test_only = True + + if renorm: + renorm_clipping = renorm_clipping or {} + keys = ['rmax', 'rmin', 'dmax'] + if set(renorm_clipping) - set(keys): + raise ValueError('renorm_clipping %s contains keys not in %s' % + (renorm_clipping, keys)) + self.renorm_clipping = renorm_clipping + self.renorm_momentum = renorm_momentum + + def _add_tower_local_variable(self, *args, **kwargs): + tower_context = distribute_lib.get_tower_context() + with tower_context.tower_local_var_scope('mean'): + return self.add_variable(*args, **kwargs) + + def build(self, input_shape): + input_shape = tensor_shape.TensorShape(input_shape) + if not input_shape.ndims: + raise ValueError('Input has undefined rank:', input_shape) + ndims = len(input_shape) + + # Convert axis to list and resolve negatives + if isinstance(self.axis, int): + self.axis = [self.axis] + + if not isinstance(self.axis, list): + raise TypeError('axis must be int or list, type given: %s' + % type(self.axis)) + + for idx, x in enumerate(self.axis): + if x < 0: + self.axis[idx] = ndims + x + + # Validate axes + for x in self.axis: + if x < 0 or x >= ndims: + raise ValueError('Invalid axis: %d' % x) + if len(self.axis) != len(set(self.axis)): + raise ValueError('Duplicate axis: %s' % self.axis) + + if self.virtual_batch_size is not None: + if self.virtual_batch_size <= 0: + raise ValueError('virtual_batch_size must be a positive integer that ' + 'divides the true batch size of the input Tensor') + # If using virtual batches, the first dimension must be the batch + # dimension and cannot be the batch norm axis + if 0 in self.axis: + raise ValueError('When using virtual_batch_size, the batch dimension ' + 'must be 0 and thus axis cannot include 0') + if self.adjustment is not None: + raise ValueError('When using virtual_batch_size, adjustment cannot ' + 'be specified') + + if self.fused: + # Currently fused batch norm doesn't support renorm. It also only supports + # an input tensor of rank 4 and a channel dimension on axis 1 or 3. + # TODO(yaozhang): if input is not 4D, reshape it to 4D and reshape the + # output back to its original shape accordingly. + self.fused = (not self.renorm and + ndims == 4 and + self.axis in [[1], [3]] and + self.virtual_batch_size is None and + self.adjustment is None) + # TODO(chrisying): fused batch norm is currently not supported for + # multi-axis batch norm and by extension virtual batches. In some cases, + # it might be possible to use fused batch norm but would require reshaping + # the Tensor to 4D with the axis in 1 or 3 (preferred 1) which is + # particularly tricky. A compromise might be to just support the most + # common use case (turning 5D w/ virtual batch to NCHW) + + if self.fused: + if self.axis == [1]: + self._data_format = 'NCHW' + elif self.axis == [3]: + self._data_format = 'NHWC' + else: + raise ValueError('Unsupported axis, fused batch norm only supports ' + 'axis == [1] or axis == [3]') + + # Raise parameters of fp16 batch norm to fp32 + if self.dtype == dtypes.float16 or self.dtype == dtypes.bfloat16: + param_dtype = dtypes.float32 + else: + param_dtype = self.dtype or dtypes.float32 + + axis_to_dim = {x: input_shape[x].value for x in self.axis} + for x in axis_to_dim: + if axis_to_dim[x] is None: + raise ValueError('Input has undefined `axis` dimension. Input shape: ', + input_shape) + self.input_spec = InputSpec(ndim=ndims, axes=axis_to_dim) + + if len(axis_to_dim) == 1 and self.virtual_batch_size is None: + # Single axis batch norm (most common/default use-case) + param_shape = (list(axis_to_dim.values())[0],) + else: + # Parameter shape is the original shape but with 1 in all non-axis dims + param_shape = [axis_to_dim[i] if i in axis_to_dim + else 1 for i in range(ndims)] + if self.virtual_batch_size is not None: + # When using virtual batches, add an extra dim at index 1 + param_shape.insert(1, 1) + for idx, x in enumerate(self.axis): + self.axis[idx] = x + 1 # Account for added dimension + + if self.scale: + self.gamma = self.add_variable( + name='gamma', + shape=param_shape, + dtype=param_dtype, + initializer=self.gamma_initializer, + regularizer=self.gamma_regularizer, + constraint=self.gamma_constraint, + trainable=True) + else: + self.gamma = None + if self.fused: + self._gamma_const = array_ops.constant( + 1.0, dtype=param_dtype, shape=param_shape) + + if self.center: + self.beta = self.add_variable( + name='beta', + shape=param_shape, + dtype=param_dtype, + initializer=self.beta_initializer, + regularizer=self.beta_regularizer, + constraint=self.beta_constraint, + trainable=True) + else: + self.beta = None + if self.fused: + self._beta_const = array_ops.constant( + 0.0, dtype=param_dtype, shape=param_shape) + + try: + # Disable variable partitioning when creating the moving mean and variance + if hasattr(self, '_scope') and self._scope: + partitioner = self._scope.partitioner + self._scope.set_partitioner(None) + else: + partitioner = None + self.moving_mean = self._add_tower_local_variable( + name='moving_mean', + shape=param_shape, + dtype=param_dtype, + initializer=self.moving_mean_initializer, + trainable=False) + + self.moving_variance = self._add_tower_local_variable( + name='moving_variance', + shape=param_shape, + dtype=param_dtype, + initializer=self.moving_variance_initializer, + trainable=False) + + if self.renorm: + # Create variables to maintain the moving mean and standard deviation. + # These are used in training and thus are different from the moving + # averages above. The renorm variables are colocated with moving_mean + # and moving_variance. + # NOTE: below, the outer `with device` block causes the current device + # stack to be cleared. The nested ones use a `lambda` to set the desired + # device and ignore any devices that may be set by the custom getter. + def _renorm_variable(name, shape): + var = self._add_tower_local_variable( + name=name, + shape=shape, + dtype=param_dtype, + initializer=init_ops.zeros_initializer(), + trainable=False) + return var + + with distribute_lib.get_distribution_strategy().colocate_vars_with( + self.moving_mean): + self.renorm_mean = _renorm_variable('renorm_mean', param_shape) + self.renorm_mean_weight = _renorm_variable('renorm_mean_weight', ()) + # We initialize renorm_stddev to 0, and maintain the (0-initialized) + # renorm_stddev_weight. This allows us to (1) mix the average + # stddev with the minibatch stddev early in training, and (2) compute + # the unbiased average stddev by dividing renorm_stddev by the weight. + with distribute_lib.get_distribution_strategy().colocate_vars_with( + self.moving_variance): + self.renorm_stddev = _renorm_variable('renorm_stddev', param_shape) + self.renorm_stddev_weight = _renorm_variable('renorm_stddev_weight', + ()) + finally: + if partitioner: + self._scope.set_partitioner(partitioner) + self.built = True + + def _assign_moving_average(self, variable, value, momentum): + with ops.name_scope(None, 'AssignMovingAvg', + [variable, value, momentum]) as scope: + decay = ops.convert_to_tensor(1.0 - momentum, name='decay') + if decay.dtype != variable.dtype.base_dtype: + decay = math_ops.cast(decay, variable.dtype.base_dtype) + update_delta = (variable - value) * decay + return state_ops.assign_sub(variable, update_delta, name=scope) + + def _fused_batch_norm(self, inputs, training): + """Returns the output of fused batch norm.""" + beta = self.beta if self.center else self._beta_const + gamma = self.gamma if self.scale else self._gamma_const + + def _fused_batch_norm_training(): + return nn.fused_batch_norm( + inputs, + gamma, + beta, + epsilon=self.epsilon, + data_format=self._data_format) + + def _fused_batch_norm_inference(): + return nn.fused_batch_norm( + inputs, + gamma, + beta, + mean=self.moving_mean, + variance=self.moving_variance, + epsilon=self.epsilon, + is_training=False, + data_format=self._data_format) + + output, mean, variance = tf_utils.smart_cond( + training, _fused_batch_norm_training, _fused_batch_norm_inference) + if not self._bessels_correction_test_only: + # Remove Bessel's correction to be consistent with non-fused batch norm. + # Note that the variance computed by fused batch norm is + # with Bessel's correction. + sample_size = math_ops.cast( + array_ops.size(inputs) / array_ops.size(variance), variance.dtype) + factor = (sample_size - math_ops.cast(1.0, variance.dtype)) / sample_size + variance *= factor + + training_value = tf_utils.constant_value(training) + if training_value is None: + momentum = tf_utils.smart_cond(training, + lambda: self.momentum, + lambda: 1.0) + else: + momentum = ops.convert_to_tensor(self.momentum) + if training_value or training_value is None: + mean_update = self._assign_moving_average(self.moving_mean, mean, + momentum) + variance_update = self._assign_moving_average(self.moving_variance, + variance, momentum) + self.add_update(mean_update, inputs=True) + self.add_update(variance_update, inputs=True) + + return output + + def _renorm_correction_and_moments(self, mean, variance, training): + """Returns the correction and update values for renorm.""" + stddev = math_ops.sqrt(variance + self.epsilon) + # Compute the average mean and standard deviation, as if they were + # initialized with this batch's moments. + mixed_renorm_mean = (self.renorm_mean + + (1. - self.renorm_mean_weight) * mean) + mixed_renorm_stddev = (self.renorm_stddev + + (1. - self.renorm_stddev_weight) * stddev) + # Compute the corrections for batch renorm. + r = stddev / mixed_renorm_stddev + d = (mean - mixed_renorm_mean) / mixed_renorm_stddev + # Ensure the corrections use pre-update moving averages. + with ops.control_dependencies([r, d]): + mean = array_ops.identity(mean) + stddev = array_ops.identity(stddev) + rmin, rmax, dmax = [self.renorm_clipping.get(key) + for key in ['rmin', 'rmax', 'dmax']] + if rmin is not None: + r = math_ops.maximum(r, rmin) + if rmax is not None: + r = math_ops.minimum(r, rmax) + if dmax is not None: + d = math_ops.maximum(d, -dmax) + d = math_ops.minimum(d, dmax) + # When not training, use r=1, d=0. + r = tf_utils.smart_cond(training, lambda: r, lambda: array_ops.ones_like(r)) + d = tf_utils.smart_cond(training, + lambda: d, + lambda: array_ops.zeros_like(d)) + + def _update_renorm_variable(var, weight, value): + """Updates a moving average and weight, returns the unbiased value.""" + value = array_ops.identity(value) + def _do_update(): + """Updates the var and weight, returns their updated ratio.""" + # Update the variables without zero debiasing. The debiasing will be + # accomplished by dividing the exponential moving average by the weight. + # For example, after a single update, the moving average would be + # (1-decay) * value. and the weight will be 1-decay, with their ratio + # giving the value. + # Make sure the weight is not updated until before r and d computation. + with ops.control_dependencies([value]): + weight_value = array_ops.constant(1., dtype=weight.dtype) + new_var = self._assign_moving_average(var, value, self.renorm_momentum) + new_weight = self._assign_moving_average(weight, weight_value, + self.renorm_momentum) + # TODO(yuefengz): the updates to var and weighted can not be batched + # together if we fetch their updated values here. Consider calculating + # new values and delaying the updates. + return new_var / new_weight + + def _fake_update(): + return array_ops.identity(var) + return tf_utils.smart_cond(training, _do_update, _fake_update) + + # TODO(yuefengz): colocate the operations + new_mean = _update_renorm_variable(self.renorm_mean, + self.renorm_mean_weight, mean) + new_stddev = _update_renorm_variable(self.renorm_stddev, + self.renorm_stddev_weight, stddev) + # Make sqrt(moving_variance + epsilon) = new_stddev. + new_variance = math_ops.square(new_stddev) - self.epsilon + + return (r, d, new_mean, new_variance) def call(self, inputs, training=None): + original_training_value = training if training is None: training = K.learning_phase() - output = super(BatchNormalization, self).call(inputs, training=training) - if not context.executing_eagerly() and training is K.learning_phase(): - output._uses_learning_phase = True # pylint: disable=protected-access - return output + + in_eager_mode = context.executing_eagerly() + if self.virtual_batch_size is not None: + # Virtual batches (aka ghost batches) can be simulated by reshaping the + # Tensor and reusing the existing batch norm implementation + original_shape = [-1] + inputs.shape.as_list()[1:] + expanded_shape = [self.virtual_batch_size, -1] + original_shape[1:] + + # Will cause errors if virtual_batch_size does not divide the batch size + inputs = array_ops.reshape(inputs, expanded_shape) + + def undo_virtual_batching(outputs): + outputs = array_ops.reshape(outputs, original_shape) + return outputs + + if self.fused: + outputs = self._fused_batch_norm(inputs, training=training) + if self.virtual_batch_size is not None: + # Currently never reaches here since fused_batch_norm does not support + # virtual batching + outputs = undo_virtual_batching(outputs) + if not context.executing_eagerly() and original_training_value is None: + outputs._uses_learning_phase = True # pylint: disable=protected-access + return outputs + + # Compute the axes along which to reduce the mean / variance + input_shape = inputs.get_shape() + ndims = len(input_shape) + reduction_axes = [i for i in range(ndims) if i not in self.axis] + if self.virtual_batch_size is not None: + del reduction_axes[1] # Do not reduce along virtual batch dim + + # Broadcasting only necessary for single-axis batch norm where the axis is + # not the last dimension + broadcast_shape = [1] * ndims + broadcast_shape[self.axis[0]] = input_shape[self.axis[0]].value + def _broadcast(v): + if (v is not None and + len(v.get_shape()) != ndims and + reduction_axes != list(range(ndims - 1))): + return array_ops.reshape(v, broadcast_shape) + return v + + scale, offset = _broadcast(self.gamma), _broadcast(self.beta) + + def _compose_transforms(scale, offset, then_scale, then_offset): + if then_scale is not None: + scale *= then_scale + offset *= then_scale + if then_offset is not None: + offset += then_offset + return (scale, offset) + + # Determine a boolean value for `training`: could be True, False, or None. + training_value = tf_utils.constant_value(training) + if training_value is not False: + if self.adjustment: + adj_scale, adj_bias = self.adjustment(array_ops.shape(inputs)) + # Adjust only during training. + adj_scale = tf_utils.smart_cond(training, + lambda: adj_scale, + lambda: array_ops.ones_like(adj_scale)) + adj_bias = tf_utils.smart_cond(training, + lambda: adj_bias, + lambda: array_ops.zeros_like(adj_bias)) + scale, offset = _compose_transforms(adj_scale, adj_bias, scale, offset) + + # Some of the computations here are not necessary when training==False + # but not a constant. However, this makes the code simpler. + keep_dims = self.virtual_batch_size is not None or len(self.axis) > 1 + mean, variance = nn.moments(inputs, reduction_axes, keep_dims=keep_dims) + + moving_mean = self.moving_mean + moving_variance = self.moving_variance + + mean = tf_utils.smart_cond(training, + lambda: mean, + lambda: moving_mean) + variance = tf_utils.smart_cond(training, + lambda: variance, + lambda: moving_variance) + + if self.renorm: + r, d, new_mean, new_variance = self._renorm_correction_and_moments( + mean, variance, training) + # When training, the normalized values (say, x) will be transformed as + # x * gamma + beta without renorm, and (x * r + d) * gamma + beta + # = x * (r * gamma) + (d * gamma + beta) with renorm. + r = _broadcast(array_ops.stop_gradient(r, name='renorm_r')) + d = _broadcast(array_ops.stop_gradient(d, name='renorm_d')) + scale, offset = _compose_transforms(r, d, scale, offset) + else: + new_mean, new_variance = mean, variance + + if self.virtual_batch_size is not None: + # This isn't strictly correct since in ghost batch norm, you are + # supposed to sequentially update the moving_mean and moving_variance + # with each sub-batch. However, since the moving statistics are only + # used during evaluation, it is more efficient to just update in one + # step and should not make a significant difference in the result. + new_mean = math_ops.reduce_mean(new_mean, + axis=1, keepdims=True) + new_variance = math_ops.reduce_mean(new_variance, + axis=1, keepdims=True) + + def _do_update(var, value): + if in_eager_mode and not self.trainable: + return + + return self._assign_moving_average(var, value, self.momentum) + + mean_update = tf_utils.smart_cond( + training, + lambda: _do_update(self.moving_mean, new_mean), + lambda: self.moving_mean) + variance_update = tf_utils.smart_cond( + training, + lambda: _do_update(self.moving_variance, new_variance), + lambda: self.moving_variance) + if not context.executing_eagerly(): + self.add_update(mean_update, inputs=True) + self.add_update(variance_update, inputs=True) + + else: + mean, variance = self.moving_mean, self.moving_variance + + outputs = nn.batch_normalization(inputs, + _broadcast(mean), + _broadcast(variance), + offset, + scale, + self.epsilon) + # If some components of the shape got lost due to adjustments, fix that. + outputs.set_shape(input_shape) + + if self.virtual_batch_size is not None: + outputs = undo_virtual_batching(outputs) + if not context.executing_eagerly() and original_training_value is None: + outputs._uses_learning_phase = True # pylint: disable=protected-access + return outputs + + def compute_output_shape(self, input_shape): + return input_shape def get_config(self): config = { @@ -133,5 +654,19 @@ class BatchNormalization(tf_normalization_layers.BatchNormalization, Layer): 'beta_constraint': constraints.serialize(self.beta_constraint), 'gamma_constraint': constraints.serialize(self.gamma_constraint) } + # Only add TensorFlow-specific parameters if they are set, so as to preserve + # model compatibility with external Keras. + if self.renorm: + config['renorm'] = True + config['renorm_clipping'] = self.renorm_clipping + config['renorm_momentum'] = self.renorm_momentum + if self.virtual_batch_size is not None: + config['virtual_batch_size'] = self.virtual_batch_size + # Note: adjustment is not serializable. + if self.adjustment is not None: + logging.warning('The `adjustment` function of this `BatchNormalization` ' + 'layer cannot be serialized and has been omitted from ' + 'the layer config. It will not be included when ' + 're-creating the layer from the saved config.') base_config = super(BatchNormalization, self).get_config() return dict(list(base_config.items()) + list(config.items())) diff --git a/tensorflow/python/keras/_impl/keras/layers/normalization_test.py b/tensorflow/python/keras/_impl/keras/layers/normalization_test.py index 2b3628c3f1023612297465bdf3286246261992a2..fa9277e3d1e5bb0b9633abc46a96a11816dddb2d 100644 --- a/tensorflow/python/keras/_impl/keras/layers/normalization_test.py +++ b/tensorflow/python/keras/_impl/keras/layers/normalization_test.py @@ -114,6 +114,26 @@ class NormalizationLayersTest(test.TestCase): np.testing.assert_allclose(np.mean(out, axis=(0, 2, 3)), 0.0, atol=1e-1) np.testing.assert_allclose(np.std(out, axis=(0, 2, 3)), 1.0, atol=1e-1) + def test_batchnorm_convnet_channel_last(self): + with self.test_session(): + # keras.backend.set_learning_phase(True) + + model = keras.models.Sequential() + norm = keras.layers.BatchNormalization( + axis=-1, input_shape=(4, 4, 3), momentum=0.8) + model.add(norm) + model.compile(loss='mse', optimizer='sgd') + + # centered on 5.0, variance 10.0 + x = np.random.normal(loc=5.0, scale=10.0, size=(1000, 4, 4, 3)) + model.fit(x, x, epochs=4, verbose=0) + out = model.predict(x) + out -= np.reshape(keras.backend.eval(norm.beta), (1, 1, 1, 3)) + out /= np.reshape(keras.backend.eval(norm.gamma), (1, 1, 1, 3)) + + np.testing.assert_allclose(np.mean(out, axis=(0, 1, 2)), 0.0, atol=1e-1) + np.testing.assert_allclose(np.std(out, axis=(0, 1, 2)), 1.0, atol=1e-1) + def test_shared_batchnorm(self): """Test that a BN layer can be shared across different data streams. """ diff --git a/tensorflow/python/keras/_impl/keras/layers/pooling.py b/tensorflow/python/keras/_impl/keras/layers/pooling.py index 15d53379769d8142f5b2755a07479f60751346d2..86bc8a680a529a9ea17592a42207fab58adeebce 100644 --- a/tensorflow/python/keras/_impl/keras/layers/pooling.py +++ b/tensorflow/python/keras/_impl/keras/layers/pooling.py @@ -19,16 +19,98 @@ from __future__ import division from __future__ import print_function from tensorflow.python.framework import tensor_shape -from tensorflow.python.keras._impl.keras import backend as K +from tensorflow.python.keras._impl.keras import backend from tensorflow.python.keras._impl.keras.engine import InputSpec from tensorflow.python.keras._impl.keras.engine import Layer from tensorflow.python.keras._impl.keras.utils import conv_utils -from tensorflow.python.layers import pooling as tf_pooling_layers +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import nn from tensorflow.python.util.tf_export import tf_export +class Pooling1D(Layer): + """Pooling layer for arbitrary pooling functions, for 1D inputs. + + This class only exists for code reuse. It will never be an exposed API. + + Arguments: + pool_function: The pooling function to apply, e.g. `tf.nn.max_pool`. + pool_size: An integer or tuple/list of a single integer, + representing the size of the pooling window. + strides: An integer or tuple/list of a single integer, specifying the + strides of the pooling operation. + padding: A string. The padding method, either 'valid' or 'same'. + Case-insensitive. + data_format: A string, one of `channels_last` (default) or `channels_first`. + The ordering of the dimensions in the inputs. + `channels_last` corresponds to inputs with shape + `(batch, length, channels)` while `channels_first` corresponds to + inputs with shape `(batch, channels, length)`. + name: A string, the name of the layer. + """ + + def __init__(self, pool_function, pool_size, strides, + padding='valid', data_format=None, + name=None, **kwargs): + super(Pooling1D, self).__init__(name=name, **kwargs) + if data_format is None: + data_format = backend.image_data_format() + if strides is None: + strides = pool_size + self.pool_function = pool_function + self.pool_size = conv_utils.normalize_tuple(pool_size, 1, 'pool_size') + self.strides = conv_utils.normalize_tuple(strides, 1, 'strides') + self.padding = conv_utils.normalize_padding(padding) + self.data_format = conv_utils.normalize_data_format(data_format) + self.input_spec = InputSpec(ndim=3) + + def call(self, inputs): + # There is no TF op for 1D pooling, hence we make the inputs 4D. + if self.data_format == 'channels_last': + # input is NWC, make it NHWC + inputs = array_ops.expand_dims(inputs, 1) + # pool on the W dim + pool_shape = (1, 1) + self.pool_size + (1,) + strides = (1, 1) + self.strides + (1,) + data_format = 'NHWC' + else: + # input is NCW, make it NCHW + inputs = array_ops.expand_dims(inputs, 2) + # pool on the W dim + pool_shape = (1, 1, 1) + self.pool_size + strides = (1, 1, 1) + self.strides + data_format = 'NCHW' + + outputs = self.pool_function( + inputs, + ksize=pool_shape, + strides=strides, + padding=self.padding.upper(), + data_format=data_format) + + if self.data_format == 'channels_last': + return array_ops.squeeze(outputs, 1) + else: + return array_ops.squeeze(outputs, 2) + + def compute_output_shape(self, input_shape): + input_shape = tensor_shape.TensorShape(input_shape).as_list() + length = conv_utils.conv_output_length(input_shape[1], self.pool_size[0], + self.padding, self.strides[0]) + return tensor_shape.TensorShape([input_shape[0], length, input_shape[2]]) + + def get_config(self): + config = { + 'strides': self.strides, + 'pool_size': self.pool_size, + 'padding': self.padding + } + base_config = super(Pooling1D, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + @tf_export('keras.layers.MaxPool1D', 'keras.layers.MaxPooling1D') -class MaxPooling1D(tf_pooling_layers.MaxPooling1D, Layer): +class MaxPooling1D(Pooling1D): """Max pooling operation for temporal data. Arguments: @@ -45,23 +127,20 @@ class MaxPooling1D(tf_pooling_layers.MaxPooling1D, Layer): 3D tensor with shape: `(batch_size, downsampled_steps, features)`. """ - def __init__(self, pool_size=2, strides=None, padding='valid', **kwargs): - if strides is None: - strides = pool_size - super(MaxPooling1D, self).__init__(pool_size, strides, padding, **kwargs) + def __init__(self, pool_size=2, strides=None, + padding='valid', data_format=None, **kwargs): - def get_config(self): - config = { - 'strides': self.strides, - 'pool_size': self.pool_size, - 'padding': self.padding - } - base_config = super(MaxPooling1D, self).get_config() - return dict(list(base_config.items()) + list(config.items())) + super(MaxPooling1D, self).__init__( + nn.max_pool, + pool_size=pool_size, + strides=strides, + padding=padding, + data_format=data_format, + **kwargs) @tf_export('keras.layers.AveragePooling1D', 'keras.layers.AvgPool1D') -class AveragePooling1D(tf_pooling_layers.AveragePooling1D, Layer): +class AveragePooling1D(Pooling1D): """Average pooling for temporal data. Arguments: @@ -78,24 +157,104 @@ class AveragePooling1D(tf_pooling_layers.AveragePooling1D, Layer): 3D tensor with shape: `(batch_size, downsampled_steps, features)`. """ - def __init__(self, pool_size=2, strides=None, padding='valid', **kwargs): + def __init__(self, pool_size=2, strides=None, + padding='valid', data_format=None, **kwargs): + super(AveragePooling1D, self).__init__( + nn.avg_pool, + pool_size=pool_size, + strides=strides, + padding=padding, + data_format=data_format, + **kwargs) + + +class Pooling2D(Layer): + """Pooling layer for arbitrary pooling functions, for 2D inputs (e.g. images). + + This class only exists for code reuse. It will never be an exposed API. + + Arguments: + pool_function: The pooling function to apply, e.g. `tf.nn.max_pool`. + pool_size: An integer or tuple/list of 2 integers: (pool_height, pool_width) + specifying the size of the pooling window. + Can be a single integer to specify the same value for + all spatial dimensions. + strides: An integer or tuple/list of 2 integers, + specifying the strides of the pooling operation. + Can be a single integer to specify the same value for + all spatial dimensions. + padding: A string. The padding method, either 'valid' or 'same'. + Case-insensitive. + data_format: A string, one of `channels_last` (default) or `channels_first`. + The ordering of the dimensions in the inputs. + `channels_last` corresponds to inputs with shape + `(batch, height, width, channels)` while `channels_first` corresponds to + inputs with shape `(batch, channels, height, width)`. + name: A string, the name of the layer. + """ + + def __init__(self, pool_function, pool_size, strides, + padding='valid', data_format=None, + name=None, **kwargs): + super(Pooling2D, self).__init__(name=name, **kwargs) + if data_format is None: + data_format = backend.image_data_format() if strides is None: strides = pool_size - super(AveragePooling1D, self).__init__(pool_size, strides, padding, - **kwargs) + self.pool_function = pool_function + self.pool_size = conv_utils.normalize_tuple(pool_size, 2, 'pool_size') + self.strides = conv_utils.normalize_tuple(strides, 2, 'strides') + self.padding = conv_utils.normalize_padding(padding) + self.data_format = conv_utils.normalize_data_format(data_format) + self.input_spec = InputSpec(ndim=4) + + def call(self, inputs): + if self.data_format == 'channels_last': + pool_shape = (1,) + self.pool_size + (1,) + strides = (1,) + self.strides + (1,) + else: + pool_shape = (1, 1) + self.pool_size + strides = (1, 1) + self.strides + outputs = self.pool_function( + inputs, + ksize=pool_shape, + strides=strides, + padding=self.padding.upper(), + data_format=conv_utils.convert_data_format(self.data_format, 4)) + return outputs + + def compute_output_shape(self, input_shape): + input_shape = tensor_shape.TensorShape(input_shape).as_list() + if self.data_format == 'channels_first': + rows = input_shape[2] + cols = input_shape[3] + else: + rows = input_shape[1] + cols = input_shape[2] + rows = conv_utils.conv_output_length(rows, self.pool_size[0], self.padding, + self.strides[0]) + cols = conv_utils.conv_output_length(cols, self.pool_size[1], self.padding, + self.strides[1]) + if self.data_format == 'channels_first': + return tensor_shape.TensorShape( + [input_shape[0], input_shape[1], rows, cols]) + else: + return tensor_shape.TensorShape( + [input_shape[0], rows, cols, input_shape[3]]) def get_config(self): config = { - 'strides': self.strides, 'pool_size': self.pool_size, - 'padding': self.padding + 'padding': self.padding, + 'strides': self.strides, + 'data_format': self.data_format } - base_config = super(AveragePooling1D, self).get_config() + base_config = super(Pooling2D, self).get_config() return dict(list(base_config.items()) + list(config.items())) @tf_export('keras.layers.MaxPool2D', 'keras.layers.MaxPooling2D') -class MaxPooling2D(tf_pooling_layers.MaxPooling2D, Layer): +class MaxPooling2D(Pooling2D): """Max pooling operation for spatial data. Arguments: @@ -142,26 +301,14 @@ class MaxPooling2D(tf_pooling_layers.MaxPooling2D, Layer): padding='valid', data_format=None, **kwargs): - if data_format is None: - data_format = K.image_data_format() - if strides is None: - strides = pool_size - super(MaxPooling2D, self).__init__(pool_size, strides, padding, data_format, - **kwargs) - - def get_config(self): - config = { - 'pool_size': self.pool_size, - 'padding': self.padding, - 'strides': self.strides, - 'data_format': self.data_format - } - base_config = super(MaxPooling2D, self).get_config() - return dict(list(base_config.items()) + list(config.items())) + super(MaxPooling2D, self).__init__( + nn.max_pool, + pool_size=pool_size, strides=strides, + padding=padding, data_format=data_format, **kwargs) @tf_export('keras.layers.AveragePooling2D', 'keras.layers.AvgPool2D') -class AveragePooling2D(tf_pooling_layers.AveragePooling2D, Layer): +class AveragePooling2D(Pooling2D): """Average pooling operation for spatial data. Arguments: @@ -208,12 +355,96 @@ class AveragePooling2D(tf_pooling_layers.AveragePooling2D, Layer): padding='valid', data_format=None, **kwargs): + super(AveragePooling2D, self).__init__( + nn.avg_pool, + pool_size=pool_size, strides=strides, + padding=padding, data_format=data_format, **kwargs) + + +class Pooling3D(Layer): + """Pooling layer for arbitrary pooling functions, for 3D inputs. + + This class only exists for code reuse. It will never be an exposed API. + + Arguments: + pool_function: The pooling function to apply, e.g. `tf.nn.max_pool`. + pool_size: An integer or tuple/list of 3 integers: + (pool_depth, pool_height, pool_width) + specifying the size of the pooling window. + Can be a single integer to specify the same value for + all spatial dimensions. + strides: An integer or tuple/list of 3 integers, + specifying the strides of the pooling operation. + Can be a single integer to specify the same value for + all spatial dimensions. + padding: A string. The padding method, either 'valid' or 'same'. + Case-insensitive. + data_format: A string, one of `channels_last` (default) or `channels_first`. + The ordering of the dimensions in the inputs. + `channels_last` corresponds to inputs with shape + `(batch, depth, height, width, channels)` + while `channels_first` corresponds to + inputs with shape `(batch, channels, depth, height, width)`. + name: A string, the name of the layer. + """ + + def __init__(self, pool_function, pool_size, strides, + padding='valid', data_format='channels_last', + name=None, **kwargs): + super(Pooling3D, self).__init__(name=name, **kwargs) if data_format is None: - data_format = K.image_data_format() + data_format = backend.image_data_format() if strides is None: strides = pool_size - super(AveragePooling2D, self).__init__(pool_size, strides, padding, - data_format, **kwargs) + self.pool_function = pool_function + self.pool_size = conv_utils.normalize_tuple(pool_size, 3, 'pool_size') + self.strides = conv_utils.normalize_tuple(strides, 3, 'strides') + self.padding = conv_utils.normalize_padding(padding) + self.data_format = conv_utils.normalize_data_format(data_format) + self.input_spec = InputSpec(ndim=5) + + def call(self, inputs): + pool_shape = (1,) + self.pool_size + (1,) + strides = (1,) + self.strides + (1,) + + if self.data_format == 'channels_first': + # TF does not support `channels_first` with 3D pooling operations, + # so we must handle this case manually. + # TODO(fchollet): remove this when TF pooling is feature-complete. + inputs = array_ops.transpose(inputs, (0, 2, 3, 4, 1)) + + outputs = self.pool_function( + inputs, + ksize=pool_shape, + strides=strides, + padding=self.padding.upper()) + + if self.data_format == 'channels_first': + outputs = array_ops.transpose(outputs, (0, 4, 1, 2, 3)) + return outputs + + def compute_output_shape(self, input_shape): + input_shape = tensor_shape.TensorShape(input_shape).as_list() + if self.data_format == 'channels_first': + len_dim1 = input_shape[2] + len_dim2 = input_shape[3] + len_dim3 = input_shape[4] + else: + len_dim1 = input_shape[1] + len_dim2 = input_shape[2] + len_dim3 = input_shape[3] + len_dim1 = conv_utils.conv_output_length(len_dim1, self.pool_size[0], + self.padding, self.strides[0]) + len_dim2 = conv_utils.conv_output_length(len_dim2, self.pool_size[1], + self.padding, self.strides[1]) + len_dim3 = conv_utils.conv_output_length(len_dim3, self.pool_size[2], + self.padding, self.strides[2]) + if self.data_format == 'channels_first': + return tensor_shape.TensorShape( + [input_shape[0], input_shape[1], len_dim1, len_dim2, len_dim3]) + else: + return tensor_shape.TensorShape( + [input_shape[0], len_dim1, len_dim2, len_dim3, input_shape[4]]) def get_config(self): config = { @@ -222,12 +453,12 @@ class AveragePooling2D(tf_pooling_layers.AveragePooling2D, Layer): 'strides': self.strides, 'data_format': self.data_format } - base_config = super(AveragePooling2D, self).get_config() + base_config = super(Pooling3D, self).get_config() return dict(list(base_config.items()) + list(config.items())) @tf_export('keras.layers.MaxPool3D', 'keras.layers.MaxPooling3D') -class MaxPooling3D(tf_pooling_layers.MaxPooling3D, Layer): +class MaxPooling3D(Pooling3D): """Max pooling operation for 3D data (spatial or spatio-temporal). Arguments: @@ -270,26 +501,14 @@ class MaxPooling3D(tf_pooling_layers.MaxPooling3D, Layer): padding='valid', data_format=None, **kwargs): - if data_format is None: - data_format = K.image_data_format() - if strides is None: - strides = pool_size - super(MaxPooling3D, self).__init__(pool_size, strides, padding, data_format, - **kwargs) - - def get_config(self): - config = { - 'pool_size': self.pool_size, - 'padding': self.padding, - 'strides': self.strides, - 'data_format': self.data_format - } - base_config = super(MaxPooling3D, self).get_config() - return dict(list(base_config.items()) + list(config.items())) + super(MaxPooling3D, self).__init__( + nn.max_pool3d, + pool_size=pool_size, strides=strides, + padding=padding, data_format=data_format, **kwargs) @tf_export('keras.layers.AveragePooling3D', 'keras.layers.AvgPool3D') -class AveragePooling3D(tf_pooling_layers.AveragePooling3D, Layer): +class AveragePooling3D(Pooling3D): """Average pooling operation for 3D data (spatial or spatio-temporal). Arguments: @@ -332,30 +551,18 @@ class AveragePooling3D(tf_pooling_layers.AveragePooling3D, Layer): padding='valid', data_format=None, **kwargs): - if data_format is None: - data_format = K.image_data_format() - if strides is None: - strides = pool_size - super(AveragePooling3D, self).__init__(pool_size, strides, padding, - data_format, **kwargs) - - def get_config(self): - config = { - 'pool_size': self.pool_size, - 'padding': self.padding, - 'strides': self.strides, - 'data_format': self.data_format - } - base_config = super(AveragePooling3D, self).get_config() - return dict(list(base_config.items()) + list(config.items())) + super(AveragePooling3D, self).__init__( + nn.avg_pool3d, + pool_size=pool_size, strides=strides, + padding=padding, data_format=data_format, **kwargs) -class _GlobalPooling1D(Layer): +class GlobalPooling1D(Layer): """Abstract class for different global pooling 1D layers. """ def __init__(self, **kwargs): - super(_GlobalPooling1D, self).__init__(**kwargs) + super(GlobalPooling1D, self).__init__(**kwargs) self.input_spec = InputSpec(ndim=3) def compute_output_shape(self, input_shape): @@ -368,7 +575,7 @@ class _GlobalPooling1D(Layer): @tf_export('keras.layers.GlobalAveragePooling1D', 'keras.layers.GlobalAvgPool1D') -class GlobalAveragePooling1D(_GlobalPooling1D): +class GlobalAveragePooling1D(GlobalPooling1D): """Global average pooling operation for temporal data. Input shape: @@ -380,11 +587,11 @@ class GlobalAveragePooling1D(_GlobalPooling1D): """ def call(self, inputs): - return K.mean(inputs, axis=1) + return backend.mean(inputs, axis=1) @tf_export('keras.layers.GlobalMaxPool1D', 'keras.layers.GlobalMaxPooling1D') -class GlobalMaxPooling1D(_GlobalPooling1D): +class GlobalMaxPooling1D(GlobalPooling1D): """Global max pooling operation for temporal data. Input shape: @@ -396,15 +603,15 @@ class GlobalMaxPooling1D(_GlobalPooling1D): """ def call(self, inputs): - return K.max(inputs, axis=1) + return backend.max(inputs, axis=1) -class _GlobalPooling2D(Layer): +class GlobalPooling2D(Layer): """Abstract class for different global pooling 2D layers. """ def __init__(self, data_format=None, **kwargs): - super(_GlobalPooling2D, self).__init__(**kwargs) + super(GlobalPooling2D, self).__init__(**kwargs) self.data_format = conv_utils.normalize_data_format(data_format) self.input_spec = InputSpec(ndim=4) @@ -420,13 +627,13 @@ class _GlobalPooling2D(Layer): def get_config(self): config = {'data_format': self.data_format} - base_config = super(_GlobalPooling2D, self).get_config() + base_config = super(GlobalPooling2D, self).get_config() return dict(list(base_config.items()) + list(config.items())) @tf_export('keras.layers.GlobalAveragePooling2D', 'keras.layers.GlobalAvgPool2D') -class GlobalAveragePooling2D(_GlobalPooling2D): +class GlobalAveragePooling2D(GlobalPooling2D): """Global average pooling operation for spatial data. Arguments: @@ -456,13 +663,13 @@ class GlobalAveragePooling2D(_GlobalPooling2D): def call(self, inputs): if self.data_format == 'channels_last': - return K.mean(inputs, axis=[1, 2]) + return backend.mean(inputs, axis=[1, 2]) else: - return K.mean(inputs, axis=[2, 3]) + return backend.mean(inputs, axis=[2, 3]) @tf_export('keras.layers.GlobalMaxPool2D', 'keras.layers.GlobalMaxPooling2D') -class GlobalMaxPooling2D(_GlobalPooling2D): +class GlobalMaxPooling2D(GlobalPooling2D): """Global max pooling operation for spatial data. Arguments: @@ -492,17 +699,17 @@ class GlobalMaxPooling2D(_GlobalPooling2D): def call(self, inputs): if self.data_format == 'channels_last': - return K.max(inputs, axis=[1, 2]) + return backend.max(inputs, axis=[1, 2]) else: - return K.max(inputs, axis=[2, 3]) + return backend.max(inputs, axis=[2, 3]) -class _GlobalPooling3D(Layer): +class GlobalPooling3D(Layer): """Abstract class for different global pooling 3D layers. """ def __init__(self, data_format=None, **kwargs): - super(_GlobalPooling3D, self).__init__(**kwargs) + super(GlobalPooling3D, self).__init__(**kwargs) self.data_format = conv_utils.normalize_data_format(data_format) self.input_spec = InputSpec(ndim=5) @@ -518,13 +725,13 @@ class _GlobalPooling3D(Layer): def get_config(self): config = {'data_format': self.data_format} - base_config = super(_GlobalPooling3D, self).get_config() + base_config = super(GlobalPooling3D, self).get_config() return dict(list(base_config.items()) + list(config.items())) @tf_export('keras.layers.GlobalAveragePooling3D', 'keras.layers.GlobalAvgPool3D') -class GlobalAveragePooling3D(_GlobalPooling3D): +class GlobalAveragePooling3D(GlobalPooling3D): """Global Average pooling operation for 3D data. Arguments: @@ -554,13 +761,13 @@ class GlobalAveragePooling3D(_GlobalPooling3D): def call(self, inputs): if self.data_format == 'channels_last': - return K.mean(inputs, axis=[1, 2, 3]) + return backend.mean(inputs, axis=[1, 2, 3]) else: - return K.mean(inputs, axis=[2, 3, 4]) + return backend.mean(inputs, axis=[2, 3, 4]) @tf_export('keras.layers.GlobalMaxPool3D', 'keras.layers.GlobalMaxPooling3D') -class GlobalMaxPooling3D(_GlobalPooling3D): +class GlobalMaxPooling3D(GlobalPooling3D): """Global Max pooling operation for 3D data. Arguments: @@ -590,9 +797,9 @@ class GlobalMaxPooling3D(_GlobalPooling3D): def call(self, inputs): if self.data_format == 'channels_last': - return K.max(inputs, axis=[1, 2, 3]) + return backend.max(inputs, axis=[1, 2, 3]) else: - return K.max(inputs, axis=[2, 3, 4]) + return backend.max(inputs, axis=[2, 3, 4]) # Aliases diff --git a/tensorflow/python/keras/_impl/keras/layers/pooling_test.py b/tensorflow/python/keras/_impl/keras/layers/pooling_test.py index bb003c1dddf80e2a745c1268a3a7d045f4e8b036..2c08b647ea0fafb7519240b0c81e8fa77f034f7f 100644 --- a/tensorflow/python/keras/_impl/keras/layers/pooling_test.py +++ b/tensorflow/python/keras/_impl/keras/layers/pooling_test.py @@ -27,14 +27,14 @@ from tensorflow.python.platform import test class GlobalPoolingTest(test.TestCase): - @tf_test_util.run_in_graph_and_eager_modes(use_gpu=True) + @tf_test_util.run_in_graph_and_eager_modes() def test_globalpooling_1d(self): testing_utils.layer_test(keras.layers.pooling.GlobalMaxPooling1D, input_shape=(3, 4, 5)) testing_utils.layer_test( keras.layers.pooling.GlobalAveragePooling1D, input_shape=(3, 4, 5)) - @tf_test_util.run_in_graph_and_eager_modes(use_gpu=True) + @tf_test_util.run_in_graph_and_eager_modes() def test_globalpooling_2d(self): testing_utils.layer_test( keras.layers.pooling.GlobalMaxPooling2D, @@ -53,7 +53,7 @@ class GlobalPoolingTest(test.TestCase): kwargs={'data_format': 'channels_last'}, input_shape=(3, 5, 6, 4)) - @tf_test_util.run_in_graph_and_eager_modes(use_gpu=True) + @tf_test_util.run_in_graph_and_eager_modes() def test_globalpooling_3d(self): testing_utils.layer_test( keras.layers.pooling.GlobalMaxPooling3D, @@ -75,7 +75,7 @@ class GlobalPoolingTest(test.TestCase): class Pooling2DTest(test.TestCase): - @tf_test_util.run_in_graph_and_eager_modes(use_gpu=True) + @tf_test_util.run_in_graph_and_eager_modes() def test_maxpooling_2d(self): pool_size = (3, 3) for strides in [(1, 1), (2, 2)]: @@ -88,7 +88,7 @@ class Pooling2DTest(test.TestCase): }, input_shape=(3, 5, 6, 4)) - @tf_test_util.run_in_graph_and_eager_modes(use_gpu=True) + @tf_test_util.run_in_graph_and_eager_modes() def test_averagepooling_2d(self): testing_utils.layer_test( keras.layers.AveragePooling2D, @@ -122,7 +122,7 @@ class Pooling2DTest(test.TestCase): class Pooling3DTest(test.TestCase): - @tf_test_util.run_in_graph_and_eager_modes(use_gpu=True) + @tf_test_util.run_in_graph_and_eager_modes() def test_maxpooling_3d(self): pool_size = (3, 3, 3) testing_utils.layer_test( @@ -141,7 +141,7 @@ class Pooling3DTest(test.TestCase): }, input_shape=(3, 4, 11, 12, 10)) - @tf_test_util.run_in_graph_and_eager_modes(use_gpu=True) + @tf_test_util.run_in_graph_and_eager_modes() def test_averagepooling_3d(self): pool_size = (3, 3, 3) testing_utils.layer_test( @@ -163,7 +163,7 @@ class Pooling3DTest(test.TestCase): class Pooling1DTest(test.TestCase): - @tf_test_util.run_in_graph_and_eager_modes(use_gpu=True) + @tf_test_util.run_in_graph_and_eager_modes() def test_maxpooling_1d(self): for padding in ['valid', 'same']: for stride in [1, 2]: @@ -173,7 +173,7 @@ class Pooling1DTest(test.TestCase): 'padding': padding}, input_shape=(3, 5, 4)) - @tf_test_util.run_in_graph_and_eager_modes(use_gpu=True) + @tf_test_util.run_in_graph_and_eager_modes() def test_averagepooling_1d(self): for padding in ['valid', 'same']: for stride in [1, 2]: diff --git a/tensorflow/python/keras/_impl/keras/layers/recurrent.py b/tensorflow/python/keras/_impl/keras/layers/recurrent.py index 791f9b311300ed05591083d551c040eb25ac8e22..f53db987ff379a70d3fd43bbc3442646635e3bd6 100644 --- a/tensorflow/python/keras/_impl/keras/layers/recurrent.py +++ b/tensorflow/python/keras/_impl/keras/layers/recurrent.py @@ -33,6 +33,9 @@ from tensorflow.python.keras._impl.keras.engine import InputSpec from tensorflow.python.keras._impl.keras.engine import Layer from tensorflow.python.keras._impl.keras.engine.base_layer import shape_type_conversion from tensorflow.python.keras._impl.keras.utils.generic_utils import has_arg +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import state_ops from tensorflow.python.platform import tf_logging as logging from tensorflow.python.util.tf_export import tf_export @@ -248,7 +251,7 @@ class RNN(Layer): It is also possible for `cell` to be a list of RNN cell instances, in which cases the cells get stacked on after the other in the RNN, implementing an efficient stacked RNN. - return_sequences: Boolean. Whether to return the last output. + return_sequences: Boolean. Whether to return the last output in the output sequence, or the full sequence. return_state: Boolean. Whether to return the last state in addition to the output. @@ -503,9 +506,12 @@ class RNN(Layer): def get_initial_state(self, inputs): # build an all-zero tensor of shape (samples, output_dim) - initial_state = K.zeros_like(inputs) # (samples, timesteps, input_dim) - initial_state = K.sum(initial_state, axis=(1, 2)) # (samples,) - initial_state = K.expand_dims(initial_state) # (samples, 1) + initial_state = array_ops.zeros_like(inputs) + # shape of initial_state = (samples, timesteps, input_dim) + initial_state = math_ops.reduce_sum(initial_state, axis=(1, 2)) + # shape of initial_state = (samples,) + initial_state = array_ops.expand_dims(initial_state, axis=-1) + # shape of initial_state = (samples, 1) if hasattr(self.cell.state_size, '__len__'): return [K.tile(initial_state, [1, dim]) for dim in self.cell.state_size] else: @@ -631,7 +637,7 @@ class RNN(Layer): if self.stateful: updates = [] for i in range(len(states)): - updates.append(K.update(self.states[i], states[i])) + updates.append(state_ops.assign(self.states[i], states[i])) self.add_update(updates, inputs) if self.return_sequences: @@ -791,10 +797,10 @@ class RNN(Layer): @property def losses(self): - losses = [] + layer_losses = super(RNN, self).losses if isinstance(self.cell, Layer): - losses += self.cell.losses - return losses + self._losses + return self.cell.losses + layer_losses + return layer_losses @property def updates(self): @@ -907,8 +913,7 @@ class SimpleRNNCell(Layer): prev_output = states[0] if 0 < self.dropout < 1 and self._dropout_mask is None: self._dropout_mask = _generate_dropout_mask( - _generate_dropout_ones(inputs, - K.shape(inputs)[-1]), + _generate_dropout_ones(inputs, array_ops.shape(inputs)[-1]), self.dropout, training=training) if (0 < self.recurrent_dropout < 1 and @@ -1012,7 +1017,7 @@ class SimpleRNN(RNN): recurrent_dropout: Float between 0 and 1. Fraction of the units to drop for the linear transformation of the recurrent state. - return_sequences: Boolean. Whether to return the last output. + return_sequences: Boolean. Whether to return the last output in the output sequence, or the full sequence. return_state: Boolean. Whether to return the last state in addition to the output. @@ -1232,6 +1237,9 @@ class GRUCell(Layer): batch them into fewer, larger operations. These modes will have different performance profiles on different hardware and for different applications. + reset_after: GRU convention (whether to apply reset gate after or + before matrix multiplication). False = "before" (default), + True = "after" (CuDNN compatible). """ def __init__(self, @@ -1251,6 +1259,7 @@ class GRUCell(Layer): dropout=0., recurrent_dropout=0., implementation=1, + reset_after=False, **kwargs): super(GRUCell, self).__init__(**kwargs) self.units = units @@ -1273,6 +1282,7 @@ class GRUCell(Layer): self.dropout = min(1., max(0., dropout)) self.recurrent_dropout = min(1., max(0., recurrent_dropout)) self.implementation = implementation + self.reset_after = reset_after self.state_size = self.units self._dropout_mask = None self._recurrent_dropout_mask = None @@ -1294,12 +1304,25 @@ class GRUCell(Layer): constraint=self.recurrent_constraint) if self.use_bias: - self.bias = self.add_weight( - shape=(self.units * 3,), - name='bias', - initializer=self.bias_initializer, - regularizer=self.bias_regularizer, - constraint=self.bias_constraint) + if not self.reset_after: + bias_shape = (3 * self.units,) + else: + # separate biases for input and recurrent kernels + # Note: the shape is intentionally different from CuDNNGRU biases + # `(2 * 3 * self.units,)`, so that we can distinguish the classes + # when loading and converting saved weights. + bias_shape = (2, 3 * self.units) + self.bias = self.add_weight(shape=bias_shape, + name='bias', + initializer=self.bias_initializer, + regularizer=self.bias_regularizer, + constraint=self.bias_constraint) + if not self.reset_after: + self.input_bias, self.recurrent_bias = self.bias, None + else: + self.input_bias = K.flatten(self.bias[0]) + self.recurrent_bias = K.flatten(self.bias[1]) + else: self.bias = None self.built = True @@ -1309,8 +1332,7 @@ class GRUCell(Layer): if 0 < self.dropout < 1 and self._dropout_mask is None: self._dropout_mask = _generate_dropout_mask( - _generate_dropout_ones(inputs, - K.shape(inputs)[-1]), + _generate_dropout_ones(inputs, array_ops.shape(inputs)[-1]), self.dropout, training=training, count=3) @@ -1336,13 +1358,15 @@ class GRUCell(Layer): inputs_z = inputs inputs_r = inputs inputs_h = inputs + x_z = K.dot(inputs_z, self.kernel[:, :self.units]) x_r = K.dot(inputs_r, self.kernel[:, self.units:self.units * 2]) x_h = K.dot(inputs_h, self.kernel[:, self.units * 2:]) + if self.use_bias: - x_z = K.bias_add(x_z, self.bias[:self.units]) - x_r = K.bias_add(x_r, self.bias[self.units:self.units * 2]) - x_h = K.bias_add(x_h, self.bias[self.units * 2:]) + x_z = K.bias_add(x_z, self.input_bias[:self.units]) + x_r = K.bias_add(x_r, self.input_bias[self.units: self.units * 2]) + x_h = K.bias_add(x_h, self.input_bias[self.units * 2:]) if 0. < self.recurrent_dropout < 1.: h_tm1_z = h_tm1 * rec_dp_mask[0] @@ -1352,42 +1376,70 @@ class GRUCell(Layer): h_tm1_z = h_tm1 h_tm1_r = h_tm1 h_tm1_h = h_tm1 - z = self.recurrent_activation( - x_z + K.dot(h_tm1_z, self.recurrent_kernel[:, :self.units])) - r = self.recurrent_activation( - x_r + K.dot(h_tm1_r, self.recurrent_kernel[:, self.units: - self.units * 2])) - - hh = self.activation(x_h + K.dot(r * h_tm1_h, - self.recurrent_kernel[:, - self.units * 2:])) + + recurrent_z = K.dot(h_tm1_z, self.recurrent_kernel[:, :self.units]) + recurrent_r = K.dot(h_tm1_r, + self.recurrent_kernel[:, self.units:self.units * 2]) + if self.reset_after and self.use_bias: + recurrent_z = K.bias_add(recurrent_z, self.recurrent_bias[:self.units]) + recurrent_r = K.bias_add(recurrent_r, + self.recurrent_bias[self.units: + self.units * 2]) + + z = self.recurrent_activation(x_z + recurrent_z) + r = self.recurrent_activation(x_r + recurrent_r) + + # reset gate applied after/before matrix multiplication + if self.reset_after: + recurrent_h = K.dot(h_tm1_h, self.recurrent_kernel[:, self.units * 2:]) + if self.use_bias: + recurrent_h = K.bias_add(recurrent_h, + self.recurrent_bias[self.units * 2:]) + recurrent_h = r * recurrent_h + else: + recurrent_h = K.dot(r * h_tm1_h, + self.recurrent_kernel[:, self.units * 2:]) + + hh = self.activation(x_h + recurrent_h) else: if 0. < self.dropout < 1.: inputs *= dp_mask[0] + + # inputs projected by all gate matrices at once matrix_x = K.dot(inputs, self.kernel) if self.use_bias: - matrix_x = K.bias_add(matrix_x, self.bias) + # biases: bias_z_i, bias_r_i, bias_h_i + matrix_x = K.bias_add(matrix_x, self.input_bias) + + x_z = matrix_x[:, :self.units] + x_r = matrix_x[:, self.units: 2 * self.units] + x_h = matrix_x[:, 2 * self.units:] + if 0. < self.recurrent_dropout < 1.: h_tm1 *= rec_dp_mask[0] matrix_inner = K.dot(h_tm1, self.recurrent_kernel[:, :2 * self.units]) - x_z = matrix_x[:, :self.units] - x_r = matrix_x[:, self.units:2 * self.units] recurrent_z = matrix_inner[:, :self.units] recurrent_r = matrix_inner[:, self.units:2 * self.units] z = self.recurrent_activation(x_z + recurrent_z) r = self.recurrent_activation(x_r + recurrent_r) - x_h = matrix_x[:, 2 * self.units:] - recurrent_h = K.dot(r * h_tm1, self.recurrent_kernel[:, 2 * self.units:]) + if self.reset_after: + recurrent_h = r * matrix_inner[:, 2 * self.units:] + else: + recurrent_h = K.dot(r * h_tm1, + self.recurrent_kernel[:, 2 * self.units:]) + hh = self.activation(x_h + recurrent_h) + # previous and candidate state mixed by update gate h = z * h_tm1 + (1 - z) * hh if 0 < self.dropout + self.recurrent_dropout: if training is None and not context.executing_eagerly(): # This would be harmless to set in eager mode, but eager tensors # disallow setting arbitrary attributes. h._uses_learning_phase = True + return h, [h] def get_config(self): @@ -1411,7 +1463,8 @@ class GRUCell(Layer): 'bias_constraint': constraints.serialize(self.bias_constraint), 'dropout': self.dropout, 'recurrent_dropout': self.recurrent_dropout, - 'implementation': self.implementation + 'implementation': self.implementation, + 'reset_after': self.reset_after } base_config = super(GRUCell, self).get_config() return dict(list(base_config.items()) + list(config.items())) @@ -1419,9 +1472,16 @@ class GRUCell(Layer): @tf_export('keras.layers.GRU') class GRU(RNN): - """Gated Recurrent Unit - Cho et al. + """Gated Recurrent Unit - Cho et al. 2014. - 2014. + There are two variants. The default one is based on 1406.1078v3 and + has reset gate applied to hidden state before matrix multiplication. The + other one is based on original 1406.1078v1 and has the order reversed. + + The second variant is compatible with CuDNNGRU (GPU-only) and allows + inference on CPU. Thus it has separate biases for `kernel` and + `recurrent_kernel`. Use `'reset_after'=True` and + `recurrent_activation='sigmoid'`. Arguments: units: Positive integer, dimensionality of the output space. @@ -1465,7 +1525,7 @@ class GRU(RNN): batch them into fewer, larger operations. These modes will have different performance profiles on different hardware and for different applications. - return_sequences: Boolean. Whether to return the last output. + return_sequences: Boolean. Whether to return the last output in the output sequence, or the full sequence. return_state: Boolean. Whether to return the last state in addition to the output. @@ -1481,6 +1541,9 @@ class GRU(RNN): Unrolling can speed-up a RNN, although it tends to be more memory-intensive. Unrolling is only suitable for short sequences. + reset_after: GRU convention (whether to apply reset gate after or + before matrix multiplication). False = "before" (default), + True = "after" (CuDNN compatible). """ @@ -1507,6 +1570,7 @@ class GRU(RNN): go_backwards=False, stateful=False, unroll=False, + reset_after=False, **kwargs): if implementation == 0: logging.warning('`implementation=0` has been deprecated, ' @@ -1528,7 +1592,8 @@ class GRU(RNN): bias_constraint=bias_constraint, dropout=dropout, recurrent_dropout=recurrent_dropout, - implementation=implementation) + implementation=implementation, + reset_after=reset_after) super(GRU, self).__init__( cell, return_sequences=return_sequences, @@ -1609,6 +1674,10 @@ class GRU(RNN): def implementation(self): return self.cell.implementation + @property + def reset_after(self): + return self.cell.reset_after + def get_config(self): config = { 'units': @@ -1644,7 +1713,9 @@ class GRU(RNN): 'recurrent_dropout': self.recurrent_dropout, 'implementation': - self.implementation + self.implementation, + 'reset_after': + self.reset_after } base_config = super(GRU, self).get_config() del base_config['cell'] @@ -1793,8 +1864,7 @@ class LSTMCell(Layer): def call(self, inputs, states, training=None): if 0 < self.dropout < 1 and self._dropout_mask is None: self._dropout_mask = _generate_dropout_mask( - _generate_dropout_ones(inputs, - K.shape(inputs)[-1]), + _generate_dropout_ones(inputs, array_ops.shape(inputs)[-1]), self.dropout, training=training, count=4) @@ -1926,7 +1996,7 @@ class LSTMCell(Layer): @tf_export('keras.layers.LSTM') class LSTM(RNN): - """Long-Short Term Memory layer - Hochreiter 1997. + """Long Short-Term Memory layer - Hochreiter 1997. Arguments: units: Positive integer, dimensionality of the output space. @@ -2176,7 +2246,7 @@ class LSTM(RNN): def _generate_dropout_ones(inputs, dims): - return K.ones((K.shape(inputs)[0], dims)) + return K.ones((array_ops.shape(inputs)[0], dims)) def _generate_dropout_mask(ones, rate, training=None, count=1): @@ -2351,9 +2421,12 @@ class Recurrent(Layer): def get_initial_state(self, inputs): # build an all-zero tensor of shape (samples, output_dim) - initial_state = K.zeros_like(inputs) # (samples, timesteps, input_dim) - initial_state = K.sum(initial_state, axis=(1, 2)) # (samples,) - initial_state = K.expand_dims(initial_state) # (samples, 1) + initial_state = array_ops.zeros_like(inputs) + # shape of initial_state = (samples, timesteps, input_dim) + initial_state = math_ops.reduce_sum(initial_state, axis=(1, 2)) + # shape of initial_state = (samples,) + initial_state = array_ops.expand_dims(initial_state, axis=-1) + # shape of initial_state = (samples, 1) initial_state = K.tile(initial_state, [1, self.units]) # (samples, output_dim) initial_state = [initial_state for _ in range(len(self.states))] @@ -2456,7 +2529,7 @@ class Recurrent(Layer): if self.stateful: updates = [] for i in range(len(states)): - updates.append(K.update(self.states[i], states[i])) + updates.append(state_ops.assign(self.states[i], states[i])) self.add_update(updates, inputs) # Properly set learning phase diff --git a/tensorflow/python/keras/_impl/keras/layers/recurrent_test.py b/tensorflow/python/keras/_impl/keras/layers/recurrent_test.py index de022153f6f07240a0dff70e5faeed5b6d4a8c5f..4c68c18825a47d87806a7a09d4054f974d569e00 100644 --- a/tensorflow/python/keras/_impl/keras/layers/recurrent_test.py +++ b/tensorflow/python/keras/_impl/keras/layers/recurrent_test.py @@ -24,6 +24,9 @@ from __future__ import print_function import numpy as np from tensorflow.python.keras._impl import keras +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import state_ops from tensorflow.python.platform import test @@ -229,6 +232,7 @@ class RNNTest(test.TestCase): cell = RNNCellWithConstants(32) layer = keras.layers.RNN(cell) y = layer(x, constants=c) + model = keras.models.Model([x, c], y) model.compile(optimizer='rmsprop', loss='mse') model.train_on_batch( @@ -276,6 +280,20 @@ class RNNTest(test.TestCase): np.zeros((6, 32)) ) + with self.test_session(): + # Test GRUCell reset_after property. + x = keras.Input((None, 5)) + c = keras.Input((3,)) + cells = [keras.layers.recurrent.GRUCell(32, reset_after=True)] + layer = keras.layers.recurrent.RNN(cells) + y = layer(x, constants=c) + model = keras.models.Model([x, c], y) + model.compile(optimizer='rmsprop', loss='mse') + model.train_on_batch( + [np.zeros((6, 5, 5)), np.zeros((6, 3))], + np.zeros((6, 32)) + ) + with self.test_session(): # Test stacked RNN serialization x_np = np.random.random((6, 5, 5)) @@ -395,8 +413,8 @@ class RNNTest(test.TestCase): # Test `get_losses_for` and `losses` x = keras.Input((None, 1)) - loss_1 = keras.backend.sum(x) - loss_2 = keras.backend.sum(cells[0].kernel) + loss_1 = math_ops.reduce_sum(x) + loss_2 = math_ops.reduce_sum(cells[0].kernel) cells[0].add_loss(loss_1, inputs=x) cells[0].add_loss(loss_2) self.assertEqual(len(layer.losses), 2) @@ -410,15 +428,15 @@ class RNNTest(test.TestCase): layer.build((None, None, 1)) x = keras.Input((None, 1)) - update_1 = keras.backend.update_add( - cells[0].kernel, x[0, 0, 0] * cells[0].kernel) - update_2 = keras.backend.update_add( - cells[0].kernel, keras.backend.ones_like(cells[0].kernel)) + update_1 = state_ops.assign_add(cells[0].kernel, + x[0, 0, 0] * cells[0].kernel) + update_2 = state_ops.assign_add(cells[0].kernel, + array_ops.ones_like(cells[0].kernel)) cells[0].add_update(update_1, inputs=x) cells[0].add_update(update_2) self.assertEqual(len(layer.updates), 2) - self.assertEqual(layer.get_updates_for(None), [update_2]) - self.assertEqual(layer.get_updates_for(x), [update_1]) + self.assertEqual(len(layer.get_updates_for(None)), 1) + self.assertEqual(len(layer.get_updates_for(x)), 1) def test_rnn_dynamic_trainability(self): layer_class = keras.layers.SimpleRNN @@ -538,6 +556,5 @@ class RNNTest(test.TestCase): [tuple(o.as_list()) for o in output_shape], expected_output_shape) - if __name__ == '__main__': test.main() diff --git a/tensorflow/python/keras/_impl/keras/layers/wrappers.py b/tensorflow/python/keras/_impl/keras/layers/wrappers.py index 76ddd9299dd669da35d89a6fe8fc521ce4c26337..9aee5f03b6d79f0b363f79d2b7a18c0b20a2883f 100644 --- a/tensorflow/python/keras/_impl/keras/layers/wrappers.py +++ b/tensorflow/python/keras/_impl/keras/layers/wrappers.py @@ -23,11 +23,12 @@ import copy from tensorflow.python.framework import tensor_shape from tensorflow.python.keras._impl.keras import backend as K +from tensorflow.python.keras._impl.keras.engine import base_layer from tensorflow.python.keras._impl.keras.engine import InputSpec from tensorflow.python.keras._impl.keras.engine import Layer from tensorflow.python.keras._impl.keras.engine.base_layer import shape_type_conversion from tensorflow.python.keras._impl.keras.utils.generic_utils import has_arg -from tensorflow.python.layers import utils as tf_layers_util +from tensorflow.python.ops import array_ops from tensorflow.python.util.tf_export import tf_export @@ -209,11 +210,11 @@ class TimeDistributed(Wrapper): # We can go with reshape-based implementation for performance. input_length = input_shape[1] if not input_length: - input_length = K.shape(inputs)[1] + input_length = array_ops.shape(inputs)[1] # Shape: (num_samples * timesteps, ...). And track the # transformation in self._input_map. - input_uid = tf_layers_util.object_list_uid(inputs) - inputs = K.reshape(inputs, (-1,) + input_shape[2:]) + input_uid = base_layer.object_list_uid(inputs) + inputs = array_ops.reshape(inputs, (-1,) + input_shape[2:]) self._input_map[input_uid] = inputs # (num_samples * timesteps, ...) y = self.layer.call(inputs, **kwargs) @@ -221,7 +222,7 @@ class TimeDistributed(Wrapper): uses_learning_phase = y._uses_learning_phase # Shape: (num_samples, timesteps, ...) output_shape = self.compute_output_shape(input_shape).as_list() - y = K.reshape(y, (-1, input_length) + tuple(output_shape[2:])) + y = array_ops.reshape(y, (-1, input_length) + tuple(output_shape[2:])) # Apply activity regularizer if any: if (hasattr(self.layer, 'activity_regularizer') and diff --git a/tensorflow/python/keras/_impl/keras/losses.py b/tensorflow/python/keras/_impl/keras/losses.py index 1576ed7b999f65992f46b357c8ebeda8935c68d0..1d634d38013164659f7360fce45704c19083f475 100644 --- a/tensorflow/python/keras/_impl/keras/losses.py +++ b/tensorflow/python/keras/_impl/keras/losses.py @@ -24,51 +24,55 @@ import six from tensorflow.python.keras._impl.keras import backend as K from tensorflow.python.keras._impl.keras.utils.generic_utils import deserialize_keras_object from tensorflow.python.keras._impl.keras.utils.generic_utils import serialize_keras_object +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import nn from tensorflow.python.util.tf_export import tf_export @tf_export('keras.metrics.mean_squared_error', 'keras.losses.mean_squared_error') def mean_squared_error(y_true, y_pred): - return K.mean(K.square(y_pred - y_true), axis=-1) + return K.mean(math_ops.square(y_pred - y_true), axis=-1) @tf_export('keras.metrics.mean_absolute_error', 'keras.losses.mean_absolute_error') def mean_absolute_error(y_true, y_pred): - return K.mean(K.abs(y_pred - y_true), axis=-1) + return K.mean(math_ops.abs(y_pred - y_true), axis=-1) @tf_export('keras.metrics.mean_absolute_percentage_error', 'keras.losses.mean_absolute_percentage_error') def mean_absolute_percentage_error(y_true, y_pred): - diff = K.abs((y_true - y_pred) / K.clip(K.abs(y_true), K.epsilon(), None)) + diff = math_ops.abs( + (y_true - y_pred) / K.clip(math_ops.abs(y_true), K.epsilon(), None)) return 100. * K.mean(diff, axis=-1) @tf_export('keras.metrics.mean_squared_logarithmic_error', 'keras.losses.mean_squared_logarithmic_error') def mean_squared_logarithmic_error(y_true, y_pred): - first_log = K.log(K.clip(y_pred, K.epsilon(), None) + 1.) - second_log = K.log(K.clip(y_true, K.epsilon(), None) + 1.) - return K.mean(K.square(first_log - second_log), axis=-1) + first_log = math_ops.log(K.clip(y_pred, K.epsilon(), None) + 1.) + second_log = math_ops.log(K.clip(y_true, K.epsilon(), None) + 1.) + return K.mean(math_ops.square(first_log - second_log), axis=-1) @tf_export('keras.metrics.squared_hinge', 'keras.losses.squared_hinge') def squared_hinge(y_true, y_pred): - return K.mean(K.square(K.maximum(1. - y_true * y_pred, 0.)), axis=-1) + return K.mean( + math_ops.square(math_ops.maximum(1. - y_true * y_pred, 0.)), axis=-1) @tf_export('keras.metrics.hinge', 'keras.losses.hinge') def hinge(y_true, y_pred): - return K.mean(K.maximum(1. - y_true * y_pred, 0.), axis=-1) + return K.mean(math_ops.maximum(1. - y_true * y_pred, 0.), axis=-1) @tf_export('keras.losses.categorical_hinge') def categorical_hinge(y_true, y_pred): - pos = K.sum(y_true * y_pred, axis=-1) - neg = K.max((1. - y_true) * y_pred, axis=-1) - return K.maximum(0., neg - pos + 1.) + pos = math_ops.reduce_sum(y_true * y_pred, axis=-1) + neg = math_ops.reduce_max((1. - y_true) * y_pred, axis=-1) + return math_ops.maximum(0., neg - pos + 1.) @tf_export('keras.losses.logcosh') @@ -89,7 +93,7 @@ def logcosh(y_true, y_pred): """ def _logcosh(x): - return x + K.softplus(-2. * x) - K.log(2.) + return x + nn.softplus(-2. * x) - math_ops.log(2.) return K.mean(_logcosh(y_pred - y_true), axis=-1) @@ -117,19 +121,19 @@ def binary_crossentropy(y_true, y_pred): def kullback_leibler_divergence(y_true, y_pred): y_true = K.clip(y_true, K.epsilon(), 1) y_pred = K.clip(y_pred, K.epsilon(), 1) - return K.sum(y_true * K.log(y_true / y_pred), axis=-1) + return math_ops.reduce_sum(y_true * math_ops.log(y_true / y_pred), axis=-1) @tf_export('keras.metrics.poisson', 'keras.losses.poisson') def poisson(y_true, y_pred): - return K.mean(y_pred - y_true * K.log(y_pred + K.epsilon()), axis=-1) + return K.mean(y_pred - y_true * math_ops.log(y_pred + K.epsilon()), axis=-1) @tf_export('keras.metrics.cosine_proximity', 'keras.losses.cosine_proximity') def cosine_proximity(y_true, y_pred): - y_true = K.l2_normalize(y_true, axis=-1) - y_pred = K.l2_normalize(y_pred, axis=-1) - return -K.sum(y_true * y_pred, axis=-1) + y_true = nn.l2_normalize(y_true, axis=-1) + y_pred = nn.l2_normalize(y_pred, axis=-1) + return -math_ops.reduce_sum(y_true * y_pred, axis=-1) # Aliases. diff --git a/tensorflow/python/keras/_impl/keras/metrics.py b/tensorflow/python/keras/_impl/keras/metrics.py index 82778a3dc4fbdc13bb6682d01e28ff68882b6dd9..747c3e65157ded6b0d227c6d6667b9092d0eed44 100644 --- a/tensorflow/python/keras/_impl/keras/metrics.py +++ b/tensorflow/python/keras/_impl/keras/metrics.py @@ -37,37 +37,45 @@ from tensorflow.python.keras._impl.keras.losses import sparse_categorical_crosse from tensorflow.python.keras._impl.keras.losses import squared_hinge from tensorflow.python.keras._impl.keras.utils.generic_utils import deserialize_keras_object from tensorflow.python.keras._impl.keras.utils.generic_utils import serialize_keras_object +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import nn from tensorflow.python.util.tf_export import tf_export @tf_export('keras.metrics.binary_accuracy') def binary_accuracy(y_true, y_pred): - return K.mean(K.equal(y_true, K.round(y_pred)), axis=-1) + return K.mean(math_ops.equal(y_true, math_ops.round(y_pred)), axis=-1) @tf_export('keras.metrics.categorical_accuracy') def categorical_accuracy(y_true, y_pred): - return K.cast( - K.equal(K.argmax(y_true, axis=-1), K.argmax(y_pred, axis=-1)), K.floatx()) + return math_ops.cast( + math_ops.equal( + math_ops.argmax(y_true, axis=-1), math_ops.argmax(y_pred, axis=-1)), + K.floatx()) def sparse_categorical_accuracy(y_true, y_pred): - return K.cast( - K.equal( - K.max(y_true, axis=-1), K.cast(K.argmax(y_pred, axis=-1), - K.floatx())), K.floatx()) + return math_ops.cast( + math_ops.equal( + math_ops.reduce_max(y_true, axis=-1), + math_ops.cast(math_ops.argmax(y_pred, axis=-1), K.floatx())), + K.floatx()) @tf_export('keras.metrics.top_k_categorical_accuracy') def top_k_categorical_accuracy(y_true, y_pred, k=5): - return K.mean(K.in_top_k(y_pred, K.argmax(y_true, axis=-1), k), axis=-1) + return K.mean( + nn.in_top_k(y_pred, math_ops.argmax(y_true, axis=-1), k), axis=-1) @tf_export('keras.metrics.sparse_top_k_categorical_accuracy') def sparse_top_k_categorical_accuracy(y_true, y_pred, k=5): return K.mean( - K.in_top_k(y_pred, K.cast(K.max(y_true, axis=-1), 'int32'), k), axis=-1) - + nn.in_top_k(y_pred, + math_ops.cast(math_ops.reduce_max(y_true, axis=-1), 'int32'), + k), + axis=-1) # Aliases diff --git a/tensorflow/python/keras/_impl/keras/metrics_test.py b/tensorflow/python/keras/_impl/keras/metrics_test.py index 44289ea02abf5ae5f8befbe515552aea3d4b231e..9deaab0c056e4b71205422e56cc79202a8e73593 100644 --- a/tensorflow/python/keras/_impl/keras/metrics_test.py +++ b/tensorflow/python/keras/_impl/keras/metrics_test.py @@ -21,6 +21,8 @@ from __future__ import print_function import numpy as np from tensorflow.python.keras._impl import keras +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import state_ops from tensorflow.python.platform import test @@ -104,16 +106,15 @@ class KerasMetricsTest(test.TestCase): The total number of true positives seen this epoch at the completion of the batch. """ - y_true = keras.backend.cast(y_true, 'int32') - y_pred = keras.backend.cast(keras.backend.round(y_pred), 'int32') - correct_preds = keras.backend.cast( - keras.backend.equal(y_pred, y_true), 'int32') - true_pos = keras.backend.cast( - keras.backend.sum(correct_preds * y_true), 'int32') + y_true = math_ops.cast(y_true, 'int32') + y_pred = math_ops.cast(math_ops.round(y_pred), 'int32') + correct_preds = math_ops.cast(math_ops.equal(y_pred, y_true), 'int32') + true_pos = math_ops.cast( + math_ops.reduce_sum(correct_preds * y_true), 'int32') current_true_pos = self.true_positives * 1 - self.add_update(keras.backend.update_add(self.true_positives, - true_pos), - inputs=[y_true, y_pred]) + self.add_update( + state_ops.assign_add(self.true_positives, true_pos), + inputs=[y_true, y_pred]) return current_true_pos + true_pos metric_fn = BinaryTruePositives() diff --git a/tensorflow/python/keras/_impl/keras/model_subclassing_test.py b/tensorflow/python/keras/_impl/keras/model_subclassing_test.py index 444590033042d915b12645fb0239833b666a02f7..bc8698f235aac0f5fb0c3303cc4c70aa1efa08bc 100644 --- a/tensorflow/python/keras/_impl/keras/model_subclassing_test.py +++ b/tensorflow/python/keras/_impl/keras/model_subclassing_test.py @@ -607,12 +607,6 @@ class CustomCallSignatureTests(test.TestCase): self.assertAllClose(10. * expected_output, self.evaluate(output)) output = model(first, second=second, training=False) self.assertAllClose(expected_output, self.evaluate(output)) - if not context.executing_eagerly(): - six.assertCountEqual(self, [first, second], model.inputs) - with self.assertRaises(TypeError): - # tf.layers.Layer expects an "inputs" argument, so all-keywords doesn't - # work at the moment. - model(first=first, second=second, fiddle_with_output='yes') @test_util.run_in_graph_and_eager_modes() def test_inputs_in_signature(self): @@ -622,10 +616,14 @@ class CustomCallSignatureTests(test.TestCase): def call(self, inputs, some_other_arg, training=False): return inputs + def compute_output_shape(self, input_shape): + return input_shape + model = HasInputsAndOtherPositional() with self.assertRaisesRegexp( TypeError, 'everything else as a keyword argument'): - model(array_ops.ones([]), array_ops.ones([])) + x1, x2 = keras.Input((1, 1)), keras.Input((1, 1)) + model(x1, x2) @test_util.run_in_graph_and_eager_modes() def test_kwargs_in_signature(self): @@ -649,13 +647,14 @@ class CustomCallSignatureTests(test.TestCase): def call(self, x, *args, **kwargs): return [x] + list(args) + def compute_output_shape(self, input_shape): + return input_shape + model = HasArgs() - arg1 = array_ops.ones([]) - arg2 = array_ops.ones([]) - arg3 = array_ops.ones([]) - model(arg1, arg2, arg3, a=3) + x1, x2, x3 = keras.Input((1, 1)), keras.Input((1, 1)), keras.Input((1, 1)) + model(x1, x2, x3, a=3) if not context.executing_eagerly(): - six.assertCountEqual(self, [arg1, arg2, arg3], model.inputs) + six.assertCountEqual(self, [x1, x2, x3], model.inputs) def test_args_and_keywords_in_signature(self): @@ -666,11 +665,9 @@ class CustomCallSignatureTests(test.TestCase): with context.graph_mode(): model = HasArgs() - arg1 = array_ops.ones([]) - arg2 = array_ops.ones([]) - arg3 = array_ops.ones([]) + x1, x2, x3 = keras.Input((1, 1)), keras.Input((1, 1)), keras.Input((1, 1)) with self.assertRaisesRegexp(TypeError, 'args and arguments with'): - model(arg1, arg2, arg3, a=3) + model(x1, x2, x3, a=3) def test_training_no_default(self): @@ -694,11 +691,9 @@ class CustomCallSignatureTests(test.TestCase): with context.graph_mode(): model = TrainingNoDefaultWithPositional() - arg1 = array_ops.ones([]) - arg2 = array_ops.ones([]) - arg3 = array_ops.ones([]) + x1, x2, x3 = keras.Input((1, 1)), keras.Input((1, 1)), keras.Input((1, 1)) with self.assertRaisesRegexp(TypeError, 'after a non-input'): - model(arg1, arg2, arg3) + model(x1, x2, x3) if __name__ == '__main__': test.main() diff --git a/tensorflow/python/keras/_impl/keras/optimizers.py b/tensorflow/python/keras/_impl/keras/optimizers.py index acbb9091d3449d8c694a0b7274c1a67264113626..9f383deb725ac69bf2f17f3627010c4e1f567ef0 100644 --- a/tensorflow/python/keras/_impl/keras/optimizers.py +++ b/tensorflow/python/keras/_impl/keras/optimizers.py @@ -31,6 +31,7 @@ from tensorflow.python.keras._impl.keras.utils.generic_utils import deserialize_ from tensorflow.python.keras._impl.keras.utils.generic_utils import serialize_keras_object from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import math_ops +from tensorflow.python.ops import state_ops from tensorflow.python.training import distribute as distribute_lib from tensorflow.python.training import optimizer as tf_optimizer_module from tensorflow.python.training import training_util @@ -118,7 +119,8 @@ class Optimizer(object): 'Common ops without gradient: ' 'K.argmax, K.round, K.eval.') if hasattr(self, 'clipnorm') and self.clipnorm > 0: - norm = K.sqrt(sum([K.sum(K.square(g)) for g in grads])) + norm = K.sqrt( + sum([math_ops.reduce_sum(math_ops.square(g)) for g in grads])) grads = [clip_norm(g, self.clipnorm, norm) for g in grads] if hasattr(self, 'clipvalue') and self.clipvalue > 0: grads = [K.clip(g, -self.clipvalue, self.clipvalue) for g in grads] @@ -204,20 +206,20 @@ class SGD(Optimizer): def get_updates(self, loss, params): grads = self.get_gradients(loss, params) - self.updates = [K.update_add(self.iterations, 1)] + self.updates = [state_ops.assign_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: - lr = lr * (1. / # pylint: disable=g-no-augmented-assignment - (1. + self.decay * K.cast(self.iterations, - K.dtype(self.decay)))) + lr = lr * ( # pylint: disable=g-no-augmented-assignment + 1. / (1. + self.decay * math_ops.cast(self.iterations, + K.dtype(self.decay)))) # momentum shapes = [K.int_shape(p) for p in params] moments = [K.zeros(shape) for shape in shapes] self.weights = [self.iterations] + moments for p, g, m in zip(params, grads, moments): v = self.momentum * m - lr * g # velocity - self.updates.append(K.update(m, v)) + self.updates.append(state_ops.assign(m, v)) if self.nesterov: new_p = p + self.momentum * v - lr * g @@ -228,7 +230,7 @@ class SGD(Optimizer): if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) - self.updates.append(K.update(p, new_p)) + self.updates.append(state_ops.assign(p, new_p)) return self.updates def get_config(self): @@ -277,25 +279,25 @@ class RMSprop(Optimizer): grads = self.get_gradients(loss, params) accumulators = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] self.weights = accumulators - self.updates = [K.update_add(self.iterations, 1)] + self.updates = [state_ops.assign_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: - lr = lr * (1. / # pylint: disable=g-no-augmented-assignment - (1. + self.decay * K.cast(self.iterations, - K.dtype(self.decay)))) + lr = lr * ( # pylint: disable=g-no-augmented-assignment + 1. / (1. + self.decay * math_ops.cast(self.iterations, + K.dtype(self.decay)))) for p, g, a in zip(params, grads, accumulators): # update accumulator - new_a = self.rho * a + (1. - self.rho) * K.square(g) - self.updates.append(K.update(a, new_a)) + new_a = self.rho * a + (1. - self.rho) * math_ops.square(g) + self.updates.append(state_ops.assign(a, new_a)) new_p = p - lr * g / (K.sqrt(new_a) + self.epsilon) # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) - self.updates.append(K.update(p, new_p)) + self.updates.append(state_ops.assign(p, new_p)) return self.updates def get_config(self): @@ -339,24 +341,24 @@ class Adagrad(Optimizer): shapes = [K.int_shape(p) for p in params] accumulators = [K.zeros(shape) for shape in shapes] self.weights = accumulators - self.updates = [K.update_add(self.iterations, 1)] + self.updates = [state_ops.assign_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: - lr = lr * (1. / # pylint: disable=g-no-augmented-assignment - (1. + self.decay * K.cast(self.iterations, - K.dtype(self.decay)))) + lr = lr * ( # pylint: disable=g-no-augmented-assignment + 1. / (1. + self.decay * math_ops.cast(self.iterations, + K.dtype(self.decay)))) for p, g, a in zip(params, grads, accumulators): - new_a = a + K.square(g) # update accumulator - self.updates.append(K.update(a, new_a)) + new_a = a + math_ops.square(g) # update accumulator + self.updates.append(state_ops.assign(a, new_a)) new_p = p - lr * g / (K.sqrt(new_a) + self.epsilon) # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) - self.updates.append(K.update(p, new_p)) + self.updates.append(state_ops.assign(p, new_p)) return self.updates def get_config(self): @@ -403,18 +405,18 @@ class Adadelta(Optimizer): accumulators = [K.zeros(shape) for shape in shapes] delta_accumulators = [K.zeros(shape) for shape in shapes] self.weights = accumulators + delta_accumulators - self.updates = [K.update_add(self.iterations, 1)] + self.updates = [state_ops.assign_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: - lr = lr * (1. / # pylint: disable=g-no-augmented-assignment - (1. + self.decay * K.cast(self.iterations, - K.dtype(self.decay)))) + lr = lr * ( # pylint: disable=g-no-augmented-assignment + 1. / (1. + self.decay * math_ops.cast(self.iterations, + K.dtype(self.decay)))) for p, g, a, d_a in zip(params, grads, accumulators, delta_accumulators): # update accumulator - new_a = self.rho * a + (1. - self.rho) * K.square(g) - self.updates.append(K.update(a, new_a)) + new_a = self.rho * a + (1. - self.rho) * math_ops.square(g) + self.updates.append(state_ops.assign(a, new_a)) # use the new accumulator and the *old* delta_accumulator update = g * K.sqrt(d_a + self.epsilon) / K.sqrt(new_a + self.epsilon) @@ -424,11 +426,11 @@ class Adadelta(Optimizer): if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) - self.updates.append(K.update(p, new_p)) + self.updates.append(state_ops.assign(p, new_p)) # update delta_accumulator - new_d_a = self.rho * d_a + (1 - self.rho) * K.square(update) - self.updates.append(K.update(d_a, new_d_a)) + new_d_a = self.rho * d_a + (1 - self.rho) * math_ops.square(update) + self.updates.append(state_ops.assign(d_a, new_d_a)) return self.updates def get_config(self): @@ -483,17 +485,18 @@ class Adam(Optimizer): def get_updates(self, loss, params): grads = self.get_gradients(loss, params) - self.updates = [K.update_add(self.iterations, 1)] + self.updates = [state_ops.assign_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: - lr = lr * (1. / # pylint: disable=g-no-augmented-assignment - (1. + self.decay * K.cast(self.iterations, - K.dtype(self.decay)))) + lr = lr * ( # pylint: disable=g-no-augmented-assignment + 1. / (1. + self.decay * math_ops.cast(self.iterations, + K.dtype(self.decay)))) - t = K.cast(self.iterations, K.floatx()) + 1 + t = math_ops.cast(self.iterations, K.floatx()) + 1 lr_t = lr * ( - K.sqrt(1. - K.pow(self.beta_2, t)) / (1. - K.pow(self.beta_1, t))) + K.sqrt(1. - math_ops.pow(self.beta_2, t)) / + (1. - math_ops.pow(self.beta_1, t))) ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] @@ -505,23 +508,23 @@ class Adam(Optimizer): for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats): m_t = (self.beta_1 * m) + (1. - self.beta_1) * g - v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g) + v_t = (self.beta_2 * v) + (1. - self.beta_2) * math_ops.square(g) if self.amsgrad: - vhat_t = K.maximum(vhat, v_t) + vhat_t = math_ops.maximum(vhat, v_t) p_t = p - lr_t * m_t / (K.sqrt(vhat_t) + self.epsilon) - self.updates.append(K.update(vhat, vhat_t)) + self.updates.append(state_ops.assign(vhat, vhat_t)) else: p_t = p - lr_t * m_t / (K.sqrt(v_t) + self.epsilon) - self.updates.append(K.update(m, m_t)) - self.updates.append(K.update(v, v_t)) + self.updates.append(state_ops.assign(m, m_t)) + self.updates.append(state_ops.assign(v, v_t)) new_p = p_t # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) - self.updates.append(K.update(p, new_p)) + self.updates.append(state_ops.assign(p, new_p)) return self.updates def get_config(self): @@ -573,16 +576,16 @@ class Adamax(Optimizer): def get_updates(self, loss, params): grads = self.get_gradients(loss, params) - self.updates = [K.update_add(self.iterations, 1)] + self.updates = [state_ops.assign_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: - lr = lr * (1. / # pylint: disable=g-no-augmented-assignment - (1. + self.decay * K.cast(self.iterations, - K.dtype(self.decay)))) + lr = lr * ( # pylint: disable=g-no-augmented-assignment + 1. / (1. + self.decay * math_ops.cast(self.iterations, + K.dtype(self.decay)))) - t = K.cast(self.iterations, K.floatx()) + 1 - lr_t = lr / (1. - K.pow(self.beta_1, t)) + t = math_ops.cast(self.iterations, K.floatx()) + 1 + lr_t = lr / (1. - math_ops.pow(self.beta_1, t)) shapes = [K.int_shape(p) for p in params] # zero init of 1st moment @@ -594,18 +597,18 @@ class Adamax(Optimizer): for p, g, m, u in zip(params, grads, ms, us): m_t = (self.beta_1 * m) + (1. - self.beta_1) * g - u_t = K.maximum(self.beta_2 * u, K.abs(g)) + u_t = math_ops.maximum(self.beta_2 * u, math_ops.abs(g)) p_t = p - lr_t * m_t / (u_t + self.epsilon) - self.updates.append(K.update(m, m_t)) - self.updates.append(K.update(u, u_t)) + self.updates.append(state_ops.assign(m, m_t)) + self.updates.append(state_ops.assign(u, u_t)) new_p = p_t # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) - self.updates.append(K.update(p, new_p)) + self.updates.append(state_ops.assign(p, new_p)) return self.updates def get_config(self): @@ -659,16 +662,17 @@ class Nadam(Optimizer): def get_updates(self, loss, params): grads = self.get_gradients(loss, params) - self.updates = [K.update_add(self.iterations, 1)] + self.updates = [state_ops.assign_add(self.iterations, 1)] - t = K.cast(self.iterations, K.floatx()) + 1 + t = math_ops.cast(self.iterations, K.floatx()) + 1 # Due to the recommendations in [2], i.e. warming momentum schedule momentum_cache_t = self.beta_1 * ( - 1. - 0.5 * (K.pow(K.cast_to_floatx(0.96), t * self.schedule_decay))) + 1. - 0.5 * + (math_ops.pow(K.cast_to_floatx(0.96), t * self.schedule_decay))) momentum_cache_t_1 = self.beta_1 * ( 1. - 0.5 * - (K.pow(K.cast_to_floatx(0.96), (t + 1) * self.schedule_decay))) + (math_ops.pow(K.cast_to_floatx(0.96), (t + 1) * self.schedule_decay))) m_schedule_new = self.m_schedule * momentum_cache_t m_schedule_next = self.m_schedule * momentum_cache_t * momentum_cache_t_1 self.updates.append((self.m_schedule, m_schedule_new)) @@ -684,13 +688,13 @@ class Nadam(Optimizer): g_prime = g / (1. - m_schedule_new) m_t = self.beta_1 * m + (1. - self.beta_1) * g m_t_prime = m_t / (1. - m_schedule_next) - v_t = self.beta_2 * v + (1. - self.beta_2) * K.square(g) - v_t_prime = v_t / (1. - K.pow(self.beta_2, t)) + v_t = self.beta_2 * v + (1. - self.beta_2) * math_ops.square(g) + v_t_prime = v_t / (1. - math_ops.pow(self.beta_2, t)) m_t_bar = ( 1. - momentum_cache_t) * g_prime + momentum_cache_t_1 * m_t_prime - self.updates.append(K.update(m, m_t)) - self.updates.append(K.update(v, v_t)) + self.updates.append(state_ops.assign(m, m_t)) + self.updates.append(state_ops.assign(v, v_t)) p_t = p - self.lr * m_t_bar / (K.sqrt(v_t_prime) + self.epsilon) new_p = p_t @@ -699,7 +703,7 @@ class Nadam(Optimizer): if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) - self.updates.append(K.update(p, new_p)) + self.updates.append(state_ops.assign(p, new_p)) return self.updates def get_config(self): @@ -743,7 +747,7 @@ class TFOptimizer(Optimizer): global_step = training_util.get_global_step() opt_update = self.optimizer.apply_gradients(grads, global_step) else: - self.updates = [K.update_add(self.iterations, 1)] + self.updates = [state_ops.assign_add(self.iterations, 1)] if not params: return self.updates diff --git a/tensorflow/python/keras/_impl/keras/regularizers.py b/tensorflow/python/keras/_impl/keras/regularizers.py index 2c30844647acdb78d1ca31d052ec7e5ecc6dcc2a..74c37d370ea630ca3c3e5e0945828f63928572e1 100644 --- a/tensorflow/python/keras/_impl/keras/regularizers.py +++ b/tensorflow/python/keras/_impl/keras/regularizers.py @@ -23,6 +23,7 @@ import six from tensorflow.python.keras._impl.keras import backend as K from tensorflow.python.keras._impl.keras.utils.generic_utils import deserialize_keras_object from tensorflow.python.keras._impl.keras.utils.generic_utils import serialize_keras_object +from tensorflow.python.ops import math_ops from tensorflow.python.util.tf_export import tf_export @@ -55,9 +56,9 @@ class L1L2(Regularizer): def __call__(self, x): regularization = 0. if self.l1: - regularization += K.sum(self.l1 * K.abs(x)) + regularization += math_ops.reduce_sum(self.l1 * math_ops.abs(x)) if self.l2: - regularization += K.sum(self.l2 * K.square(x)) + regularization += math_ops.reduce_sum(self.l2 * math_ops.square(x)) return regularization def get_config(self): diff --git a/tensorflow/python/keras/_impl/keras/utils/conv_utils.py b/tensorflow/python/keras/_impl/keras/utils/conv_utils.py index 583079d9626361eb594f16a57af86f103e5ee74d..8882a3a46bcb9de7283a67f001e67ed8644a0cf7 100644 --- a/tensorflow/python/keras/_impl/keras/utils/conv_utils.py +++ b/tensorflow/python/keras/_impl/keras/utils/conv_utils.py @@ -21,17 +21,146 @@ from __future__ import print_function import numpy as np from six.moves import range # pylint: disable=redefined-builtin -# pylint: disable=unused-import -from tensorflow.python.keras._impl.keras import backend as K -from tensorflow.python.layers.utils import conv_input_length -from tensorflow.python.layers.utils import conv_output_length -from tensorflow.python.layers.utils import deconv_output_length as deconv_length -from tensorflow.python.layers.utils import normalize_tuple +from tensorflow.python.keras._impl.keras import backend + + +def convert_data_format(data_format, ndim): + if data_format == 'channels_last': + if ndim == 3: + return 'NWC' + elif ndim == 4: + return 'NHWC' + elif ndim == 5: + return 'NDHWC' + else: + raise ValueError('Input rank not supported:', ndim) + elif data_format == 'channels_first': + if ndim == 3: + return 'NCW' + elif ndim == 4: + return 'NCHW' + elif ndim == 5: + return 'NCDHW' + else: + raise ValueError('Input rank not supported:', ndim) + else: + raise ValueError('Invalid data_format:', data_format) + + +def normalize_tuple(value, n, name): + """Transforms a single integer or iterable of integers into an integer tuple. + + Arguments: + value: The value to validate and convert. Could an int, or any iterable + of ints. + n: The size of the tuple to be returned. + name: The name of the argument being validated, e.g. "strides" or + "kernel_size". This is only used to format error messages. + + Returns: + A tuple of n integers. + + Raises: + ValueError: If something else than an int/long or iterable thereof was + passed. + """ + if isinstance(value, int): + return (value,) * n + else: + try: + value_tuple = tuple(value) + except TypeError: + raise ValueError('The `' + name + '` argument must be a tuple of ' + + str(n) + ' integers. Received: ' + str(value)) + if len(value_tuple) != n: + raise ValueError('The `' + name + '` argument must be a tuple of ' + + str(n) + ' integers. Received: ' + str(value)) + for single_value in value_tuple: + try: + int(single_value) + except (ValueError, TypeError): + raise ValueError('The `' + name + '` argument must be a tuple of ' + + str(n) + ' integers. Received: ' + str(value) + ' ' + 'including element ' + str(single_value) + ' of type' + + ' ' + str(type(single_value))) + return value_tuple + + +def conv_output_length(input_length, filter_size, padding, stride, dilation=1): + """Determines output length of a convolution given input length. + + Arguments: + input_length: integer. + filter_size: integer. + padding: one of "same", "valid", "full". + stride: integer. + dilation: dilation rate, integer. + + Returns: + The output length (integer). + """ + if input_length is None: + return None + assert padding in {'same', 'valid', 'full'} + dilated_filter_size = filter_size + (filter_size - 1) * (dilation - 1) + if padding == 'same': + output_length = input_length + elif padding == 'valid': + output_length = input_length - dilated_filter_size + 1 + elif padding == 'full': + output_length = input_length + dilated_filter_size - 1 + return (output_length + stride - 1) // stride + + +def conv_input_length(output_length, filter_size, padding, stride): + """Determines input length of a convolution given output length. + + Arguments: + output_length: integer. + filter_size: integer. + padding: one of "same", "valid", "full". + stride: integer. + + Returns: + The input length (integer). + """ + if output_length is None: + return None + assert padding in {'same', 'valid', 'full'} + if padding == 'same': + pad = filter_size // 2 + elif padding == 'valid': + pad = 0 + elif padding == 'full': + pad = filter_size - 1 + return (output_length - 1) * stride - 2 * pad + filter_size + + +def deconv_output_length(input_length, filter_size, padding, stride): + """Determines output length of a transposed convolution given input length. + + Arguments: + input_length: integer. + filter_size: integer. + padding: one of "same", "valid", "full". + stride: integer. + + Returns: + The output length (integer). + """ + if input_length is None: + return None + input_length *= stride + if padding == 'valid': + input_length += max(filter_size - stride, 0) + elif padding == 'full': + input_length -= (stride + filter_size - 2) + return input_length def normalize_data_format(value): if value is None: - value = K.image_data_format() + value = backend.image_data_format() data_format = value.lower() if data_format not in {'channels_first', 'channels_last'}: raise ValueError('The `data_format` argument must be one of ' diff --git a/tensorflow/python/keras/_impl/keras/utils/io_utils.py b/tensorflow/python/keras/_impl/keras/utils/io_utils.py index bbf1d2a3d9c3948271780ec3fad3316b4e6d53c3..f82e3277de70a631c93f0ef3c240f41ddb3390a7 100644 --- a/tensorflow/python/keras/_impl/keras/utils/io_utils.py +++ b/tensorflow/python/keras/_impl/keras/utils/io_utils.py @@ -19,9 +19,9 @@ from __future__ import division from __future__ import print_function from collections import defaultdict -import sys import numpy as np +import six from tensorflow.python.util.tf_export import tf_export @@ -160,13 +160,11 @@ def ask_to_proceed_with_overwrite(filepath): Returns: True if we can proceed with overwrite, False otherwise. """ - get_input = input - if sys.version_info[:2] <= (2, 7): - get_input = raw_input - overwrite = get_input('[WARNING] %s already exists - overwrite? ' - '[y/n]' % (filepath)) - while overwrite not in ['y', 'n']: - overwrite = get_input('Enter "y" (overwrite) or "n" (cancel).') + overwrite = six.moves.input('[WARNING] %s already exists - overwrite? ' + '[y/n]' % (filepath)).strip().lower() + while overwrite not in ('y', 'n'): + overwrite = six.moves.input('Enter "y" (overwrite) or "n" ' + '(cancel).').strip().lower() if overwrite == 'n': return False print('[TIP] Next time specify overwrite=True!') diff --git a/tensorflow/python/keras/_impl/keras/utils/layer_utils.py b/tensorflow/python/keras/_impl/keras/utils/layer_utils.py index 4c8009dfd80e1aec457fa03687f2840c7fe4607b..902972ecbb8fd69a9252b7e19e32bee5e33e4f97 100644 --- a/tensorflow/python/keras/_impl/keras/utils/layer_utils.py +++ b/tensorflow/python/keras/_impl/keras/utils/layer_utils.py @@ -35,7 +35,7 @@ def count_params(weights): Returns: The total number of scalars composing the weights """ - return int(np.sum([K.count_params(p) for p in set(weights)])) + return int(np.sum([np.prod(p.get_shape().as_list()) for p in set(weights)])) def print_summary(model, line_length=None, positions=None, print_fn=None): @@ -193,8 +193,7 @@ def print_summary(model, line_length=None, positions=None, print_fn=None): else: trainable_count = count_params(model.trainable_weights) - non_trainable_count = int( - np.sum([K.count_params(p) for p in set(model.non_trainable_weights)])) + non_trainable_count = count_params(model.non_trainable_weights) print_fn('Total params: {:,}'.format(trainable_count + non_trainable_count)) print_fn('Trainable params: {:,}'.format(trainable_count)) diff --git a/tensorflow/python/keras/_impl/keras/utils/tf_utils.py b/tensorflow/python/keras/_impl/keras/utils/tf_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..8da5f7777733767f31fad205a23c2f08f9ffbb1c --- /dev/null +++ b/tensorflow/python/keras/_impl/keras/utils/tf_utils.py @@ -0,0 +1,74 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""TensorFlow-related utilities.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.framework import smart_cond as smart_module +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import variables + + +def smart_cond(pred, true_fn=None, false_fn=None, name=None): + """Return either `true_fn()` if predicate `pred` is true else `false_fn()`. + + If `pred` is a bool or has a constant value, we return either `true_fn()` + or `false_fn()`, otherwise we use `tf.cond` to dynamically route to both. + + Arguments: + pred: A scalar determining whether to return the result of `true_fn` or + `false_fn`. + true_fn: The callable to be performed if pred is true. + false_fn: The callable to be performed if pred is false. + name: Optional name prefix when using `tf.cond`. + + Returns: + Tensors returned by the call to either `true_fn` or `false_fn`. + + Raises: + TypeError: If `true_fn` or `false_fn` is not callable. + """ + if isinstance(pred, variables.Variable): + return control_flow_ops.cond( + pred, true_fn=true_fn, false_fn=false_fn, name=name) + return smart_module.smart_cond( + pred, true_fn=true_fn, false_fn=false_fn, name=name) + + +def constant_value(pred): + """Return the bool value for `pred`, or None if `pred` had a dynamic value. + + Arguments: + pred: A scalar, either a Python bool or a TensorFlow boolean variable + or tensor, or the Python integer 1 or 0. + + Returns: + True or False if `pred` has a constant boolean value, None otherwise. + + Raises: + TypeError: If `pred` is not a Variable, Tensor or bool, or Python + integer 1 or 0. + """ + # Allow integer booleans. + if isinstance(pred, int): + if pred == 1: + pred = True + elif pred == 0: + pred = False + + if isinstance(pred, variables.Variable): + return None + return smart_module.smart_constant_value(pred) diff --git a/tensorflow/python/keras/layers/__init__.py b/tensorflow/python/keras/layers/__init__.py index 84ee5040dcd7b118a5c63b6532135913fe238797..b45cafed3186a096333bcdc5cbcec13556918872 100644 --- a/tensorflow/python/keras/layers/__init__.py +++ b/tensorflow/python/keras/layers/__init__.py @@ -49,6 +49,7 @@ from tensorflow.python.keras._impl.keras.layers.convolutional import Convolution from tensorflow.python.keras._impl.keras.layers.convolutional import Convolution3DTranspose from tensorflow.python.keras._impl.keras.layers.convolutional import SeparableConvolution1D from tensorflow.python.keras._impl.keras.layers.convolutional import SeparableConvolution2D +from tensorflow.python.keras._impl.keras.layers.convolutional import DepthwiseConv2D # Image processing layers. from tensorflow.python.keras._impl.keras.layers.convolutional import UpSampling1D diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index ea210346c18a39b1c2822d9ff4cb5e8322b95150..ba8f1fd3caf4def5f6648298d882b75165861359 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -295,7 +295,6 @@ tf_py_test( "//tensorflow/python:nn_grad", ], data = ["//tensorflow/core:image_testdata"], - tags = ["no_windows"], ) tf_py_test( @@ -593,7 +592,7 @@ cuda_py_test( cuda_py_test( name = "matrix_solve_op_test", - size = "small", + size = "medium", srcs = ["matrix_solve_op_test.py"], additional_deps = [ "//third_party/py/numpy", @@ -918,6 +917,20 @@ tf_py_test( ], ) +tf_py_test( + name = "string_strip_op_test", + size = "small", + srcs = ["string_strip_op_test.py"], + additional_deps = [ + "//third_party/py/numpy", + "//tensorflow/python:array_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:errors", + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:string_ops", + ], +) + tf_py_test( name = "substr_op_test", size = "small", @@ -1030,12 +1043,14 @@ tf_py_test( "//tensorflow/python:framework_for_generated_wrappers", "//tensorflow/python:framework_test_lib", "//tensorflow/python:init_ops", + "//tensorflow/python:layers", "//tensorflow/python:math_ops", "//tensorflow/python:variable_scope", "//tensorflow/python:resource_variable_ops", "//tensorflow/python:state_ops", "//tensorflow/python:variables", "//tensorflow/python/eager:context", + "//tensorflow/python/eager:function", ], tags = ["no_windows"], ) @@ -1142,7 +1157,6 @@ tf_py_test( "//tensorflow/python:variables", ], data = ["//tensorflow/core:lmdb_testdata"], - tags = ["no_windows"], ) cuda_py_test( @@ -1190,6 +1204,37 @@ cuda_py_test( "//tensorflow/python/eager:context", ], shard_count = 10, + tags = [ + "noasan", # times out + ], +) + +cuda_py_test( + name = "broadcast_to_ops_test", + size = "small", + srcs = ["broadcast_to_ops_test.py"], + additional_deps = [ + "//third_party/py/numpy", + "//tensorflow/python:array_ops", + "//tensorflow/python:client", + "//tensorflow/python:client_testlib", + ], +) + +cuda_py_test( + name = "inplace_ops_test", + size = "small", + srcs = ["inplace_ops_test.py"], + additional_deps = [ + "//third_party/py/numpy", + "//tensorflow/python:array_ops", + "//tensorflow/python:errors", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python:math_ops", + ], + shard_count = 10, ) cuda_py_test( @@ -1587,11 +1632,16 @@ cuda_py_test( "//tensorflow/python:variable_scope", "//tensorflow/python:variables", ], + shard_count = 4, + tags = [ + "noasan", + "notap", + ], ) cuda_py_test( name = "linalg_ops_test", - size = "small", + size = "medium", srcs = ["linalg_ops_test.py"], additional_deps = [ "//third_party/py/numpy", @@ -2332,7 +2382,6 @@ cuda_py_test( "//tensorflow/python:variables", ], shard_count = 4, - tags = ["no_windows"], ) cuda_py_test( @@ -2463,7 +2512,6 @@ cuda_py_test( "//tensorflow/python/eager:context", ], shard_count = 10, - tags = ["no_windows"], ) cuda_py_test( @@ -2523,7 +2571,10 @@ cuda_py_test( "//tensorflow/python:sparse_ops", ], shard_count = 5, - tags = ["noasan"], + tags = [ + "noasan", + "optonly", # b/77589990 + ], ) cuda_py_test( @@ -2652,10 +2703,6 @@ cuda_py_test( "//tensorflow/python:variables", ], shard_count = 50, - tags = [ - "manual", - "notap", # b/30226163 - ], ) cuda_py_test( @@ -2726,6 +2773,7 @@ cuda_py_test( ], data = ["//tensorflow/python/kernel_tests/testdata:self_adjoint_eig_op_test_files"], shard_count = 20, + tags = ["no_windows"], ) cuda_py_test( @@ -2804,7 +2852,7 @@ sycl_py_test( tf_py_test( name = "sets_test", - size = "small", + size = "medium", srcs = ["sets_test.py"], additional_deps = [ "//third_party/py/numpy", @@ -2856,7 +2904,10 @@ tf_py_test( "//tensorflow/python:variables", ], shard_count = 10, - tags = ["no_windows_gpu"], + tags = [ + "no_windows_gpu", + "noasan", + ], ) tf_py_test( diff --git a/tensorflow/python/kernel_tests/array_ops_test.py b/tensorflow/python/kernel_tests/array_ops_test.py index 64c1760d5e72c8dd2b0b8adb09cc3612f85228b0..5a20eebbc559cf6a3cad97adb8aa056cb88719cb 100644 --- a/tensorflow/python/kernel_tests/array_ops_test.py +++ b/tensorflow/python/kernel_tests/array_ops_test.py @@ -780,6 +780,14 @@ class StridedSliceGradTest(test_util.TensorFlowTestCase): grad = GradSliceChecker(self, sess, var, np.array(8)) _ = grad[tuple()] + def testInt64Indices(self): + with self.test_session(use_gpu=True) as sess: + a = math_ops.range(3) + index = constant_op.constant(1, dtype=dtypes.int64) + b = 2 * a[index] + grad, = gradients_impl.gradients(b, a) + self.assertAllEqual(sess.run(grad), [0, 2, 0]) + class StridedSliceGradTypeTest(test_util.TensorFlowTestCase): """Test varied index types and host located memory.""" @@ -999,30 +1007,38 @@ class SliceAssignTest(test_util.TensorFlowTestCase): class ShapeSizeRankTest(test_util.TensorFlowTestCase): + @test_util.run_in_graph_and_eager_modes() def testDenseShape(self): - with self.test_session(): - t_value = [[0, 42], [24, 0]] - self.assertAllEqual((2, 2), array_ops.shape(t_value).eval()) - self.assertEqual(4, array_ops.size(t_value).eval()) - self.assertEqual(2, array_ops.rank(t_value).eval()) + t_value = [[0, 42], [24, 0]] + self.assertAllEqual((2, 2), self.evaluate(array_ops.shape(t_value))) + self.assertEqual(4, self.evaluate(array_ops.size(t_value))) + self.assertEqual(2, self.evaluate(array_ops.rank(t_value))) - t = constant_op.constant(t_value) - self.assertAllEqual((2, 2), array_ops.shape(t).eval()) - self.assertEqual(4, array_ops.size(t).eval()) - self.assertEqual(2, array_ops.rank(t).eval()) + t = constant_op.constant(t_value) + self.assertAllEqual((2, 2), self.evaluate(array_ops.shape(t))) + self.assertEqual(4, self.evaluate(array_ops.size(t))) + self.assertEqual(2, self.evaluate(array_ops.rank(t))) + @test_util.run_in_graph_and_eager_modes() def testSparseShape(self): - with self.test_session(): - sp_value = sparse_tensor.SparseTensorValue( - indices=((0, 1), (1, 0)), values=(42, 24), dense_shape=(2, 2)) - self.assertAllEqual((2, 2), array_ops.shape(sp_value).eval()) - self.assertEqual(4, array_ops.size(sp_value).eval()) - self.assertEqual(2, array_ops.rank(sp_value).eval()) - - sp = sparse_tensor.SparseTensor.from_value(sp_value) - self.assertAllEqual((2, 2), array_ops.shape(sp).eval()) - self.assertEqual(4, array_ops.size(sp).eval()) - self.assertEqual(2, array_ops.rank(sp).eval()) + sp_value = sparse_tensor.SparseTensorValue( + indices=((0, 1), (1, 0)), values=(42, 24), dense_shape=(2, 2)) + self.assertAllEqual((2, 2), self.evaluate(array_ops.shape(sp_value))) + self.assertEqual(4, self.evaluate(array_ops.size(sp_value))) + self.assertEqual(2, self.evaluate(array_ops.rank(sp_value))) + + sp = sparse_tensor.SparseTensor.from_value(sp_value) + self.assertAllEqual((2, 2), self.evaluate(array_ops.shape(sp))) + self.assertEqual(4, self.evaluate(array_ops.size(sp))) + self.assertEqual(2, self.evaluate(array_ops.rank(sp))) + + @test_util.run_in_graph_and_eager_modes() + def testSizeDtype(self): + tensor = [1] + self.assertEqual(dtypes.int32, self.evaluate(array_ops.size(tensor)).dtype) + self.assertEqual( + dtypes.int64, + self.evaluate(array_ops.size(tensor, out_type=dtypes.int64)).dtype) @test_util.with_c_api diff --git a/tensorflow/python/kernel_tests/boosted_trees/prediction_ops_test.py b/tensorflow/python/kernel_tests/boosted_trees/prediction_ops_test.py index d132f15e51dbc6cd8e706e36b889352c20792cdf..54f33f336015cc9cb50658941b8e157cc1b94df9 100644 --- a/tensorflow/python/kernel_tests/boosted_trees/prediction_ops_test.py +++ b/tensorflow/python/kernel_tests/boosted_trees/prediction_ops_test.py @@ -49,7 +49,6 @@ class TrainingPredictionOpsTest(test_util.TensorFlowTestCase): # Grow tree ensemble. predict_op = boosted_trees_ops.training_predict( tree_ensemble_handle, - max_depth=2, cached_tree_ids=cached_tree_ids, cached_node_ids=cached_node_ids, bucketized_features=[feature_0_values, feature_1_values], @@ -116,7 +115,6 @@ class TrainingPredictionOpsTest(test_util.TensorFlowTestCase): # Grow tree ensemble. predict_op = boosted_trees_ops.training_predict( tree_ensemble_handle, - max_depth=2, cached_tree_ids=cached_tree_ids, cached_node_ids=cached_node_ids, bucketized_features=[feature_0_values], @@ -189,7 +187,6 @@ class TrainingPredictionOpsTest(test_util.TensorFlowTestCase): # Grow tree ensemble. predict_op = boosted_trees_ops.training_predict( tree_ensemble_handle, - max_depth=4, cached_tree_ids=cached_tree_ids, cached_node_ids=cached_node_ids, bucketized_features=[feature_0_values, feature_1_values], @@ -299,7 +296,6 @@ class TrainingPredictionOpsTest(test_util.TensorFlowTestCase): # Grow tree ensemble. predict_op = boosted_trees_ops.training_predict( tree_ensemble_handle, - max_depth=4, cached_tree_ids=cached_tree_ids, cached_node_ids=cached_node_ids, bucketized_features=[feature_0_values, feature_1_values], @@ -429,7 +425,6 @@ class TrainingPredictionOpsTest(test_util.TensorFlowTestCase): # Grow tree ensemble. predict_op = boosted_trees_ops.training_predict( tree_ensemble_handle, - max_depth=2, cached_tree_ids=cached_tree_ids, cached_node_ids=cached_node_ids, bucketized_features=[feature_0_values, feature_1_values], @@ -562,7 +557,6 @@ class TrainingPredictionOpsTest(test_util.TensorFlowTestCase): # Grow tree ensemble. predict_op = boosted_trees_ops.training_predict( tree_ensemble_handle, - max_depth=3, cached_tree_ids=cached_tree_ids, cached_node_ids=cached_node_ids, bucketized_features=[feature_0_values, feature_1_values], @@ -705,7 +699,6 @@ class TrainingPredictionOpsTest(test_util.TensorFlowTestCase): # Grow tree ensemble. predict_op = boosted_trees_ops.training_predict( tree_ensemble_handle, - max_depth=3, cached_tree_ids=cached_tree_ids, cached_node_ids=cached_node_ids, bucketized_features=[feature_0_values, feature_1_values], @@ -782,7 +775,6 @@ class TrainingPredictionOpsTest(test_util.TensorFlowTestCase): # Grow tree ensemble. predict_op = boosted_trees_ops.training_predict( tree_ensemble_handle, - max_depth=1, cached_tree_ids=cached_tree_ids, cached_node_ids=cached_node_ids, bucketized_features=[feature_0_values, feature_1_values], @@ -905,8 +897,7 @@ class PredictionOpsTest(test_util.TensorFlowTestCase): predict_op = boosted_trees_ops.predict( tree_ensemble_handle, bucketized_features=[feature_0_values, feature_1_values], - logits_dimension=1, - max_depth=2) + logits_dimension=1) logits = session.run(predict_op) self.assertAllClose(expected_logits, logits) @@ -915,8 +906,7 @@ class PredictionOpsTest(test_util.TensorFlowTestCase): predict_op = boosted_trees_ops.predict( tree_ensemble_handle, bucketized_features=[feature_0_values, feature_1_values], - logits_dimension=1, - max_depth=2) + logits_dimension=1) logits = session.run(predict_op) self.assertAllClose(expected_logits, logits) diff --git a/tensorflow/python/kernel_tests/boosted_trees/resource_ops_test.py b/tensorflow/python/kernel_tests/boosted_trees/resource_ops_test.py index a223241e893d6838faec9a48cb4ca9cb3c24a211..d5f0c22d6e042a28f54fea2d4505208a3f7258c0 100644 --- a/tensorflow/python/kernel_tests/boosted_trees/resource_ops_test.py +++ b/tensorflow/python/kernel_tests/boosted_trees/resource_ops_test.py @@ -36,16 +36,18 @@ class ResourceOpsTest(test_util.TensorFlowTestCase): resources.initialize_resources(resources.shared_resources()).run() stamp_token = ensemble.get_stamp_token() self.assertEqual(0, stamp_token.eval()) - (_, num_trees, num_finalized_trees, - num_attempted_layers) = ensemble.get_states() + (_, num_trees, num_finalized_trees, num_attempted_layers, + nodes_range) = ensemble.get_states() self.assertEqual(0, num_trees.eval()) self.assertEqual(0, num_finalized_trees.eval()) self.assertEqual(0, num_attempted_layers.eval()) + self.assertAllEqual([0, 1], nodes_range.eval()) def testCreateWithProto(self): with self.test_session(): ensemble_proto = boosted_trees_pb2.TreeEnsemble() - text_format.Merge(""" + text_format.Merge( + """ trees { nodes { bucketized_split { @@ -141,6 +143,8 @@ class ResourceOpsTest(test_util.TensorFlowTestCase): growing_metadata { num_trees_attempted: 2 num_layers_attempted: 6 + last_layer_node_start: 16 + last_layer_node_end: 19 } """, ensemble_proto) ensemble = boosted_trees_ops.TreeEnsemble( @@ -148,28 +152,31 @@ class ResourceOpsTest(test_util.TensorFlowTestCase): stamp_token=7, serialized_proto=ensemble_proto.SerializeToString()) resources.initialize_resources(resources.shared_resources()).run() - (stamp_token, num_trees, num_finalized_trees, - num_attempted_layers) = ensemble.get_states() + (stamp_token, num_trees, num_finalized_trees, num_attempted_layers, + nodes_range) = ensemble.get_states() self.assertEqual(7, stamp_token.eval()) self.assertEqual(2, num_trees.eval()) self.assertEqual(1, num_finalized_trees.eval()) self.assertEqual(6, num_attempted_layers.eval()) + self.assertAllEqual([16, 19], nodes_range.eval()) def testSerializeDeserialize(self): with self.test_session(): # Initialize. ensemble = boosted_trees_ops.TreeEnsemble('ensemble', stamp_token=5) resources.initialize_resources(resources.shared_resources()).run() - (stamp_token, num_trees, num_finalized_trees, - num_attempted_layers) = ensemble.get_states() + (stamp_token, num_trees, num_finalized_trees, num_attempted_layers, + nodes_range) = ensemble.get_states() self.assertEqual(5, stamp_token.eval()) self.assertEqual(0, num_trees.eval()) self.assertEqual(0, num_finalized_trees.eval()) self.assertEqual(0, num_attempted_layers.eval()) + self.assertAllEqual([0, 1], nodes_range.eval()) # Deserialize. ensemble_proto = boosted_trees_pb2.TreeEnsemble() - text_format.Merge(""" + text_format.Merge( + """ trees { nodes { bucketized_split { @@ -201,6 +208,8 @@ class ResourceOpsTest(test_util.TensorFlowTestCase): growing_metadata { num_trees_attempted: 1 num_layers_attempted: 5 + last_layer_node_start: 3 + last_layer_node_end: 7 } """, ensemble_proto) with ops.control_dependencies([ @@ -208,13 +217,15 @@ class ResourceOpsTest(test_util.TensorFlowTestCase): stamp_token=3, serialized_proto=ensemble_proto.SerializeToString()) ]): - (stamp_token, num_trees, num_finalized_trees, - num_attempted_layers) = ensemble.get_states() + (stamp_token, num_trees, num_finalized_trees, num_attempted_layers, + nodes_range) = ensemble.get_states() self.assertEqual(3, stamp_token.eval()) self.assertEqual(1, num_trees.eval()) # This reads from metadata, not really counting the layers. self.assertEqual(5, num_attempted_layers.eval()) self.assertEqual(0, num_finalized_trees.eval()) + self.assertAllEqual([3, 7], nodes_range.eval()) + # Serialize. new_ensemble_proto = boosted_trees_pb2.TreeEnsemble() diff --git a/tensorflow/python/kernel_tests/boosted_trees/stats_ops_test.py b/tensorflow/python/kernel_tests/boosted_trees/stats_ops_test.py index a54cc43517f4513b88b94ceb9b401b84b5ca053f..f0bb84e69a5ae29843123718574f919a9a8553e0 100644 --- a/tensorflow/python/kernel_tests/boosted_trees/stats_ops_test.py +++ b/tensorflow/python/kernel_tests/boosted_trees/stats_ops_test.py @@ -29,7 +29,7 @@ class StatsOpsTest(test_util.TensorFlowTestCase): """Testing Gain calculation without any regularization.""" with self.test_session() as sess: max_splits = 7 - node_id_range = [1, 2] # node 1 through 2 will be processed. + node_id_range = [1, 3] # node 1 through 2 will be processed. stats_summary_list = [ [ [[0., 0.], [.08, .09], [0., 0.], [0., 0.]], # node 0; ignored @@ -59,6 +59,7 @@ class StatsOpsTest(test_util.TensorFlowTestCase): l1=0.0, l2=0.0, tree_complexity=0.0, + min_node_weight=0, max_splits=max_splits) self.assertAllEqual([[1, 2], [1, 2]], sess.run(node_ids_list)) @@ -76,7 +77,7 @@ class StatsOpsTest(test_util.TensorFlowTestCase): """Testing Gain calculation with L2.""" with self.test_session() as sess: max_splits = 7 - node_id_range = [1, 2] # node 1 through 2 will be processed. + node_id_range = [1, 3] # node 1 through 2 will be processed. stats_summary_list = [ [ [[0., 0.], [.08, .09], [0., 0.], [0., 0.]], # node 0; ignored @@ -106,6 +107,7 @@ class StatsOpsTest(test_util.TensorFlowTestCase): l1=0.0, l2=0.1, tree_complexity=0.0, + min_node_weight=0, max_splits=max_splits) self.assertAllEqual([[1, 2], [1, 2]], sess.run(node_ids_list)) @@ -123,7 +125,7 @@ class StatsOpsTest(test_util.TensorFlowTestCase): """Testing Gain calculation with L1.""" with self.test_session() as sess: max_splits = 7 - node_id_range = [1, 2] # node 1 through 2 will be processed. + node_id_range = [1, 3] # node 1 through 2 will be processed. stats_summary_list = [ [ [[0., 0.], [.08, .09], [0., 0.], [0., 0.]], # node 0; ignored @@ -154,6 +156,7 @@ class StatsOpsTest(test_util.TensorFlowTestCase): l1=l1, l2=0.0, tree_complexity=0.0, + min_node_weight=0, max_splits=max_splits) self.assertAllEqual([[0, 1], [1, 1]], sess.run(thresholds_list)) @@ -173,7 +176,7 @@ class StatsOpsTest(test_util.TensorFlowTestCase): """Testing Gain calculation with L2.""" with self.test_session() as sess: max_splits = 7 - node_id_range = [1, 2] # node 1 through 2 will be processed. + node_id_range = [1, 3] # node 1 through 2 will be processed. stats_summary_list = [ [ [[0., 0.], [.08, .09], [0., 0.], [0., 0.]], # node 0; ignored @@ -205,6 +208,7 @@ class StatsOpsTest(test_util.TensorFlowTestCase): l1=0.0, l2=l2, tree_complexity=tree_complexity, + min_node_weight=0, max_splits=max_splits) self.assertAllEqual([[1, 2], [1, 2]], sess.run(node_ids_list)) @@ -220,6 +224,53 @@ class StatsOpsTest(test_util.TensorFlowTestCase): self.assertAllClose([[[-.424658], [-.6]], [[-.043478], [.485294]]], sess.run(right_node_contribs_list)) + def testCalculateBestGainsWithMinNodeWEight(self): + """Testing Gain calculation without any regularization.""" + with self.test_session() as sess: + max_splits = 7 + node_id_range = [1, 3] # node 1 through 2 will be processed. + stats_summary_list = [ + [ + [[0., 0.], [.08, .09], [0., 0.], [0., 0.]], # node 0; ignored + [[0., 0.], [.15, .036], [.06, .07], [.1, .2]], # node 1 + [[0., 0.], [-.33, .68], [0., 0.], [.3, .4]], # node 2 + [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 3; ignored + [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 4; ignored + [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 5; ignored + [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 6; ignored + ], # feature 0 + [ + [[0., 0.], [0., 0.], [.08, .09], [0., 0.]], # node 0; ignored + [[0., 0.], [.3, .5], [-.05, .6], [.06, .07]], # node 1 + [[.1, .1], [.2, .03], [-.4, .05], [.07, .08]], # node 2 + [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 3; ignored + [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 4; ignored + [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 5; ignored + [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 6; ignored + ], # feature 1 + ] # num_features * shape=[max_splits, num_buckets, 2] + + (node_ids_list, gains_list, thresholds_list, left_node_contribs_list, + right_node_contribs_list + ) = boosted_trees_ops.calculate_best_gains_per_feature( + node_id_range, + stats_summary_list, + l1=0.0, + l2=0.0, + tree_complexity=0.0, + min_node_weight=1, + max_splits=max_splits) + + # We can't split node 1 on feature 1 and node 2 on feature 2 because of + # the min node weight. + self.assertAllEqual([[2], [1]], sess.run(node_ids_list)) + self.assertAllClose([[0.384314], [0.098013]], sess.run(gains_list)) + self.assertAllEqual([[1], [1]], sess.run(thresholds_list)) + self.assertAllClose([[[0.4852941]], [[-.6]]], + sess.run(left_node_contribs_list)) + self.assertAllClose([[[-0.75]], [[-0.014925]]], + sess.run(right_node_contribs_list)) + def testMakeStatsSummarySimple(self): """Simple test for MakeStatsSummary.""" with self.test_session(): diff --git a/tensorflow/python/kernel_tests/boosted_trees/training_ops_test.py b/tensorflow/python/kernel_tests/boosted_trees/training_ops_test.py index 4226ff75c2327d09c0d89b29950605b610672603..d6c004774746dd28a7b376eb2e0564e5b71e5b40 100644 --- a/tensorflow/python/kernel_tests/boosted_trees/training_ops_test.py +++ b/tensorflow/python/kernel_tests/boosted_trees/training_ops_test.py @@ -132,6 +132,8 @@ class UpdateTreeEnsembleOpTest(test_util.TensorFlowTestCase): growing_metadata { num_trees_attempted: 1 num_layers_attempted: 1 + last_layer_node_start: 0 + last_layer_node_end: 1 } """ self.assertEqual(new_stamp, 1) @@ -314,6 +316,8 @@ class UpdateTreeEnsembleOpTest(test_util.TensorFlowTestCase): growing_metadata { num_trees_attempted: 1 num_layers_attempted: 2 + last_layer_node_start: 0 + last_layer_node_end: 1 } """ self.assertEqual(new_stamp, 1) @@ -461,6 +465,8 @@ class UpdateTreeEnsembleOpTest(test_util.TensorFlowTestCase): growing_metadata { num_trees_attempted: 2 num_layers_attempted: 2 + last_layer_node_start: 1 + last_layer_node_end: 3 } """ self.assertEqual(new_stamp, 1) @@ -615,6 +621,8 @@ class UpdateTreeEnsembleOpTest(test_util.TensorFlowTestCase): growing_metadata { num_trees_attempted: 1 num_layers_attempted: 2 + last_layer_node_start: 3 + last_layer_node_end: 5 } """ self.assertEqual(new_stamp, 1) @@ -624,7 +632,8 @@ class UpdateTreeEnsembleOpTest(test_util.TensorFlowTestCase): """Test that the metadata is updated even though we can't split.""" with self.test_session() as session: tree_ensemble_config = boosted_trees_pb2.TreeEnsemble() - text_format.Merge(""" + text_format.Merge( + """ trees { nodes { bucketized_split { @@ -655,6 +664,9 @@ class UpdateTreeEnsembleOpTest(test_util.TensorFlowTestCase): growing_metadata { num_trees_attempted: 1 num_layers_attempted: 1 + last_layer_node_start: 1 + last_layer_node_end: 3 + } """, tree_ensemble_config) @@ -685,7 +697,7 @@ class UpdateTreeEnsembleOpTest(test_util.TensorFlowTestCase): # Expect no new splits created, but attempted (global) stats updated. Meta # data for this tree should not be updated (we didn't succeed building a - # layer. + # layer. Node ranges don't change. new_stamp, serialized = session.run(tree_ensemble.serialize()) tree_ensemble = boosted_trees_pb2.TreeEnsemble() tree_ensemble.ParseFromString(serialized) @@ -721,6 +733,8 @@ class UpdateTreeEnsembleOpTest(test_util.TensorFlowTestCase): growing_metadata { num_trees_attempted: 1 num_layers_attempted: 2 + last_layer_node_start: 1 + last_layer_node_end: 3 } """ self.assertEqual(new_stamp, 1) @@ -730,7 +744,8 @@ class UpdateTreeEnsembleOpTest(test_util.TensorFlowTestCase): """Test metadata is updated correctly when no split due to prepruning.""" with self.test_session() as session: tree_ensemble_config = boosted_trees_pb2.TreeEnsemble() - text_format.Merge(""" + text_format.Merge( + """ trees { nodes { bucketized_split { @@ -761,6 +776,8 @@ class UpdateTreeEnsembleOpTest(test_util.TensorFlowTestCase): growing_metadata { num_trees_attempted: 1 num_layers_attempted: 1 + last_layer_node_start: 1 + last_layer_node_end: 3 } """, tree_ensemble_config) @@ -851,6 +868,8 @@ class UpdateTreeEnsembleOpTest(test_util.TensorFlowTestCase): growing_metadata { num_trees_attempted: 1 num_layers_attempted: 2 + last_layer_node_start: 1 + last_layer_node_end: 3 } """ self.assertEqual(new_stamp, 1) @@ -941,6 +960,8 @@ class UpdateTreeEnsembleOpTest(test_util.TensorFlowTestCase): growing_metadata { num_trees_attempted: 1 num_layers_attempted: 1 + last_layer_node_start: 1 + last_layer_node_end: 3 } """ self.assertEqual(new_stamp, 1) @@ -1046,6 +1067,8 @@ class UpdateTreeEnsembleOpTest(test_util.TensorFlowTestCase): growing_metadata { num_trees_attempted: 1 num_layers_attempted: 2 + last_layer_node_start: 3 + last_layer_node_end: 7 } """ self.assertEqual(new_stamp, 2) @@ -1179,6 +1202,8 @@ class UpdateTreeEnsembleOpTest(test_util.TensorFlowTestCase): growing_metadata { num_trees_attempted: 1 num_layers_attempted: 3 + last_layer_node_start: 0 + last_layer_node_end: 1 } """ self.assertEqual(new_stamp, 3) @@ -1268,6 +1293,8 @@ class UpdateTreeEnsembleOpTest(test_util.TensorFlowTestCase): growing_metadata { num_trees_attempted: 1 num_layers_attempted: 1 + last_layer_node_start: 1 + last_layer_node_end: 3 } """ self.assertEqual(new_stamp, 1) @@ -1307,7 +1334,8 @@ class UpdateTreeEnsembleOpTest(test_util.TensorFlowTestCase): # Expect the ensemble to be empty as post-pruning will prune # the entire finalized tree. self.assertEqual(new_stamp, 2) - self.assertProtoEquals(""" + self.assertProtoEquals( + """ trees { nodes { leaf { @@ -1359,6 +1387,8 @@ class UpdateTreeEnsembleOpTest(test_util.TensorFlowTestCase): growing_metadata { num_trees_attempted: 1 num_layers_attempted: 2 + last_layer_node_start: 0 + last_layer_node_end: 1 } """, res_ensemble) @@ -1455,6 +1485,8 @@ class UpdateTreeEnsembleOpTest(test_util.TensorFlowTestCase): growing_metadata { num_trees_attempted: 1 num_layers_attempted: 1 + last_layer_node_start: 0 + last_layer_node_end: 1 } """ self.assertEqual(new_stamp, 1) diff --git a/tensorflow/python/kernel_tests/broadcast_to_ops_test.py b/tensorflow/python/kernel_tests/broadcast_to_ops_test.py new file mode 100644 index 0000000000000000000000000000000000000000..6a1bd958ba89080ff38e461646b07edbc6daec21 --- /dev/null +++ b/tensorflow/python/kernel_tests/broadcast_to_ops_test.py @@ -0,0 +1,85 @@ +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for broadcast_to ops.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import test_util +from tensorflow.python.ops import array_ops +from tensorflow.python.platform import test as test_lib + + +class BroadcastToTest(test_util.TensorFlowTestCase): + + def testBroadcastToBasic(self): + for dtype in [np.uint8, np.uint16, np.int8, np.int16, np.int32, np.int64]: + with self.test_session(use_gpu=True): + x = np.array([1, 2, 3], dtype=dtype) + v_tf = array_ops.broadcast_to(constant_op.constant(x), [3, 3]) + v_np = np.broadcast_to(x, [3, 3]) + self.assertAllEqual(v_tf.eval(), v_np) + + def testBroadcastToString(self): + with self.test_session(use_gpu=True): + x = np.array([b"1", b"2", b"3"]) + v_tf = array_ops.broadcast_to(constant_op.constant(x), [3, 3]) + v_np = np.broadcast_to(x, [3, 3]) + self.assertAllEqual(v_tf.eval(), v_np) + + def testBroadcastToBool(self): + with self.test_session(use_gpu=True): + x = np.array([True, False, True], dtype=np.bool) + v_tf = array_ops.broadcast_to(constant_op.constant(x), [3, 3]) + v_np = np.broadcast_to(x, [3, 3]) + self.assertAllEqual(v_tf.eval(), v_np) + + def testBroadcastToShape(self): + for input_dim in range(1, 6): + for output_dim in range(input_dim, 6): + with self.test_session(use_gpu=True): + input_shape = [2] * input_dim + output_shape = [2] * output_dim + x = np.array(np.random.randint(5, size=input_shape), dtype=np.int32) + v_tf = array_ops.broadcast_to(constant_op.constant(x), output_shape) + v_np = np.broadcast_to(x, output_shape) + self.assertAllEqual(v_tf.eval(), v_np) + + def testBroadcastToScalar(self): + with self.test_session(use_gpu=True): + x = np.array(1, dtype=np.int32) + v_tf = array_ops.broadcast_to(constant_op.constant(x), [3, 3]) + v_np = np.broadcast_to(x, [3, 3]) + self.assertAllEqual(v_tf.eval(), v_np) + + def testBroadcastToShapeTypeAndInference(self): + for dtype in [dtypes.int32, dtypes.int64]: + with self.test_session(use_gpu=True): + x = np.array([1, 2, 3]) + v_tf = array_ops.broadcast_to( + constant_op.constant(x), + constant_op.constant([3, 3], dtype=dtype)) + shape = v_tf.get_shape().as_list() + v_np = np.broadcast_to(x, [3, 3]) + self.assertAllEqual(v_tf.eval(), v_np) + # check shape inference when shape input is constant + self.assertAllEqual(shape, v_np.shape) + +if __name__ == "__main__": + test_lib.main() diff --git a/tensorflow/python/kernel_tests/clip_ops_test.py b/tensorflow/python/kernel_tests/clip_ops_test.py index 5c8b71da174b8c38a797f8bf97c432d732d9978f..e08123b0417912c479476d8147d832d1715b8882 100644 --- a/tensorflow/python/kernel_tests/clip_ops_test.py +++ b/tensorflow/python/kernel_tests/clip_ops_test.py @@ -19,16 +19,33 @@ from __future__ import division from __future__ import print_function from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.ops import clip_ops +from tensorflow.python.ops import gradient_checker from tensorflow.python.platform import test class ClipTest(test.TestCase): + def DISABLED_testClipByValueGradient(self): + inputs = constant_op.constant([1.0, 2.0, 3.0, 4.0], dtype=dtypes.float32) + outputs_1 = clip_ops.clip_by_value(inputs, 0.5, 3.5) + min_val = constant_op.constant([0.5, 0.5, 0.5, 0.5], dtype=dtypes.float32) + max_val = constant_op.constant([3.5, 3.5, 3.5, 3.5], dtype=dtypes.float32) + outputs_2 = clip_ops.clip_by_value(inputs, min_val, max_val) + with self.test_session(): + error_1 = gradient_checker.compute_gradient_error(inputs, [4], outputs_1, + [4]) + self.assertLess(error_1, 1e-4) + + error_2 = gradient_checker.compute_gradient_error(inputs, [4], outputs_2, + [4]) + self.assertLess(error_2, 1e-4) + # ClipByValue test def testClipByValue(self): - with self.test_session(): + with self.test_session(use_gpu=True): x = constant_op.constant([-5.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[2, 3]) np_ans = [[-4.4, 2.0, 3.0], [4.0, 4.4, 4.4]] clip_value = 4.4 @@ -37,8 +54,76 @@ class ClipTest(test.TestCase): self.assertAllClose(np_ans, tf_ans) + # [Tensor, Scalar, Scalar] + def DISABLED_testClipByValue0Type(self): + for dtype in [ + dtypes.float16, dtypes.float32, dtypes.float64, dtypes.int8, + dtypes.int16, dtypes.int32, dtypes.int64, dtypes.uint8, dtypes.uint16 + ]: + with self.test_session(use_gpu=True): + x = constant_op.constant([1, 2, 3, 4, 5, 6], shape=[2, 3], dtype=dtype) + np_ans = [[2, 2, 3], [4, 4, 4]] + clip_value_min = 2 + clip_value_max = 4 + ans = clip_ops.clip_by_value(x, clip_value_min, clip_value_max) + tf_ans = ans.eval() + + self.assertAllClose(np_ans, tf_ans) + + # [Tensor, Tensor, Scalar] + def DISABLED_testClipByValue1Type(self): + for dtype in [ + dtypes.float16, dtypes.float32, dtypes.float64, dtypes.int8, + dtypes.int16, dtypes.int32, dtypes.int64, dtypes.uint8, dtypes.uint16 + ]: + with self.test_session(use_gpu=True): + x = constant_op.constant([1, 2, 3, 4, 5, 6], shape=[2, 3], dtype=dtype) + np_ans = [[2, 2, 3], [4, 4, 4]] + clip_value_min = constant_op.constant( + [2, 2, 2, 3, 3, 3], shape=[2, 3], dtype=dtype) + clip_value_max = 4 + ans = clip_ops.clip_by_value(x, clip_value_min, clip_value_max) + tf_ans = ans.eval() + + self.assertAllClose(np_ans, tf_ans) + + # [Tensor, Scalar, Tensor] + def DISABLED_testClipByValue2Type(self): + for dtype in [ + dtypes.float16, dtypes.float32, dtypes.float64, dtypes.int8, + dtypes.int16, dtypes.int32, dtypes.int64, dtypes.uint8, dtypes.uint16 + ]: + with self.test_session(use_gpu=True): + x = constant_op.constant([1, 2, 3, 4, 5, 6], shape=[2, 3], dtype=dtype) + np_ans = [[4, 4, 4], [4, 5, 6]] + clip_value_min = 4 + clip_value_max = constant_op.constant( + [6, 6, 6, 6, 6, 6], shape=[2, 3], dtype=dtype) + ans = clip_ops.clip_by_value(x, clip_value_min, clip_value_max) + tf_ans = ans.eval() + + self.assertAllClose(np_ans, tf_ans) + + # [Tensor, Tensor, Tensor] + def DISABLED_testClipByValue3Type(self): + for dtype in [ + dtypes.float16, dtypes.float32, dtypes.float64, dtypes.int8, + dtypes.int16, dtypes.int32, dtypes.int64, dtypes.uint8, dtypes.uint16 + ]: + with self.test_session(use_gpu=True): + x = constant_op.constant([1, 2, 3, 4, 5, 6], shape=[2, 3], dtype=dtype) + np_ans = [[2, 2, 3], [5, 5, 6]] + clip_value_min = constant_op.constant( + [2, 2, 2, 5, 5, 5], shape=[2, 3], dtype=dtype) + clip_value_max = constant_op.constant( + [5, 5, 5, 7, 7, 7], shape=[2, 3], dtype=dtype) + ans = clip_ops.clip_by_value(x, clip_value_min, clip_value_max) + tf_ans = ans.eval() + + self.assertAllClose(np_ans, tf_ans) + def testClipByValueBadShape(self): - with self.test_session(): + with self.test_session(use_gpu=True): x = constant_op.constant([-5.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[2, 3, 1]) # Use a nonsensical shape. clip = constant_op.constant([1.0, 2.0]) @@ -48,6 +133,7 @@ class ClipTest(test.TestCase): _ = clip_ops.clip_by_value(x, 1.0, clip) def testClipByValueNonFinite(self): + # TODO(b/78016351): Enable test on GPU once the bug is fixed. with self.test_session(): x = constant_op.constant([float('NaN'), float('Inf'), -float('Inf')]) np_ans = [float('NaN'), 4.0, -4.0] @@ -60,7 +146,7 @@ class ClipTest(test.TestCase): # ClipByNorm tests def testClipByNormClipped(self): # Norm clipping when clip_norm < 5 - with self.test_session(): + with self.test_session(use_gpu=True): x = constant_op.constant([-3.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3]) # Norm of x = sqrt(3^2 + 4^2) = 5 np_ans = [[-2.4, 0.0, 0.0], [3.2, 0.0, 0.0]] @@ -76,7 +162,7 @@ class ClipTest(test.TestCase): self.assertAllClose(np_ans, tf_ans_tensor) def testClipByNormBadShape(self): - with self.test_session(): + with self.test_session(use_gpu=True): x = constant_op.constant([-3.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3, 1]) # Use a nonsensical shape. clip = constant_op.constant([1.0, 2.0]) @@ -85,7 +171,7 @@ class ClipTest(test.TestCase): def testClipByNormNotClipped(self): # No norm clipping when clip_norm >= 5 - with self.test_session(): + with self.test_session(use_gpu=True): x = constant_op.constant([-3.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3]) # Norm of x = sqrt(3^2 + 4^2) = 5 np_ans = [[-3.0, 0.0, 0.0], [4.0, 0.0, 0.0]] @@ -97,7 +183,7 @@ class ClipTest(test.TestCase): def testClipByNormZero(self): # No norm clipping when norm = 0 - with self.test_session(): + with self.test_session(use_gpu=True): x = constant_op.constant([0.0, 0.0, 0.0, 0.0, 0.0, 0.0], shape=[2, 3]) # Norm = 0, no changes np_ans = [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]] @@ -109,7 +195,7 @@ class ClipTest(test.TestCase): def testClipByNormClippedWithDim0(self): # Norm clipping when clip_norm < 5 - with self.test_session(): + with self.test_session(use_gpu=True): x = constant_op.constant([-3.0, 0.0, 0.0, 4.0, 0.0, 3.0], shape=[2, 3]) # Norm of x[:, 0] = sqrt(3^2 + 4^2) = 5, x[:, 2] = 3 np_ans = [[-2.4, 0.0, 0.0], [3.2, 0.0, 3.0]] @@ -121,7 +207,7 @@ class ClipTest(test.TestCase): def testClipByNormClippedWithDim1(self): # Norm clipping when clip_norm < 5 - with self.test_session(): + with self.test_session(use_gpu=True): x = constant_op.constant([-3.0, 0.0, 0.0, 4.0, 0.0, 3.0], shape=[2, 3]) # Norm of x[0, :] = 3, x[1, :] = sqrt(3^2 + 4^2) = 5 np_ans = [[-3.0, 0.0, 0.0], [3.2, 0.0, 2.4]] @@ -133,7 +219,7 @@ class ClipTest(test.TestCase): def testClipByNormNotClippedWithAxes(self): # No norm clipping when clip_norm >= 5 - with self.test_session(): + with self.test_session(use_gpu=True): x = constant_op.constant([-3.0, 0.0, 0.0, 4.0, 0.0, 3.0], shape=[2, 3]) # Norm of x[0, :] = 3, x[1, :] = sqrt(3^2 + 4^2) = 5 np_ans = [[-3.0, 0.0, 0.0], [4.0, 0.0, 3.0]] @@ -146,7 +232,7 @@ class ClipTest(test.TestCase): # ClipByGlobalNorm tests def testClipByGlobalNormClipped(self): # Norm clipping when clip_norm < 5 - with self.test_session(): + with self.test_session(use_gpu=True): x0 = constant_op.constant([-2.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3]) x1 = constant_op.constant([1.0, -2.0]) # Global norm of x0 and x1 = sqrt(1 + 4^2 + 2^2 + 2^2) = 5 @@ -167,7 +253,7 @@ class ClipTest(test.TestCase): def testClipByGlobalNormClippedTensor(self): # Norm clipping when clip_norm < 5 - with self.test_session(): + with self.test_session(use_gpu=True): x0 = constant_op.constant([-2.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3]) x1 = constant_op.constant([1.0, -2.0]) # Global norm of x0 and x1 = sqrt(1 + 4^2 + 2^2 + 2^2) = 5 @@ -188,7 +274,7 @@ class ClipTest(test.TestCase): def testClipByGlobalNormSupportsNone(self): # Norm clipping when clip_norm < 5 - with self.test_session(): + with self.test_session(use_gpu=True): x0 = constant_op.constant([-2.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3]) x1 = constant_op.constant([1.0, -2.0]) # Global norm of x0 and x1 = sqrt(1 + 4^2 + 2^2 + 2^2) = 5 @@ -211,7 +297,7 @@ class ClipTest(test.TestCase): def testClipByGlobalNormWithIndexedSlicesClipped(self): # Norm clipping when clip_norm < 5 - with self.test_session(): + with self.test_session(use_gpu=True): x0 = constant_op.constant([-2.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3]) x1 = ops.IndexedSlices( constant_op.constant([1.0, -2.0]), constant_op.constant([3, 4])) @@ -244,7 +330,7 @@ class ClipTest(test.TestCase): def testClipByGlobalNormNotClipped(self): # No norm clipping when clip_norm >= 5 - with self.test_session(): + with self.test_session(use_gpu=True): x0 = constant_op.constant([-2.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3]) x1 = constant_op.constant([1.0, -2.0]) # Global norm of x0 and x1 = sqrt(1 + 4^2 + 2^2 + 2^2) = 5 @@ -263,7 +349,7 @@ class ClipTest(test.TestCase): def testClipByGlobalNormZero(self): # No norm clipping when norm = 0 - with self.test_session(): + with self.test_session(use_gpu=True): x0 = constant_op.constant([0.0, 0.0, 0.0, 0.0, 0.0, 0.0], shape=[2, 3]) x1 = constant_op.constant([0.0, 0.0]) # Norm = 0, no changes @@ -282,7 +368,7 @@ class ClipTest(test.TestCase): def testClipByAverageNormClipped(self): # Norm clipping when average clip_norm < 0.83333333 - with self.test_session(): + with self.test_session(use_gpu=True): x = constant_op.constant([-3.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3]) # Average norm of x = sqrt(3^2 + 4^2) / 6 = 0.83333333 np_ans = [[-2.88, 0.0, 0.0], [3.84, 0.0, 0.0]] @@ -294,7 +380,7 @@ class ClipTest(test.TestCase): def testClipByAverageNormClippedTensor(self): # Norm clipping when average clip_norm < 0.83333333 - with self.test_session(): + with self.test_session(use_gpu=True): x = constant_op.constant([-3.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3]) # Average norm of x = sqrt(3^2 + 4^2) / 6 = 0.83333333 np_ans = [[-2.88, 0.0, 0.0], [3.84, 0.0, 0.0]] @@ -306,7 +392,7 @@ class ClipTest(test.TestCase): def testClipByAverageNormNotClipped(self): # No norm clipping when average clip_norm >= 0.83333333 - with self.test_session(): + with self.test_session(use_gpu=True): x = constant_op.constant([-3.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3]) # Average norm of x = sqrt(3^2 + 4^2) / 6 = 0.83333333 np_ans = [[-3.0, 0.0, 0.0], [4.0, 0.0, 0.0]] @@ -318,7 +404,7 @@ class ClipTest(test.TestCase): def testClipByAverageNormZero(self): # No norm clipping when average clip_norm = 0 - with self.test_session(): + with self.test_session(use_gpu=True): x = constant_op.constant([0.0, 0.0, 0.0, 0.0, 0.0, 0.0], shape=[2, 3]) # Average norm = 0, no changes np_ans = [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]] diff --git a/tensorflow/python/kernel_tests/confusion_matrix_test.py b/tensorflow/python/kernel_tests/confusion_matrix_test.py index 670a625f0f1dd84c523de8acb17f9a410d184ad5..79e419867d70071280b7c88b6bfa820b935b24cd 100644 --- a/tensorflow/python/kernel_tests/confusion_matrix_test.py +++ b/tensorflow/python/kernel_tests/confusion_matrix_test.py @@ -19,6 +19,7 @@ from __future__ import division from __future__ import print_function import numpy as np +from six.moves import xrange # pylint: disable=redefined-builtin from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes @@ -104,11 +105,7 @@ class ConfusionMatrixTest(test.TestCase): d, l, cm_out = sess.run([data, lab, cm], {m_neg: 0.0, m_pos: 1.0, s: 1.0}) truth = np.zeros([2, 2], dtype=np_dtype) - try: - range_builder = xrange - except NameError: # In Python 3. - range_builder = range - for i in range_builder(len(d)): + for i in xrange(len(d)): truth[l[i], d[i]] += 1 self.assertEqual(cm_out.dtype, np_dtype) diff --git a/tensorflow/python/kernel_tests/constant_op_test.py b/tensorflow/python/kernel_tests/constant_op_test.py index 18796f709566f022258806ce46cc706e8fe34354..107ee37fabbae56c5bf715e1e7953b62ac3c526b 100644 --- a/tensorflow/python/kernel_tests/constant_op_test.py +++ b/tensorflow/python/kernel_tests/constant_op_test.py @@ -65,6 +65,11 @@ class ConstantTest(test.TestCase): self._testCpu(x) self._testGpu(x) + def testInvalidDType(self): + # Test case for GitHub issue 18474 + with self.assertRaises(TypeError): + constant_op.constant(dtypes_lib.string, "[,]") + def testBFloat16(self): bfloat16 = dtypes_lib.bfloat16.as_numpy_dtype self._testAll(np.arange(-15, 15).reshape([2, 3, 5]).astype(bfloat16)) @@ -653,12 +658,12 @@ class FillTest(test.TestCase): self._compareAll([2, 3], np_ans[0][0], np_ans) def testFillComplex64(self): - np_ans = np.array([[0.15] * 3] * 2).astype(np.complex64) - self._compare([2, 3], np_ans[0][0], np_ans, use_gpu=False) + np_ans = np.array([[0.15 + 0.3j] * 3] * 2).astype(np.complex64) + self._compareAll([2, 3], np_ans[0][0], np_ans) def testFillComplex128(self): - np_ans = np.array([[0.15] * 3] * 2).astype(np.complex128) - self._compare([2, 3], np_ans[0][0], np_ans, use_gpu=False) + np_ans = np.array([[0.15 + 0.3j] * 3] * 2).astype(np.complex128) + self._compareAll([2, 3], np_ans[0][0], np_ans) def testFillString(self): np_ans = np.array([[b"yolo"] * 3] * 2) diff --git a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py index 75f8644f694c4cebb7dbdac4599244dda427bc05..e27eb00818a9f4b8dd8b8c9caf5bd48ee7400928 100644 --- a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py +++ b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py @@ -664,6 +664,23 @@ class ControlFlowTest(test.TestCase): self.assertAllEqual(42.0, grad.eval(feed_dict={c: 1})) self.assertAllEqual(3.0, grad.eval(feed_dict={c: 3})) + def testCondGrad_3(self): + with self.test_session(): + c = array_ops.placeholder(dtypes.int32, shape=[]) + ox = constant_op.constant(10.0) + pred = math_ops.less(c, 2) + + def fn1(x): + m = x * x + return gradients_impl.gradients(m, [ox])[0] + + fn2 = lambda: math_ops.multiply(ox, 3.0) + y = math_ops.multiply(7.0, ox) + r = control_flow_ops.cond(pred, lambda: fn1(y), fn2) + + self.assertAllEqual(980.0, r.eval(feed_dict={c: 1})) + self.assertAllEqual(30.0, r.eval(feed_dict={c: 3})) + def testNestedCond_Simple(self): with self.test_session(): x = constant_op.constant(0., name="X") diff --git a/tensorflow/python/kernel_tests/conv3d_transpose_test.py b/tensorflow/python/kernel_tests/conv3d_transpose_test.py index a8b3af509622a871f0afe78c510f9ce994078866..8973a450fa246e3c924f4352d72a1bbd4f7851ea 100644 --- a/tensorflow/python/kernel_tests/conv3d_transpose_test.py +++ b/tensorflow/python/kernel_tests/conv3d_transpose_test.py @@ -119,6 +119,18 @@ class Conv3DTransposeTest(test.TestCase): target = 3.0 self.assertAllClose(target, value[n, d, h, w, k]) + def testConv3DTransposeShapeMismatch(self): + # Test case for GitHub issue 18460 + x_shape = [2, 2, 3, 4, 3] + f_shape = [3, 3, 3, 2, 2] + y_shape = [2, 2, 6, 8, 6] + strides = [1, 1, 2, 2, 2] + np.random.seed(1) + x_value = np.random.random_sample(x_shape).astype(np.float64) + f_value = np.random.random_sample(f_shape).astype(np.float64) + nn_ops.conv3d_transpose( + x_value, f_value, y_shape, strides, data_format='NCDHW') + def testConv3DTransposeValid(self): with self.test_session(): strides = [1, 2, 2, 2, 1] diff --git a/tensorflow/python/kernel_tests/conv_ops_3d_test.py b/tensorflow/python/kernel_tests/conv_ops_3d_test.py index ec8ac74163d093c57e6e4ffbab6977ce732cc3ef..0b531125f36c6d01268081aefe3815d38720a23f 100644 --- a/tensorflow/python/kernel_tests/conv_ops_3d_test.py +++ b/tensorflow/python/kernel_tests/conv_ops_3d_test.py @@ -25,8 +25,10 @@ import numpy as np from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes +from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import test_util from tensorflow.python.ops import gradient_checker +from tensorflow.python.ops import gradients_impl from tensorflow.python.ops import nn_ops import tensorflow.python.ops.nn_grad # pylint: disable=unused-import from tensorflow.python.platform import test @@ -60,18 +62,18 @@ class Conv3DTest(test.TestCase): def _SetupValuesForDevice(self, tensor_in_sizes, filter_in_sizes, stride, padding, data_format, dtype, use_gpu): - total_size_1 = 1 - total_size_2 = 1 + total_size_tensor = 1 + total_size_filter = 1 for s in tensor_in_sizes: - total_size_1 *= s + total_size_tensor *= s for s in filter_in_sizes: - total_size_2 *= s + total_size_filter *= s # Initializes the input tensor with array containing numbers from 0 to 1. # We keep the input tensor values fairly small to avoid overflowing float16 # during the conv3d. - x1 = [f * 1.0 / total_size_1 for f in range(1, total_size_1 + 1)] - x2 = [f * 1.0 / total_size_2 for f in range(1, total_size_2 + 1)] + x1 = [f * 1.0 / total_size_tensor for f in range(1, total_size_tensor + 1)] + x2 = [f * 1.0 / total_size_filter for f in range(1, total_size_filter + 1)] with self.test_session(use_gpu=use_gpu): t1 = constant_op.constant(x1, shape=tensor_in_sizes, dtype=dtype) t2 = constant_op.constant(x2, shape=filter_in_sizes, dtype=dtype) @@ -117,6 +119,79 @@ class Conv3DTest(test.TestCase): self.assertAllClose(expected, value.flatten(), atol=tol, rtol=tol) + def _ComputeReferenceDilatedConv(self, tensor_in_sizes, filter_in_sizes, + stride, dilation, padding, data_format, + use_gpu): + total_size_tensor = 1 + total_size_filter = 1 + for s in tensor_in_sizes: + total_size_tensor *= s + for s in filter_in_sizes: + total_size_filter *= s + + # Initializes the input tensor with array containing incrementing + # numbers from 1. + x1 = [f * 1.0 for f in range(1, total_size_tensor + 1)] + x2 = [f * 1.0 for f in range(1, total_size_filter + 1)] + with self.test_session(use_gpu=use_gpu): + t1 = constant_op.constant(x1, shape=tensor_in_sizes) + t2 = constant_op.constant(x2, shape=filter_in_sizes) + if isinstance(stride, collections.Iterable): + strides = list(stride) + else: + strides = [stride, stride, stride] + if data_format == "NCDHW": + t1 = test_util.NHWCToNCHW(t1) + full_strides = [1, 1] + strides + full_dilation = [1, 1] + dilation + else: + full_strides = [1] + strides + [1] + full_dilation = [1] + dilation + [1] + expected = nn_ops.convolution( + t1, + t2, + padding=padding, + strides=strides, + dilation_rate=dilation, + data_format=data_format) + computed = nn_ops.conv3d( + t1, + t2, + strides=full_strides, + dilations=full_dilation, + padding=padding, + data_format=data_format) + if data_format == "NCDHW": + expected = test_util.NCHWToNHWC(expected) + computed = test_util.NCHWToNHWC(computed) + return expected, computed + + def _VerifyDilatedConvValues(self, tensor_in_sizes, filter_in_sizes, stride, + padding, dilations): + expected_results = [] + computed_results = [] + default_dilations = ( + dilations[0] == 1 and dilations[1] == 1 and dilations[2] == 1) + for data_format, use_gpu in GetTestConfigs(): + # If any dilation rate is larger than 1, only do test on the GPU + # because we currently do not have a CPU implementation for arbitrary + # dilation rates. + if default_dilations or use_gpu: + expected, computed = self._ComputeReferenceDilatedConv( + tensor_in_sizes, filter_in_sizes, stride, dilations, padding, + data_format, use_gpu) + expected_results.append(expected) + computed_results.append(computed) + tolerance = 1e-2 if use_gpu else 1e-5 + with self.test_session() as sess: + expected_values = sess.run(expected_results) + computed_values = sess.run(computed_results) + for e_value, c_value in zip(expected_values, computed_values): + print("expected = ", e_value) + print("actual = ", c_value) + self.assertAllClose( + e_value.flatten(), c_value.flatten(), atol=tolerance, rtol=1e-6) + def testConv3D1x1x1Filter(self): expected_output = [ 0.18518519, 0.22222222, 0.25925926, 0.40740741, 0.5, 0.59259259, @@ -144,6 +219,15 @@ class Conv3DTest(test.TestCase): padding="VALID", expected=expected_output) + def testConv3D1x1x1Filter2x1x1Dilation(self): + if test.is_gpu_available(cuda_only=True): + self._VerifyDilatedConvValues( + tensor_in_sizes=[1, 3, 6, 1, 1], + filter_in_sizes=[1, 1, 1, 1, 1], + stride=1, + padding="VALID", + dilations=[2, 1, 1]) + # Expected values computed using scipy's correlate function. def testConv3D2x2x2Filter(self): expected_output = [ @@ -160,6 +244,15 @@ class Conv3DTest(test.TestCase): padding="VALID", expected=expected_output) + def testConv3D2x2x2Filter1x2x1Dilation(self): + if test.is_gpu_available(cuda_only=True): + self._VerifyDilatedConvValues( + tensor_in_sizes=[1, 4, 6, 3, 1], + filter_in_sizes=[2, 2, 2, 1, 1], + stride=1, + padding="VALID", + dilations=[1, 2, 1]) + def testConv3DStrides(self): expected_output = [ 0.06071429, 0.08988095, 0.10238095, 0.11488095, 0.12738095, 0.13988095, @@ -344,6 +437,8 @@ class Conv3DTest(test.TestCase): if data_format == "NCDHW": conv = test_util.NCHWToNHWC(conv) + self.assertEqual(conv.shape, tensor_shape.TensorShape(output_shape)) + if test_input: jacob_t, jacob_n = gradient_checker.compute_gradient( orig_input_tensor, input_shape, conv, output_shape) @@ -543,6 +638,98 @@ class Conv3DTest(test.TestCase): padding="SAME", test_input=False) + # Testing for backprops + def _RunAndVerifyBackprop(self, input_sizes, filter_sizes, output_sizes, + strides, dilations, padding, data_format, use_gpu, + err, mode): + total_input_size = 1 + total_filter_size = 1 + for s in input_sizes: + total_input_size *= s + for s in filter_sizes: + total_filter_size *= s + # Initializes the input tensor with array containing incrementing + # numbers from 1. + x1 = [f * 1.0 for f in range(1, total_input_size + 1)] + x2 = [f * 1.0 for f in range(1, total_filter_size + 1)] + default_dilations = ( + dilations[0] == 1 and dilations[1] == 1 and dilations[2] == 1) + + # If any dilation rate is larger than 1, only do test on the GPU + # because we currently do not have a CPU implementation for arbitrary + # dilation rates. + if default_dilations or use_gpu: + with self.test_session(use_gpu=use_gpu) as sess: + if data_format == "NCDHW": + input_sizes = test_util.NHWCToNCHW(input_sizes) + t1 = constant_op.constant(x1, shape=input_sizes) + t2 = constant_op.constant(x2, shape=filter_sizes) + full_strides = [1] + strides + [1] + full_dilations = [1] + dilations + [1] + if data_format == "NCDHW": + full_strides = test_util.NHWCToNCHW(full_strides) + full_dilations = test_util.NHWCToNCHW(full_dilations) + actual = nn_ops.conv3d( + t1, + t2, + strides=full_strides, + dilations=full_dilations, + padding=padding, + data_format=data_format) + expected = nn_ops.convolution( + t1, + t2, + padding=padding, + strides=strides, + dilation_rate=dilations, + data_format=data_format) + if data_format == "NCDHW": + actual = test_util.NCHWToNHWC(actual) + expected = test_util.NCHWToNHWC(expected) + actual_grad = gradients_impl.gradients(actual, t1 + if mode == "input" else t2)[0] + expected_grad = gradients_impl.gradients(expected, t1 + if mode == "input" else t2)[0] + # "values" consists of two tensors for two backprops + actual_value = sess.run(actual_grad) + expected_value = sess.run(expected_grad) + self.assertShapeEqual(actual_value, actual_grad) + self.assertShapeEqual(expected_value, expected_grad) + print("expected = ", expected_value) + print("actual = ", actual_value) + self.assertArrayNear(expected_value.flatten(), actual_value.flatten(), + err) + + def testConv3D2x2Depth3ValidBackpropFilterStride1x1Dilation2x1(self): + if test.is_gpu_available(cuda_only=True): + for (data_format, use_gpu) in GetTestConfigs(): + self._RunAndVerifyBackprop( + input_sizes=[1, 3, 6, 1, 1], + filter_sizes=[2, 2, 1, 1, 1], + output_sizes=[1, 1, 5, 1, 1], + strides=[1, 1, 1], + dilations=[2, 1, 1], + padding="VALID", + data_format=data_format, + use_gpu=use_gpu, + err=1e-5, + mode="filter") + + def testConv3D2x2Depth3ValidBackpropInputStride1x1Dilation2x1(self): + if test.is_gpu_available(cuda_only=True): + for (data_format, use_gpu) in GetTestConfigs(): + self._RunAndVerifyBackprop( + input_sizes=[1, 3, 6, 1, 1], + filter_sizes=[2, 2, 1, 1, 1], + output_sizes=[1, 1, 5, 1, 1], + strides=[1, 1, 1], + dilations=[2, 1, 1], + padding="VALID", + data_format=data_format, + use_gpu=use_gpu, + err=1e-5, + mode="input") + if __name__ == "__main__": test.main() diff --git a/tensorflow/python/kernel_tests/cwise_ops_test.py b/tensorflow/python/kernel_tests/cwise_ops_test.py index 8db0bb6f0dc495e7be2cd717787acf87156f42af..87da89831c8ded9b8382c7bb251948b6d202300e 100644 --- a/tensorflow/python/kernel_tests/cwise_ops_test.py +++ b/tensorflow/python/kernel_tests/cwise_ops_test.py @@ -398,14 +398,17 @@ class UnaryOpTest(test.TestCase): self._compareCpu(x, np.abs, _ABS) self._compareCpu(x, np.negative, math_ops.negative) self._compareCpu(x, np.negative, _NEG) - self._compareCpu(x, np.square, math_ops.square) self._compareCpu(x, np.sign, math_ops.sign) self._compareBothSparse(x, np.abs, math_ops.abs) self._compareBothSparse(x, np.negative, math_ops.negative) - self._compareBothSparse(x, np.square, math_ops.square) self._compareBothSparse(x, np.sign, math_ops.sign) + def testInt64Square(self): + x = np.arange(-6 << 20, 6 << 20, 2 << 20).reshape(1, 3, 2).astype(np.int64) + self._compareCpu(x, np.square, math_ops.square) + self._compareBothSparse(x, np.square, math_ops.square) + def testComplex64Basic(self): x = np.complex(1, 1) * np.arange(-3, 3).reshape(1, 3, 2).astype( np.complex64) @@ -2165,5 +2168,47 @@ class AccumulateTest(test.TestCase): math_ops.accumulate_n([a], tensor_dtype=np.int32) +class PolyvalTest(test.TestCase): + + def _runtest(self, dtype, degree): + x = np.random.rand(2, 2).astype(dtype) + coeffs = [np.random.rand(2, 2).astype(dtype) for _ in range(degree + 1)] + np_val = np.polyval(coeffs, x) + with self.test_session(): + tf_val = math_ops.polyval(coeffs, x) + self.assertAllClose(np_val, tf_val.eval()) + + def testSimple(self): + for dtype in [ + np.int32, np.float32, np.float64, np.complex64, np.complex128 + ]: + for degree in range(5): + self._runtest(dtype, degree) + + def testBroadcast(self): + dtype = np.float32 + degree = 3 + shapes = [(1,), (2, 1), (1, 2), (2, 2)] + for x_shape in shapes: + for coeff_shape in shapes: + x = np.random.rand(*x_shape).astype(dtype) + coeffs = [ + np.random.rand(*coeff_shape).astype(dtype) + for _ in range(degree + 1) + ] + np_val = np.polyval(coeffs, x) + with self.test_session(): + tf_val = math_ops.polyval(coeffs, x) + self.assertAllClose(np_val, tf_val.eval()) + + def testEmpty(self): + x = np.random.rand(2, 2).astype(np.float32) + coeffs = [] + np_val = np.polyval(coeffs, x) + with self.test_session(): + tf_val = math_ops.polyval(coeffs, x) + self.assertAllClose(np_val, tf_val.eval()) + + if __name__ == "__main__": test.main() diff --git a/tensorflow/python/kernel_tests/decode_csv_op_test.py b/tensorflow/python/kernel_tests/decode_csv_op_test.py index fec52fa9cc7bcab1da67e797c2e121edac8c9345..4f49d726765e6019715a9b40f531b82df7f33126 100644 --- a/tensorflow/python/kernel_tests/decode_csv_op_test.py +++ b/tensorflow/python/kernel_tests/decode_csv_op_test.py @@ -78,9 +78,11 @@ class DecodeCSVOpTest(test.TestCase): self._test(args, expected_out) def test2DNoQuoteDelimiter(self): - args = {"records": [["1", "2"], ['""', '"']], - "record_defaults": [[""]], - "use_quote_delim": False} + args = { + "records": [["1", "2"], ['""', '"']], + "record_defaults": [[""]], + "use_quote_delim": False + } expected_out = [[[b"1", b"2"], [b'""', b'"']]] self._test(args, expected_out) @@ -88,8 +90,7 @@ class DecodeCSVOpTest(test.TestCase): def testDouble(self): args = { "records": ["1.0", "-1.79e+308", '"1.79e+308"'], - "record_defaults": [np.array( - [], dtype=np.double)], + "record_defaults": [np.array([], dtype=np.double)], } expected_out = [[1.0, -1.79e+308, 1.79e+308]] @@ -99,8 +100,7 @@ class DecodeCSVOpTest(test.TestCase): def testInt64(self): args = { "records": ["1", "2", '"2147483648"'], - "record_defaults": [np.array( - [], dtype=np.int64)], + "record_defaults": [np.array([], dtype=np.int64)], } expected_out = [[1, 2, 2147483648]] @@ -173,8 +173,7 @@ class DecodeCSVOpTest(test.TestCase): def testWithoutDefaultsError(self): args = { "records": [",1", "0.2,3", "3.0,"], - "record_defaults": [[1.0], np.array( - [], dtype=np.int32)] + "record_defaults": [[1.0], np.array([], dtype=np.int32)] } self._test( @@ -183,8 +182,7 @@ class DecodeCSVOpTest(test.TestCase): def testWrongFieldIntError(self): args = { "records": [",1", "0.2,234a", "3.0,2"], - "record_defaults": [[1.0], np.array( - [], dtype=np.int32)] + "record_defaults": [[1.0], np.array([], dtype=np.int32)] } self._test( @@ -202,8 +200,7 @@ class DecodeCSVOpTest(test.TestCase): def testWrongFieldFloatError(self): args = { "records": [",1", "0.2,2", "3.0adf,3"], - "record_defaults": [[1.0], np.array( - [], dtype=np.int32)] + "record_defaults": [[1.0], np.array([], dtype=np.int32)] } self._test( @@ -229,6 +226,73 @@ class DecodeCSVOpTest(test.TestCase): self._test( args, expected_err_re="Quoted field has to end with quote followed.*") + def testSelectCols(self): + args = { + "records": [",,", "4,5,6"], + "record_defaults": [[1], [2]], + "select_cols": [0, 1] + } + expected_out = [[1, 4], [2, 5]] + self._test(args, expected_out) + + def testSelectColsInclLast(self): + # The last col is a edge-casey; add test for that + args = { + "records": [",,", "4,5,6"], + "record_defaults": [[0], [1], [2]], + "select_cols": [0, 1, 2] + } + expected_out = [[0, 4], [1, 5], [2, 6]] + self._test(args, expected_out) + + def testWrongSelectColsInclLast(self): + # The last col is a edge-casey; add test for that + args = { + "records": [",,", "4,5,6"], + "record_defaults": [[0], [1], [2]], + "select_cols": [0, 1, 3] + } + self._test(args, expected_err_re="Expect 3 fields but have 2 in record 0") + + def testWrongSelectColsLen(self): + args = { + "records": ["1,2,3", "4,5,6"], + "record_defaults": [[0], [0], [0]], + "select_cols": [0] + } + with self.assertRaisesWithPredicateMatch( + ValueError, "Length of select_cols and record_defaults do not match."): + self._test(args) + + def testWrongSelectColsSorting(self): + args = { + "records": ["1,2,3"], + "record_defaults": [[0], [1]], + "select_cols": [1, 0] + } + with self.assertRaisesWithPredicateMatch( + ValueError, "select_cols is not strictly increasing."): + self._test(args) + + def testWrongSelectColsIndicesNegative(self): + args = { + "records": ["1,2,3"], + "record_defaults": [[0], [1]], + "select_cols": [-1, 0] # -1 is not a valid index + } + with self.assertRaisesWithPredicateMatch( + ValueError, "select_cols contains negative values."): + self._test(args) + + def testWrongSelectColsIndicesTooHigh(self): + args = { + "records": ["1,2,3"], + "record_defaults": [[0], [1]], + "select_cols": [0, 3] # 3 is not a valid index + } + # Only successfully parses one of the columns + self._test(args, expected_err_re="Expect 2 fields but have 1 in record 0") + if __name__ == "__main__": test.main() diff --git a/tensorflow/python/kernel_tests/distributions/bijector_test.py b/tensorflow/python/kernel_tests/distributions/bijector_test.py index 9f9fb5c0bb4c0e9d68ddf6034a8649ad5a6bd8e9..18582241e2fb69dffc0b66aa361aa77fbb97944f 100644 --- a/tensorflow/python/kernel_tests/distributions/bijector_test.py +++ b/tensorflow/python/kernel_tests/distributions/bijector_test.py @@ -20,6 +20,7 @@ from __future__ import print_function import abc +import numpy as np import six from tensorflow.python.framework import constant_op @@ -43,11 +44,10 @@ class BaseBijectorTest(test.TestCase): """Minimal specification of a `Bijector`.""" def __init__(self): - super(_BareBonesBijector, self).__init__() + super(_BareBonesBijector, self).__init__(forward_min_event_ndims=0) with self.test_session() as sess: bij = _BareBonesBijector() - self.assertEqual(None, bij.event_ndims) self.assertEqual([], bij.graph_parents) self.assertEqual(False, bij.is_constant_jacobian) self.assertEqual(False, bij.validate_args) @@ -67,13 +67,21 @@ class BaseBijectorTest(test.TestCase): self.assertAllEqual(shape, inverse_event_shape_) self.assertAllEqual(shape, bij.inverse_event_shape(shape)) - for fn in ["forward", - "inverse", - "inverse_log_det_jacobian", - "forward_log_det_jacobian"]: - with self.assertRaisesRegexp( - NotImplementedError, fn + " not implemented"): - getattr(bij, fn)(0) + with self.assertRaisesRegexp( + NotImplementedError, "inverse not implemented"): + bij.inverse(0) + + with self.assertRaisesRegexp( + NotImplementedError, "forward not implemented"): + bij.forward(0) + + with self.assertRaisesRegexp( + NotImplementedError, "inverse_log_det_jacobian not implemented"): + bij.inverse_log_det_jacobian(0, event_ndims=0) + + with self.assertRaisesRegexp( + NotImplementedError, "forward_log_det_jacobian not implemented"): + bij.forward_log_det_jacobian(0, event_ndims=0) class IntentionallyMissingError(Exception): @@ -85,7 +93,7 @@ class BrokenBijector(bijector.Bijector): def __init__(self, forward_missing=False, inverse_missing=False): super(BrokenBijector, self).__init__( - event_ndims=0, validate_args=False, name="broken") + validate_args=False, forward_min_event_ndims=0, name="broken") self._forward_missing = forward_missing self._inverse_missing = inverse_missing @@ -120,35 +128,42 @@ class BijectorCachingTestBase(object): def testCachingOfForwardResults(self): broken_bijector = self.broken_bijector_cls(inverse_missing=True) - with self.test_session(): - x = constant_op.constant(1.1) + x = constant_op.constant(1.1) + + # Call forward and forward_log_det_jacobian one-by-one (not together). + y = broken_bijector.forward(x) + _ = broken_bijector.forward_log_det_jacobian(x, event_ndims=0) - # Call forward and forward_log_det_jacobian one-by-one (not together). - y = broken_bijector.forward(x) - _ = broken_bijector.forward_log_det_jacobian(x) + # Now, everything should be cached if the argument is y. + broken_bijector.inverse_log_det_jacobian(y, event_ndims=0) + try: + broken_bijector.inverse(y) + broken_bijector.inverse_log_det_jacobian(y, event_ndims=0) + except IntentionallyMissingError: + raise AssertionError("Tests failed! Cached values not used.") - # Now, everything should be cached if the argument is y. - try: - broken_bijector.inverse(y) - broken_bijector.inverse_log_det_jacobian(y) - except IntentionallyMissingError: - raise AssertionError("Tests failed! Cached values not used.") + # Different event_ndims should not be cached. + with self.assertRaises(IntentionallyMissingError): + broken_bijector.inverse_log_det_jacobian(y, event_ndims=1) def testCachingOfInverseResults(self): broken_bijector = self.broken_bijector_cls(forward_missing=True) - with self.test_session(): - y = constant_op.constant(1.1) + y = constant_op.constant(1.1) - # Call inverse and inverse_log_det_jacobian one-by-one (not together). - x = broken_bijector.inverse(y) - _ = broken_bijector.inverse_log_det_jacobian(y) + # Call inverse and inverse_log_det_jacobian one-by-one (not together). + x = broken_bijector.inverse(y) + _ = broken_bijector.inverse_log_det_jacobian(y, event_ndims=0) - # Now, everything should be cached if the argument is x. - try: - broken_bijector.forward(x) - broken_bijector.forward_log_det_jacobian(x) - except IntentionallyMissingError: - raise AssertionError("Tests failed! Cached values not used.") + # Now, everything should be cached if the argument is x. + try: + broken_bijector.forward(x) + broken_bijector.forward_log_det_jacobian(x, event_ndims=0) + except IntentionallyMissingError: + raise AssertionError("Tests failed! Cached values not used.") + + # Different event_ndims should not be cached. + with self.assertRaises(IntentionallyMissingError): + broken_bijector.forward_log_det_jacobian(x, event_ndims=1) class BijectorCachingTest(BijectorCachingTestBase, test.TestCase): @@ -159,5 +174,107 @@ class BijectorCachingTest(BijectorCachingTestBase, test.TestCase): return BrokenBijector +class ExpOnlyJacobian(bijector.Bijector): + """Only used for jacobian calculations.""" + + def __init__(self, forward_min_event_ndims=0): + super(ExpOnlyJacobian, self).__init__( + validate_args=False, + is_constant_jacobian=False, + forward_min_event_ndims=forward_min_event_ndims, + name="exp") + + def _inverse_log_det_jacobian(self, y): + return -math_ops.log(y) + + def _forward_log_det_jacobian(self, x): + return math_ops.log(x) + + +class ConstantJacobian(bijector.Bijector): + """Only used for jacobian calculations.""" + + def __init__(self, forward_min_event_ndims=0): + super(ConstantJacobian, self).__init__( + validate_args=False, + is_constant_jacobian=True, + forward_min_event_ndims=forward_min_event_ndims, + name="c") + + def _inverse_log_det_jacobian(self, y): + return constant_op.constant(2., y.dtype) + + def _forward_log_det_jacobian(self, x): + return constant_op.constant(-2., x.dtype) + + +class BijectorReduceEventDimsTest(test.TestCase): + """Test caching with BrokenBijector.""" + + def testReduceEventNdimsForward(self): + x = [[[1., 2.], [3., 4.]]] + bij = ExpOnlyJacobian() + self.assertAllClose( + np.log(x), + self.evaluate(bij.forward_log_det_jacobian(x, event_ndims=0))) + self.assertAllClose( + np.sum(np.log(x), axis=-1), + self.evaluate(bij.forward_log_det_jacobian(x, event_ndims=1))) + self.assertAllClose( + np.sum(np.log(x), axis=(-1, -2)), + self.evaluate(bij.forward_log_det_jacobian(x, event_ndims=2))) + + def testReduceEventNdimsForwardRaiseError(self): + x = [[[1., 2.], [3., 4.]]] + bij = ExpOnlyJacobian(forward_min_event_ndims=1) + with self.assertRaisesRegexp(ValueError, "must be larger than"): + bij.forward_log_det_jacobian(x, event_ndims=0) + + def testReduceEventNdimsInverse(self): + x = [[[1., 2.], [3., 4.]]] + bij = ExpOnlyJacobian() + self.assertAllClose( + -np.log(x), + self.evaluate(bij.inverse_log_det_jacobian(x, event_ndims=0))) + self.assertAllClose( + np.sum(-np.log(x), axis=-1), + self.evaluate(bij.inverse_log_det_jacobian(x, event_ndims=1))) + self.assertAllClose( + np.sum(-np.log(x), axis=(-1, -2)), + self.evaluate(bij.inverse_log_det_jacobian(x, event_ndims=2))) + + def testReduceEventNdimsInverseRaiseError(self): + x = [[[1., 2.], [3., 4.]]] + bij = ExpOnlyJacobian(forward_min_event_ndims=1) + with self.assertRaisesRegexp(ValueError, "must be larger than"): + bij.inverse_log_det_jacobian(x, event_ndims=0) + + def testReduceEventNdimsForwardConstJacobian(self): + x = [[[1., 2.], [3., 4.]]] + bij = ConstantJacobian() + self.assertAllClose( + -2., + self.evaluate(bij.forward_log_det_jacobian(x, event_ndims=0))) + self.assertAllClose( + -4., + self.evaluate(bij.forward_log_det_jacobian(x, event_ndims=1))) + self.assertAllClose( + -8., + self.evaluate(bij.forward_log_det_jacobian(x, event_ndims=2))) + + def testReduceEventNdimsInverseConstJacobian(self): + x = [[[1., 2.], [3., 4.]]] + bij = ConstantJacobian() + self.assertAllClose( + 2., + self.evaluate(bij.inverse_log_det_jacobian(x, event_ndims=0))) + self.assertAllClose( + 4., + self.evaluate(bij.inverse_log_det_jacobian(x, event_ndims=1))) + self.assertAllClose( + 8., + self.evaluate(bij.inverse_log_det_jacobian(x, event_ndims=2))) + + if __name__ == "__main__": test.main() diff --git a/tensorflow/python/kernel_tests/distributions/identity_bijector_test.py b/tensorflow/python/kernel_tests/distributions/identity_bijector_test.py index e8f9d0b728d8f831becc82cdba0ae2bf3d5da52a..b347c20db25df6dc0f278d9b34b4588277104850 100644 --- a/tensorflow/python/kernel_tests/distributions/identity_bijector_test.py +++ b/tensorflow/python/kernel_tests/distributions/identity_bijector_test.py @@ -27,14 +27,19 @@ class IdentityBijectorTest(test.TestCase): """Tests correctness of the Y = g(X) = X transformation.""" def testBijector(self): - with self.test_session(): - bijector = identity_bijector.Identity() - self.assertEqual("identity", bijector.name) - x = [[[0.], [1.]]] - self.assertAllEqual(x, bijector.forward(x).eval()) - self.assertAllEqual(x, bijector.inverse(x).eval()) - self.assertAllEqual(0., bijector.inverse_log_det_jacobian(x).eval()) - self.assertAllEqual(0., bijector.forward_log_det_jacobian(x).eval()) + bijector = identity_bijector.Identity(validate_args=True) + self.assertEqual("identity", bijector.name) + x = [[[0.], [1.]]] + self.assertAllEqual(x, self.evaluate(bijector.forward(x))) + self.assertAllEqual(x, self.evaluate(bijector.inverse(x))) + self.assertAllEqual( + 0., + self.evaluate( + bijector.inverse_log_det_jacobian(x, event_ndims=3))) + self.assertAllEqual( + 0., + self.evaluate( + bijector.forward_log_det_jacobian(x, event_ndims=3))) def testScalarCongruency(self): with self.test_session(): diff --git a/tensorflow/python/kernel_tests/distributions/uniform_test.py b/tensorflow/python/kernel_tests/distributions/uniform_test.py index df99a0ed257da20179909eb44eacf7d44528dad2..a8def95b147b6dd4825675769187733b8493b374 100644 --- a/tensorflow/python/kernel_tests/distributions/uniform_test.py +++ b/tensorflow/python/kernel_tests/distributions/uniform_test.py @@ -281,6 +281,22 @@ class UniformTest(test.TestCase): expected_pdf = [1.0, 0.1] self.assertAllClose(expected_pdf, pdf.eval()) + def testUniformFloat64(self): + uniform = uniform_lib.Uniform( + low=np.float64(0.), high=np.float64(1.)) + + self.assertAllClose( + [1., 1.], + self.evaluate(uniform.prob(np.array([0.5, 0.6], dtype=np.float64)))) + + self.assertAllClose( + [0.5, 0.6], + self.evaluate(uniform.cdf(np.array([0.5, 0.6], dtype=np.float64)))) + + self.assertAllClose(0.5, self.evaluate(uniform.mean())) + self.assertAllClose(1 / 12., self.evaluate(uniform.variance())) + self.assertAllClose(0., self.evaluate(uniform.entropy())) + if __name__ == "__main__": test.main() diff --git a/tensorflow/python/kernel_tests/functional_ops_test.py b/tensorflow/python/kernel_tests/functional_ops_test.py index 1301ef9d19deca5ed10c61944118847802653e9f..34fb655035d6cadab583c1f66dbeae3f7a0b65b0 100644 --- a/tensorflow/python/kernel_tests/functional_ops_test.py +++ b/tensorflow/python/kernel_tests/functional_ops_test.py @@ -24,6 +24,7 @@ from tensorflow.core.protobuf import config_pb2 from tensorflow.python.client import session from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes +from tensorflow.python.framework import errors from tensorflow.python.framework import function from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor @@ -39,6 +40,7 @@ import tensorflow.python.ops.tensor_array_grad # pylint: disable=unused-import from tensorflow.python.platform import test +# pylint: disable=invalid-name def simple_scoped_fn(a, x): """Simple function: (a, x) -> 2(x+a), but with "2" as a variable in scope.""" with variable_scope.variable_scope("body"): @@ -158,6 +160,13 @@ class FunctionalOpsTest(test.TestCase): values=constant_op.constant([0, 1, 2]), dense_shape=[2, 2])) + @test_util.run_in_graph_and_eager_modes() + def testMapOverScalarErrors(self): + with self.assertRaisesRegexp(ValueError, "not scalars"): + functional_ops.map_fn(lambda x: x, [1, 2]) + with self.assertRaisesRegexp(ValueError, "not a scalar"): + functional_ops.map_fn(lambda x: x, 1) + def testMap_Scoped(self): with self.test_session() as sess: @@ -607,6 +616,276 @@ class FunctionalOpsTest(test.TestCase): mul = sess.run(remote_op) self.assertEqual(mul, 9) + def testIf(self): + + @function.Defun(dtypes.float32) + def Twice(x): + return x * 2 + + @function.Defun(dtypes.float32) + def Thrice(x): + return x * 3 + 1 + + with self.test_session(use_gpu=False) as sess: + + def Run(x): + return sess.run( + functional_ops.If(math_ops.greater(x, 0), [x], Twice, Thrice))[0] + + self.assertAllEqual(Run(9.), 18.) + self.assertAllEqual(Run(-8.), -23.) + self.assertAllEqual(Run(0.), 1.) + + def testWhile(self): + + @function.Defun(*[dtypes.float32] * 2) + def Cond(n, unused_x): + return n > 0 + + @function.Defun(*[dtypes.float32] * 2) + def Body(n, x): + return n - 1, x + n + + # TODO(b/65752372): Set `use_gpu=False` because + # `functional_ops.While()` does not reliably work on GPU (apparently + # because the result of evaluating the condition may be in device + # memory, but it is read on the host). + with self.test_session(use_gpu=False) as sess: + + def Run(n): + return sess.run(functional_ops.While([n, 0.], Cond, Body))[1] + + self.assertAllEqual(Run(20.), 210.) + self.assertAllEqual(Run(100.), 5050.) + + def testWhileError(self): + + @function.Defun(*[dtypes.float32] * 2) + def Cond(n, unused_x): + return n > 0 + + @function.Defun(*[dtypes.float32] * 2) + def CondReturnsTooManyArgs(n, x): + return n > 0, x + + @function.Defun(*[dtypes.float32] * 2) + def Body(n, x): + return n - 1, x + n + + @function.Defun(*[dtypes.float32] * 2) + def BodyReturnsTooManyArgs(n, x): + return n - 1, x + n, x + + # TODO(b/65752372): Set `use_gpu=False` because + # `functional_ops.While()` does not reliably work on GPU (apparently + # because the result of evaluating the condition may be in device + # memory, but it is read on the host). + with self.test_session(use_gpu=False): + with self.assertRaisesRegexp(errors.InvalidArgumentError, + "Expected a single scalar.*got 2 tensors."): + functional_ops.While([5., 0.], CondReturnsTooManyArgs, Body)[0].eval() + with self.assertRaisesRegexp( + errors.InvalidArgumentError, + "While loop body returned 3 arguments. Expected: 2"): + functional_ops.While([5., 0.], Cond, BodyReturnsTooManyArgs)[0].eval() + + def testWhileInMultipleSubgraphs(self): + + @function.Defun(* [dtypes.float32] * 2) + def Cond(n, x): # pylint: disable=unused-argument + return n > 0 + + @function.Defun(* [dtypes.float32] * 2) + def Body(n, x): + return n - 1, x + n + + # TODO(b/65752372): Set `use_gpu=False` because + # `functional_ops.While()` does not reliably work on GPU (apparently + # because the result of evaluating the condition may be in device + # memory, but it is read on the host). + with self.test_session(use_gpu=False) as sess: + n = array_ops.placeholder(dtypes.float32) + _, result = functional_ops.While([n, 0.], Cond, Body) + c = constant_op.constant(37.) + + self.assertAllEqual(210., sess.run(result, feed_dict={n: 20.})) + self.assertAllEqual(5050., sess.run(result, feed_dict={n: 100.})) + # Test that the result is the same when we run a different subgraph. + self.assertAllEqual(5050., sess.run([result, c], feed_dict={n: 100.})[0]) + + def _tfSum(self, rewrite_with_while): + # On GPU, don't rewrite using a while loop. + use_gpu = not rewrite_with_while + with self.test_session(use_gpu=use_gpu) as sess: + + @function.Defun(dtypes.int32, dtypes.float32) + def Body(n, x): + return x + math_ops.to_float(n) + + xs = [ + # 1 + 2 + ... + 20 + functional_ops.For( + 1, 21, 1, [0.], Body, rewrite_with_while=rewrite_with_while)[0], + # 100 + 99 + ... + 1 + functional_ops.For( + 100, 0, -1, [0.], Body, rewrite_with_while=rewrite_with_while)[0], + ] + xvals = sess.run(xs) + self.assertAllEqual(210, xvals[0]) + self.assertAllEqual(5050, xvals[1]) + + def testFor(self): + self._tfSum(False) + + def testForWithWhile(self): + self._tfSum(True) + + def testForWithWhileNaming(self): + g = ops.Graph() + with g.as_default(): + + @function.Defun(dtypes.int32, dtypes.float32, func_name="TestBody") + def TestBody(n, x): + return x + math_ops.to_float(n) + + _ = functional_ops.For( + 1, 21, 1, [0.], TestBody, rewrite_with_while=True)[0] + + names = [] + for func in g.as_graph_def().library.function: + names.append(func.signature.name) + self.assertTrue("TestBody" in names) + self.assertTrue("TestBody_Cond" in names) + self.assertTrue("TestBody_Body" in names) + + def testForCapturedInputs(self): + v = variables.Variable(1.0) + + @function.Defun(dtypes.int32) + def TestNullary(n): + v + math_ops.to_float(n) # pylint: disable=expression-not-assigned + + @function.Defun(dtypes.int32, dtypes.float32) + def TestUnary(n, x): + return x + math_ops.to_float(n) + v + + @function.Defun(dtypes.int32, dtypes.float32, dtypes.float32) + def TestBinary(n, x, x2): + return x + math_ops.to_float(n) + v, x2 + v + + for rewrite_with_while in (True, False): + # TODO(b/65752372): Set `use_gpu=False` because + # `functional_ops.While()` does not reliably work on GPU (apparently + # because the result of evaluating the condition may be in device + # memory, but it is read on the host). + use_gpu = not rewrite_with_while + with self.test_session(use_gpu=use_gpu) as sess: + result_nullary = functional_ops.For( + 1, 10, 1, [], TestNullary, + rewrite_with_while=rewrite_with_while) + result_unary = functional_ops.For( + 1, 10, 1, [0.], TestUnary, + rewrite_with_while=rewrite_with_while) + result_binary = functional_ops.For( + 1, 10, 1, [0., 0.], TestBinary, + rewrite_with_while=rewrite_with_while) + sess.run(variables.global_variables_initializer()) + assert not result_nullary + # The nullary variant doesn't return anything so we can't easily run it. + # As a total hack, fetch the operation by name and run it. + sess.run(ops.get_default_graph().get_operation_by_name( + "While" if rewrite_with_while else "For")) + assert len(result_unary) == 1 + self.assertEqual([54.0], sess.run(result_unary)) + assert len(result_binary) == 2 + self.assertEqual([54.0, 9.0], sess.run(result_binary)) + + def _tfMLP(self, xval, wsval, bsval, rewrite_with_while): + # On GPU, don't rewrite using a while loop. + use_gpu = not rewrite_with_while + with self.test_session(use_gpu=use_gpu): + + @function.Defun(dtypes.int32, *[dtypes.float64] * 3) + def MLP(i, a, ws, bs): + a = math_ops.tanh(math_ops.matmul(a, ws[i, :]) + bs[i, :]) + return a, ws, bs + + ret = functional_ops.For( + 0, + wsval.shape[0], + 1, [xval, wsval, bsval], + MLP, + rewrite_with_while=rewrite_with_while)[0] + + return ret.eval() + + def _npMLP(self, xval, wsval, bsval): + for i in range(wsval.shape[0]): + xval = np.tanh(np.dot(xval, wsval[i, :]) + bsval[i, :]) + return xval + + def _testForMLP(self, rewrite_with_while): + # We construct a 5-layer Multi-Layer Perceptron network here. + # Each layer have the same number of hidden unites (3), and the + # activation function is tanh(). We feed the input (xval) with + # batch size 2. + xval = np.random.normal(size=(2, 3)) + wsval = np.random.normal(size=(5, 3, 3)) + bsval = np.random.normal(size=(5, 3)) + np_ans = self._npMLP(xval, wsval, bsval) + tf_for_ans = self._tfMLP(xval, wsval, bsval, rewrite_with_while) + self.assertAllClose(np_ans, tf_for_ans) + + def testForMLP(self): + self._testForMLP(False) + + def testForMLPWhile(self): + self._testForMLP(True) + + def testForError(self): + + @function.Defun(dtypes.int32, dtypes.float32) + def Foo(i, v): + return math_ops.to_float(i) + v + + @function.Defun(dtypes.int32, dtypes.float32) + def ReturnsTooManyArgs(unused_i, v): + return v, v + + with self.test_session(use_gpu=True): + with self.assertRaisesRegexp(errors.InvalidArgumentError, + "must be a scalar"): + functional_ops.For([0], 10, 1, [0.0], Foo)[0].eval() + with self.assertRaisesRegexp(errors.InvalidArgumentError, + "Invalid start/limit/delta"): + functional_ops.For(0, 10, -1, [0.0], Foo)[0].eval() + with self.assertRaisesRegexp( + errors.InvalidArgumentError, + "For loop body returned 2 arguments. Expected: 1"): + functional_ops.For(0, 10, 1, [0.0], ReturnsTooManyArgs)[0].eval() + + def testGradient(self): + + @function.Defun(dtypes.float32) + def Poly(x): + # y = 2x^3+3x^2+4x+8 + return 2 * x * x * x + 3 * x * x + 4 * x + 8 + + @function.Defun(dtypes.float32) + def Grad(x): + # dy/dx = dy/dy * dy/dx = 1.0 * (6x^2+6x+4) + return functional_ops.Gradient([x, 1.0], Poly)[0] + + with self.test_session(use_gpu=False) as sess: + a = constant_op.constant(0.) + avals = [Poly(a), Grad(a)] + b = constant_op.constant(1.) + bvals = [Poly(b), Grad(b)] + self.assertAllEqual(sess.run(avals), [8., 4.]) + self.assertAllEqual(sess.run(bvals), [17., 16.]) + if __name__ == "__main__": test.main() + +# pylint: enable=invalid-name diff --git a/tensorflow/python/kernel_tests/gather_op_test.py b/tensorflow/python/kernel_tests/gather_op_test.py index 9a946925693370912613f4dde33bbbda176060e4..a2fcd751dfa94605d271587640815fae6ac1c360 100644 --- a/tensorflow/python/kernel_tests/gather_op_test.py +++ b/tensorflow/python/kernel_tests/gather_op_test.py @@ -149,6 +149,15 @@ class GatherTest(test.TestCase): self.assertAllEqual([b"asdf", b"qwer"], array_ops.gather(params, 0, axis=1).eval()) + def testUInt32AndUInt64(self): + for unsigned_type in (dtypes.uint32, dtypes.uint64): + params = self._buildParams( + np.array([[1, 2, 3], [7, 8, 9]]), unsigned_type) + with self.test_session(): + self.assertAllEqual([7, 8, 9], + array_ops.gather(params, 1, axis=0).eval()) + self.assertAllEqual([1, 7], array_ops.gather(params, 0, axis=1).eval()) + def testUnknownIndices(self): params = constant_op.constant([[0, 1, 2]]) indices = array_ops.placeholder(dtypes.int32) diff --git a/tensorflow/python/kernel_tests/init_ops_test.py b/tensorflow/python/kernel_tests/init_ops_test.py index 1e5c118cbc3573af0a2ce95239f499a5e52a0c86..a9b55854f1b4a3dfc49f05397ca32bc7b2ccb88e 100644 --- a/tensorflow/python/kernel_tests/init_ops_test.py +++ b/tensorflow/python/kernel_tests/init_ops_test.py @@ -551,7 +551,6 @@ class OrthogonalInitializerTest(test.TestCase): init2 = init_ops.orthogonal_initializer(gain=3.14, seed=1, dtype=dtype) with self.test_session(graph=ops.Graph(), use_gpu=True): t1 = init1(shape).eval() - with self.test_session(graph=ops.Graph(), use_gpu=True): t2 = init2(shape).eval() return np.allclose(t1, t2 / 3.14, rtol=1e-15, atol=1e-15) @@ -610,15 +609,16 @@ class ConvolutionDeltaOrthogonalInitializerTest(test.TestCase): seed=1, dtype=dtype) with self.test_session(graph=ops.Graph(), use_gpu=True): t1 = init1(shape).eval() - with self.test_session(graph=ops.Graph(), use_gpu=True): t2 = init2(shape).eval() return np.allclose(t1, t2 / 3.14, rtol=1e-15, atol=1e-15) def testShapesValues(self): + gain = 3.14 for dtype in [dtypes.float32]: for kernel_size in [[3], [8], [3, 5], [2, 4], [3, 3, 3], [2, 2, 2]]: tol = 1e-2 - # Check orthogonality by computing the 2-norms of the inputs and outputs. + # Check orthogonality by computing ratio between + # the 2-norms of the inputs and outputs. if len(kernel_size) == 1: shape = [4, 32, 64] convolution = convolutional.conv1d @@ -634,9 +634,10 @@ class ConvolutionDeltaOrthogonalInitializerTest(test.TestCase): inputs, padding="same", filters=128, kernel_size=kernel_size, use_bias=False, kernel_initializer=init_ops.convolutional_delta_orthogonal( - gain=3.14)) + gain=gain)) outputs_shape = shape[0:-1] + [128] outputs_2norm = linalg_ops.norm(outputs) + ratio = outputs_2norm / inputs_2norm my_ops = variables.global_variables_initializer() with self.test_session(use_gpu=True) as sess: sess.run(my_ops) @@ -644,10 +645,8 @@ class ConvolutionDeltaOrthogonalInitializerTest(test.TestCase): t = outputs.eval() self.assertAllEqual(t.shape, outputs_shape) # Check isometry of the delta-orthogonal kernel. - self.assertAllClose( - sess.run(inputs_2norm)/np.sqrt(np.prod(shape)), - sess.run(outputs_2norm)/(np.sqrt(np.prod(shape))*np.sqrt(3.14)), - rtol=tol, atol=tol) + self.assertAllClose(sess.run(ratio), np.sqrt(gain), + rtol=tol, atol=tol) def testNonuniformity(self): value = 0 @@ -655,7 +654,7 @@ class ConvolutionDeltaOrthogonalInitializerTest(test.TestCase): shape = [3, 3, 10, 10] count = 70 tol = 1e-5 - with self.test_session(use_gpu=True): # as sess: + with self.test_session(use_gpu=True): for i in range(count): x = variable_scope.get_variable("{}".format(i), shape=shape, initializer= @@ -674,6 +673,340 @@ class ConvolutionDeltaOrthogonalInitializerTest(test.TestCase): self.assertAllClose(abs_value, count, rtol=tol, atol=tol) +class ConvolutionOrthogonal1dInitializerTest(test.TestCase): + + def testInitializerIdentical(self): + for dtype in [dtypes.float32, dtypes.float64]: + init1 = init_ops.convolutional_orthogonal_1d(seed=1, dtype=dtype) + init2 = init_ops.convolutional_orthogonal_1d(seed=1, dtype=dtype) + self.assertTrue(identicaltest(self, init1, init2, (3, 10, 10))) + + def testInitializerDifferent(self): + for dtype in [dtypes.float32, dtypes.float64]: + init1 = init_ops.convolutional_orthogonal_1d(seed=1, dtype=dtype) + init2 = init_ops.convolutional_orthogonal_1d(seed=2, dtype=dtype) + self.assertFalse(identicaltest(self, init1, init2, (3, 10, 10))) + + def testDuplicatedInitializer(self): + init = init_ops.convolutional_orthogonal_1d() + self.assertFalse(duplicated_initializer(self, init, 1, (3, 10, 10))) + + def testInvalidDataType(self): + self.assertRaises( + ValueError, init_ops.convolutional_orthogonal_1d, + dtype=dtypes.string) + + def testInvalidShape(self): + init1 = init_ops.convolutional_orthogonal_1d() + with self.test_session(graph=ops.Graph(), use_gpu=True): + self.assertRaises(ValueError, init1, shape=[3, 6, 5]) + + def testGain(self): + shape = (3, 10, 10) + for dtype in [dtypes.float32, dtypes.float64]: + init1 = init_ops.convolutional_orthogonal_1d(seed=1, dtype=dtype) + init2 = init_ops.convolutional_orthogonal_1d(gain=3.14, + seed=1, dtype=dtype) + with self.test_session(graph=ops.Graph(), use_gpu=True): + t1 = init1(shape).eval() + t2 = init2(shape).eval() + return np.allclose(t1, t2 / 3.14, rtol=1e-15, atol=1e-15) + + def testNonuniformity(self): + value = 0 + abs_value = 0 + shape = [3, 10, 10] + count = 70 + tol = 1e-5 + with self.test_session(use_gpu=True): + for i in range(count): + x = variable_scope.get_variable("{}".format(i), shape=shape, + initializer= + init_ops.convolutional_orthogonal_1d) + x.initializer.run() + y = np.sum(x.eval(), axis=0) + determinant = np.linalg.det(y) + value += determinant + abs_value += np.abs(determinant) + + # Check there is some variation in the signs of the determinants. + self.assertLess(value, count - tol) + self.assertLess(-count + tol, value) + # Check all determinants have absolute value 1 + # Compute the sum of the absolute values of 'count' determinants + self.assertAllClose(abs_value, count, rtol=tol, atol=tol) + + def testShapesValues(self): + def circular_pad(input_, width, kernel_size): + """Pad input_ for computing (circular) convolution. + + Args: + input_: the input tensor + width: the width of the tensor. + kernel_size: the kernel size of the filter. + Returns: + a tensor whose width is (width + kernel_size - 1). + """ + + beginning = kernel_size // 2 + end = kernel_size - 1 - beginning + + tmp_up = array_ops.slice(input_, [0, width - beginning, 0], + [-1, beginning, -1]) + tmp_down = array_ops.slice(input_, [0, 0, 0], [-1, end, -1]) + tmp = array_ops.concat([tmp_up, input_, tmp_down], 1) + + return tmp + + cout = 64 + shape = [10, 20, 32] + outputs_shape = shape[0:-1] + [cout] + dtype = dtypes.float32 + tol = 1e-3 + gain = 3.14 + # Check orthogonality/isometry by computing the ratio between + # the 2-norms of the inputs and ouputs. + for kernel_size in [[1], [2], [3], [4], [5], [6]]: + convolution = convolutional.conv1d + inputs = random_ops.random_normal(shape, dtype=dtype) + inputs_2norm = linalg_ops.norm(inputs) + input_with_circular_pad = circular_pad(inputs, shape[1], kernel_size[0]) + outputs = convolution( + input_with_circular_pad, padding="valid", filters=cout, + kernel_size=kernel_size[0], use_bias=False, + kernel_initializer=init_ops.convolutional_orthogonal_1d(gain=gain)) + outputs_2norm = linalg_ops.norm(outputs) + ratio = outputs_2norm / inputs_2norm + my_ops = variables.global_variables_initializer() + with self.test_session(use_gpu=True) as sess: + sess.run(my_ops) + # Check the shape of the outputs + t = outputs.eval() + self.assertAllEqual(t.shape, outputs_shape) + # Check isometry of the orthogonal kernel. + self.assertAllClose(sess.run(ratio), np.sqrt(gain), rtol=tol, atol=tol) + + +class ConvolutionOrthogonal2dInitializerTest(test.TestCase): + + def testInitializerIdentical(self): + for dtype in [dtypes.float32, dtypes.float64]: + init1 = init_ops.convolutional_orthogonal_2d(seed=1, dtype=dtype) + init2 = init_ops.convolutional_orthogonal_2d(seed=1, dtype=dtype) + self.assertTrue(identicaltest(self, init1, init2, (3, 3, 10, 10))) + + def testInitializerDifferent(self): + for dtype in [dtypes.float32, dtypes.float64]: + init1 = init_ops.convolutional_orthogonal_2d(seed=1, dtype=dtype) + init2 = init_ops.convolutional_orthogonal_2d(seed=2, dtype=dtype) + self.assertFalse(identicaltest(self, init1, init2, (3, 3, 10, 10))) + + def testDuplicatedInitializer(self): + init = init_ops.convolutional_orthogonal_2d() + self.assertFalse(duplicated_initializer(self, init, 1, (3, 3, 10, 10))) + + def testInvalidDataType(self): + self.assertRaises( + ValueError, init_ops.convolutional_orthogonal_2d, + dtype=dtypes.string) + + def testInvalidShape(self): + init1 = init_ops.convolutional_orthogonal_2d() + with self.test_session(graph=ops.Graph(), use_gpu=True): + self.assertRaises(ValueError, init1, shape=[3, 3, 6, 5]) + + def testGain(self): + shape = (3, 3, 10, 10) + for dtype in [dtypes.float32, dtypes.float64]: + init1 = init_ops.convolutional_orthogonal_2d(seed=1, dtype=dtype) + init2 = init_ops.convolutional_orthogonal_2d(gain=3.14, + seed=1, dtype=dtype) + with self.test_session(graph=ops.Graph(), use_gpu=True): + t1 = init1(shape).eval() + t2 = init2(shape).eval() + return np.allclose(t1, t2 / 3.14, rtol=1e-15, atol=1e-15) + + def testShapesValues(self): + def circular_pad(input_, width, kernel_size): + """Pad input_ for computing (circular) convolution. + + Args: + input_: the input tensor + width: the width of the tensor. + kernel_size: the kernel size of the filter. + Returns: + a tensor whose width is (width + kernel_size - 1). + """ + beginning = kernel_size // 2 + end = kernel_size - 1 - beginning + + tmp_up = array_ops.slice(input_, [0, width - beginning, 0, 0], + [-1, beginning, width, -1]) + tmp_down = array_ops.slice(input_, [0, 0, 0, 0], [-1, end, width, -1]) + tmp = array_ops.concat([tmp_up, input_, tmp_down], 1) + + new_width = width + kernel_size - 1 + tmp_left = array_ops.slice(tmp, [0, 0, width - beginning, 0], + [-1, new_width, beginning, -1]) + tmp_right = array_ops.slice(tmp, [0, 0, 0, 0], [-1, new_width, end, -1]) + + final = array_ops.concat([tmp_left, tmp, tmp_right], 2) + return final + + cout = 45 + shape = [64, 28, 28, 32] + outputs_shape = shape[0:-1] + [cout] + dtype = dtypes.float32 + tol = 1e-3 + gain = 3.14 + # Check orthogonality/isometry by computing the ratio between + # the 2-norms of the inputs and ouputs. + for kernel_size in [[1, 1], [2, 2], [3, 3], [4, 4], [5, 5]]: + convolution = convolutional.conv2d + inputs = random_ops.random_normal(shape, dtype=dtype) + inputs_2norm = linalg_ops.norm(inputs) + input_with_circular_pad = circular_pad(inputs, shape[1], kernel_size[0]) + outputs = convolution( + input_with_circular_pad, padding="valid", filters=cout, + kernel_size=kernel_size, use_bias=False, + kernel_initializer=init_ops.convolutional_orthogonal_2d(gain=gain)) + outputs_2norm = linalg_ops.norm(outputs) + ratio = outputs_2norm / inputs_2norm + my_ops = variables.global_variables_initializer() + with self.test_session(use_gpu=True) as sess: + sess.run(my_ops) + # Check the shape of the outputs + t = outputs.eval() + self.assertAllEqual(t.shape, outputs_shape) + # Check isometry of the orthogonal kernel. + self.assertAllClose(sess.run(ratio), np.sqrt(gain), rtol=tol, atol=tol) + + +class ConvolutionOrthogonal3dInitializerTest(test.TestCase): + + def testInitializerIdentical(self): + for dtype in [dtypes.float32, dtypes.float64]: + init1 = init_ops.convolutional_orthogonal_3d(seed=1, dtype=dtype) + init2 = init_ops.convolutional_orthogonal_3d(seed=1, dtype=dtype) + self.assertTrue(identicaltest(self, init1, init2, (3, 3, 3, 10, 10))) + + def testInitializerDifferent(self): + for dtype in [dtypes.float32, dtypes.float64]: + init1 = init_ops.convolutional_orthogonal_3d(seed=1, dtype=dtype) + init2 = init_ops.convolutional_orthogonal_3d(seed=2, dtype=dtype) + self.assertFalse(identicaltest(self, init1, init2, (3, 3, 3, 10, 10))) + + def testDuplicatedInitializer(self): + init = init_ops.convolutional_orthogonal_3d() + self.assertFalse(duplicated_initializer(self, init, 1, (3, 3, 3, 10, 10))) + + def testInvalidDataType(self): + self.assertRaises( + ValueError, init_ops.convolutional_orthogonal_3d, + dtype=dtypes.string) + + def testInvalidShape(self): + init1 = init_ops.convolutional_orthogonal_3d() + with self.test_session(graph=ops.Graph(), use_gpu=True): + self.assertRaises(ValueError, init1, shape=[3, 3, 3, 6, 5]) + + def testGain(self): + shape = (3, 3, 3, 10, 10) + for dtype in [dtypes.float32, dtypes.float64]: + init1 = init_ops.convolutional_orthogonal_3d(seed=1, dtype=dtype) + init2 = init_ops.convolutional_orthogonal_3d(gain=3.14, + seed=1, dtype=dtype) + with self.test_session(graph=ops.Graph(), use_gpu=True): + t1 = init1(shape).eval() + t2 = init2(shape).eval() + return np.allclose(t1, t2 / 3.14, rtol=1e-15, atol=1e-15) + + def testNonuniformity(self): + value = 0 + abs_value = 0 + shape = [3, 3, 3, 5, 5] + count = 20 + tol = 1e-5 + with self.test_session(use_gpu=True): + for i in range(count): + x = variable_scope.get_variable("{}".format(i), shape=shape, + initializer= + init_ops.convolutional_orthogonal_3d) + x.initializer.run() + y = np.sum(x.eval(), axis=(0, 1, 2)) + determinant = np.linalg.det(y) + value += determinant + abs_value += np.abs(determinant) + + # Check there is some variation in the signs of the determinants + self.assertLess(value, count - tol) + self.assertLess(-count + tol, value) + # Check all determinants have absolute value 1 + # Compute the sum of the absolute values of 'count' determinants + self.assertAllClose(abs_value, count, rtol=tol, atol=tol) + + def testShapesValues(self): + def circular_pad(input_, width, kernel_size): + """Padding input_ for computing circular convolution. + + Args: + input_: the input tensor + width: the width of the tensor. + kernel_size: the kernel size of the filter. + + Returns: + a tensor whose width is (width + kernel_size - 1). + """ + + beginning = kernel_size // 2 + end = kernel_size - 1 - beginning + + tmp_up = array_ops.slice(input_, [0, width - beginning, 0, 0, 0], + [-1, beginning, -1, -1, -1]) + tmp_down = array_ops.slice(input_, [0, 0, 0, 0, 0], + [-1, end, -1, -1, -1]) + tmp = array_ops.concat([tmp_up, input_, tmp_down], 1) + + tmp_left = array_ops.slice(tmp, [0, 0, width - beginning, 0, 0], + [-1, -1, beginning, -1, -1]) + tmp_right = array_ops.slice(tmp, [0, 0, 0, 0, 0], + [-1, -1, end, -1, -1]) + tmp = array_ops.concat([tmp_left, tmp, tmp_right], 2) + + tmp_front = array_ops.slice(tmp, [0, 0, 0, width - beginning, 0], + [-1, -1, -1, beginning, -1]) + tmp_back = array_ops.slice(tmp, [0, 0, 0, 0, 0], [-1, -1, -1, end, -1]) + return array_ops.concat([tmp_front, tmp, tmp_back], 3) + + cout = 32 + shape = [1, 7, 7, 7, 16] + outputs_shape = shape[0:-1] + [cout] + dtype = dtypes.float32 + tol = 1e-3 + gain = 3.14 + # Check orthogonality/isometry by computing the ratio between + # the 2-norms of the inputs and ouputs. + for kernel_size in [[1, 1, 1], [2, 2, 2], [3, 3, 3]]: + convolution = convolutional.conv3d + inputs = random_ops.random_normal(shape, dtype=dtype) + inputs_2norm = linalg_ops.norm(inputs) + input_with_circular_pad = circular_pad(inputs, shape[1], kernel_size[0]) + outputs = convolution( + input_with_circular_pad, padding="valid", filters=cout, + kernel_size=kernel_size[0], use_bias=False, + kernel_initializer=init_ops.convolutional_orthogonal_3d(gain=gain)) + outputs_2norm = linalg_ops.norm(outputs) + ratio = outputs_2norm / inputs_2norm + my_ops = variables.global_variables_initializer() + with self.test_session(use_gpu=True) as sess: + sess.run(my_ops) + # Check the shape of the outputs + t = outputs.eval() + self.assertAllEqual(t.shape, outputs_shape) + # Check isometry of the orthogonal kernel. + self.assertAllClose(sess.run(ratio), np.sqrt(gain), rtol=tol, atol=tol) + + class IdentityInitializerTest(test.TestCase): def testInvalidDataType(self): diff --git a/tensorflow/python/kernel_tests/inplace_ops_test.py b/tensorflow/python/kernel_tests/inplace_ops_test.py new file mode 100644 index 0000000000000000000000000000000000000000..0f95e13187fcd5cc199d871ea5efdca363b37cd0 --- /dev/null +++ b/tensorflow/python/kernel_tests/inplace_ops_test.py @@ -0,0 +1,198 @@ +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for inplace_ops.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np +from six.moves import xrange # pylint: disable=redefined-builtin + +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import errors +from tensorflow.python.framework import ops +from tensorflow.python.framework import test_util +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import inplace_ops +from tensorflow.python.platform import test as test_lib + + +class InplaceOpsTest(test_util.TensorFlowTestCase): + + def testBasicUpdate(self): + for dtype in [dtypes.float32, dtypes.int32, dtypes.int64]: + with self.test_session(use_gpu=True): + x = array_ops.ones([7, 3], dtype) + y = np.ones([7, 3], dtype.as_numpy_dtype) + self.assertAllClose(x.eval(), y) + x = inplace_ops.inplace_update(x, [3], array_ops.ones([1, 3], dtype)) + y[3, :] = 1 + self.assertAllClose(x.eval(), y) + x = inplace_ops.inplace_update(x, [-1], + array_ops.ones([1, 3], dtype) * 2) + y[-1, :] = 2 + self.assertAllClose(x.eval(), y) + x = inplace_ops.inplace_update(x, 5, array_ops.ones([3], dtype) * 7) + y[5, :] = 7 + self.assertAllClose(x.eval(), y) + + def testBasicUpdateBool(self): + with self.test_session(use_gpu=True): + x = array_ops.ones([7, 3], dtypes.bool) + y = np.ones([7, 3], dtypes.bool.as_numpy_dtype) + self.assertAllClose(x.eval(), y) + x = inplace_ops.inplace_update(x, [3], array_ops.ones([1, 3], + dtypes.bool)) + y[3, :] = True + self.assertAllClose(x.eval(), y) + x = inplace_ops.inplace_update(x, [-1], + array_ops.zeros([1, 3], dtypes.bool)) + y[-1, :] = False + self.assertAllClose(x.eval(), y) + x = inplace_ops.inplace_update(x, 5, array_ops.zeros([3], dtypes.bool)) + y[5, :] = False + self.assertAllClose(x.eval(), y) + + def testBasicAdd(self): + for dtype in [dtypes.float32, dtypes.int32, dtypes.int64]: + with self.test_session(use_gpu=True): + x = array_ops.ones([7, 3], dtype) + y = np.ones([7, 3], dtype.as_numpy_dtype) + self.assertAllClose(x.eval(), y) + x = array_ops.inplace_add(x, [3], array_ops.ones([1, 3], dtype)) + y[3, :] += 1 + self.assertAllClose(x.eval(), y) + x = inplace_ops.inplace_add(x, [-1], array_ops.ones([1, 3], dtype) * 2) + y[-1, :] += 2 + self.assertAllClose(x.eval(), y) + x = inplace_ops.inplace_add(x, 5, array_ops.ones([3], dtype) * 7) + y[5, :] += 7 + self.assertAllClose(x.eval(), y) + x = inplace_ops.inplace_add(x, None, array_ops.ones([7, 3], dtype) * 99) + y[:, :] += 99 + self.assertAllClose(x.eval(), y) + + def testBasicSub(self): + for dtype in [dtypes.float32, dtypes.int32, dtypes.int64]: + with self.test_session(use_gpu=True): + x = array_ops.ones([7, 3], dtype) + y = np.ones([7, 3], dtype.as_numpy_dtype) + self.assertAllClose(x.eval(), y) + x = inplace_ops.inplace_sub(x, [3], array_ops.ones([1, 3], dtype)) + y[3, :] -= 1 + self.assertAllClose(x.eval(), y) + x = inplace_ops.inplace_sub(x, [-1], array_ops.ones([1, 3], dtype) * 2) + y[-1, :] -= 2 + self.assertAllClose(x.eval(), y) + x = inplace_ops.inplace_sub(x, 5, array_ops.ones([3], dtype) * 7) + y[5, :] -= 7 + self.assertAllClose(x.eval(), y) + x = inplace_ops.inplace_sub(x, None, array_ops.ones([7, 3], dtype) * 99) + y[:, :] -= 99 + self.assertAllClose(x.eval(), y) + + def testRandom(self): + with self.test_session(use_gpu=True): + d0, d1, d2 = 100, 3, 5 + x = array_ops.zeros([d0, d1, d2]) + y = np.zeros([d0, d1, d2]) + for _ in xrange(20): + idx = np.random.choice(d0, d0 // 10, replace=False) + val = np.random.randint(10, size=(d0 // 10, d1, d2)) + op = np.random.randint(3) + if op == 0: + x = inplace_ops.inplace_update(x, idx, val) + y[idx, :] = val + elif op == 1: + x = inplace_ops.inplace_add(x, idx, val) + y[idx, :] += val + elif op == 2: + x = inplace_ops.inplace_sub(x, idx, val) + y[idx, :] -= val + self.assertAllClose(x.eval(), y) + + def testRandom1D(self): + with self.test_session(use_gpu=True): + d0 = 100 + x = array_ops.zeros([d0]) + y = np.zeros([d0]) + for _ in xrange(20): + idx = np.random.choice(d0, d0 // 10, replace=False) + val = np.random.randint(10, size=(d0 // 10)) + op = np.random.randint(3) + if op == 0: + x = inplace_ops.inplace_update(x, idx, val) + y[idx] = val + elif op == 1: + x = inplace_ops.inplace_add(x, idx, val) + y[idx] += val + elif op == 2: + x = inplace_ops.inplace_sub(x, idx, val) + y[idx] -= val + self.assertAllClose(x.eval(), y) + + def testAlias(self): + with self.test_session(use_gpu=True) as sess: + x = array_ops.ones([2, 3]) + y = inplace_ops.alias_inplace_add(x, [0], [[1, 2, 3]]) + with ops.control_dependencies([y]): + z = array_ops.identity(x) + _, vy, vz = sess.run([x, y, z]) + self.assertAllClose(vy, vz) + + def testError(self): + with self.test_session(): + with self.assertRaisesRegexp(errors.InvalidArgumentError, + "must be a vector"): + _ = inplace_ops.inplace_update([[1.]], [[0]], [[10]]).eval() + with self.assertRaisesRegexp(errors.InvalidArgumentError, + "x and v shape doesn't match"): + _ = inplace_ops.inplace_update([[1.]], [0], [10]).eval() + with self.assertRaisesRegexp(errors.InvalidArgumentError, + "i and x shape doesn't match"): + _ = inplace_ops.inplace_update([[1.]], [0, 1], [[10]]).eval() + + def testEmpty(self): + for dtype in [ + dtypes.float32, dtypes.float64, dtypes.int32, dtypes.int64, dtypes.bool + ]: + with self.test_session(use_gpu=True): + test_shapes = [(), (1,), (2, 3), (0, 2), (2, 3, 5), (2, 0, 5)] + for shape in test_shapes: + val = inplace_ops.empty(shape, dtype).eval() + self.assertEqual(val.shape, shape) + self.assertEqual(val.dtype, dtype.as_numpy_dtype) + val = inplace_ops.empty(shape, dtype, init=True).eval() + self.assertEqual(val.shape, shape) + self.assertEqual(val.dtype, dtype.as_numpy_dtype) + self.assertAllEqual(val, np.zeros(shape, dtype.as_numpy_dtype)) + val = inplace_ops.empty_like(array_ops.zeros(shape, dtype)).eval() + self.assertEqual(val.shape, shape) + self.assertEqual(val.dtype, dtype.as_numpy_dtype) + val = inplace_ops.empty_like( + array_ops.zeros(shape, dtype), init=True).eval() + self.assertEqual(val.shape, shape) + self.assertEqual(val.dtype, dtype.as_numpy_dtype) + self.assertAllEqual(val, np.zeros(shape, dtype.as_numpy_dtype)) + + val = inplace_ops.empty((1, 2), dtypes.string, init=True).eval() + self.assertEqual(val.tolist(), [[b"", b""]]) + + val = inplace_ops.empty((1, 2), dtypes.string, init=False).eval() + self.assertEqual(val.tolist(), [[b"", b""]]) + + +if __name__ == "__main__": + test_lib.main() diff --git a/tensorflow/python/kernel_tests/large_concat_op_test.py b/tensorflow/python/kernel_tests/large_concat_op_test.py index 66afb6ec014991ca32efd5b0895ff695d3d1015f..184d1dde2aa5e8d786cb85141f8dfb90c0bdad63 100644 --- a/tensorflow/python/kernel_tests/large_concat_op_test.py +++ b/tensorflow/python/kernel_tests/large_concat_op_test.py @@ -19,10 +19,12 @@ from __future__ import print_function from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops +from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops from tensorflow.python.platform import test +@test_util.with_c_api class LargeConcatOpTest(test.TestCase): """Tests that belong in concat_op_test.py, but run over large tensors.""" diff --git a/tensorflow/python/kernel_tests/linalg/BUILD b/tensorflow/python/kernel_tests/linalg/BUILD index 9555e510997a6aa07797dffa1a6e4810b0b4e5d2..7ffa48b65308b50d57a51d06fea1f88928cb2554 100644 --- a/tensorflow/python/kernel_tests/linalg/BUILD +++ b/tensorflow/python/kernel_tests/linalg/BUILD @@ -123,6 +123,10 @@ cuda_py_test( "//tensorflow/python:platform_test", ], shard_count = 5, + tags = [ + "noasan", # times out + "optonly", + ], ) cuda_py_test( diff --git a/tensorflow/python/kernel_tests/linalg/linear_operator_util_test.py b/tensorflow/python/kernel_tests/linalg/linear_operator_util_test.py index e1edffc3d9afec618f8dfcf74bae1b0f1bde2772..7b291e29de41d2fe37257bb42222ac23fc8e1d3f 100644 --- a/tensorflow/python/kernel_tests/linalg/linear_operator_util_test.py +++ b/tensorflow/python/kernel_tests/linalg/linear_operator_util_test.py @@ -23,6 +23,7 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import random_seed from tensorflow.python.ops import array_ops +from tensorflow.python.ops import linalg_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops.linalg import linear_operator_util from tensorflow.python.platform import test @@ -94,8 +95,8 @@ class AssertNoEntriesWithModulusZeroTest(test.TestCase): class BroadcastMatrixBatchDimsTest(test.TestCase): def test_zero_batch_matrices_returned_as_empty_list(self): - self.assertAllEqual( - [], linear_operator_util.broadcast_matrix_batch_dims([])) + self.assertAllEqual([], + linear_operator_util.broadcast_matrix_batch_dims([])) def test_one_batch_matrix_returned_after_tensor_conversion(self): arr = rng.rand(2, 3, 4) @@ -194,6 +195,44 @@ class BroadcastMatrixBatchDimsTest(test.TestCase): linear_operator_util.broadcast_matrix_batch_dims([y, x]) +class CholeskySolveWithBroadcastTest(test.TestCase): + + def test_static_dims_broadcast(self): + # batch_shape = [2] + chol = rng.rand(3, 3) + rhs = rng.rand(2, 3, 7) + chol_broadcast = chol + np.zeros((2, 1, 1)) + + with self.test_session(): + result = linear_operator_util.cholesky_solve_with_broadcast(chol, rhs) + self.assertAllEqual((2, 3, 7), result.get_shape()) + expected = linalg_ops.cholesky_solve(chol_broadcast, rhs) + self.assertAllEqual(expected.eval(), result.eval()) + + def test_dynamic_dims_broadcast_64bit(self): + # batch_shape = [2, 2] + chol = rng.rand(2, 3, 3) + rhs = rng.rand(2, 1, 3, 7) + chol_broadcast = chol + np.zeros((2, 2, 1, 1)) + rhs_broadcast = rhs + np.zeros((2, 2, 1, 1)) + + chol_ph = array_ops.placeholder(dtypes.float64) + rhs_ph = array_ops.placeholder(dtypes.float64) + + with self.test_session() as sess: + result, expected = sess.run( + [ + linear_operator_util.cholesky_solve_with_broadcast( + chol_ph, rhs_ph), + linalg_ops.cholesky_solve(chol_broadcast, rhs_broadcast) + ], + feed_dict={ + chol_ph: chol, + rhs_ph: rhs, + }) + self.assertAllEqual(expected, result) + + class MatmulWithBroadcastTest(test.TestCase): def test_static_dims_broadcast(self): @@ -209,7 +248,7 @@ class MatmulWithBroadcastTest(test.TestCase): expected = math_ops.matmul(x, y_broadcast) self.assertAllEqual(expected.eval(), result.eval()) - def test_dynamic_dims_broadcast_32bit(self): + def test_dynamic_dims_broadcast_64bit(self): # batch_shape = [2] # for each batch member, we have a 1x3 matrix times a 3x7 matrix ==> 1x7 x = rng.rand(2, 1, 3) @@ -221,9 +260,90 @@ class MatmulWithBroadcastTest(test.TestCase): with self.test_session() as sess: result, expected = sess.run( - [linear_operator_util.matmul_with_broadcast(x_ph, y_ph), - math_ops.matmul(x, y_broadcast)], - feed_dict={x_ph: x, y_ph: y}) + [ + linear_operator_util.matmul_with_broadcast(x_ph, y_ph), + math_ops.matmul(x, y_broadcast) + ], + feed_dict={ + x_ph: x, + y_ph: y + }) + self.assertAllEqual(expected, result) + + +class MatrixSolveWithBroadcastTest(test.TestCase): + + def test_static_dims_broadcast(self): + # batch_shape = [2] + matrix = rng.rand(3, 3) + rhs = rng.rand(2, 3, 7) + matrix_broadcast = matrix + np.zeros((2, 1, 1)) + + with self.test_session(): + result = linear_operator_util.matrix_solve_with_broadcast(matrix, rhs) + self.assertAllEqual((2, 3, 7), result.get_shape()) + expected = linalg_ops.matrix_solve(matrix_broadcast, rhs) + self.assertAllEqual(expected.eval(), result.eval()) + + def test_dynamic_dims_broadcast_64bit(self): + # batch_shape = [2, 2] + matrix = rng.rand(2, 3, 3) + rhs = rng.rand(2, 1, 3, 7) + matrix_broadcast = matrix + np.zeros((2, 2, 1, 1)) + rhs_broadcast = rhs + np.zeros((2, 2, 1, 1)) + + matrix_ph = array_ops.placeholder(dtypes.float64) + rhs_ph = array_ops.placeholder(dtypes.float64) + + with self.test_session() as sess: + result, expected = sess.run( + [ + linear_operator_util.matrix_solve_with_broadcast( + matrix_ph, rhs_ph), + linalg_ops.matrix_solve(matrix_broadcast, rhs_broadcast) + ], + feed_dict={ + matrix_ph: matrix, + rhs_ph: rhs, + }) + self.assertAllEqual(expected, result) + + +class MatrixTriangularSolveWithBroadcastTest(test.TestCase): + + def test_static_dims_broadcast(self): + # batch_shape = [2] + matrix = rng.rand(2, 3, 3) + rhs = rng.rand(3, 7) + rhs_broadcast = rhs + np.zeros((2, 1, 1)) + + with self.test_session(): + result = linear_operator_util.matrix_triangular_solve_with_broadcast( + matrix, rhs) + self.assertAllEqual((2, 3, 7), result.get_shape()) + expected = linalg_ops.matrix_triangular_solve(matrix, rhs_broadcast) + self.assertAllEqual(expected.eval(), result.eval()) + + def test_dynamic_dims_broadcast_64bit(self): + # batch_shape = [2] + matrix = rng.rand(2, 3, 3) + rhs = rng.rand(3, 7) + rhs_broadcast = rhs + np.zeros((2, 1, 1)) + + matrix_ph = array_ops.placeholder(dtypes.float64) + rhs_ph = array_ops.placeholder(dtypes.float64) + + with self.test_session() as sess: + result, expected = sess.run( + [ + linear_operator_util.matrix_triangular_solve_with_broadcast( + matrix_ph, rhs_ph), + linalg_ops.matrix_triangular_solve(matrix, rhs_broadcast) + ], + feed_dict={ + matrix_ph: matrix, + rhs_ph: rhs, + }) self.assertAllEqual(expected, result) @@ -244,7 +364,7 @@ class AssertCompatibleMatrixDimensionsTest(test.TestCase): operator = DomainDimensionStubOperator(3) # Should not raise linear_operator_util.assert_compatible_matrix_dimensions( - operator, x).run() + operator, x).run() # pyformat: disable def test_incompatible_dimensions_raise(self): with self.test_session(): @@ -252,7 +372,7 @@ class AssertCompatibleMatrixDimensionsTest(test.TestCase): operator = DomainDimensionStubOperator(3) with self.assertRaisesOpError("Incompatible matrix dimensions"): linear_operator_util.assert_compatible_matrix_dimensions( - operator, x).run() + operator, x).run() # pyformat: disable if __name__ == "__main__": diff --git a/tensorflow/python/kernel_tests/list_ops_test.py b/tensorflow/python/kernel_tests/list_ops_test.py index dbbed39c727f01ed1fae271375575c690958c7d8..098f9724a2a65f544005e1799f6329a29bb02abe 100644 --- a/tensorflow/python/kernel_tests/list_ops_test.py +++ b/tensorflow/python/kernel_tests/list_ops_test.py @@ -33,6 +33,8 @@ from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import list_ops from tensorflow.python.ops import math_ops +from tensorflow.python.ops import state_ops +from tensorflow.python.ops import variable_scope as vs from tensorflow.python.platform import test from tensorflow.python.training import server_lib @@ -43,71 +45,83 @@ def scalar_shape(): class ListOpsTest(test_util.TensorFlowTestCase): + @test_util.run_in_graph_and_eager_modes() def testPushPop(self): l = list_ops.empty_tensor_list(element_dtype=dtypes.float32, element_shape=scalar_shape()) l = list_ops.tensor_list_push_back(l, constant_op.constant(1.0)) l, e = list_ops.tensor_list_pop_back(l, element_dtype=dtypes.float32) - self.assertAllEqual(e, 1.0) + self.assertAllEqual(self.evaluate(e), 1.0) + @test_util.run_in_graph_and_eager_modes() def testPushPopGPU(self): if not context.num_gpus(): return with context.device("gpu:0"): self.testPushPop() + @test_util.run_in_graph_and_eager_modes() def testStack(self): l = list_ops.empty_tensor_list(element_dtype=dtypes.float32, element_shape=scalar_shape()) l = list_ops.tensor_list_push_back(l, constant_op.constant(1.0)) l = list_ops.tensor_list_push_back(l, constant_op.constant(2.0)) t = list_ops.tensor_list_stack(l, element_dtype=dtypes.float32) - self.assertAllEqual(t, [1.0, 2.0]) + self.assertAllEqual(self.evaluate(t), [1.0, 2.0]) + @test_util.run_in_graph_and_eager_modes() def testStackGPU(self): if not context.num_gpus(): return with context.device("gpu:0"): self.testStack() + @test_util.run_in_graph_and_eager_modes() def testTensorListFromTensor(self): t = constant_op.constant([1.0, 2.0]) l = list_ops.tensor_list_from_tensor(t, element_shape=scalar_shape()) l, e = list_ops.tensor_list_pop_back(l, element_dtype=dtypes.float32) - self.assertAllEqual(e, 2.0) + self.assertAllEqual(self.evaluate(e), 2.0) l, e = list_ops.tensor_list_pop_back(l, element_dtype=dtypes.float32) - self.assertAllEqual(e, 1.0) - self.assertAllEqual(list_ops.tensor_list_length(l), 0) + self.assertAllEqual(self.evaluate(e), 1.0) + self.assertAllEqual(self.evaluate(list_ops.tensor_list_length(l)), 0) + @test_util.run_in_graph_and_eager_modes() def testFromTensorGPU(self): if not context.num_gpus(): return with context.device("gpu:0"): self.testTensorListFromTensor() + @test_util.run_in_graph_and_eager_modes() def testGetSetItem(self): t = constant_op.constant([1.0, 2.0]) l = list_ops.tensor_list_from_tensor(t, element_shape=scalar_shape()) e0 = list_ops.tensor_list_get_item(l, 0, element_dtype=dtypes.float32) - self.assertAllEqual(e0, 1.0) + self.assertAllEqual(self.evaluate(e0), 1.0) l = list_ops.tensor_list_set_item(l, 0, 3.0) t = list_ops.tensor_list_stack(l, element_dtype=dtypes.float32) - self.assertAllEqual(t, [3.0, 2.0]) + self.assertAllEqual(self.evaluate(t), [3.0, 2.0]) + @test_util.run_in_graph_and_eager_modes() def testGetSetGPU(self): if not context.num_gpus(): return with context.device("gpu:0"): self.testGetSetItem() + @test_util.run_in_graph_and_eager_modes() def testUnknownShape(self): - l = list_ops.empty_tensor_list(element_dtype=dtypes.float32, - element_shape=-1) + l = list_ops.empty_tensor_list( + element_dtype=dtypes.float32, element_shape=-1) l = list_ops.tensor_list_push_back(l, constant_op.constant(1.0)) l = list_ops.tensor_list_push_back(l, constant_op.constant([1.0, 2.0])) - _, e = list_ops.tensor_list_pop_back(l, element_dtype=dtypes.float32) - self.assertAllEqual(e, [1.0, 2.0]) + l, e = list_ops.tensor_list_pop_back(l, element_dtype=dtypes.float32) + self.assertAllEqual(self.evaluate(e), [1.0, 2.0]) + l, e = list_ops.tensor_list_pop_back(l, element_dtype=dtypes.float32) + self.assertAllEqual(self.evaluate(e), 1.0) + @test_util.run_in_graph_and_eager_modes() def testCPUGPUCopy(self): if not context.num_gpus(): return @@ -116,15 +130,16 @@ class ListOpsTest(test_util.TensorFlowTestCase): with context.device("gpu:0"): l_gpu = array_ops.identity(l) self.assertAllEqual( - list_ops.tensor_list_pop_back( - l_gpu, element_dtype=dtypes.float32)[1], - 2.0) + self.evaluate( + list_ops.tensor_list_pop_back( + l_gpu, element_dtype=dtypes.float32)[1]), 2.0) l_cpu = array_ops.identity(l_gpu) self.assertAllEqual( - list_ops.tensor_list_pop_back( - l_cpu, element_dtype=dtypes.float32)[1], - 2.0) + self.evaluate( + list_ops.tensor_list_pop_back( + l_cpu, element_dtype=dtypes.float32)[1]), 2.0) + @test_util.run_in_graph_and_eager_modes() def testGraphStack(self): with context.graph_mode(), self.test_session(): tl = list_ops.empty_tensor_list( @@ -132,9 +147,11 @@ class ListOpsTest(test_util.TensorFlowTestCase): element_dtype=dtypes.int32) tl = list_ops.tensor_list_push_back(tl, [1]) self.assertAllEqual( - list_ops.tensor_list_stack(tl, element_dtype=dtypes.int32).eval(), + self.evaluate( + list_ops.tensor_list_stack(tl, element_dtype=dtypes.int32)), [[1]]) + @test_util.run_in_graph_and_eager_modes() def testGraphStackInLoop(self): with context.graph_mode(), self.test_session(): t1 = list_ops.empty_tensor_list( @@ -149,9 +166,10 @@ class ListOpsTest(test_util.TensorFlowTestCase): i, t1 = control_flow_ops.while_loop(lambda i, t1: math_ops.less(i, 4), body, [i, t1]) - s1 = list_ops.tensor_list_stack(t1, element_dtype=dtypes.int32).eval() - self.assertAllEqual(s1, [0, 1, 2, 3]) + s1 = list_ops.tensor_list_stack(t1, element_dtype=dtypes.int32) + self.assertAllEqual(self.evaluate(s1), [0, 1, 2, 3]) + @test_util.run_in_graph_and_eager_modes() def testGraphStackSwitchDtype(self): with context.graph_mode(), self.test_session(): list_ = list_ops.empty_tensor_list( @@ -169,11 +187,11 @@ class ListOpsTest(test_util.TensorFlowTestCase): for _ in range(2): list_, m = body(list_, m) - s1 = list_ops.tensor_list_stack( - list_, element_dtype=dtypes.float32).eval() + s1 = list_ops.tensor_list_stack(list_, element_dtype=dtypes.float32) np_s1 = np.array([[1, 2, 3], [1, 2, 3]], dtype=np.float32) - self.assertAllEqual(s1, np_s1) + self.assertAllEqual(self.evaluate(s1), np_s1) + @test_util.run_in_graph_and_eager_modes() def testGraphStackInLoopSwitchDtype(self): with context.graph_mode(), self.test_session(): t1 = list_ops.empty_tensor_list( @@ -193,10 +211,11 @@ class ListOpsTest(test_util.TensorFlowTestCase): i, m, t1 = control_flow_ops.while_loop( lambda i, m, t1: math_ops.less(i, 4), body, [i, m, t1]) - s1 = list_ops.tensor_list_stack(t1, element_dtype=dtypes.float32).eval() + s1 = list_ops.tensor_list_stack(t1, element_dtype=dtypes.float32) np_s1 = np.vstack([np.arange(1, 4) * i for i in range(4)]) - self.assertAllEqual(s1, np_s1) + self.assertAllEqual(self.evaluate(s1), np_s1) + @test_util.run_in_graph_and_eager_modes() def testSerialize(self): # pylint: disable=g-import-not-at-top try: @@ -226,8 +245,9 @@ class ListOpsTest(test_util.TensorFlowTestCase): l_ps, element_dtype=dtypes.float32) with ops.device("/job:worker"): worker_e = array_ops.identity(e) - self.assertAllEqual(worker_e.eval(), [2.0]) + self.assertAllEqual(self.evaluate(worker_e), [2.0]) + @test_util.run_in_graph_and_eager_modes() def testPushPopGradients(self): with backprop.GradientTape() as tape: l = list_ops.empty_tensor_list(element_dtype=dtypes.float32, @@ -237,18 +257,21 @@ class ListOpsTest(test_util.TensorFlowTestCase): l = list_ops.tensor_list_push_back(l, c) l, e = list_ops.tensor_list_pop_back(l, element_dtype=dtypes.float32) e = 2 * e - self.assertAllEqual(tape.gradient(e, [c])[0], 2.0) + self.assertAllEqual(self.evaluate(tape.gradient(e, [c])[0]), 2.0) + @test_util.run_in_graph_and_eager_modes() def testStackFromTensorGradients(self): with backprop.GradientTape() as tape: c = constant_op.constant([1.0, 2.0]) tape.watch(c) l = list_ops.tensor_list_from_tensor(c, element_shape=scalar_shape()) c2 = list_ops.tensor_list_stack( - l, element_dtype=dtypes.float32) + l, element_dtype=dtypes.float32, num_elements=2) result = c2 * 2.0 - self.assertAllEqual(tape.gradient(result, [c])[0], [2.0, 2.0]) + grad = tape.gradient(result, [c])[0] + self.assertAllEqual(self.evaluate(grad), [2.0, 2.0]) + @test_util.run_in_graph_and_eager_modes() def testGetSetGradients(self): with backprop.GradientTape() as tape: c = constant_op.constant([1.0, 2.0]) @@ -261,16 +284,142 @@ class ListOpsTest(test_util.TensorFlowTestCase): ee = list_ops.tensor_list_get_item(l, 1, element_dtype=dtypes.float32) y = e * e + ee * ee grad_c, grad_c2 = tape.gradient(y, [c, c2]) - self.assertAllEqual(grad_c, [0.0, 4.0]) - self.assertAllEqual(grad_c2, 6.0) + self.assertAllEqual(self.evaluate(grad_c), [0.0, 4.0]) + self.assertAllEqual(self.evaluate(grad_c2), 6.0) + @test_util.run_in_graph_and_eager_modes() def testSetOutOfBounds(self): c = constant_op.constant([1.0, 2.0]) l = list_ops.tensor_list_from_tensor(c, element_shape=scalar_shape()) with self.assertRaises(errors.InvalidArgumentError): - list_ops.tensor_list_set_item(l, 20, 3.0) + self.evaluate(list_ops.tensor_list_set_item(l, 20, 3.0)) + + @test_util.run_in_graph_and_eager_modes() + def testResourceVariableScatterGather(self): + c = constant_op.constant([1.0, 2.0], dtype=dtypes.float32) + l = list_ops.tensor_list_from_tensor(c, element_shape=scalar_shape()) + v = vs.get_variable("var", initializer=[l] * 10, use_resource=True) + v_r_0_stacked = list_ops.tensor_list_stack(v[0], dtypes.float32) + self.evaluate(v.initializer) + self.assertAllEqual([1.0, 2.0], self.evaluate(v_r_0_stacked)) + v_r_sparse_stacked = list_ops.tensor_list_stack( + v.sparse_read(0), dtypes.float32) + self.assertAllEqual([1.0, 2.0], self.evaluate(v_r_sparse_stacked)) + l_new_0 = list_ops.tensor_list_from_tensor( + [3.0, 4.0], element_shape=scalar_shape()) + l_new_1 = list_ops.tensor_list_from_tensor( + [5.0, 6.0], element_shape=scalar_shape()) + updated_v = state_ops.scatter_update(v, [3, 5], [l_new_0, l_new_1]) + updated_v_elems = array_ops.unstack(updated_v) + updated_v_stacked = [ + list_ops.tensor_list_stack(el, dtypes.float32) for el in updated_v_elems + ] + expected = ([[1.0, 2.0]] * 3 + [[3.0, 4.0], [1.0, 2.0], [5.0, 6.0]] + + [[1.0, 2.0]] * 4) + self.assertAllEqual(self.evaluate(updated_v_stacked), expected) + + @test_util.run_in_graph_and_eager_modes() + def testConcat(self): + c = constant_op.constant([1.0, 2.0], dtype=dtypes.float32) + l0 = list_ops.tensor_list_from_tensor(c, element_shape=scalar_shape()) + l1 = list_ops.tensor_list_from_tensor([-1.0], element_shape=scalar_shape()) + l_batch_0 = array_ops.stack([l0, l1]) + l_batch_1 = array_ops.stack([l1, l0]) + + l_concat_01 = list_ops.tensor_list_concat_lists( + l_batch_0, l_batch_1, element_dtype=dtypes.float32) + l_concat_10 = list_ops.tensor_list_concat_lists( + l_batch_1, l_batch_0, element_dtype=dtypes.float32) + l_concat_00 = list_ops.tensor_list_concat_lists( + l_batch_0, l_batch_0, element_dtype=dtypes.float32) + l_concat_11 = list_ops.tensor_list_concat_lists( + l_batch_1, l_batch_1, element_dtype=dtypes.float32) + + expected_00 = [[1.0, 2.0, 1.0, 2.0], [-1.0, -1.0]] + expected_01 = [[1.0, 2.0, -1.0], [-1.0, 1.0, 2.0]] + expected_10 = [[-1.0, 1.0, 2.0], [1.0, 2.0, -1.0]] + expected_11 = [[-1.0, -1.0], [1.0, 2.0, 1.0, 2.0]] + + for i, (concat, expected) in enumerate(zip( + [l_concat_00, l_concat_01, l_concat_10, l_concat_11], + [expected_00, expected_01, expected_10, expected_11])): + splitted = array_ops.unstack(concat) + splitted_stacked_ret = self.evaluate( + (list_ops.tensor_list_stack(splitted[0], dtypes.float32), + list_ops.tensor_list_stack(splitted[1], dtypes.float32))) + print("Test concat %d: %s, %s, %s, %s" + % (i, expected[0], splitted_stacked_ret[0], + expected[1], splitted_stacked_ret[1])) + self.assertAllClose(expected[0], splitted_stacked_ret[0]) + self.assertAllClose(expected[1], splitted_stacked_ret[1]) + + # Concatenating mismatched shapes fails. + with self.assertRaises((errors.InvalidArgumentError, ValueError)): + self.evaluate( + list_ops.tensor_list_concat_lists( + l_batch_0, + list_ops.empty_tensor_list(scalar_shape(), dtypes.float32), + element_dtype=dtypes.float32)) + + with self.assertRaisesRegexp(errors.InvalidArgumentError, + "element shapes are not identical at index 0"): + l_batch_of_vec_tls = array_ops.stack( + [list_ops.tensor_list_from_tensor([[1.0]], element_shape=[1])] * 2) + self.evaluate( + list_ops.tensor_list_concat_lists(l_batch_0, l_batch_of_vec_tls, + element_dtype=dtypes.float32)) + + with self.assertRaisesRegexp(errors.InvalidArgumentError, + r"input_b\[0\].dtype != element_dtype."): + l_batch_of_int_tls = array_ops.stack( + [list_ops.tensor_list_from_tensor([1], element_shape=scalar_shape())] + * 2) + self.evaluate( + list_ops.tensor_list_concat_lists(l_batch_0, l_batch_of_int_tls, + element_dtype=dtypes.float32)) + + @test_util.run_in_graph_and_eager_modes() + def testPushBackBatch(self): + c = constant_op.constant([1.0, 2.0], dtype=dtypes.float32) + l0 = list_ops.tensor_list_from_tensor(c, element_shape=scalar_shape()) + l1 = list_ops.tensor_list_from_tensor([-1.0], element_shape=scalar_shape()) + l_batch = array_ops.stack([l0, l1]) + l_push = list_ops.tensor_list_push_back_batch(l_batch, [3.0, 4.0]) + l_unstack = array_ops.unstack(l_push) + l0_ret = list_ops.tensor_list_stack(l_unstack[0], dtypes.float32) + l1_ret = list_ops.tensor_list_stack(l_unstack[1], dtypes.float32) + self.assertAllClose([1.0, 2.0, 3.0], self.evaluate(l0_ret)) + self.assertAllClose([-1.0, 4.0], self.evaluate(l1_ret)) + + with ops.control_dependencies([l_push]): + l_unstack_orig = array_ops.unstack(l_batch) + l0_orig_ret = list_ops.tensor_list_stack(l_unstack_orig[0], + dtypes.float32) + l1_orig_ret = list_ops.tensor_list_stack(l_unstack_orig[1], + dtypes.float32) + + # Check that without aliasing, push_back_batch still works; and + # that it doesn't modify the input. + l0_r_v, l1_r_v, l0_orig_v, l1_orig_v = self.evaluate( + (l0_ret, l1_ret, l0_orig_ret, l1_orig_ret)) + self.assertAllClose([1.0, 2.0, 3.0], l0_r_v) + self.assertAllClose([-1.0, 4.0], l1_r_v) + self.assertAllClose([1.0, 2.0], l0_orig_v) + self.assertAllClose([-1.0], l1_orig_v) + + # Pushing back mismatched shapes fails. + with self.assertRaises((errors.InvalidArgumentError, ValueError)): + self.evaluate(list_ops.tensor_list_push_back_batch(l_batch, [])) + + with self.assertRaisesRegexp(errors.InvalidArgumentError, + "incompatible shape to a list at index 0"): + self.evaluate( + list_ops.tensor_list_push_back_batch(l_batch, [[3.0], [4.0]])) + + with self.assertRaisesRegexp(errors.InvalidArgumentError, + "Invalid data type at index 0"): + self.evaluate(list_ops.tensor_list_push_back_batch(l_batch, [3, 4])) if __name__ == "__main__": - ops.enable_eager_execution() test.main() diff --git a/tensorflow/python/kernel_tests/manip_ops_test.py b/tensorflow/python/kernel_tests/manip_ops_test.py index b8200ac0cb1e4315a56181779c70da1126d8fc15..f31426713c49ba03b978ae7820e22c5c17319139 100644 --- a/tensorflow/python/kernel_tests/manip_ops_test.py +++ b/tensorflow/python/kernel_tests/manip_ops_test.py @@ -20,8 +20,10 @@ from __future__ import print_function import numpy as np from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors_impl from tensorflow.python.framework import test_util +from tensorflow.python.ops import array_ops from tensorflow.python.ops import gradient_checker from tensorflow.python.ops import manip_ops from tensorflow.python.platform import test as test_lib @@ -88,41 +90,78 @@ class RollTest(test_util.TensorFlowTestCase): x = np.random.rand(3, 2, 1, 1).astype(t) self._testAll(x + 1j * x, [2, 1, 1, 0], [0, 3, 1, 2]) + def testNegativeAxis(self): + self._testAll(np.random.randint(-100, 100, (5)).astype(np.int32), 3, -1) + self._testAll(np.random.randint(-100, 100, (4, 4)).astype(np.int32), 3, -2) + # Make sure negative axis shoudl be 0 <= axis + dims < dims + with self.test_session(): + with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, + "is out of range"): + manip_ops.roll(np.random.randint(-100, 100, (4, 4)).astype(np.int32), + 3, -10).eval() + + def testInvalidInputShape(self): + # The input should be 1-D or higher, checked in shape function. + with self.assertRaisesRegexp( + ValueError, "Shape must be at least rank 1 but is rank 0"): + manip_ops.roll(7, 1, 0) + def testRollInputMustVectorHigherRaises(self): - tensor = 7 + # The input should be 1-D or higher, checked in kernel. + tensor = array_ops.placeholder(dtype=dtypes.int32) shift = 1 axis = 0 with self.test_session(): with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, "input must be 1-D or higher"): - manip_ops.roll(tensor, shift, axis).eval() + manip_ops.roll(tensor, shift, axis).eval(feed_dict={tensor: 7}) + + def testInvalidAxisShape(self): + # The axis should be a scalar or 1-D, checked in shape function. + with self.assertRaisesRegexp( + ValueError, "Shape must be at most rank 1 but is rank 2"): + manip_ops.roll([[1, 2], [3, 4]], 1, [[0, 1]]) def testRollAxisMustBeScalarOrVectorRaises(self): + # The axis should be a scalar or 1-D, checked in kernel. tensor = [[1, 2], [3, 4]] shift = 1 - axis = [[0, 1]] + axis = array_ops.placeholder(dtype=dtypes.int32) with self.test_session(): with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, "axis must be a scalar or a 1-D vector"): - manip_ops.roll(tensor, shift, axis).eval() + manip_ops.roll(tensor, shift, axis).eval(feed_dict={axis: [[0, 1]]}) + + def testInvalidShiftShape(self): + # The shift should be a scalar or 1-D, checked in shape function. + with self.assertRaisesRegexp( + ValueError, "Shape must be at most rank 1 but is rank 2"): + manip_ops.roll([[1, 2], [3, 4]], [[0, 1]], 1) def testRollShiftMustBeScalarOrVectorRaises(self): + # The shift should be a scalar or 1-D, checked in kernel. tensor = [[1, 2], [3, 4]] - shift = [[0, 1]] + shift = array_ops.placeholder(dtype=dtypes.int32) axis = 1 with self.test_session(): with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, "shift must be a scalar or a 1-D vector"): - manip_ops.roll(tensor, shift, axis).eval() + manip_ops.roll(tensor, shift, axis).eval(feed_dict={shift: [[0, 1]]}) + + def testInvalidShiftAndAxisNotEqualShape(self): + # The shift and axis must be same size, checked in shape function. + with self.assertRaisesRegexp(ValueError, "both shapes must be equal"): + manip_ops.roll([[1, 2], [3, 4]], [1], [0, 1]) def testRollShiftAndAxisMustBeSameSizeRaises(self): + # The shift and axis must be same size, checked in kernel. tensor = [[1, 2], [3, 4]] - shift = [1] + shift = array_ops.placeholder(dtype=dtypes.int32) axis = [0, 1] with self.test_session(): with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, "shift and axis must have the same size"): - manip_ops.roll(tensor, shift, axis).eval() + manip_ops.roll(tensor, shift, axis).eval(feed_dict={shift: [1]}) def testRollAxisOutOfRangeRaises(self): tensor = [1, 2] diff --git a/tensorflow/python/kernel_tests/metrics_test.py b/tensorflow/python/kernel_tests/metrics_test.py index ad802f7e1f72f6cbc3dda1ca98e46e6da4e5110a..55653489aff0a745c5731db4d31864aede97e954 100644 --- a/tensorflow/python/kernel_tests/metrics_test.py +++ b/tensorflow/python/kernel_tests/metrics_test.py @@ -1124,40 +1124,91 @@ class AUCTest(test.TestCase): self.assertAlmostEqual(0.7, auc.eval(), 5) - def testAUCPRSpecialCase(self): + # Regarding the AUC-PR tests: note that the preferred method when + # calculating AUC-PR is summation_method='careful_interpolation'. + def testCorrectAUCPRSpecialCase(self): with self.test_session() as sess: predictions = constant_op.constant( [0.1, 0.4, 0.35, 0.8], shape=(1, 4), dtype=dtypes_lib.float32) labels = constant_op.constant([0, 0, 1, 1], shape=(1, 4)) - auc, update_op = metrics.auc(labels, predictions, curve='PR') + auc, update_op = metrics.auc(labels, predictions, curve='PR', + summation_method='careful_interpolation') + + sess.run(variables.local_variables_initializer()) + # expected ~= 0.79726744594 + expected = 1 - math.log(1.5) / 2 + self.assertAlmostEqual(expected, sess.run(update_op), delta=1e-3) + self.assertAlmostEqual(expected, auc.eval(), delta=1e-3) + + def testCorrectAnotherAUCPRSpecialCase(self): + with self.test_session() as sess: + predictions = constant_op.constant( + [0.1, 0.4, 0.35, 0.8, 0.1, 0.135, 0.81], + shape=(1, 7), + dtype=dtypes_lib.float32) + labels = constant_op.constant([0, 0, 1, 0, 1, 0, 1], shape=(1, 7)) + auc, update_op = metrics.auc(labels, predictions, curve='PR', + summation_method='careful_interpolation') + + sess.run(variables.local_variables_initializer()) + # expected ~= 0.61350593198 + expected = (2.5 - 2 * math.log(4./3) - 0.25 * math.log(7./5)) / 3 + self.assertAlmostEqual(expected, sess.run(update_op), delta=1e-3) + self.assertAlmostEqual(expected, auc.eval(), delta=1e-3) + + def testThirdCorrectAUCPRSpecialCase(self): + with self.test_session() as sess: + predictions = constant_op.constant( + [0.0, 0.1, 0.2, 0.33, 0.3, 0.4, 0.5], + shape=(1, 7), + dtype=dtypes_lib.float32) + labels = constant_op.constant([0, 0, 0, 0, 1, 1, 1], shape=(1, 7)) + auc, update_op = metrics.auc(labels, predictions, curve='PR', + summation_method='careful_interpolation') + + sess.run(variables.local_variables_initializer()) + # expected ~= 0.90410597584 + expected = 1 - math.log(4./3) / 3 + self.assertAlmostEqual(expected, sess.run(update_op), delta=1e-3) + self.assertAlmostEqual(expected, auc.eval(), delta=1e-3) + + def testIncorrectAUCPRSpecialCase(self): + with self.test_session() as sess: + predictions = constant_op.constant( + [0.1, 0.4, 0.35, 0.8], shape=(1, 4), dtype=dtypes_lib.float32) + labels = constant_op.constant([0, 0, 1, 1], shape=(1, 4)) + auc, update_op = metrics.auc(labels, predictions, curve='PR', + summation_method='trapezoidal') sess.run(variables.local_variables_initializer()) self.assertAlmostEqual(0.79166, sess.run(update_op), delta=1e-3) self.assertAlmostEqual(0.79166, auc.eval(), delta=1e-3) - def testAnotherAUCPRSpecialCase(self): + def testAnotherIncorrectAUCPRSpecialCase(self): with self.test_session() as sess: predictions = constant_op.constant( [0.1, 0.4, 0.35, 0.8, 0.1, 0.135, 0.81], shape=(1, 7), dtype=dtypes_lib.float32) labels = constant_op.constant([0, 0, 1, 0, 1, 0, 1], shape=(1, 7)) - auc, update_op = metrics.auc(labels, predictions, curve='PR') + auc, update_op = metrics.auc(labels, predictions, curve='PR', + summation_method='trapezoidal') sess.run(variables.local_variables_initializer()) self.assertAlmostEqual(0.610317, sess.run(update_op), delta=1e-3) self.assertAlmostEqual(0.610317, auc.eval(), delta=1e-3) - def testThirdAUCPRSpecialCase(self): + def testThirdIncorrectAUCPRSpecialCase(self): with self.test_session() as sess: predictions = constant_op.constant( [0.0, 0.1, 0.2, 0.33, 0.3, 0.4, 0.5], shape=(1, 7), dtype=dtypes_lib.float32) labels = constant_op.constant([0, 0, 0, 0, 1, 1, 1], shape=(1, 7)) - auc, update_op = metrics.auc(labels, predictions, curve='PR') + auc, update_op = metrics.auc(labels, predictions, curve='PR', + summation_method='trapezoidal') sess.run(variables.local_variables_initializer()) self.assertAlmostEqual(0.90277, sess.run(update_op), delta=1e-3) diff --git a/tensorflow/python/kernel_tests/norm_op_test.py b/tensorflow/python/kernel_tests/norm_op_test.py index d85512fae69801a0c14789f2ed2122559815f80d..3f71b326a2fcb8accfd3182ce5d42f30aa2c74b4 100644 --- a/tensorflow/python/kernel_tests/norm_op_test.py +++ b/tensorflow/python/kernel_tests/norm_op_test.py @@ -37,17 +37,17 @@ class NormOpTest(test_lib.TestCase): def testBadOrder(self): matrix = [[0., 1.], [2., 3.]] - for ord_ in "foo", -7, -1.1, 0: + for ord_ in "fro", -7, -1.1, 0: with self.assertRaisesRegexp(ValueError, "'ord' must be a supported vector norm"): - linalg_ops.norm(matrix, ord="fro") + linalg_ops.norm(matrix, ord=ord_) - for ord_ in "foo", -7, -1.1, 0: + for ord_ in "fro", -7, -1.1, 0: with self.assertRaisesRegexp(ValueError, "'ord' must be a supported vector norm"): linalg_ops.norm(matrix, ord=ord_, axis=-1) - for ord_ in 1.1, 2: + for ord_ in "foo", -7, -1.1, 1.1: with self.assertRaisesRegexp(ValueError, "'ord' must be a supported matrix norm"): linalg_ops.norm(matrix, ord=ord_, axis=[-2, -1]) @@ -69,14 +69,14 @@ def _GetNormOpTest(dtype_, shape_, ord_, axis_, keep_dims_, use_static_shape_): if use_static_shape_: tf_matrix = constant_op.constant(matrix) tf_norm = linalg_ops.norm( - tf_matrix, ord=ord_, axis=axis_, keep_dims=keep_dims_) + tf_matrix, ord=ord_, axis=axis_, keepdims=keep_dims_) tf_norm_val = sess.run(tf_norm) else: tf_matrix = array_ops.placeholder(dtype_) tf_norm = linalg_ops.norm( - tf_matrix, ord=ord_, axis=axis_, keep_dims=keep_dims_) + tf_matrix, ord=ord_, axis=axis_, keepdims=keep_dims_) tf_norm_val = sess.run(tf_norm, feed_dict={tf_matrix: matrix}) - self.assertAllClose(np_norm, tf_norm_val) + self.assertAllClose(np_norm, tf_norm_val, rtol=1e-5, atol=1e-5) def Test(self): is_matrix_norm = (isinstance(axis_, tuple) or @@ -85,8 +85,6 @@ def _GetNormOpTest(dtype_, shape_, ord_, axis_, keep_dims_, use_static_shape_): if ((not is_matrix_norm and ord_ == "fro") or (is_matrix_norm and is_fancy_p_norm)): self.skipTest("Not supported by neither numpy.linalg.norm nor tf.norm") - if is_matrix_norm and ord_ == 2: - self.skipTest("Not supported by tf.norm") if ord_ == 'euclidean' or (axis_ is None and len(shape) > 2): self.skipTest("Not supported by numpy.linalg.norm") matrix = np.random.randn(*shape_).astype(dtype_) diff --git a/tensorflow/python/kernel_tests/pooling_ops_test.py b/tensorflow/python/kernel_tests/pooling_ops_test.py index ed44a1a4d16a94d3aa75a50bf059e33326757c4d..a0c372db7d0a4e76c37c01e1ce24cd8fc9123f7a 100644 --- a/tensorflow/python/kernel_tests/pooling_ops_test.py +++ b/tensorflow/python/kernel_tests/pooling_ops_test.py @@ -817,9 +817,6 @@ class PoolingTest(test.TestCase): cpu_val, gpu_val, half_rtol=0.01, half_atol=0.01) def testMaxPoolingWithArgmax(self): - # MaxPoolWithArgMax is implemented only on CUDA. - if not test.is_gpu_available(cuda_only=True): - return tensor_input = [1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0] with self.test_session(use_gpu=True) as sess: t = constant_op.constant(tensor_input, shape=[1, 3, 3, 1]) @@ -836,9 +833,6 @@ class PoolingTest(test.TestCase): self.assertAllEqual(argmax.ravel(), [0, 1, 3, 5]) def testMaxPoolingGradWithArgmax(self): - # MaxPoolWithArgMax is implemented only on CUDA. - if not test.is_gpu_available(cuda_only=True): - return orig_input = [1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0] tensor_input = [11.0, 12.0, 13.0, 14.0] tensor_argmax = list(np.array([0, 1, 3, 5], dtype=np.int64)) diff --git a/tensorflow/python/kernel_tests/py_func_test.py b/tensorflow/python/kernel_tests/py_func_test.py index 5b508b7c0e72180194fa1a4c95bc4282d4694605..b9f44d728a1d9843df1e836594f9caa7010d8a94 100644 --- a/tensorflow/python/kernel_tests/py_func_test.py +++ b/tensorflow/python/kernel_tests/py_func_test.py @@ -52,6 +52,38 @@ class PyFuncTest(test.TestCase): """Encapsulates tests for py_func and eager_py_func.""" # ----- Tests for py_func ----- + def testRealDataTypes(self): + def sum_func(x, y): + return x + y + for dtype in [dtypes.float16, dtypes.float32, dtypes.float64, + dtypes.uint8, dtypes.int8, dtypes.uint16, dtypes.int16, + dtypes.int32, dtypes.int64]: + with self.test_session(): + x = constant_op.constant(1, dtype=dtype) + y = constant_op.constant(2, dtype=dtype) + z = self.evaluate(script_ops.py_func(sum_func, [x, y], dtype)) + self.assertEqual(z, 3) + + def testComplexDataTypes(self): + def sub_func(x, y): + return x - y + for dtype in [dtypes.complex64, dtypes.complex128]: + with self.test_session(): + x = constant_op.constant(1 + 1j, dtype=dtype) + y = constant_op.constant(2 - 2j, dtype=dtype) + z = self.evaluate(script_ops.py_func(sub_func, [x, y], dtype)) + self.assertEqual(z, -1 + 3j) + + def testBoolDataTypes(self): + def and_func(x, y): + return x and y + dtype = dtypes.bool + with self.test_session(): + x = constant_op.constant(True, dtype=dtype) + y = constant_op.constant(False, dtype=dtype) + z = self.evaluate(script_ops.py_func(and_func, [x, y], dtype)) + self.assertEqual(z, False) + def testSingleType(self): with self.test_session(): x = constant_op.constant(1.0, dtypes.float32) diff --git a/tensorflow/python/kernel_tests/random/multinomial_op_test.py b/tensorflow/python/kernel_tests/random/multinomial_op_test.py index a9dc7b7de000024f23b88406bf0c1c2f32ac4fac..051c7d86bf2342f15b587fc350bfbede7fae2285 100644 --- a/tensorflow/python/kernel_tests/random/multinomial_op_test.py +++ b/tensorflow/python/kernel_tests/random/multinomial_op_test.py @@ -46,7 +46,7 @@ def composed_sampler(logits, num_samples): logits = array_ops.expand_dims(logits, -1) # [batch size, num samples] - return math_ops.argmax(logits + noise, dimension=1) + return math_ops.argmax(logits + noise, axis=1) native_sampler = random_ops.multinomial diff --git a/tensorflow/python/kernel_tests/random/random_ops_test.py b/tensorflow/python/kernel_tests/random/random_ops_test.py index df37dd98ece57ae7c3835ab63b720b29fc19c975..e4b5c3832a2252aedc8820a650b022cd30b7f285 100644 --- a/tensorflow/python/kernel_tests/random/random_ops_test.py +++ b/tensorflow/python/kernel_tests/random/random_ops_test.py @@ -228,6 +228,17 @@ class RandomUniformTest(test.TestCase): print("count = ", count) self.assertTrue(count < count_limit) + def testUniformIntsWithInvalidShape(self): + for dtype in dtypes.int32, dtypes.int64: + with self.assertRaisesRegexp( + ValueError, "Shape must be rank 0 but is rank 1"): + random_ops.random_uniform( + [1000], minval=[1, 2], maxval=3, dtype=dtype) + with self.assertRaisesRegexp( + ValueError, "Shape must be rank 0 but is rank 1"): + random_ops.random_uniform( + [1000], minval=1, maxval=[2, 3], dtype=dtype) + # Check that uniform ints actually follow a uniform distribution. def testUniformInts(self): minv = -2 diff --git a/tensorflow/python/kernel_tests/reduce_join_op_test.py b/tensorflow/python/kernel_tests/reduce_join_op_test.py index 7f3049b9f841c55759e78c8bc66301f325d2a7c6..fb9e5cc2a3727b70f731ca8ae946b6d5ef325833 100644 --- a/tensorflow/python/kernel_tests/reduce_join_op_test.py +++ b/tensorflow/python/kernel_tests/reduce_join_op_test.py @@ -160,7 +160,7 @@ class ReduceJoinTest(UnicodeTestCase): separator=separator) if not reduction_indices: truth = constant_op.constant(truth) - truth_squeezed = array_ops.squeeze(truth, squeeze_dims=reduction_indices) + truth_squeezed = array_ops.squeeze(truth, axis=reduction_indices) output_array = output.eval() output_keep_dims_array = output_keep_dims.eval() truth_array = truth.eval() diff --git a/tensorflow/python/kernel_tests/reduction_ops_test.py b/tensorflow/python/kernel_tests/reduction_ops_test.py index 589ea54973c10902c461f552d5c54b6fad6ecf67..ea78b58d88f7ff04a0a5d2272d2c94e1c97009da 100644 --- a/tensorflow/python/kernel_tests/reduction_ops_test.py +++ b/tensorflow/python/kernel_tests/reduction_ops_test.py @@ -889,9 +889,9 @@ class AnyReductionTest(test.TestCase): class CountNonzeroReductionTest(test.TestCase): - def _compare(self, x, reduction_axes, keepdims, use_gpu=False, + def _compare(self, x, reduction_axes, keepdims, use_gpu=False, zero=0, feed_dict=None): - np_ans = (x != 0).astype(np.int32) + np_ans = (x != zero).astype(np.int32) if reduction_axes is None: np_ans = np.sum(np_ans, keepdims=keepdims) else: @@ -958,6 +958,37 @@ class CountNonzeroReductionTest(test.TestCase): y = math_ops.count_nonzero(x, [0]) self.assertAllEqual(y.eval(), np.zeros(9938)) + def testStringReduce(self): + # Test case for GitHub issue 18712 + with self.test_session() as sess: + v = math_ops.count_nonzero(constant_op.constant(["test"])) + self.assertAllClose(sess.run(v), 1) + + def testStringReduce1D(self): + # Create a 1D array of strings + x = np.asarray(["", "", "a", "", "", "b"]) + self._compare(x, None, keepdims=False, zero=np.str("")) + self._compare(x, [], keepdims=False, zero=np.str("")) + self._compare(x, [0], keepdims=False, zero=np.str("")) + self._compare(x, None, keepdims=True, zero=np.str("")) + self._compare(x, [], keepdims=True, zero=np.str("")) + self._compare(x, [0], keepdims=True, zero=np.str("")) + + def testStringReduce2D(self): + # Create a 2D array of strings + x = np.asarray([["", "", "a", "", "", "b"], + ["", "c", "", "d", "", ""], + ["e", "", "f", "", "", ""]]) + self._compare(x, None, keepdims=False, zero=np.str("")) + self._compare(x, [], keepdims=False, zero=np.str("")) + self._compare(x, [0], keepdims=False, zero=np.str("")) + self._compare(x, [1], keepdims=False, zero=np.str("")) + self._compare(x, [0, 1], keepdims=False, zero=np.str("")) + self._compare(x, None, keepdims=True, zero=np.str("")) + self._compare(x, [], keepdims=True, zero=np.str("")) + self._compare(x, [0], keepdims=True, zero=np.str("")) + self._compare(x, [0, 1], keepdims=True, zero=np.str("")) + if __name__ == "__main__": test.main() diff --git a/tensorflow/python/kernel_tests/resource_variable_ops_test.py b/tensorflow/python/kernel_tests/resource_variable_ops_test.py index c31d5a1f91856e1be4e32c22719e15fbc55c984c..984192258c9724dd9d73105c65177786def98e83 100644 --- a/tensorflow/python/kernel_tests/resource_variable_ops_test.py +++ b/tensorflow/python/kernel_tests/resource_variable_ops_test.py @@ -36,6 +36,9 @@ from tensorflow.python.ops import state_ops from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables from tensorflow.python.platform import test +from tensorflow.python.training import momentum +from tensorflow.python.training import saver +from tensorflow.python.training import training_util from tensorflow.python.util import compat @@ -174,73 +177,59 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase): resource_variable_ops.read_variable_op(handle, dtype=dtypes.int32)) self.assertEqual(read, 2) - @test_util.run_in_graph_and_eager_modes(use_gpu=True) + @test_util.run_in_graph_and_eager_modes() def testScatterAdd(self): - with ops.device("cpu:0"): - handle = resource_variable_ops.var_handle_op( - dtype=dtypes.int32, shape=[1, 1]) - self.evaluate(resource_variable_ops.assign_variable_op( - handle, constant_op.constant([[1]], dtype=dtypes.int32))) - self.evaluate(resource_variable_ops.resource_scatter_add( - handle, [0], constant_op.constant([[2]], dtype=dtypes.int32))) - read = resource_variable_ops.read_variable_op(handle, dtype=dtypes.int32) - self.assertEqual(self.evaluate(read), [[3]]) + handle = resource_variable_ops.var_handle_op( + dtype=dtypes.int32, shape=[1, 1]) + self.evaluate( + resource_variable_ops.assign_variable_op( + handle, constant_op.constant([[1]], dtype=dtypes.int32))) + self.evaluate( + resource_variable_ops.resource_scatter_add( + handle, [0], constant_op.constant([[2]], dtype=dtypes.int32))) + read = resource_variable_ops.read_variable_op(handle, dtype=dtypes.int32) + self.assertEqual(self.evaluate(read), [[3]]) - @test_util.run_in_graph_and_eager_modes(use_gpu=True) + @test_util.run_in_graph_and_eager_modes() def testScatterSub(self): - with ops.device("cpu:0"): - handle = resource_variable_ops.var_handle_op( - dtype=dtypes.int32, shape=[1, 1]) - self.evaluate( - resource_variable_ops.assign_variable_op(handle, - constant_op.constant( - [[1]], - dtype=dtypes.int32))) - self.evaluate( - resource_variable_ops.resource_scatter_sub(handle, [0], - constant_op.constant( - [[2]], - dtype=dtypes.int32))) - read = resource_variable_ops.read_variable_op(handle, dtype=dtypes.int32) - self.assertEqual(self.evaluate(read), [[-1]]) + handle = resource_variable_ops.var_handle_op( + dtype=dtypes.int32, shape=[1, 1]) + self.evaluate( + resource_variable_ops.assign_variable_op( + handle, constant_op.constant([[1]], dtype=dtypes.int32))) + self.evaluate( + resource_variable_ops.resource_scatter_sub( + handle, [0], constant_op.constant([[2]], dtype=dtypes.int32))) + read = resource_variable_ops.read_variable_op(handle, dtype=dtypes.int32) + self.assertEqual(self.evaluate(read), [[-1]]) - @test_util.run_in_graph_and_eager_modes(use_gpu=True) + @test_util.run_in_graph_and_eager_modes() def testScatterMul(self): - with ops.device("cpu:0"): - handle = resource_variable_ops.var_handle_op( - dtype=dtypes.int32, shape=[1, 1]) - self.evaluate( - resource_variable_ops.assign_variable_op(handle, - constant_op.constant( - [[1]], - dtype=dtypes.int32))) - self.evaluate( - resource_variable_ops.resource_scatter_mul(handle, [0], - constant_op.constant( - [[5]], - dtype=dtypes.int32))) - read = resource_variable_ops.read_variable_op(handle, dtype=dtypes.int32) - self.assertEqual(self.evaluate(read), [[5]]) + handle = resource_variable_ops.var_handle_op( + dtype=dtypes.int32, shape=[1, 1]) + self.evaluate( + resource_variable_ops.assign_variable_op( + handle, constant_op.constant([[1]], dtype=dtypes.int32))) + self.evaluate( + resource_variable_ops.resource_scatter_mul( + handle, [0], constant_op.constant([[5]], dtype=dtypes.int32))) + read = resource_variable_ops.read_variable_op(handle, dtype=dtypes.int32) + self.assertEqual(self.evaluate(read), [[5]]) - @test_util.run_in_graph_and_eager_modes(use_gpu=True) + @test_util.run_in_graph_and_eager_modes() def testScatterDiv(self): - with ops.device("cpu:0"): - handle = resource_variable_ops.var_handle_op( - dtype=dtypes.int32, shape=[1, 1]) - self.evaluate( - resource_variable_ops.assign_variable_op(handle, - constant_op.constant( - [[6]], - dtype=dtypes.int32))) - self.evaluate( - resource_variable_ops.resource_scatter_div(handle, [0], - constant_op.constant( - [[3]], - dtype=dtypes.int32))) - read = resource_variable_ops.read_variable_op(handle, dtype=dtypes.int32) - self.assertEqual(self.evaluate(read), [[2]]) + handle = resource_variable_ops.var_handle_op( + dtype=dtypes.int32, shape=[1, 1]) + self.evaluate( + resource_variable_ops.assign_variable_op( + handle, constant_op.constant([[6]], dtype=dtypes.int32))) + self.evaluate( + resource_variable_ops.resource_scatter_div( + handle, [0], constant_op.constant([[3]], dtype=dtypes.int32))) + read = resource_variable_ops.read_variable_op(handle, dtype=dtypes.int32) + self.assertEqual(self.evaluate(read), [[2]]) - @test_util.run_in_graph_and_eager_modes(use_gpu=True) + @test_util.run_in_graph_and_eager_modes() def testScatterMin(self): with ops.device("cpu:0"): handle = resource_variable_ops.var_handle_op( @@ -258,131 +247,115 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase): read = resource_variable_ops.read_variable_op(handle, dtype=dtypes.int32) self.assertEqual(self.evaluate(read), [[3]]) - @test_util.run_in_graph_and_eager_modes(use_gpu=True) + def testMetagraph(self): + with ops.Graph().as_default(): + with variable_scope.variable_scope("foo", use_resource=True): + a = variable_scope.get_variable("a", initializer=10.0) + + momentum.MomentumOptimizer( + learning_rate=0.001, momentum=0.1).minimize( + a, + colocate_gradients_with_ops=True, + global_step=training_util.get_or_create_global_step()) + + graph = ops.get_default_graph() + meta_graph_def = saver.export_meta_graph(graph=graph) + + with ops.Graph().as_default(): + saver.import_meta_graph(meta_graph_def, import_scope="") + meta_graph_two = saver.export_meta_graph(graph=graph) + self.assertEqual(meta_graph_def, meta_graph_two) + + @test_util.run_in_graph_and_eager_modes() def testScatterMax(self): - with ops.device("cpu:0"): - handle = resource_variable_ops.var_handle_op( - dtype=dtypes.int32, shape=[1, 1]) - self.evaluate( - resource_variable_ops.assign_variable_op(handle, - constant_op.constant( - [[6]], - dtype=dtypes.int32))) - self.evaluate( - resource_variable_ops.resource_scatter_max(handle, [0], - constant_op.constant( - [[3]], - dtype=dtypes.int32))) - read = resource_variable_ops.read_variable_op(handle, dtype=dtypes.int32) - self.assertEqual(self.evaluate(read), [[6]]) + handle = resource_variable_ops.var_handle_op( + dtype=dtypes.int32, shape=[1, 1]) + self.evaluate( + resource_variable_ops.assign_variable_op( + handle, constant_op.constant([[6]], dtype=dtypes.int32))) + self.evaluate( + resource_variable_ops.resource_scatter_max( + handle, [0], constant_op.constant([[3]], dtype=dtypes.int32))) + read = resource_variable_ops.read_variable_op(handle, dtype=dtypes.int32) + self.assertEqual(self.evaluate(read), [[6]]) - @test_util.run_in_graph_and_eager_modes(use_gpu=True) + @test_util.run_in_graph_and_eager_modes() def testScatterAddScalar(self): - with ops.device("cpu:0"): - handle = resource_variable_ops.var_handle_op( - dtype=dtypes.int32, shape=[1, 1]) - self.evaluate( - resource_variable_ops.assign_variable_op(handle, - constant_op.constant( - [[1]], - dtype=dtypes.int32))) - self.evaluate( - resource_variable_ops.resource_scatter_add(handle, [0], - constant_op.constant( - 2, - dtype=dtypes.int32))) - read = resource_variable_ops.read_variable_op(handle, dtype=dtypes.int32) - self.assertEqual(self.evaluate(read), [[3]]) + handle = resource_variable_ops.var_handle_op( + dtype=dtypes.int32, shape=[1, 1]) + self.evaluate( + resource_variable_ops.assign_variable_op( + handle, constant_op.constant([[1]], dtype=dtypes.int32))) + self.evaluate( + resource_variable_ops.resource_scatter_add( + handle, [0], constant_op.constant(2, dtype=dtypes.int32))) + read = resource_variable_ops.read_variable_op(handle, dtype=dtypes.int32) + self.assertEqual(self.evaluate(read), [[3]]) - @test_util.run_in_graph_and_eager_modes(use_gpu=True) + @test_util.run_in_graph_and_eager_modes() def testScatterSubScalar(self): - with ops.device("cpu:0"): - handle = resource_variable_ops.var_handle_op( - dtype=dtypes.int32, shape=[1, 1]) - self.evaluate( - resource_variable_ops.assign_variable_op(handle, - constant_op.constant( - [[1]], - dtype=dtypes.int32))) - self.evaluate( - resource_variable_ops.resource_scatter_sub(handle, [0], - constant_op.constant( - 2, - dtype=dtypes.int32))) - read = resource_variable_ops.read_variable_op(handle, dtype=dtypes.int32) - self.assertEqual(self.evaluate(read), [[-1]]) + handle = resource_variable_ops.var_handle_op( + dtype=dtypes.int32, shape=[1, 1]) + self.evaluate( + resource_variable_ops.assign_variable_op( + handle, constant_op.constant([[1]], dtype=dtypes.int32))) + self.evaluate( + resource_variable_ops.resource_scatter_sub( + handle, [0], constant_op.constant(2, dtype=dtypes.int32))) + read = resource_variable_ops.read_variable_op(handle, dtype=dtypes.int32) + self.assertEqual(self.evaluate(read), [[-1]]) - @test_util.run_in_graph_and_eager_modes(use_gpu=True) + @test_util.run_in_graph_and_eager_modes() def testScatterMulScalar(self): - with ops.device("cpu:0"): - handle = resource_variable_ops.var_handle_op( - dtype=dtypes.int32, shape=[1, 1]) - self.evaluate( - resource_variable_ops.assign_variable_op(handle, - constant_op.constant( - [[1]], - dtype=dtypes.int32))) - self.evaluate( - resource_variable_ops.resource_scatter_mul(handle, [0], - constant_op.constant( - 5, - dtype=dtypes.int32))) - read = resource_variable_ops.read_variable_op(handle, dtype=dtypes.int32) - self.assertEqual(self.evaluate(read), [[5]]) + handle = resource_variable_ops.var_handle_op( + dtype=dtypes.int32, shape=[1, 1]) + self.evaluate( + resource_variable_ops.assign_variable_op( + handle, constant_op.constant([[1]], dtype=dtypes.int32))) + self.evaluate( + resource_variable_ops.resource_scatter_mul( + handle, [0], constant_op.constant(5, dtype=dtypes.int32))) + read = resource_variable_ops.read_variable_op(handle, dtype=dtypes.int32) + self.assertEqual(self.evaluate(read), [[5]]) - @test_util.run_in_graph_and_eager_modes(use_gpu=True) + @test_util.run_in_graph_and_eager_modes() def testScatterDivScalar(self): - with ops.device("cpu:0"): - handle = resource_variable_ops.var_handle_op( - dtype=dtypes.int32, shape=[1, 1]) - self.evaluate( - resource_variable_ops.assign_variable_op(handle, - constant_op.constant( - [[6]], - dtype=dtypes.int32))) - self.evaluate( - resource_variable_ops.resource_scatter_div(handle, [0], - constant_op.constant( - 3, - dtype=dtypes.int32))) - read = resource_variable_ops.read_variable_op(handle, dtype=dtypes.int32) - self.assertEqual(self.evaluate(read), [[2]]) + handle = resource_variable_ops.var_handle_op( + dtype=dtypes.int32, shape=[1, 1]) + self.evaluate( + resource_variable_ops.assign_variable_op( + handle, constant_op.constant([[6]], dtype=dtypes.int32))) + self.evaluate( + resource_variable_ops.resource_scatter_div( + handle, [0], constant_op.constant(3, dtype=dtypes.int32))) + read = resource_variable_ops.read_variable_op(handle, dtype=dtypes.int32) + self.assertEqual(self.evaluate(read), [[2]]) - @test_util.run_in_graph_and_eager_modes(use_gpu=True) + @test_util.run_in_graph_and_eager_modes() def testScatterMinScalar(self): - with ops.device("cpu:0"): - handle = resource_variable_ops.var_handle_op( - dtype=dtypes.int32, shape=[1, 1]) - self.evaluate( - resource_variable_ops.assign_variable_op(handle, - constant_op.constant( - [[6]], - dtype=dtypes.int32))) - self.evaluate( - resource_variable_ops.resource_scatter_min(handle, [0], - constant_op.constant( - 3, - dtype=dtypes.int32))) - read = resource_variable_ops.read_variable_op(handle, dtype=dtypes.int32) - self.assertEqual(self.evaluate(read), [[3]]) + handle = resource_variable_ops.var_handle_op( + dtype=dtypes.int32, shape=[1, 1]) + self.evaluate( + resource_variable_ops.assign_variable_op( + handle, constant_op.constant([[6]], dtype=dtypes.int32))) + self.evaluate( + resource_variable_ops.resource_scatter_min( + handle, [0], constant_op.constant(3, dtype=dtypes.int32))) + read = resource_variable_ops.read_variable_op(handle, dtype=dtypes.int32) + self.assertEqual(self.evaluate(read), [[3]]) - @test_util.run_in_graph_and_eager_modes(use_gpu=True) + @test_util.run_in_graph_and_eager_modes() def testScatterMaxScalar(self): - with ops.device("cpu:0"): - handle = resource_variable_ops.var_handle_op( - dtype=dtypes.int32, shape=[1, 1]) - self.evaluate( - resource_variable_ops.assign_variable_op(handle, - constant_op.constant( - [[6]], - dtype=dtypes.int32))) - self.evaluate( - resource_variable_ops.resource_scatter_max(handle, [0], - constant_op.constant( - 3, - dtype=dtypes.int32))) - read = resource_variable_ops.read_variable_op(handle, dtype=dtypes.int32) - self.assertEqual(self.evaluate(read), [[6]]) + handle = resource_variable_ops.var_handle_op( + dtype=dtypes.int32, shape=[1, 1]) + self.evaluate( + resource_variable_ops.assign_variable_op( + handle, constant_op.constant([[6]], dtype=dtypes.int32))) + self.evaluate( + resource_variable_ops.resource_scatter_max( + handle, [0], constant_op.constant(3, dtype=dtypes.int32))) + read = resource_variable_ops.read_variable_op(handle, dtype=dtypes.int32) + self.assertEqual(self.evaluate(read), [[6]]) def testScatterUpdateString(self): handle = resource_variable_ops.var_handle_op( @@ -802,6 +775,12 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase): state_ops.scatter_update(v, [1], [3.0]) self.assertAllEqual([1.0, 3.0], v.numpy()) + def testScatterAddStateOps(self): + with context.eager_mode(): + v = resource_variable_ops.ResourceVariable([1.0, 2.0], name="add") + state_ops.scatter_add(v, [1], [3]) + self.assertAllEqual([1.0, 5.0], v.numpy()) + def testScatterUpdateCast(self): with context.eager_mode(): v = resource_variable_ops.ResourceVariable([1.0, 2.0], name="update") diff --git a/tensorflow/python/kernel_tests/rnn_test.py b/tensorflow/python/kernel_tests/rnn_test.py index 9a0409c796ab60da3d47cf7d46ef6fbd5bd82394..fe5ad84c104502f0e09d3a963b406f49d6b97b71 100644 --- a/tensorflow/python/kernel_tests/rnn_test.py +++ b/tensorflow/python/kernel_tests/rnn_test.py @@ -206,6 +206,28 @@ class RNNTest(test.TestCase): self.assertAllEqual(4, state[0]) self.assertAllEqual([[[1]], [[2]], [[3]], [[4]]], state[1]) + def _assert_cell_builds(self, cell_class, dtype, batch_size, in_size, + out_size): + cell = cell_class(out_size, dtype=dtype) + in_shape = tensor_shape.TensorShape((batch_size, in_size)) + cell.build(in_shape) + state_output = cell.zero_state(batch_size, dtype) + cell_output, _ = cell(array_ops.zeros(in_shape, dtype), state_output) + self.assertAllEqual([batch_size, out_size], cell_output.shape.as_list()) + + @test_util.run_in_graph_and_eager_modes() + def testCellsBuild(self): + f32 = dtypes.float32 + f64 = dtypes.float64 + self._assert_cell_builds(rnn_cell_impl.BasicRNNCell, f32, 5, 7, 3) + self._assert_cell_builds(rnn_cell_impl.BasicRNNCell, f64, 5, 7, 3) + self._assert_cell_builds(rnn_cell_impl.BasicLSTMCell, f32, 5, 7, 3) + self._assert_cell_builds(rnn_cell_impl.BasicLSTMCell, f64, 5, 7, 3) + self._assert_cell_builds(rnn_cell_impl.GRUCell, f32, 5, 7, 3) + self._assert_cell_builds(rnn_cell_impl.GRUCell, f64, 5, 7, 3) + self._assert_cell_builds(rnn_cell_impl.LSTMCell, f32, 5, 7, 3) + self._assert_cell_builds(rnn_cell_impl.LSTMCell, f64, 5, 7, 3) + ######### Benchmarking RNN code diff --git a/tensorflow/python/kernel_tests/scatter_nd_ops_test.py b/tensorflow/python/kernel_tests/scatter_nd_ops_test.py index 9f5794951524b2689daa5fc4eefb19703262b8f0..b7477a768ab718c5a57293cc6f774298e1c9f891 100644 --- a/tensorflow/python/kernel_tests/scatter_nd_ops_test.py +++ b/tensorflow/python/kernel_tests/scatter_nd_ops_test.py @@ -364,6 +364,42 @@ class ScatterNdTest(test.TestCase): del input_ # input_ is not used in scatter_nd return array_ops.scatter_nd(indices, updates, shape) + def testString(self): + indices = constant_op.constant([[4], [3], [1], [7]], + dtype=dtypes.int32) + updates = constant_op.constant(["four", "three", "one", "seven"], + dtype=dtypes.string) + expected = np.array([b"", b"one", b"", b"three", b"four", + b"", b"", b"seven"]) + scatter = self.scatter_nd(indices, updates, shape=(8,)) + with self.test_session() as sess: + result = sess.run(scatter) + self.assertAllEqual(expected, result) + + # Same indice is updated twice by same value. + indices = constant_op.constant([[4], [3], [3], [7]], + dtype=dtypes.int32) + updates = constant_op.constant(["a", "b", "b", "c"], + dtype=dtypes.string) + expected = np.array([b"", b"", b"", b"bb", b"a", b"", b"", b"c"]) + scatter = self.scatter_nd(indices, updates, shape=(8,)) + with self.test_session() as sess: + result = sess.run(scatter) + self.assertAllEqual(expected, result) + + # Same indice is updated twice by different value. + indices = constant_op.constant([[4], [3], [3], [7]], + dtype=dtypes.int32) + updates = constant_op.constant(["a", "b", "c", "d"], + dtype=dtypes.string) + expected = [np.array([b"", b"", b"", b"bc", b"a", b"", b"", b"d"]), + np.array([b"", b"", b"", b"cb", b"a", b"", b"", b"d"])] + scatter = self.scatter_nd(indices, updates, shape=(8,)) + with self.test_session() as sess: + result = sess.run(scatter) + self.assertTrue(np.array_equal(result, expected[0]) or + np.array_equal(result, expected[1])) + def testRank3ValidShape(self): indices = array_ops.zeros([2, 2, 2], dtypes.int32) updates = array_ops.zeros([2, 2, 2], dtypes.int32) @@ -584,6 +620,10 @@ class ScatterNdNonAliasingAddTest(ScatterNdTest): shape, dtype=updates.dtype)) return array_ops.scatter_nd_non_aliasing_add(input_, indices, updates) + def testString(self): + # Not supported yet. + pass + if __name__ == "__main__": test.main() diff --git a/tensorflow/python/kernel_tests/softmax_op_test.py b/tensorflow/python/kernel_tests/softmax_op_test.py index 981f96b74d3058aa79a1ea10e1254e572d0e8b85..dc4d4dbeabf3c53be6e30cb357f4a1d2d7e69064 100644 --- a/tensorflow/python/kernel_tests/softmax_op_test.py +++ b/tensorflow/python/kernel_tests/softmax_op_test.py @@ -39,6 +39,10 @@ class SoftmaxTest(test.TestCase): dim = len(features.shape) - 1 one_only_on_dim = list(features.shape) one_only_on_dim[dim] = 1 + is_fp16 = features.dtype == np.float16 + if is_fp16: + # Do the compute in fp32 and cast the input back to fp32. + features = features.astype(np.float32) e = np.exp(features - np.reshape( np.amax( features, axis=dim), one_only_on_dim)) @@ -47,6 +51,8 @@ class SoftmaxTest(test.TestCase): res = np.log(softmax) else: res = softmax + if is_fp16: + res = res.astype(np.float16) return res def _testSoftmax(self, np_features, dim=-1, log=False, use_gpu=False): @@ -125,8 +131,8 @@ class SoftmaxTest(test.TestCase): "Test only applicable when running on GPUs") def testFloatGPU(self): if test.is_gpu_available(cuda_only=True): - rows = [2**x + np.random.randint(0, 1024) for x in range(1, 10)] - cols = [2**x + np.random.randint(0, 1024) for x in range(1, 10)] + rows = [2**x + np.random.randint(0, 16) for x in range(1, 4)] + cols = [2**x + np.random.randint(0, 16) for x in range(1, 4)] for row, col in zip(rows, cols): logging.info("Testing softmax float dtype in shape [%d, %d]", row, col) data = np.random.rand(row, col) @@ -140,8 +146,8 @@ class SoftmaxTest(test.TestCase): "Test only applicable when running on GPUs") def testHalfGPU(self): if test.is_gpu_available(cuda_only=True): - rows = [2**x + np.random.randint(0, 1024) for x in range(1, 8)] - cols = [2**x + np.random.randint(0, 1024) for x in range(1, 8)] + rows = [2**x + np.random.randint(0, 16) for x in range(1, 4)] + cols = [2**x + np.random.randint(0, 16) for x in range(1, 4)] for row, col in zip(rows, cols): logging.info("Testing softmax half dtype in shape [%d, %d]", row, col) data = np.random.rand(row, col) diff --git a/tensorflow/python/kernel_tests/string_strip_op_test.py b/tensorflow/python/kernel_tests/string_strip_op_test.py new file mode 100644 index 0000000000000000000000000000000000000000..30fd477ff42cf1e6c96b80936226df7dc15997d4 --- /dev/null +++ b/tensorflow/python/kernel_tests/string_strip_op_test.py @@ -0,0 +1,56 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for string_strip_op.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.ops import string_ops +from tensorflow.python.platform import test + + +class StringStripOpTest(test.TestCase): + """ Test cases for tf.string_strip.""" + + def test_string_strip(self): + strings = ["pigs on the wing", "animals"] + + with self.test_session() as sess: + output = string_ops.string_strip(strings) + output = sess.run(output) + self.assertAllEqual(output, [b"pigs on the wing", b"animals"]) + + def test_string_strip_2d(self): + strings = [["pigs on the wing", "animals"], + [" hello ", "\n\tworld \r \n"]] + + with self.test_session() as sess: + output = string_ops.string_strip(strings) + output = sess.run(output) + self.assertAllEqual(output, [[b"pigs on the wing", b"animals"], + [b"hello", b"world"]]) + + def test_string_strip_with_empty_strings(self): + strings = [" hello ", "", "world ", " \t \r \n "] + + with self.test_session() as sess: + output = string_ops.string_strip(strings) + output = sess.run(output) + self.assertAllEqual(output, [b"hello", b"", b"world", b""]) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/python/kernel_tests/variable_scope_test.py b/tensorflow/python/kernel_tests/variable_scope_test.py index 86ab9fbb70b5efcf06cc064617df14deb18c1f98..51aa671098905e840b7c96cd5a984887d347adf9 100644 --- a/tensorflow/python/kernel_tests/variable_scope_test.py +++ b/tensorflow/python/kernel_tests/variable_scope_test.py @@ -24,11 +24,13 @@ import threading import numpy from tensorflow.python.eager import context +from tensorflow.python.eager import function from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.framework import test_util +from tensorflow.python.layers import core as core_layers from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import init_ops @@ -118,6 +120,16 @@ class VariableScopeTest(test.TestCase): w = variable_scope.get_variable("w", []) self.assertEqual(w.dtype.base_dtype, dtypes.float16) + def testGetVariableInGraphNestedUnderEagerContext(self): + with context.eager_mode(): + + @function.defun + def f(): + v = variable_scope.get_variable("should_be_resource", []) + self.assertEqual(type(v), resource_variable_ops.ResourceVariable) + + f() + def testEagerVariableStore(self): with context.eager_mode(): store = variable_scope.EagerVariableStore() @@ -156,6 +168,28 @@ class VariableScopeTest(test.TestCase): for v in new_store.variables(): self.assertEqual(v.numpy(), 1) + def testEagerVariableStoreWithEagerDefun(self): + with context.eager_mode(): + + @function.defun + def f(): + x = constant_op.constant([[2.0]]) + d1 = core_layers.Dense( + 1, name="my_dense", kernel_initializer=init_ops.ones_initializer()) + _ = d1(x) # create variables + self.assertEqual(len(d1.variables), 2) + v1, v2 = d1.variables + d2 = core_layers.Dense( + 1, + name="my_dense", + kernel_initializer=init_ops.ones_initializer(), + _reuse=True) + _ = d2(x) + self.assertEqual(len(d2.variables), 2) + v3, v4 = d2.variables + self.assertAllEqual([v1, v2], [v3, v4]) + f() + @test_util.run_in_graph_and_eager_modes() def testInitFromNonTensorValue(self): v = variable_scope.get_variable("v4", initializer=4, dtype=dtypes.int32) @@ -209,15 +243,15 @@ class VariableScopeTest(test.TestCase): with variable_scope.variable_scope("not_cached", caching_device=""): v2_not_cached = variable_scope.get_variable("v", []) - self.assertFalse(v2_not_cached.value().device.startswith( - caching_device)) + self.assertFalse( + v2_not_cached.value().device.startswith(caching_device)) with variable_scope.variable_scope( "not_cached_identity_device", caching_device=lambda op: op.device): v2_identity_device = variable_scope.get_variable("v", []) - self.assertFalse(v2_identity_device.value().device.startswith( - caching_device)) + self.assertFalse( + v2_identity_device.value().device.startswith(caching_device)) with variable_scope.variable_scope("we_will_do_it_live") as vs_live: vs_live.set_caching_device("/job:live") @@ -484,15 +518,19 @@ class VariableScopeTest(test.TestCase): def testVarScopeGetOrCreateReuse(self): with self.test_session(): + def test_value(value): x = constant_op.constant(value) - with variable_scope.variable_scope("testVarScopeGetOrCreateReuse_bar", - reuse=variable_scope.AUTO_REUSE): + with variable_scope.variable_scope( + "testVarScopeGetOrCreateReuse_bar", + reuse=variable_scope.AUTO_REUSE): _ = state_ops.assign(variable_scope.get_variable("var", []), x) - with variable_scope.variable_scope("testVarScopeGetOrCreateReuse_bar", - reuse=variable_scope.AUTO_REUSE): + with variable_scope.variable_scope( + "testVarScopeGetOrCreateReuse_bar", + reuse=variable_scope.AUTO_REUSE): _ = variable_scope.get_variable("var", []) self.assertEqual(value, x.eval()) + test_value(42.) # Variable is created. test_value(13.) # Variable is reused hereafter. test_value(17.) @@ -551,19 +589,16 @@ class VariableScopeTest(test.TestCase): with variable_scope.variable_scope("default") as default: with variable_scope.variable_scope(None, "layer"): self.assertEqual( - variable_scope.get_variable("w", []).name, - "default/layer/w:0") + variable_scope.get_variable("w", []).name, "default/layer/w:0") with variable_scope.variable_scope(None, "layer"): self.assertEqual( - variable_scope.get_variable("w", []).name, - "default/layer_1/w:0") + variable_scope.get_variable("w", []).name, "default/layer_1/w:0") with variable_scope.variable_scope(default): pass # No matter the jump in the middle, unique numbering continues. with variable_scope.variable_scope(None, "layer"): self.assertEqual( - variable_scope.get_variable("w", []).name, - "default/layer_2/w:0") + variable_scope.get_variable("w", []).name, "default/layer_2/w:0") def testVarOpScopeReuse(self): with self.test_session(): @@ -935,12 +970,12 @@ class VariableScopeTest(test.TestCase): def testGetCollection(self): with self.test_session(): _ = variable_scope.get_variable("testGetCollection_a", []) - _ = variable_scope.get_variable("testGetCollection_b", [], - trainable=False) + _ = variable_scope.get_variable( + "testGetCollection_b", [], trainable=False) with variable_scope.variable_scope("testGetCollection_foo_") as scope1: _ = variable_scope.get_variable("testGetCollection_a", []) - _ = variable_scope.get_variable("testGetCollection_b", [], - trainable=False) + _ = variable_scope.get_variable( + "testGetCollection_b", [], trainable=False) self.assertEqual([ v.name for v in scope1.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES) @@ -954,8 +989,8 @@ class VariableScopeTest(test.TestCase): ]) with variable_scope.variable_scope("testGetCollection_foo") as scope2: _ = variable_scope.get_variable("testGetCollection_a", []) - _ = variable_scope.get_variable("testGetCollection_b", [], - trainable=False) + _ = variable_scope.get_variable( + "testGetCollection_b", [], trainable=False) self.assertEqual([ v.name for v in scope2.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES) @@ -992,22 +1027,22 @@ class VariableScopeTest(test.TestCase): with variable_scope.variable_scope( "testGetTrainableVariables_foo") as scope: _ = variable_scope.get_variable("testGetTrainableVariables_b", []) - _ = variable_scope.get_variable("testGetTrainableVariables_c", [], - trainable=False) - self.assertEqual([v.name - for v in scope.trainable_variables()], - ["testGetTrainableVariables_foo/" - "testGetTrainableVariables_b:0"]) + _ = variable_scope.get_variable( + "testGetTrainableVariables_c", [], trainable=False) + self.assertEqual( + [v.name for v in scope.trainable_variables()], + ["testGetTrainableVariables_foo/" + "testGetTrainableVariables_b:0"]) def testGetGlobalVariables(self): with self.test_session(): _ = variable_scope.get_variable("testGetGlobalVariables_a", []) with variable_scope.variable_scope("testGetGlobalVariables_foo") as scope: _ = variable_scope.get_variable("testGetGlobalVariables_b", []) - self.assertEqual([v.name - for v in scope.global_variables()], - ["testGetGlobalVariables_foo/" - "testGetGlobalVariables_b:0"]) + self.assertEqual( + [v.name for v in scope.global_variables()], + ["testGetGlobalVariables_foo/" + "testGetGlobalVariables_b:0"]) def testGetLocalVariables(self): with self.test_session(): @@ -1016,10 +1051,8 @@ class VariableScopeTest(test.TestCase): with variable_scope.variable_scope("foo") as scope: _ = variable_scope.get_variable( "b", [], collections=[ops.GraphKeys.LOCAL_VARIABLES]) - _ = variable_scope.get_variable( - "c", []) - self.assertEqual([v.name - for v in scope.local_variables()], ["foo/b:0"]) + _ = variable_scope.get_variable("c", []) + self.assertEqual([v.name for v in scope.local_variables()], ["foo/b:0"]) def testGetVariableWithRefDtype(self): v = variable_scope.get_variable("v", shape=[3, 4], dtype=dtypes.float32) @@ -1242,10 +1275,8 @@ class VariableScopeWithCustomGetterTest(test.TestCase): with ops.name_scope("prod_getter"): return g_0 * g_1 - with variable_scope.variable_scope( - "prod_scope", custom_getter=prod_getter): - with variable_scope.variable_scope( - "sum_scope", custom_getter=sum_getter): + with variable_scope.variable_scope("prod_scope", custom_getter=prod_getter): + with variable_scope.variable_scope("sum_scope", custom_getter=sum_getter): with variable_scope.variable_scope( "inner_sum_scope", custom_getter=sum_getter): # take sums of sums of products @@ -1270,9 +1301,8 @@ class VariableScopeWithCustomGetterTest(test.TestCase): np_vars, np_v = sess.run([true_vars, v]) # take products of sums of products self.assertAllClose( - np_v, - (((np_vars[0] * np_vars[1]) + (np_vars[2] * np_vars[3])) - + ((np_vars[4] * np_vars[5]) + (np_vars[6] * np_vars[7])))) + np_v, (((np_vars[0] * np_vars[1]) + (np_vars[2] * np_vars[3])) + ( + (np_vars[4] * np_vars[5]) + (np_vars[6] * np_vars[7])))) def testVariableCreator(self): @@ -1368,7 +1398,11 @@ class VariableScopeMultithreadedTest(test.TestCase): graph = ops.get_default_graph() threads = [ - threading.Thread(target=thread_fn, args=(i, graph,)) for i in range(2)] + threading.Thread(target=thread_fn, args=( + i, + graph, + )) for i in range(2) + ] threads[0].start() # Allow thread 0 to finish before starting thread 1. diff --git a/tensorflow/python/layers/base.py b/tensorflow/python/layers/base.py index 242cdff6f31a65277d38dcbdf789446be8c1c1fa..64db49c900c21d60ba2337f920d6fa2cb9ab7b5f 100644 --- a/tensorflow/python/layers/base.py +++ b/tensorflow/python/layers/base.py @@ -12,148 +12,91 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================= - -# pylint: disable=unused-import,g-bad-import-order """Contains the base Layer class, from which all layers inherit.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function -import collections import copy -import re -import weakref -import numpy as np from tensorflow.python.eager import context from tensorflow.python.estimator import util as estimator_util from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops -from tensorflow.python.framework import tensor_shape -from tensorflow.python.layers import utils as layers_util -from tensorflow.python.framework import tensor_util -from tensorflow.python.ops import array_ops +from tensorflow.python.keras._impl.keras.engine import base_layer from tensorflow.python.ops import variable_scope as vs from tensorflow.python.ops import variables as tf_variables -from tensorflow.python.platform import tf_logging as logging -from tensorflow.python.training import checkpointable from tensorflow.python.util import nest from tensorflow.python.util.tf_export import tf_export -@tf_export('layers.Layer') -class Layer(checkpointable.CheckpointableBase): - """Base layer class. +InputSpec = base_layer.InputSpec # pylint: disable=invalid-name - This is the class from which all layers inherit, implementing common - infrastructure functionality. - A layer is a class implementing common neural networks operations, such - as convolution, batch norm, etc. These operations require managing variables, - losses, and updates, as well as applying TensorFlow ops to input tensors. +@tf_export('layers.Layer') +class Layer(base_layer.Layer): + """Base layer class. - Users will just instantiate it and then treat it as a callable. + It is considered legacy, and we recommend the use of `tf.keras.layers.Layer` + instead. - We recommend that descendants of Layer implement the following methods: - * `__init__()`: Save configuration in member variables - * `build()`: Called once from `__call__`, when we know the shapes of inputs - and `dtype`. Should have the calls to `add_variable()`, and then - call the super's `build()` (which sets `self.built = True`, which is - nice in case the user wants to call `build()` manually before the - first `__call__`). - * `call()`: Called in `__call__` after making sure `build()` has been called - once. Should actually perform the logic of applying the layer to the - input tensors (which should be passed in as the first argument). + Arguments: + trainable: Boolean, whether the layer's variables should be trainable. + name: String name of the layer. + dtype: Default dtype of the layer's weights (default of `None` means use the + type of the first input). Read-only properties: - `name`: The name of the layer (string). - `dtype`: Default dtype of the layer (default of `None` means use the + name: The name of the layer (string). + dtype: Default dtype of the layer's weights (default of `None` means use the type of the first input). - `trainable_variables`: List of trainable variables. - `non_trainable_variables`: List of non-trainable variables. - `variables`: List of all variables of this layer, trainable and + trainable_variables: List of trainable variables. + non_trainable_variables: List of non-trainable variables. + variables: List of all variables of this layer, trainable and non-trainable. - `updates`: List of update ops of this layer. - `losses`: List of losses added by this layer. + updates: List of update ops of this layer. + losses: List of losses added by this layer. + trainable_weights: List of variables to be included in backprop. + non_trainable_weights: List of variables that should not be + included in backprop. + weights: The concatenation of the lists trainable_weights and + non_trainable_weights (in this order). Mutable properties: - `trainable`: Whether the layer should be trained (boolean). - `input_spec`: Optional (list of) `InputSpec` object(s) specifying the + trainable: Whether the layer should be trained (boolean). + input_spec: Optional (list of) `InputSpec` object(s) specifying the constraints on inputs that can be accepted by the layer. """ def __init__(self, trainable=True, name=None, dtype=None, - activity_regularizer=None, **kwargs): - # We use a kwargs dict here because these kwargs only exist - # for compatibility reasons. - # The list of kwargs is subject to changes in the future. - # We do not want to commit to it or to expose the list to users at all. - # Note this is exactly as safe as defining kwargs in the function signature, - # the only difference being that the list of valid kwargs is defined - # below rather rather in the signature, and default values are defined - # in calls to kwargs.get(). - allowed_kwargs = { - '_scope', - '_reuse', - 'input_shape', # For compatibility with Keras `Sequential` model. - 'batch_size', # For compatibility with Keras `Sequential` model. - } - for kwarg in kwargs: - if kwarg not in allowed_kwargs: - raise TypeError('Keyword argument not understood:', kwarg) - - # Mutable properties - # Indicates whether the layer's weights are updated during training - # and whether the layer's updates are run during training - self.trainable = trainable - # A stateful layer is a layer whose updates are run during inference too, - # for instance stateful RNNs. - self.stateful = False - # Indicates whether `build` needs to be called upon layer call, to create - # the layer's weights. - self.built = False - # Provides information about which inputs are compatible with the layer. - self.input_spec = None - - if activity_regularizer and context.executing_eagerly(): - raise ValueError( - ('Activity regularization is not supported when executing eagerly. ' - 'Got activity_regularizer=%s') % (activity_regularizer,)) - self._activity_regularizer = activity_regularizer + **kwargs): + # For backwards compatibility, legacy layers do not use `ResourceVariable` + # by default. + self._use_resource_variables = False + scope = kwargs.pop('_scope', None) + self._reuse = kwargs.pop('_reuse', None) + + # Avoid an incorrect lint error self._trainable_weights = [] - self._non_trainable_weights = [] - self._updates = [] - # When executing eagerly, _losses is a list of zero-argument lambdas which - # return tensors. When using graph execution, _losses is a list of ops. - self._losses = [] - self._reuse = kwargs.get('_reuse') - self._graph = None # Will be set at build time. - self._dtype = None if dtype is None else dtypes.as_dtype(dtype).name - self._call_fn_args = estimator_util.fn_args(self.call) - self._compute_previous_mask = ('mask' in self._call_fn_args or - hasattr(self, 'compute_mask')) - self._call_has_scope_arg = 'scope' in self._call_fn_args - - # These lists will be filled via successive calls - # to self._add_inbound_node(). - self._inbound_nodes = [] - self._outbound_nodes = [] + self.built = False - self._init_set_name(name) + super(Layer, self).__init__(trainable=trainable, name=name, dtype=dtype, + **kwargs) - # Determine variable scope. - scope = kwargs.get('_scope') + self._graph = None + self._call_has_scope_arg = 'scope' in self._call_fn_args if scope: with vs.variable_scope(scope) as captured_scope: self._scope = captured_scope else: self._scope = None + self._current_scope = None - # Set `_batch_input_shape` attribute - # for compatibility with Keras `Sequential` model. - if 'input_shape' in kwargs: - batch_size = kwargs.get('batch_size') - self._batch_input_shape = (batch_size,) + tuple(kwargs['input_shape']) + @property + def graph(self): + if context.executing_eagerly(): + raise RuntimeError('Layer.graph not supported when executing eagerly.') + return self._graph def _init_set_name(self, name): # Determine layer name (non-unique). @@ -166,18 +109,15 @@ class Layer(checkpointable.CheckpointableBase): self._name, base_name = self._make_unique_name() self._base_name = base_name - @property - def dtype(self): - return self._dtype - - @property - def name(self): - return self._name - - @property - def activity_regularizer(self): - """Optional regularizer function for the output of this layer.""" - return self._activity_regularizer + def _make_unique_name(self, name_uid_map=None, avoid_names=None, + namespace='', zero_based=False): + base_name = base_layer.to_snake_case(self.__class__.__name__) + name = base_layer.unique_layer_name(base_name, + name_uid_map=name_uid_map, + avoid_names=avoid_names, + namespace=namespace, + zero_based=zero_based) + return (name, base_name) @property def scope_name(self): @@ -189,271 +129,16 @@ class Layer(checkpointable.CheckpointableBase): 'querying `scope_name`.') return self._scope.name - @property - def trainable_weights(self): - return self._trainable_weights if self.trainable else [] - - @property - def non_trainable_weights(self): - if self.trainable: - return self._non_trainable_weights - else: - return self._trainable_weights + self._non_trainable_weights - - @property - def trainable_variables(self): - return self.trainable_weights - - @property - def non_trainable_variables(self): - return self.non_trainable_weights - - @property - def weights(self): - """Returns the list of all layer variables/weights. - - Returns: - A list of variables. - """ - return self.trainable_weights + self.non_trainable_weights - - @property - def variables(self): - """Returns the list of all layer variables/weights. - - Returns: - A list of variables. - """ - return self.weights - - @property - def updates(self): - if context.executing_eagerly(): - raise RuntimeError('Layer.updates not supported in Eager mode.') - if not self.trainable and not self.stateful: - return [] - return self._updates - - def add_update(self, updates, inputs=None): - """Add update op(s), potentially dependent on layer inputs. - - Weight updates (for instance, the updates of the moving mean and variance - in a BatchNormalization layer) may be dependent on the inputs passed - when calling a layer. Hence, when reusing the same layer on - different inputs `a` and `b`, some entries in `layer.updates` may be - dependent on `a` and some on `b`. This method automatically keeps track - of dependencies. - - The `get_updates_for` method allows to retrieve the updates relevant to a - specific set of inputs. - - This call is ignored in Eager mode. - - Arguments: - updates: Update op, or list/tuple of update ops. - inputs: If anything other than None is passed, it signals the updates - are conditional on some of the layer's inputs, - and thus they should only be run where these inputs are available. - This is the case for BatchNormalization updates, for instance. - If None, the updates will be taken into account unconditionally, - and you are responsible for making sure that any dependency they might - have is available at runtime. - A step counter might fall into this category. - """ - if context.executing_eagerly(): - return # Updates already applied when in eager mode. - - updates = _to_list(updates) - updates = [x if isinstance(x, ops.Operation) - else ops.convert_to_tensor(x) for x in updates] - self._updates += updates - if inputs is None: - for u in updates: - u._unconditional_update = True # pylint: disable=protected-access - else: - for u in updates: - u._unconditional_update = False # pylint: disable=protected-access - - def get_updates_for(self, inputs): - """Retrieves updates relevant to a specific set of inputs. - - Arguments: - inputs: Input tensor or list/tuple of input tensors. - - Returns: - List of update ops of the layer that depend on `inputs`. - - Raises: - RuntimeError: If called in Eager mode. - """ - if context.executing_eagerly(): - raise RuntimeError('`get_updates_for()` not supported in Eager mode.') - - # Updates disabled if layer is not trainable and not explicitly stateful. - if not self.trainable and not self.stateful: - return [] - - if inputs is None: - # Requesting unconditional updates. - return [x for x in self.updates if x._unconditional_update] # pylint: disable=protected-access - - # Requesting input-conditional updates. - inputs = nest.flatten(inputs) - reachable = layers_util.get_reachable_from_inputs(inputs, self.updates) - updates = [] - for update in self.updates: - if update in reachable: - updates.append(update) - return updates - - @property - def losses(self): - """Losses which are associated with this `Layer`. - - Note that when executing eagerly, getting this property evaluates - regularizers. When using graph execution, variable regularization ops have - already been created and are simply returned here. - - Returns: - A list of tensors. - """ - if context.executing_eagerly(): - # _losses may only contain variable regularization losses when executing - # eagerly, and they have been saved as lambdas to be executed when - # requested. - return [regularizer() for regularizer in self._losses] - else: - return self._losses - def add_loss(self, losses, inputs=None): - """Add loss tensor(s), potentially dependent on layer inputs. - - Some losses (for instance, activity regularization losses) may be dependent - on the inputs passed when calling a layer. Hence, when reusing the same - layer on different inputs `a` and `b`, some entries in `layer.losses` may - be dependent on `a` and some on `b`. This method automatically keeps track - of dependencies. - - The `get_losses_for` method allows to retrieve the losses relevant to a - specific set of inputs. - - Note that `add_loss` is not supported when executing eagerly. Instead, - variable regularizers may be added through `add_variable`. Activity - regularization is not supported directly (but such losses may be returned - from `Layer.call()`). - - Arguments: - losses: Loss tensor, or list/tuple of tensors. - inputs: If anything other than None is passed, it signals the losses - are conditional on some of the layer's inputs, - and thus they should only be run where these inputs are available. - This is the case for activity regularization losses, for instance. - If `None` is passed, the losses are assumed - to be unconditional, and will apply across all dataflows of the layer - (e.g. weight regularization losses). - - Raises: - RuntimeError: If called in Eager mode. - """ - if context.executing_eagerly(): - # TODO(fchollet): it should be possible (and highly desirable) to support - # `add_loss` in eager mode. This allows great convenience and flexibility - # in defining custom losses on the fly (e.g. in VAEs). - # Simply appending the loss value to `self._losses` - # is the correct behavior. - # The only caveat is that we need to force the user to only call - # `add_loss` from inside a model or Layer's `call` method - # (otherwise the loss computation cannot be backproped through). - raise RuntimeError('Layer.add_loss not supported in Eager mode.') - - losses = _to_list(losses) - self._losses += losses - if inputs is None: - for loss in losses: - loss._unconditional_loss = True # pylint: disable=protected-access - else: - for loss in losses: - loss._unconditional_loss = False # pylint: disable=protected-access + previous_losses_length = len(self._losses) + super(Layer, self).add_loss(losses, inputs=inputs) # TODO(fchollet): deprecate collection below. - _add_elements_to_collection(losses, ops.GraphKeys.REGULARIZATION_LOSSES) - - def get_losses_for(self, inputs): - """Retrieves losses relevant to a specific set of inputs. - - Arguments: - inputs: Input tensor or list/tuple of input tensors. - - Returns: - List of loss tensors of the layer that depend on `inputs`. - - Raises: - RuntimeError: If called in Eager mode. - """ - if context.executing_eagerly(): - raise RuntimeError('Layer.get_losses_for not supported in Eager mode.') - - if inputs is None: - # Requesting unconditional losses. - return [x for x in self.losses if x._unconditional_loss] # pylint: disable=protected-access - - # Requesting input-conditional losses. - inputs = nest.flatten(inputs) - # Retrieve the set of tensors in the TF graph that depend on `inputs`. - # The losses we want to return will be part of this set. - # To avoid unnecessary work, we stop the search in case all of - # `self.losses` have been retrieved. - reachable = layers_util.get_reachable_from_inputs(inputs, self.losses) - losses = [] - for loss in self.losses: - if loss in reachable: - losses.append(loss) - return losses - - def build(self, _): - """Creates the variables of the layer.""" - self.built = True - - def call(self, inputs, **kwargs): # pylint: disable=unused-argument - """The logic of the layer lives here. + new_losses = self._losses[previous_losses_length:] + _add_elements_to_collection(new_losses, ops.GraphKeys.REGULARIZATION_LOSSES) - Arguments: - inputs: input tensor(s). - **kwargs: additional keyword arguments. - - Returns: - Output tensor(s). - """ - return inputs - - def _name_scope_name(self, current_variable_scope): + def _name_scope(self): """Determines op naming for the Layer.""" - return current_variable_scope.original_name_scope - - def compute_output_shape(self, input_shape): - """Computes the output shape of the layer given the input shape. - - Args: - input_shape: A (possibly nested tuple of) `TensorShape`. It need not - be fully defined (e.g. the batch size may be unknown). - - Returns: - A (possibly nested tuple of) `TensorShape`. - - Raises: - TypeError: if `input_shape` is not a (possibly nested tuple of) - `TensorShape`. - ValueError: if `input_shape` is incomplete or is incompatible with the - the layer. - """ - raise NotImplementedError - - def _make_unique_name(self, name_uid_map=None, avoid_names=None, - namespace='', zero_based=False): - base_name = _to_snake_case(self.__class__.__name__) - name = _unique_layer_name(base_name, name_uid_map=name_uid_map, - avoid_names=avoid_names, namespace=namespace, - zero_based=zero_based) - return (name, base_name) + return self._current_scope.original_name_scope def _set_scope(self, scope=None): if self._scope is None: @@ -467,10 +152,11 @@ class Layer(checkpointable.CheckpointableBase): scope, default_name=self._base_name) as captured_scope: self._scope = captured_scope - def add_variable(self, name, shape, dtype=None, - initializer=None, regularizer=None, - trainable=True, constraint=None, - partitioner=None): + def add_weight(self, name, shape, dtype=None, + initializer=None, regularizer=None, + trainable=True, constraint=None, + use_resource=None, + partitioner=None): """Adds a new variable to the layer, or gets an existing one; returns it. Arguments: @@ -486,6 +172,7 @@ class Layer(checkpointable.CheckpointableBase): then this parameter is ignored and any added variables are also marked as non-trainable. constraint: constraint instance (callable). + use_resource: Whether to use `ResourceVariable`. partitioner: (optional) partitioner instance (callable). If provided, when the requested variable is created it will be split into multiple partitions according to `partitioner`. In this case, @@ -504,10 +191,6 @@ class Layer(checkpointable.CheckpointableBase): RuntimeError: If called with partioned variable regularization and eager execution is enabled. """ - - # `init_graph` should point to the graph in which variable initialization - # will occur; it should be None if and only if initialization will take - # place in the eager context. init_graph = None if not context.executing_eagerly(): default_graph = ops.get_default_graph() @@ -530,71 +213,43 @@ class Layer(checkpointable.CheckpointableBase): self._set_scope(None) reuse = self.built or self._reuse + prev_len_trainable = len(self._trainable_weights) with vs.variable_scope( self._scope, reuse=reuse, auxiliary_name_scope=False) as scope: - with ops.name_scope(self._name_scope_name(scope)): - variable = self._add_variable_with_custom_getter( - name=name, - shape=shape, - getter=vs.get_variable, - # Manage errors in Layer rather than Checkpointable. - overwrite=True, - initializer=initializer, + self._current_scope = scope + with ops.name_scope(self._name_scope()): + use_resource = (use_resource or + self._use_resource_variables or + scope.use_resource) + variable = super(Layer, self).add_weight( + name, + shape, dtype=dtypes.as_dtype(dtype), + initializer=initializer or scope.initializer, + trainable=trainable, constraint=constraint, - trainable=trainable and self.trainable, - partitioner=partitioner) - - if init_graph is not None: # pylint: disable=protected-access - # The variable was created and initialized in a graph. - - if variable in existing_variables: - # To match the behavior of tf.get_variable(), we only apply - # regularization if the variable is newly created. - return variable - + partitioner=partitioner, + use_resource=use_resource, + getter=vs.get_variable) + + if regularizer: + if context.executing_eagerly() or variable not in existing_variables: + self._handle_weight_regularization(name, variable, regularizer) + + if init_graph is not None: + # Handle edge case where a custom getter has overridden `trainable`. + # There is one known occurrence of this, in unit test + # testBasicRNNCellNotTrainable in + # contrib.rnn.python.kernel_tests.core_rnn_cell_test with init_graph.as_default(): trainable_variables = tf_variables.trainable_variables() if (trainable and self.trainable and variable not in trainable_variables): # A custom getter / variable scope overrode the trainable flag. - trainable = False - - if regularizer: - if isinstance(variable, tf_variables.PartitionedVariable): - for v in variable: - with ops.colocate_with(v.op): - with ops.name_scope(name + '/Regularizer'): - regularization = regularizer(v) - if regularization is not None: - self.add_loss(regularization) - else: - with ops.colocate_with(variable.op): - with ops.name_scope(name + '/Regularizer'): - regularization = regularizer(variable) - if regularization is not None: - self.add_loss(regularization) - elif regularizer: # and initialization took place in an eager context - if isinstance(variable, tf_variables.PartitionedVariable): - raise RuntimeError( - 'Partitioned variable regularization is not yet ' - 'supported when executing eagerly. File a feature request ' - 'if this is important to you.') - # Save a zero-argument lambda which runs the regularizer on the - # variable, to be executed when `Layer.losses` is requested. - # This makes losses responsive to variable updates when executing - # eagerly. - # - # TODO(akshayka): Do the same for graphs as well, so that losses - # collected in a while_loop can be run outside its control flow - # context and so that losses won't be swallowed up by graph functions - # (i.e., `.losses()` should always create regularizers). - self._losses.append(lambda: regularizer(variable)) - - if trainable: - self._trainable_weights.append(variable) - else: - self._non_trainable_weights.append(variable) + extra_trainable_vars = self._trainable_weights[prev_len_trainable:] + self._trainable_weights = self._trainable_weights[ + :prev_len_trainable] + self._non_trainable_weights += extra_trainable_vars return variable def __call__(self, inputs, *args, **kwargs): @@ -622,35 +277,14 @@ class Layer(checkpointable.CheckpointableBase): ValueError: if the layer's `call` method returns None (an invalid value). """ self._set_scope(kwargs.pop('scope', None)) - input_list = nest.flatten(inputs) - build_graph = not context.executing_eagerly() - # TODO(fchollet, allenl): Make deferred mode work with subclassed Models - # which don't use an "inputs" argument. - in_deferred_mode = isinstance(input_list[0], _DeferredTensor) - # Ensure the Layer, if being reused, is working with inputs from - # the same graph as where it was created. - if build_graph: + if not context.executing_eagerly(): try: # Set layer's "graph" at build time - self._graph = ops._get_graph_from_inputs(input_list, graph=self._graph) # pylint: disable=protected-access + self._graph = ops._get_graph_from_inputs(nest.flatten(inputs), # pylint: disable=protected-access + graph=self._graph) except ValueError as e: raise ValueError('Input graph and Layer graph are not the same: %s' % e) - if build_graph or in_deferred_mode: - user_kwargs = copy.copy(kwargs) - - # Handle Keras mask propagation from previous layer to current layer. - previous_mask = None - if (not hasattr(self, '_compute_previous_mask') or - self._compute_previous_mask): - previous_mask = _collect_previous_mask(inputs) - if not hasattr(self, '_call_fn_args'): - self._call_fn_args = estimator_util.fn_args(self.call) - if ('mask' in self._call_fn_args and 'mask' not in kwargs and - not _is_all_none(previous_mask)): - # The previous layer generated a mask, and mask was not explicitly pass - # to __call__, hence we set previous_mask as the default value. - kwargs['mask'] = previous_mask if self.built: try: @@ -667,133 +301,27 @@ class Layer(checkpointable.CheckpointableBase): else: scope_context_manager = vs.variable_scope( self._scope, reuse=self._reuse, auxiliary_name_scope=False) - input_shapes = None - with scope_context_manager as scope: - with ops.name_scope(self._name_scope_name(scope)): - if not self.built: - if not build_graph: - # Activity regularization is currently unsupported in Eager mode. - if self._activity_regularizer: - raise ValueError( - 'activity_regularizer currently unsupported with ' - 'eager execution enabled. Found an activity_regularizer in ' - '%s(%s).' % (self.__class__.__name__, self)) - if not build_graph and not in_deferred_mode: - # TODO(agarwal): support _keras_history in Eager mode. - for x in input_list: - if hasattr(x, '_keras_history'): - raise ValueError('_keras_history currently unsupported in ' - 'Eager mode. Found _keras_history in %s while ' - 'executing __call__ for %s(%s)' % - (x, self.__class_.__name__, self)) - - # Check input assumptions set before layer building, e.g. input rank. - self._assert_input_compatibility(inputs) - if input_list and self._dtype is None: - try: - self._dtype = input_list[0].dtype.base_dtype.name - except AttributeError: - pass - input_shapes = nest.map_structure(lambda x: x.get_shape(), inputs) - self.build(input_shapes) - try: - # Note: not all sub-classes of Layer call Layer.__init__ (especially - # the ones under tensorflow/python/keras). Hence we recompute this - # attribute here if it is not set. - # TODO(agarwal): Fix the sub-classes and avoid this complexity. - call_has_scope_arg = self._call_has_scope_arg - except AttributeError: - self._call_fn_args = estimator_util.fn_args(self.call) - self._call_has_scope_arg = 'scope' in self._call_fn_args - call_has_scope_arg = self._call_has_scope_arg - if call_has_scope_arg: - kwargs['scope'] = scope - # Check input assumptions set after layer building, e.g. input shape. - if build_graph or in_deferred_mode: - self._assert_input_compatibility(inputs) - - if not in_deferred_mode: - outputs = self.call(inputs, *args, **kwargs) - if outputs is None: - raise ValueError('A layer\'s `call` method should return a Tensor ' - 'or a list of Tensors, not None.') - else: - # Deferred mode behavior: use `compute_output_shape` to - # infer the number of outputs of the layer and their shapes. - if input_shapes is None: - input_shapes = nest.map_structure(lambda x: x.get_shape(), inputs) - - output_shapes = self.compute_output_shape(input_shapes) - output_shapes = nest.flatten(output_shapes) - outputs = [ - # TODO(fchollet): name the deferred tensors? - _DeferredTensor(shape=shape, dtype=self._dtype) - for shape in output_shapes - ] - if len(outputs) == 1: - outputs = outputs[0] - if build_graph: - # Apply activity regularization. - # Note that it should be applied every time the layer creates a new - # output, since it is output-specific. - if self._activity_regularizer: - output_list = nest.flatten(outputs) - for output in output_list: - with ops.name_scope('ActivityRegularizer'): - activity_regularization = self._activity_regularizer(output) - self.add_loss(activity_regularization, inputs=inputs) + with scope_context_manager as scope: + self._current_scope = scope - # TODO(fchollet): consider enabling masking for Eager mode. - if hasattr(self, 'compute_mask'): - output_mask = self.compute_mask(inputs, previous_mask) - if isinstance(outputs, (list, tuple)): - if output_mask is None: - output_mask = [None for _ in range(len(outputs))] - for x, m in zip(outputs, output_mask): - x._keras_mask = m # pylint: disable=protected-access - else: - outputs._keras_mask = output_mask # pylint: disable=protected-access + try: + call_has_scope_arg = self._call_has_scope_arg + except AttributeError: + self._call_fn_args = estimator_util.fn_args(self.call) + self._call_has_scope_arg = 'scope' in self._call_fn_args + call_has_scope_arg = self._call_has_scope_arg + if call_has_scope_arg: + kwargs['scope'] = scope - if build_graph: - # If all input tensors have history metadata, - # we update the output tensors - # with corresponding history metadata, thus eventually allowing to use - # these tensors to instantiate a Network. - if _have_all_keras_metadata(inputs): - # If the layer returns tensors from its inputs, unmodified, - # we copy them to avoid loss of tensor metadata. - output_ls = nest.flatten(outputs) - output_ls_copy = [] - for x in output_ls: - if x in input_list: - with ops.name_scope(scope.original_name_scope): - x = array_ops.identity(x) - output_ls_copy.append(x) - if len(output_ls_copy) == 1: - outputs = output_ls_copy[0] - else: - outputs = output_ls_copy + # Actually call layer + outputs = super(Layer, self).__call__(inputs, *args, **kwargs) + if not context.executing_eagerly(): # Update global default collections. _add_elements_to_collection(self.updates, ops.GraphKeys.UPDATE_OPS) - - if in_deferred_mode or build_graph: - if _have_all_keras_metadata(inputs): - # Add an inbound node to the layer, so it can keep track of this call. - # This updates the layer history of the output tensor(s). - self._add_inbound_node( - input_tensors=inputs, output_tensors=outputs, arguments=user_kwargs) - - self.built = True return outputs - @property - def graph(self): - if context.executing_eagerly(): - raise RuntimeError('Layer.graph not supported in Eager mode.') - return self._graph - def __deepcopy__(self, memo): no_copy = set(['_graph']) shallow_copy = set(['_scope', '_always_reuse_variable_scope']) @@ -805,658 +333,12 @@ class Layer(checkpointable.CheckpointableBase): setattr(result, k, v) elif k in shallow_copy: setattr(result, k, copy.copy(v)) - elif _is_tensor_or_tensor_list(v): + elif base_layer.is_tensor_or_tensor_list(v): setattr(result, k, v) else: setattr(result, k, copy.deepcopy(v, memo)) return result - def apply(self, inputs, *args, **kwargs): - """Apply the layer on a input. - - This simply wraps `self.__call__`. - - Arguments: - inputs: Input tensor(s). - *args: additional positional arguments to be passed to `self.call`. - **kwargs: additional keyword arguments to be passed to `self.call`. - - Returns: - Output tensor(s). - """ - return self.__call__(inputs, *args, **kwargs) - - def _add_inbound_node(self, - input_tensors, - output_tensors, - arguments=None): - """Internal method to create an inbound node for the layer. - - Arguments: - input_tensors: list of input tensors. - output_tensors: list of output tensors. - arguments: dictionary of keyword arguments that were passed to the - `call` method of the layer at the call that created the node. - """ - input_tensors = nest.flatten(input_tensors) - output_tensors = nest.flatten(output_tensors) - - # Collect input tensor(s) coordinates. - inbound_layers = [] - node_indices = [] - tensor_indices = [] - for x in input_tensors: - assert hasattr(x, '_keras_history') - inbound_layer, node_index, tensor_index = x._keras_history # pylint: disable=protected-access - inbound_layers.append(inbound_layer) - node_indices.append(node_index) - tensor_indices.append(tensor_index) - - # Create node, add it to inbound nodes. - Node( - self, - inbound_layers=inbound_layers, - node_indices=node_indices, - tensor_indices=tensor_indices, - input_tensors=input_tensors, - output_tensors=output_tensors, - arguments=arguments) - - # Update tensor history metadata. - for i in range(len(output_tensors)): - # The metadata attribute consists of 1) a layer instance - # 2) a node index for the layer, 3) a tensor index for the node. - # The allows layer reuse (multiple nodes per layer) and multi-output - # or multi-input layers (e.g. a layer can return multiple tensors, - # and each can be sent to a different layer). - output_tensors[i]._keras_history = (self, len(self._inbound_nodes) - 1, i) # pylint: disable=protected-access - - def _get_node_attribute_at_index(self, node_index, attr, attr_name): - """Private utility to retrieves an attribute (e.g. inputs) from a node. - - This is used to implement the methods: - - get_input_shape_at - - get_output_shape_at - - get_input_at - etc... - - Arguments: - node_index: Integer index of the node from which - to retrieve the attribute. - attr: Exact node attribute name. - attr_name: Human-readable attribute name, for error messages. - - Returns: - The layer's attribute `attr` at the node of index `node_index`. - - Raises: - RuntimeError: If the layer has no inbound nodes, or if called in Eager - mode. - ValueError: If the index provided does not match any node. - """ - if not self._inbound_nodes: - raise RuntimeError('The layer has never been called ' - 'and thus has no defined ' + attr_name + '.') - if not len(self._inbound_nodes) > node_index: - raise ValueError('Asked to get ' + attr_name + ' at node ' + - str(node_index) + ', but the layer has only ' + - str(len(self._inbound_nodes)) + ' inbound nodes.') - values = getattr(self._inbound_nodes[node_index], attr) - if len(values) == 1: - return values[0] - else: - return values - - def get_input_shape_at(self, node_index): - """Retrieves the input shape(s) of a layer at a given node. - - Arguments: - node_index: Integer, index of the node - from which to retrieve the attribute. - E.g. `node_index=0` will correspond to the - first time the layer was called. - - Returns: - A shape tuple - (or list of shape tuples if the layer has multiple inputs). - - Raises: - RuntimeError: If called in Eager mode. - """ - return self._get_node_attribute_at_index(node_index, 'input_shapes', - 'input shape') - - def get_output_shape_at(self, node_index): - """Retrieves the output shape(s) of a layer at a given node. - - Arguments: - node_index: Integer, index of the node - from which to retrieve the attribute. - E.g. `node_index=0` will correspond to the - first time the layer was called. - - Returns: - A shape tuple - (or list of shape tuples if the layer has multiple outputs). - - Raises: - RuntimeError: If called in Eager mode. - """ - if context.executing_eagerly(): - raise RuntimeError( - 'Layer.get_output_shape_at not supported in Eager mode.') - return self._get_node_attribute_at_index(node_index, 'output_shapes', - 'output shape') - - def get_input_at(self, node_index): - """Retrieves the input tensor(s) of a layer at a given node. - - Arguments: - node_index: Integer, index of the node - from which to retrieve the attribute. - E.g. `node_index=0` will correspond to the - first time the layer was called. - - Returns: - A tensor (or list of tensors if the layer has multiple inputs). - - Raises: - RuntimeError: If called in Eager mode. - """ - if context.executing_eagerly(): - raise RuntimeError('Layer.get_input_at not supported in Eager mode.') - return self._get_node_attribute_at_index(node_index, 'input_tensors', - 'input') - - def get_output_at(self, node_index): - """Retrieves the output tensor(s) of a layer at a given node. - - Arguments: - node_index: Integer, index of the node - from which to retrieve the attribute. - E.g. `node_index=0` will correspond to the - first time the layer was called. - - Returns: - A tensor (or list of tensors if the layer has multiple outputs). - - Raises: - RuntimeError: If called in Eager mode. - """ - return self._get_node_attribute_at_index(node_index, 'output_tensors', - 'output') - - @property - def input(self): - """Retrieves the input tensor(s) of a layer. - - Only applicable if the layer has exactly one input, - i.e. if it is connected to one incoming layer. - - Returns: - Input tensor or list of input tensors. - - Raises: - AttributeError: if the layer is connected to - more than one incoming layers. - - Raises: - RuntimeError: If called in Eager mode. - AttributeError: If no inbound nodes are found. - """ - if not self._inbound_nodes: - raise AttributeError('Layer ' + self.name + - ' is not connected, no input to return.') - return self._get_node_attribute_at_index(0, 'input_tensors', 'input') - - @property - def output(self): - """Retrieves the output tensor(s) of a layer. - - Only applicable if the layer has exactly one output, - i.e. if it is connected to one incoming layer. - - Returns: - Output tensor or list of output tensors. - - Raises: - AttributeError: if the layer is connected to more than one incoming - layers. - RuntimeError: if called in Eager mode. - """ - if not self._inbound_nodes: - raise AttributeError('Layer ' + self.name + ' has no inbound nodes.') - return self._get_node_attribute_at_index(0, 'output_tensors', 'output') - - @property - def input_shape(self): - """Retrieves the input shape(s) of a layer. - - Only applicable if the layer has exactly one input, - i.e. if it is connected to one incoming layer, or if all inputs - have the same shape. - - Returns: - Input shape, as an integer shape tuple - (or list of shape tuples, one tuple per input tensor). - - Raises: - AttributeError: if the layer has no defined input_shape. - RuntimeError: if called in Eager mode. - """ - if not self._inbound_nodes: - raise AttributeError('The layer has never been called ' - 'and thus has no defined input shape.') - all_input_shapes = set( - [str(node.input_shapes) for node in self._inbound_nodes]) - if len(all_input_shapes) == 1: - input_shapes = self._inbound_nodes[0].input_shapes - if len(input_shapes) == 1: - return tuple(tensor_shape.TensorShape(input_shapes[0]).as_list()) - else: - return [ - tuple(tensor_shape.TensorShape(shape).as_list()) - for shape in input_shapes - ] - else: - raise AttributeError('The layer "' + str(self.name) + - ' has multiple inbound nodes, ' - 'with different input shapes. Hence ' - 'the notion of "input shape" is ' - 'ill-defined for the layer. ' - 'Use `get_input_shape_at(node_index)` ' - 'instead.') - - def count_params(self): - """Count the total number of scalars composing the weights. - - Returns: - An integer count. - - Raises: - ValueError: if the layer isn't yet built - (in which case its weights aren't yet defined). - """ - if not self.built: - if self.__class__.__name__ == 'Sequential': - self.build() # pylint: disable=no-value-for-parameter - else: - raise ValueError('You tried to call `count_params` on ' + self.name + - ', but the layer isn\'t built. ' - 'You can build it manually via: `' + self.name + - '.build(batch_input_shape)`.') - weight_shapes = [w.get_shape().as_list() for w in self.weights] - return int(sum([np.prod(w) for w in weight_shapes])) - - @property - def output_shape(self): - """Retrieves the output shape(s) of a layer. - - Only applicable if the layer has one output, - or if all outputs have the same shape. - - Returns: - Output shape, as an integer shape tuple - (or list of shape tuples, one tuple per output tensor). - - Raises: - AttributeError: if the layer has no defined output shape. - RuntimeError: if called in Eager mode. - """ - if not self._inbound_nodes: - raise AttributeError('The layer has never been called ' - 'and thus has no defined output shape.') - all_output_shapes = set( - [str(node.output_shapes) for node in self._inbound_nodes]) - if len(all_output_shapes) == 1: - output_shapes = self._inbound_nodes[0].output_shapes - if len(output_shapes) == 1: - return tuple(tensor_shape.TensorShape(output_shapes[0]).as_list()) - else: - return [ - tuple(tensor_shape.TensorShape(shape).as_list()) - for shape in output_shapes - ] - else: - raise AttributeError('The layer "%s"' - ' has multiple inbound nodes, ' - 'with different output shapes. Hence ' - 'the notion of "output shape" is ' - 'ill-defined for the layer. ' - 'Use `get_output_shape_at(node_index)` ' - 'instead.' % self.name) - - @property - def inbound_nodes(self): - """Deprecated, do NOT use! Only for compatibility with external Keras.""" - return self._inbound_nodes - - @property - def outbound_nodes(self): - """Deprecated, do NOT use! Only for compatibility with external Keras.""" - return self._outbound_nodes - - def _assert_input_compatibility(self, inputs): - """Checks compatibility between the layer and provided inputs. - - This checks that the tensor(s) `inputs` verify the input assumptions - of the layer (if any). If not, a clear and actional exception gets raised. - - Arguments: - inputs: input tensor or list of input tensors. - - Raises: - ValueError: in case of mismatch between - the provided inputs and the expectations of the layer. - """ - if not self.input_spec: - return - if not isinstance(self.input_spec, (list, tuple)): - input_spec = nest.flatten(self.input_spec) - else: - input_spec = self.input_spec - inputs = nest.flatten(inputs) - if len(inputs) != len(input_spec): - raise ValueError('Layer ' + self.name + ' expects ' + - str(len(input_spec)) + ' inputs, ' - 'but it received ' + str(len(inputs)) + - ' input tensors. Inputs received: ' + str(inputs)) - for input_index, (x, spec) in enumerate(zip(inputs, input_spec)): - if spec is None: - continue - - if (spec.ndim is not None or - spec.min_ndim is not None or - spec.max_ndim is not None): - if x.get_shape().ndims is None: - raise ValueError('Input ' + str(input_index) + ' of layer ' + - self.name + ' is incompatible with the layer: ' - 'its rank is undefined, but the layer requires a ' - 'defined rank.') - - # Check ndim. - if spec.ndim is not None: - ndim = x.get_shape().ndims - if ndim != spec.ndim: - raise ValueError('Input ' + str(input_index) + ' of layer ' + - self.name + ' is incompatible with the layer: ' - 'expected ndim=' + str(spec.ndim) + ', found ndim=' + - str(ndim) + '. Full shape received: ' + - str(x.get_shape().as_list())) - if spec.max_ndim is not None: - ndim = x.get_shape().ndims - if ndim is not None and ndim > spec.max_ndim: - raise ValueError('Input ' + str(input_index) + ' of layer ' + - self.name + ' is incompatible with the layer: ' - 'expected max_ndim=' + str(spec.max_ndim) + - ', found ndim=' + str(ndim)) - if spec.min_ndim is not None: - ndim = x.get_shape().ndims - if ndim is not None and ndim < spec.min_ndim: - raise ValueError('Input ' + str(input_index) + ' of layer ' + - self.name + ' is incompatible with the layer: ' - ': expected min_ndim=' + str(spec.min_ndim) + - ', found ndim=' + str(ndim) + - '. Full shape received: ' + - str(x.get_shape().as_list())) - # Check dtype. - if spec.dtype is not None: - if x.dtype != spec.dtype: - raise ValueError('Input ' + str(input_index) + ' of layer ' + - self.name + ' is incompatible with the layer: ' - 'expected dtype=' + str(spec.dtype) + - ', found dtype=' + str(x.dtype)) - # Check specific shape axes. - if spec.axes: - shape = x.get_shape().as_list() - if shape is not None: - for axis, value in spec.axes.items(): - if hasattr(value, 'value'): - value = value.value - if value is not None and shape[int(axis)] not in {value, None}: - raise ValueError( - 'Input ' + str(input_index) + ' of layer ' + self.name + ' is' - ' incompatible with the layer: expected axis ' + str(axis) + - ' of input shape to have value ' + str(value) + - ' but received input with shape ' + str(shape)) - # Check shape. - if spec.shape is not None: - shape = x.get_shape().as_list() - if shape is not None: - for spec_dim, dim in zip(spec.shape, shape): - if spec_dim is not None and dim is not None: - if spec_dim != dim: - raise ValueError('Input ' + str(input_index) + - ' is incompatible with layer ' + self.name + - ': expected shape=' + str(spec.shape) + - ', found shape=' + str(shape)) - - -@tf_export('keras.layers.InputSpec', 'layers.InputSpec') -class InputSpec(object): - """Specifies the ndim, dtype and shape of every input to a layer. - - Every layer should expose (if appropriate) an `input_spec` attribute: - a list of instances of InputSpec (one per input tensor). - - A None entry in a shape is compatible with any dimension, - a None shape is compatible with any shape. - - Arguments: - dtype: Expected DataType of the input. - shape: Shape tuple, expected shape of the input - (may include None for unchecked axes). - ndim: Integer, expected rank of the input. - max_ndim: Integer, maximum rank of the input. - min_ndim: Integer, minimum rank of the input. - axes: Dictionary mapping integer axes to - a specific dimension value. - """ - - def __init__(self, - dtype=None, - shape=None, - ndim=None, - max_ndim=None, - min_ndim=None, - axes=None): - self.dtype = dtype - self.shape = shape - if shape is not None: - self.ndim = len(shape) - else: - self.ndim = ndim - self.max_ndim = max_ndim - self.min_ndim = min_ndim - self.axes = axes or {} - - def __repr__(self): - spec = [('dtype=' + str(self.dtype)) if self.dtype else '', - ('shape=' + str(self.shape)) if self.shape else '', - ('ndim=' + str(self.ndim)) if self.ndim else '', - ('max_ndim=' + str(self.max_ndim)) if self.max_ndim else '', - ('min_ndim=' + str(self.min_ndim)) if self.min_ndim else '', - ('axes=' + str(self.axes)) if self.axes else ''] - return 'InputSpec(%s)' % ', '.join(x for x in spec if x) - - -class Node(object): - """A `Node` describes the connectivity between two layers. - - Each time a layer is connected to some new input, - a node is added to `layer._inbound_nodes`. - Each time the output of a layer is used by another layer, - a node is added to `layer._outbound_nodes`. - - Arguments: - outbound_layer: the layer that takes - `input_tensors` and turns them into `output_tensors` - (the node gets created when the `call` - method of the layer was called). - inbound_layers: a list of layers, the same length as `input_tensors`, - the layers from where `input_tensors` originate. - node_indices: a list of integers, the same length as `inbound_layers`. - `node_indices[i]` is the origin node of `input_tensors[i]` - (necessary since each inbound layer might have several nodes, - e.g. if the layer is being shared with a different data stream). - tensor_indices: a list of integers, - the same length as `inbound_layers`. - `tensor_indices[i]` is the index of `input_tensors[i]` within the - output of the inbound layer - (necessary since each inbound layer might - have multiple tensor outputs, with each one being - independently manipulable). - input_tensors: list of input tensors. - output_tensors: list of output tensors. - arguments: dictionary of keyword arguments that were passed to the - `call` method of the layer at the call that created the node. - - `node_indices` and `tensor_indices` are basically fine-grained coordinates - describing the origin of the `input_tensors`. - - A node from layer A to layer B is added to: - - A._outbound_nodes - - B._inbound_nodes - """ - - def __init__(self, - outbound_layer, - inbound_layers, - node_indices, - tensor_indices, - input_tensors, - output_tensors, - arguments=None): - # Layer instance (NOT a list). - if isinstance(outbound_layer, list): - raise ValueError( - '`outbound_layer` should be a layer instance, not a list.') - # this is the layer that takes a list of input tensors - # and turns them into a list of output tensors. - # the current node will be added to - # the inbound_nodes of outbound_layer. - self.outbound_layer = outbound_layer - - # The following 3 properties describe where - # the input tensors come from: which layers, - # and for each layer, which node and which - # tensor output of each node. - - # List of layer instances. - self.inbound_layers = inbound_layers - # List of integers, 1:1 mapping with inbound_layers. - self.node_indices = node_indices - # List of integers, 1:1 mapping with inbound_layers. - self.tensor_indices = tensor_indices - - # Following 2 properties: - # tensor inputs and outputs of outbound_layer. - - # List of tensors. 1:1 mapping with inbound_layers. - self.input_tensors = input_tensors - # List of tensors, created by outbound_layer.call(). - self.output_tensors = output_tensors - - # Following 2 properties: input and output shapes. - - # List of shape tuples, shapes of input_tensors. - self.input_shapes = [layers_util.static_shape(x) for x in input_tensors] - # List of shape tuples, shapes of output_tensors. - self.output_shapes = [layers_util.static_shape(x) for x in output_tensors] - - # Optional keyword arguments to layer's `call`. - self.arguments = arguments - - # Add nodes to all layers involved. - for layer in inbound_layers: - if layer is not None: - # For compatibility with external Keras, we use the deprecated - # accessor here. - layer.outbound_nodes.append(self) - # For compatibility with external Keras, we use the deprecated - # accessor here. - outbound_layer.inbound_nodes.append(self) - - def get_config(self): - inbound_names = [] - for layer in self.inbound_layers: - if layer: - inbound_names.append(layer.name) - else: - inbound_names.append(None) - return { - 'outbound_layer': self.outbound_layer.name, - 'inbound_layers': inbound_names, - 'node_indices': self.node_indices, - 'tensor_indices': self.tensor_indices - } - - -class _DeferredTensor(object): - """Tensor-like object used to build graphs of layers in Eager mode. - - When calling a layer on a DeferredTensor, the layer will not perform any - computation and will simply perfom shape inference to return new - DeferredTensors with appropriate shape information. Thus DeferredTensor - behaves like a graph-mode Tensor when manipulated by layers. - """ - - def __init__(self, shape, dtype, name=None): - self.shape = tensor_shape.TensorShape(shape) - if dtype is None: - self.dtype = dtypes.as_dtype(np.float32) - else: - self.dtype = dtypes.as_dtype(dtype) - self.name = name - - def get_shape(self): - return self.shape - - def __str__(self): - return "DeferredTensor('%s', shape=%s, dtype=%s)" % (self.name, - self.get_shape(), - self.dtype.name) - - def __repr__(self): - return "<_DeferredTensor '%s' shape=%s dtype=%s>" % (self.name, - self.get_shape(), - self.dtype.name) - - -def _is_tensor_or_tensor_list(v): - v = nest.flatten(v) - if v and isinstance(v[0], ops.Tensor): - return True - else: - return False - - -def _to_snake_case(name): - intermediate = re.sub('(.)([A-Z][a-z0-9]+)', r'\1_\2', name) - insecure = re.sub('([a-z])([A-Z])', r'\1_\2', intermediate).lower() - # If the class is private the name starts with "_" which is not secure - # for creating scopes. We prefix the name with "private" in this case. - if insecure[0] != '_': - return insecure - return 'private' + insecure - - -def _to_list(x): - """This normalizes a list/tuple or single element into a list. - - If a single element is passed, we return - a list of size 1 containing the element. - - Arguments: - x: list or tuple or single element. - - Returns: - A list. - """ - if isinstance(x, (list, tuple)): - return list(x) - return [x] - def _add_elements_to_collection(elements, collection_list): if context.executing_eagerly(): @@ -1472,105 +354,3 @@ def _add_elements_to_collection(elements, collection_list): if element not in collection_set: collection.append(element) - -def _is_all_none(iterable_or_element): - if not isinstance(iterable_or_element, (list, tuple)): - iterable = [iterable_or_element] - else: - iterable = iterable_or_element - # We cannot use Python's `any` because the iterable may return Tensors. - for element in iterable: - if element is not None: - return False - return True - - -def _have_all_keras_metadata(iterable_or_element): - if not isinstance(iterable_or_element, (list, tuple)): - iterable = [iterable_or_element] - else: - iterable = iterable_or_element - return all([hasattr(x, '_keras_history') for x in iterable]) - - -def _collect_previous_mask(input_tensors): - """Retrieves the output mask(s) of the previous node. - - Arguments: - input_tensors: A tensor or list of tensors. - - Returns: - A mask tensor or list of mask tensors. - """ - input_tensors = nest.flatten(input_tensors) - masks = [] - for x in input_tensors: - if hasattr(x, '_keras_mask'): - mask = x._keras_mask # pylint: disable=protected-access - masks.append(mask) - else: - masks.append(None) - if len(masks) == 1: - return masks[0] - return masks - - -# A global dictionary mapping graph objects to an index of counters used -# for various layer names in each graph. -# Allows to give unique autogenerated names to layers, in a graph-specific way. -PER_GRAPH_LAYER_NAME_UIDS = weakref.WeakKeyDictionary() - - -def _get_default_graph_uid_map(): - graph = ops.get_default_graph() - name_uid_map = PER_GRAPH_LAYER_NAME_UIDS.get(graph, None) - if name_uid_map is None: - name_uid_map = collections.defaultdict(int) - PER_GRAPH_LAYER_NAME_UIDS[graph] = name_uid_map - return name_uid_map - - -def _unique_layer_name(name, name_uid_map=None, avoid_names=None, namespace='', - zero_based=False): - """Makes a layer name (or arbitrary string) unique within a TensorFlow graph. - - Arguments: - name: String name to make unique. - name_uid_map: An optional defaultdict(int) to use when creating unique - names. If None (default), uses a per-Graph dictionary. - avoid_names: An optional set or dict with names which should not be used. If - None (default) does not avoid any names. - namespace: Gets a name which is unique within the (graph, namespace). Layers - which are not Networks use a blank namespace and so get graph-global - names. - zero_based: If True, name sequences start with no suffix (e.g. "dense", - "dense_1"). If False, naming is one-based ("dense_1", "dense_2"). - - Returns: - Unique string name. - - Example: - - ```python - _unique_layer_name('dense') # dense_1 - _unique_layer_name('dense') # dense_2 - ``` - """ - if name_uid_map is None: - name_uid_map = _get_default_graph_uid_map() - if avoid_names is None: - avoid_names = set() - proposed_name = None - while proposed_name is None or proposed_name in avoid_names: - name_key = (namespace, name) - if zero_based: - number = name_uid_map[name_key] - if number: - proposed_name = name + '_' + str(number) - else: - proposed_name = name - name_uid_map[name_key] += 1 - else: - name_uid_map[name_key] += 1 - proposed_name = name + '_' + str(name_uid_map[name_key]) - return proposed_name diff --git a/tensorflow/python/layers/base_test.py b/tensorflow/python/layers/base_test.py index 9ed4afeaba931c47d2a1e65f08489773f0b9eb1b..f08b552840f5ff9144edae1cb0f90a1bc3db0f8c 100644 --- a/tensorflow/python/layers/base_test.py +++ b/tensorflow/python/layers/base_test.py @@ -52,6 +52,12 @@ class BaseLayerTest(test.TestCase): layer = base_layers.Layer(name='my_layer', trainable=False) self.assertEqual(layer.trainable, False) + @test_util.run_in_graph_and_eager_modes() + def testInt64Layer(self): + layer = base_layers.Layer(name='my_layer', dtype='int64') + layer.add_variable('my_var', [2, 2]) + self.assertEqual(layer.name, 'my_layer') + @test_util.run_in_graph_and_eager_modes() def testAddWeight(self): layer = base_layers.Layer(name='my_layer') @@ -94,61 +100,6 @@ class BaseLayerTest(test.TestCase): with self.assertRaisesRegexp(ValueError, 'activity_regularizer'): core_layers.Dense(1, activity_regularizer=lambda *args, **kwargs: 0.) - def testGetVariable(self): - with self.test_session(): - - class MyLayer(base_layers.Layer): - - def build(self, input_shape): - self.my_var = self.add_variable( - 'my_var', [2, 2], initializer=init_ops.zeros_initializer()) - - def call(self, inputs): - return inputs * 2 - - layer = MyLayer(name='my_layer') - inputs = random_ops.random_uniform((5,), seed=1) - layer.apply(inputs) - layer.apply(inputs) - self.assertEqual([v.name for v in layer.variables], - ['my_layer/my_var:0']) - - # Creating a layer with no scope leads to lazy construction of - # the scope at apply() time. It uses scope "/base_name" - lazy_layer = MyLayer(_reuse=True) - with variable_scope.variable_scope('new_scope'): - with variable_scope.variable_scope('my_layer'): - variable_scope.get_variable('my_var', [2, 2]) - - # Smoke test: it runs. - lazy_layer.apply(inputs) - # The variables were created outside of the Layer, and - # reuse=True, so the Layer does not own them and they are not - # stored in its collection. - self.assertEqual(lazy_layer.variables, []) - self.assertEqual(lazy_layer._scope.name, 'new_scope/my_layer') - - # Creating a layer with no scope leads to lazy construction of - # the scope at apply() time. If 'scope' argument is passed to - # apply(), it uses that scope when accessing variables. - lazy_layer = MyLayer(_reuse=True) - with variable_scope.variable_scope('new_scope') as new_scope: - variable_scope.get_variable('my_var', [2, 2]) - - # Smoke test: it runs. - lazy_layer.apply(inputs, scope=new_scope) - # The variables were created outside of the Layer, and - # reuse=True, so the Layer does not own them and they are not - # stored in its collection. - self.assertEqual(lazy_layer.variables, []) - self.assertEqual(lazy_layer._scope.name, 'new_scope') - - # Checking for graph equality is only done in GRAPH mode. - with ops.Graph().as_default(): - inputs_ng = random_ops.random_uniform((5,), seed=1) - with self.assertRaisesRegexp(ValueError, r'graph are not the same'): - layer.apply(inputs_ng) - @test_util.run_in_graph_and_eager_modes() def testCall(self): @@ -165,38 +116,6 @@ class BaseLayerTest(test.TestCase): # op is only supported in GRAPH mode self.assertEqual(outputs.op.name, 'my_layer/Square') - def testFirstCallCanCreateVariablesButSecondCanNotWhenBuildEmpty(self): - # Note that this test is only run in Graph mode since with EAGER mode we can - # still create a new variable on second call. - - class MyLayer(base_layers.Layer): - - def build(self, _): - # Do not mark the layer as built. - pass - - def call(self, inputs): - self.my_var = self.add_variable('my_var', [2, 2]) - if self.built: - # Skip creating on the first call; try to create after it's - # built. This is expected to fail. - self.add_variable('this_will_break_on_second_call', [2, 2]) - return inputs + math_ops.square(self.my_var) - - layer = MyLayer(name='my_layer') - inputs = random_ops.random_uniform((2,), seed=1) - outputs = layer.apply(inputs) - self.assertEqual(layer.built, True) - self.assertEqual(outputs.op.name, 'my_layer/add') - self.assertEqual([v.name - for v in layer.variables], ['my_layer/my_var:0']) - with self.assertRaisesRegexp(ValueError, - 'my_layer/this_will_break_on_second_call'): - layer.apply(inputs) - # The list of variables hasn't changed. - self.assertEqual([v.name - for v in layer.variables], ['my_layer/my_var:0']) - @test_util.run_in_graph_and_eager_modes() def testDeepCopy(self): @@ -645,13 +564,14 @@ class BaseLayerTest(test.TestCase): def testLayerGraphSetInFirstApply(self): with ops.Graph().as_default(): - layer = core_layers.Dense(1) # Graph at construction time is ignored + # Graph at construction time is ignored + layer = core_layers.Dense(1) with ops.Graph().as_default(): - layer.apply(constant_op.constant([[1]])) + layer.apply(constant_op.constant([[1.]])) # layer is now bound to second Graph with ops.Graph().as_default(), self.assertRaisesRegexp( ValueError, 'Input graph and Layer graph are not the same'): - layer.apply(constant_op.constant([[1]])) + layer.apply(constant_op.constant([[1.]])) if __name__ == '__main__': diff --git a/tensorflow/python/layers/convolutional.py b/tensorflow/python/layers/convolutional.py index 2d99b1688f1b2736c0660ba2ac914018b21bf9ed..34a1487e748e41eebae8b87b17c34d0deda8597f 100644 --- a/tensorflow/python/layers/convolutional.py +++ b/tensorflow/python/layers/convolutional.py @@ -23,6 +23,7 @@ from __future__ import print_function from tensorflow.python.eager import context from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape +from tensorflow.python.keras._impl.keras import layers as keras_layers from tensorflow.python.layers import base from tensorflow.python.layers import utils from tensorflow.python.ops import array_ops @@ -32,201 +33,8 @@ from tensorflow.python.ops import nn_ops from tensorflow.python.util.tf_export import tf_export -class _Conv(base.Layer): - """Abstract nD convolution layer (private, used as implementation base). - - This layer creates a convolution kernel that is convolved - (actually cross-correlated) with the layer input to produce a tensor of - outputs. If `use_bias` is True (and a `bias_initializer` is provided), - a bias vector is created and added to the outputs. Finally, if - `activation` is not `None`, it is applied to the outputs as well. - - Arguments: - rank: An integer, the rank of the convolution, e.g. "2" for 2D convolution. - filters: Integer, the dimensionality of the output space (i.e. the number - of filters in the convolution). - kernel_size: An integer or tuple/list of n integers, specifying the - length of the convolution window. - strides: An integer or tuple/list of n integers, - specifying the stride length of the convolution. - Specifying any stride value != 1 is incompatible with specifying - any `dilation_rate` value != 1. - padding: One of `"valid"` or `"same"` (case-insensitive). - data_format: A string, one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape - `(batch, ..., channels)` while `channels_first` corresponds to - inputs with shape `(batch, channels, ...)`. - dilation_rate: An integer or tuple/list of n integers, specifying - the dilation rate to use for dilated convolution. - Currently, specifying any `dilation_rate` value != 1 is - incompatible with specifying any `strides` value != 1. - activation: Activation function. Set it to None to maintain a - linear activation. - use_bias: Boolean, whether the layer uses a bias. - kernel_initializer: An initializer for the convolution kernel. - bias_initializer: An initializer for the bias vector. If None, the default - initializer will be used. - kernel_regularizer: Optional regularizer for the convolution kernel. - bias_regularizer: Optional regularizer for the bias vector. - activity_regularizer: Optional regularizer function for the output. - kernel_constraint: Optional projection function to be applied to the - kernel after being updated by an `Optimizer` (e.g. used to implement - norm constraints or value constraints for layer weights). The function - must take as input the unprojected variable and must return the - projected variable (which must have the same shape). Constraints are - not safe to use when doing asynchronous distributed training. - bias_constraint: Optional projection function to be applied to the - bias after being updated by an `Optimizer`. - trainable: Boolean, if `True` also add variables to the graph collection - `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). - name: A string, the name of the layer. - """ - - def __init__(self, rank, - filters, - kernel_size, - strides=1, - padding='valid', - data_format='channels_last', - dilation_rate=1, - activation=None, - use_bias=True, - kernel_initializer=None, - bias_initializer=init_ops.zeros_initializer(), - kernel_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - kernel_constraint=None, - bias_constraint=None, - trainable=True, - name=None, - **kwargs): - super(_Conv, self).__init__(trainable=trainable, name=name, - activity_regularizer=activity_regularizer, - **kwargs) - self.rank = rank - self.filters = filters - self.kernel_size = utils.normalize_tuple(kernel_size, rank, 'kernel_size') - self.strides = utils.normalize_tuple(strides, rank, 'strides') - self.padding = utils.normalize_padding(padding) - self.data_format = utils.normalize_data_format(data_format) - self.dilation_rate = utils.normalize_tuple( - dilation_rate, rank, 'dilation_rate') - self.activation = activation - self.use_bias = use_bias - self.kernel_initializer = kernel_initializer - self.bias_initializer = bias_initializer - self.kernel_regularizer = kernel_regularizer - self.bias_regularizer = bias_regularizer - self.kernel_constraint = kernel_constraint - self.bias_constraint = bias_constraint - self.input_spec = base.InputSpec(ndim=self.rank + 2) - - def build(self, input_shape): - input_shape = tensor_shape.TensorShape(input_shape) - if self.data_format == 'channels_first': - channel_axis = 1 - else: - channel_axis = -1 - if input_shape[channel_axis].value is None: - raise ValueError('The channel dimension of the inputs ' - 'should be defined. Found `None`.') - input_dim = input_shape[channel_axis].value - kernel_shape = self.kernel_size + (input_dim, self.filters) - - self.kernel = self.add_variable(name='kernel', - shape=kernel_shape, - initializer=self.kernel_initializer, - regularizer=self.kernel_regularizer, - constraint=self.kernel_constraint, - trainable=True, - dtype=self.dtype) - if self.use_bias: - self.bias = self.add_variable(name='bias', - shape=(self.filters,), - initializer=self.bias_initializer, - regularizer=self.bias_regularizer, - constraint=self.bias_constraint, - trainable=True, - dtype=self.dtype) - else: - self.bias = None - self.input_spec = base.InputSpec(ndim=self.rank + 2, - axes={channel_axis: input_dim}) - self._convolution_op = nn_ops.Convolution( - input_shape, - filter_shape=self.kernel.get_shape(), - dilation_rate=self.dilation_rate, - strides=self.strides, - padding=self.padding.upper(), - data_format=utils.convert_data_format(self.data_format, - self.rank + 2)) - self.built = True - - def call(self, inputs): - outputs = self._convolution_op(inputs, self.kernel) - - if self.use_bias: - if self.data_format == 'channels_first': - if self.rank == 1: - # nn.bias_add does not accept a 1D input tensor. - bias = array_ops.reshape(self.bias, (1, self.filters, 1)) - outputs += bias - if self.rank == 2: - outputs = nn.bias_add(outputs, self.bias, data_format='NCHW') - if self.rank == 3: - # As of Mar 2017, direct addition is significantly slower than - # bias_add when computing gradients. To use bias_add, we collapse Z - # and Y into a single dimension to obtain a 4D input tensor. - outputs_shape = outputs.shape.as_list() - if outputs_shape[0] is None: - outputs_shape[0] = -1 - outputs_4d = array_ops.reshape(outputs, - [outputs_shape[0], outputs_shape[1], - outputs_shape[2] * outputs_shape[3], - outputs_shape[4]]) - outputs_4d = nn.bias_add(outputs_4d, self.bias, data_format='NCHW') - outputs = array_ops.reshape(outputs_4d, outputs_shape) - else: - outputs = nn.bias_add(outputs, self.bias, data_format='NHWC') - - if self.activation is not None: - return self.activation(outputs) - return outputs - - def compute_output_shape(self, input_shape): - input_shape = tensor_shape.TensorShape(input_shape).as_list() - if self.data_format == 'channels_last': - space = input_shape[1:-1] - new_space = [] - for i in range(len(space)): - new_dim = utils.conv_output_length( - space[i], - self.kernel_size[i], - padding=self.padding, - stride=self.strides[i], - dilation=self.dilation_rate[i]) - new_space.append(new_dim) - return tensor_shape.TensorShape([input_shape[0]] + new_space + - [self.filters]) - else: - space = input_shape[2:] - new_space = [] - for i in range(len(space)): - new_dim = utils.conv_output_length( - space[i], - self.kernel_size[i], - padding=self.padding, - stride=self.strides[i], - dilation=self.dilation_rate[i]) - new_space.append(new_dim) - return tensor_shape.TensorShape([input_shape[0], self.filters] + - new_space) - - @tf_export('layers.Conv1D') -class Conv1D(_Conv): +class Conv1D(keras_layers.Conv1D, base.Layer): """1D convolution layer (e.g. temporal convolution). This layer creates a convolution kernel that is convolved @@ -294,8 +102,7 @@ class Conv1D(_Conv): trainable=True, name=None, **kwargs): - super(Convolution1D, self).__init__( - rank=1, + super(Conv1D, self).__init__( filters=filters, kernel_size=kernel_size, strides=strides, @@ -417,7 +224,7 @@ def conv1d(inputs, @tf_export('layers.Conv2D') -class Conv2D(_Conv): +class Conv2D(keras_layers.Conv2D, base.Layer): """2D convolution layer (e.g. spatial convolution over images). This layer creates a convolution kernel that is convolved @@ -493,7 +300,6 @@ class Conv2D(_Conv): name=None, **kwargs): super(Conv2D, self).__init__( - rank=2, filters=filters, kernel_size=kernel_size, strides=strides, @@ -622,7 +428,7 @@ def conv2d(inputs, @tf_export('layers.Conv3D') -class Conv3D(_Conv): +class Conv3D(keras_layers.Conv3D, base.Layer): """3D convolution layer (e.g. spatial convolution over volumes). This layer creates a convolution kernel that is convolved @@ -699,7 +505,6 @@ class Conv3D(_Conv): name=None, **kwargs): super(Conv3D, self).__init__( - rank=3, filters=filters, kernel_size=kernel_size, strides=strides, @@ -828,169 +633,8 @@ def conv3d(inputs, return layer.apply(inputs) -class _SeparableConv(_Conv): - """Abstract base layer for separable nD convolution. - - This layer performs a depthwise convolution that acts separately on - channels, followed by a pointwise convolution that mixes channels. - If `use_bias` is True and a bias initializer is provided, - it adds a bias vector to the output. - It then optionally applies an activation function to produce the final output. - - Arguments: - rank: An integer, the rank of the convolution, e.g. "2" for 2D convolution. - filters: Integer, the dimensionality of the output space (i.e. the number - of filters in the convolution). - kernel_size: A tuple or list of integers specifying the spatial - dimensions of the filters. Can be a single integer to specify the same - value for all spatial dimensions. - strides: A tuple or list of integers specifying the strides - of the convolution. Can be a single integer to specify the same value for - all spatial dimensions. - Specifying any `stride` value != 1 is incompatible with specifying - any `dilation_rate` value != 1. - padding: One of `"valid"` or `"same"` (case-insensitive). - data_format: A string, one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape - `(batch, ..., channels)` while `channels_first` corresponds to - inputs with shape `(batch, channels, ...)`. - dilation_rate: An integer or tuple/list of 2 integers, specifying - the dilation rate to use for dilated convolution. - Can be a single integer to specify the same value for - all spatial dimensions. - Currently, specifying any `dilation_rate` value != 1 is - incompatible with specifying any stride value != 1. - depth_multiplier: The number of depthwise convolution output channels for - each input channel. The total number of depthwise convolution output - channels will be equal to `num_filters_in * depth_multiplier`. - activation: Activation function. Set it to None to maintain a - linear activation. - use_bias: Boolean, whether the layer uses a bias. - depthwise_initializer: An initializer for the depthwise convolution kernel. - pointwise_initializer: An initializer for the pointwise convolution kernel. - bias_initializer: An initializer for the bias vector. If None, the default - initializer will be used. - depthwise_regularizer: Optional regularizer for the depthwise - convolution kernel. - pointwise_regularizer: Optional regularizer for the pointwise - convolution kernel. - bias_regularizer: Optional regularizer for the bias vector. - activity_regularizer: Optional regularizer function for the output. - depthwise_constraint: Optional projection function to be applied to the - depthwise kernel after being updated by an `Optimizer` (e.g. used for - norm constraints or value constraints for layer weights). The function - must take as input the unprojected variable and must return the - projected variable (which must have the same shape). Constraints are - not safe to use when doing asynchronous distributed training. - pointwise_constraint: Optional projection function to be applied to the - pointwise kernel after being updated by an `Optimizer`. - bias_constraint: Optional projection function to be applied to the - bias after being updated by an `Optimizer`. - trainable: Boolean, if `True` also add variables to the graph collection - `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). - name: A string, the name of the layer. - """ - - def __init__(self, - rank, - filters, - kernel_size, - strides=1, - padding='valid', - data_format='channels_last', - dilation_rate=1, - depth_multiplier=1, - activation=None, - use_bias=True, - depthwise_initializer=None, - pointwise_initializer=None, - bias_initializer=init_ops.zeros_initializer(), - depthwise_regularizer=None, - pointwise_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - depthwise_constraint=None, - pointwise_constraint=None, - bias_constraint=None, - trainable=True, - name=None, - **kwargs): - super(_SeparableConv, self).__init__( - rank=rank, - filters=filters, - kernel_size=kernel_size, - strides=strides, - padding=padding, - data_format=data_format, - dilation_rate=dilation_rate, - activation=activation, - use_bias=use_bias, - bias_regularizer=bias_regularizer, - activity_regularizer=activity_regularizer, - bias_constraint=bias_constraint, - trainable=trainable, - name=name, - **kwargs) - self.depth_multiplier = depth_multiplier - self.depthwise_initializer = depthwise_initializer - self.pointwise_initializer = pointwise_initializer - self.depthwise_regularizer = depthwise_regularizer - self.pointwise_regularizer = pointwise_regularizer - self.depthwise_constraint = depthwise_constraint - self.pointwise_constraint = pointwise_constraint - - def build(self, input_shape): - input_shape = tensor_shape.TensorShape(input_shape) - if self.data_format == 'channels_first': - channel_axis = 1 - else: - channel_axis = -1 - if input_shape[channel_axis].value is None: - raise ValueError('The channel dimension of the inputs ' - 'should be defined. Found `None`.') - input_dim = input_shape[channel_axis].value - self.input_spec = base.InputSpec(ndim=self.rank + 2, - axes={channel_axis: input_dim}) - depthwise_kernel_shape = self.kernel_size + (input_dim, - self.depth_multiplier) - pointwise_kernel_shape = ( - 1,) * self.rank + (self.depth_multiplier * input_dim, self.filters) - - self.depthwise_kernel = self.add_variable( - name='depthwise_kernel', - shape=depthwise_kernel_shape, - initializer=self.depthwise_initializer, - regularizer=self.depthwise_regularizer, - constraint=self.depthwise_constraint, - trainable=True, - dtype=self.dtype) - self.pointwise_kernel = self.add_variable( - name='pointwise_kernel', - shape=pointwise_kernel_shape, - initializer=self.pointwise_initializer, - regularizer=self.pointwise_regularizer, - constraint=self.pointwise_constraint, - trainable=True, - dtype=self.dtype) - if self.use_bias: - self.bias = self.add_variable(name='bias', - shape=(self.filters,), - initializer=self.bias_initializer, - regularizer=self.bias_regularizer, - constraint=self.bias_constraint, - trainable=True, - dtype=self.dtype) - else: - self.bias = None - self.built = True - - def call(self, inputs): - raise NotImplementedError - - @tf_export('layers.SeparableConv1D') -class SeparableConv1D(_SeparableConv): +class SeparableConv1D(keras_layers.SeparableConv1D, base.Layer): """Depthwise separable 1D convolution. This layer performs a depthwise convolution that acts separately on @@ -1072,7 +716,6 @@ class SeparableConv1D(_SeparableConv): name=None, **kwargs): super(SeparableConv1D, self).__init__( - rank=1, filters=filters, kernel_size=kernel_size, strides=strides, @@ -1096,45 +739,9 @@ class SeparableConv1D(_SeparableConv): name=name, **kwargs) - def call(self, inputs): - if self.data_format == 'channels_last': - strides = (1,) + self.strides * 2 + (1,) - spatial_start_dim = 1 - else: - strides = (1, 1) + self.strides * 2 - spatial_start_dim = 2 - - # Explicitly broadcast inputs and kernels to 4D. - # TODO(fchollet): refactor when a native separable_conv1d op is available. - inputs = array_ops.expand_dims(inputs, spatial_start_dim) - depthwise_kernel = array_ops.expand_dims(self.depthwise_kernel, 0) - pointwise_kernel = array_ops.expand_dims(self.pointwise_kernel, 0) - dilation_rate = (1,) + self.dilation_rate - - outputs = nn.separable_conv2d( - inputs, - depthwise_kernel, - pointwise_kernel, - strides=strides, - padding=self.padding.upper(), - rate=dilation_rate, - data_format=utils.convert_data_format(self.data_format, ndim=4)) - - if self.use_bias: - outputs = nn.bias_add( - outputs, - self.bias, - data_format=utils.convert_data_format(self.data_format, ndim=4)) - - outputs = array_ops.squeeze(outputs, [spatial_start_dim]) - - if self.activation is not None: - return self.activation(outputs) - return outputs - @tf_export('layers.SeparableConv2D') -class SeparableConv2D(_SeparableConv): +class SeparableConv2D(keras_layers.SeparableConv2D, base.Layer): """Depthwise separable 2D convolution. This layer performs a depthwise convolution that acts separately on @@ -1221,7 +828,6 @@ class SeparableConv2D(_SeparableConv): name=None, **kwargs): super(SeparableConv2D, self).__init__( - rank=2, filters=filters, kernel_size=kernel_size, strides=strides, @@ -1245,31 +851,6 @@ class SeparableConv2D(_SeparableConv): name=name, **kwargs) - def call(self, inputs): - # Apply the actual ops. - if self.data_format == 'channels_last': - strides = (1,) + self.strides + (1,) - else: - strides = (1, 1) + self.strides - outputs = nn.separable_conv2d( - inputs, - self.depthwise_kernel, - self.pointwise_kernel, - strides=strides, - padding=self.padding.upper(), - rate=self.dilation_rate, - data_format=utils.convert_data_format(self.data_format, ndim=4)) - - if self.use_bias: - outputs = nn.bias_add( - outputs, - self.bias, - data_format=utils.convert_data_format(self.data_format, ndim=4)) - - if self.activation is not None: - return self.activation(outputs) - return outputs - @tf_export('layers.separable_conv1d') def separable_conv1d(inputs, @@ -1511,7 +1092,7 @@ def separable_conv2d(inputs, @tf_export('layers.Conv2DTranspose') -class Conv2DTranspose(Conv2D): +class Conv2DTranspose(keras_layers.Conv2DTranspose, base.Layer): """Transposed 2D convolution layer (sometimes called 2D Deconvolution). The need for transposed convolutions generally arises @@ -1576,8 +1157,8 @@ class Conv2DTranspose(Conv2D): name=None, **kwargs): super(Conv2DTranspose, self).__init__( - filters, - kernel_size, + filters=filters, + kernel_size=kernel_size, strides=strides, padding=padding, data_format=data_format, @@ -1593,120 +1174,6 @@ class Conv2DTranspose(Conv2D): trainable=trainable, name=name, **kwargs) - self.input_spec = base.InputSpec(ndim=4) - - def build(self, input_shape): - if len(input_shape) != 4: - raise ValueError('Inputs should have rank 4. Received input shape: ' + - str(input_shape)) - if self.data_format == 'channels_first': - channel_axis = 1 - else: - channel_axis = -1 - if input_shape[channel_axis] is None: - raise ValueError('The channel dimension of the inputs ' - 'should be defined. Found `None`.') - input_dim = input_shape[channel_axis] - self.input_spec = base.InputSpec(ndim=4, axes={channel_axis: input_dim}) - kernel_shape = self.kernel_size + (self.filters, input_dim) - - self.kernel = self.add_variable(name='kernel', - shape=kernel_shape, - initializer=self.kernel_initializer, - regularizer=self.kernel_regularizer, - constraint=self.kernel_constraint, - trainable=True, - dtype=self.dtype) - if self.use_bias: - self.bias = self.add_variable(name='bias', - shape=(self.filters,), - initializer=self.bias_initializer, - regularizer=self.bias_regularizer, - constraint=self.bias_constraint, - trainable=True, - dtype=self.dtype) - else: - self.bias = None - self.built = True - - def call(self, inputs): - inputs_shape = array_ops.shape(inputs) - batch_size = inputs_shape[0] - if self.data_format == 'channels_first': - c_axis, h_axis, w_axis = 1, 2, 3 - else: - c_axis, h_axis, w_axis = 3, 1, 2 - - height, width = inputs_shape[h_axis], inputs_shape[w_axis] - kernel_h, kernel_w = self.kernel_size - stride_h, stride_w = self.strides - - # Infer the dynamic output shape: - out_height = utils.deconv_output_length(height, - kernel_h, - self.padding, - stride_h) - out_width = utils.deconv_output_length(width, - kernel_w, - self.padding, - stride_w) - if self.data_format == 'channels_first': - output_shape = (batch_size, self.filters, out_height, out_width) - strides = (1, 1, stride_h, stride_w) - else: - output_shape = (batch_size, out_height, out_width, self.filters) - strides = (1, stride_h, stride_w, 1) - - output_shape_tensor = array_ops.stack(output_shape) - outputs = nn.conv2d_transpose( - inputs, - self.kernel, - output_shape_tensor, - strides, - padding=self.padding.upper(), - data_format=utils.convert_data_format(self.data_format, ndim=4)) - - if not context.executing_eagerly(): - # Infer the static output shape: - out_shape = inputs.get_shape().as_list() - out_shape[c_axis] = self.filters - out_shape[h_axis] = utils.deconv_output_length(out_shape[h_axis], - kernel_h, - self.padding, - stride_h) - out_shape[w_axis] = utils.deconv_output_length(out_shape[w_axis], - kernel_w, - self.padding, - stride_w) - outputs.set_shape(out_shape) - - if self.use_bias: - outputs = nn.bias_add( - outputs, - self.bias, - data_format=utils.convert_data_format(self.data_format, ndim=4)) - - if self.activation is not None: - return self.activation(outputs) - return outputs - - def compute_output_shape(self, input_shape): - input_shape = tensor_shape.TensorShape(input_shape).as_list() - output_shape = list(input_shape) - if self.data_format == 'channels_first': - c_axis, h_axis, w_axis = 1, 2, 3 - else: - c_axis, h_axis, w_axis = 3, 1, 2 - - kernel_h, kernel_w = self.kernel_size - stride_h, stride_w = self.strides - - output_shape[c_axis] = self.filters - output_shape[h_axis] = utils.deconv_output_length( - output_shape[h_axis], kernel_h, self.padding, stride_h) - output_shape[w_axis] = utils.deconv_output_length( - output_shape[w_axis], kernel_w, self.padding, stride_w) - return tensor_shape.TensorShape(output_shape) @tf_export('layers.conv2d_transpose') @@ -1806,7 +1273,7 @@ def conv2d_transpose(inputs, @tf_export('layers.Conv3DTranspose') -class Conv3DTranspose(Conv3D): +class Conv3DTranspose(keras_layers.Conv3DTranspose, base.Layer): """Transposed 3D convolution layer (sometimes called 3D Deconvolution). Arguments: @@ -1885,153 +1352,6 @@ class Conv3DTranspose(Conv3D): trainable=trainable, name=name, **kwargs) - self.input_spec = base.InputSpec(ndim=5) - - def build(self, input_shape): - if len(input_shape) != 5: - raise ValueError('Inputs should have rank 5, received input shape:', - str(input_shape)) - if self.data_format == 'channels_first': - channel_axis = 1 - else: - channel_axis = -1 - if input_shape[channel_axis] is None: - raise ValueError('The channel dimension of the inputs ' - 'should be defined, found None: ' + str(input_shape)) - input_dim = input_shape[channel_axis] - kernel_shape = self.kernel_size + (self.filters, input_dim) - - self.kernel = self.add_variable( - 'kernel', - shape=kernel_shape, - initializer=self.kernel_initializer, - regularizer=self.kernel_regularizer, - constraint=self.kernel_constraint, - trainable=True, - dtype=self.dtype) - if self.use_bias: - self.bias = self.add_variable( - 'bias', - shape=(self.filters,), - initializer=self.bias_initializer, - regularizer=self.bias_regularizer, - constraint=self.bias_constraint, - trainable=True, - dtype=self.dtype) - else: - self.bias = None - self.built = True - - def call(self, inputs): - inputs_shape = array_ops.shape(inputs) - batch_size = inputs_shape[0] - if self.data_format == 'channels_first': - c_axis, d_axis, h_axis, w_axis = 1, 2, 3, 4 - else: - c_axis, d_axis, h_axis, w_axis = 4, 1, 2, 3 - - self.input_spec = base.InputSpec(ndim=5, - axes={c_axis: inputs_shape[c_axis]}) - - depth = inputs_shape[d_axis] - height = inputs_shape[h_axis] - width = inputs_shape[w_axis] - - kernel_d, kernel_h, kernel_w = self.kernel_size - stride_d, stride_h, stride_w = self.strides - - # Infer the dynamic output shape: - out_depth = utils.deconv_output_length(depth, - kernel_d, - self.padding, - stride_d) - out_height = utils.deconv_output_length(height, - kernel_h, - self.padding, - stride_h) - out_width = utils.deconv_output_length(width, - kernel_w, - self.padding, - stride_w) - if self.data_format == 'channels_first': - output_shape = (batch_size, self.filters, out_depth, out_height, - out_width) - strides = (1, 1, stride_d, stride_h, stride_w) - else: - output_shape = (batch_size, out_depth, out_height, out_width, - self.filters) - strides = (1, stride_d, stride_h, stride_w, 1) - - output_shape_tensor = array_ops.stack(output_shape) - outputs = nn.conv3d_transpose( - inputs, - self.kernel, - output_shape_tensor, - strides, - data_format=utils.convert_data_format(self.data_format, ndim=5), - padding=self.padding.upper()) - - if not context.executing_eagerly(): - # Infer the static output shape: - out_shape = inputs.get_shape().as_list() - out_shape[c_axis] = self.filters - out_shape[d_axis] = utils.deconv_output_length(out_shape[d_axis], - kernel_d, - self.padding, - stride_d) - out_shape[h_axis] = utils.deconv_output_length(out_shape[h_axis], - kernel_h, - self.padding, - stride_h) - out_shape[w_axis] = utils.deconv_output_length(out_shape[w_axis], - kernel_w, - self.padding, - stride_w) - outputs.set_shape(out_shape) - - if self.use_bias: - outputs_shape = outputs.shape.as_list() - if outputs_shape[0] is None: - outputs_shape[0] = -1 - if self.data_format == 'channels_first': - outputs_4d = array_ops.reshape(outputs, [ - outputs_shape[0], outputs_shape[1], - outputs_shape[2] * outputs_shape[3], outputs_shape[4] - ]) - else: - outputs_4d = array_ops.reshape(outputs, [ - outputs_shape[0], outputs_shape[1] * outputs_shape[2], - outputs_shape[3], outputs_shape[4] - ]) - outputs_4d = nn.bias_add( - outputs_4d, - self.bias, - data_format=utils.convert_data_format(self.data_format, ndim=4)) - outputs = array_ops.reshape(outputs_4d, outputs_shape) - - if self.activation is not None: - return self.activation(outputs) - return outputs - - def compute_output_shape(self, input_shape): - input_shape = tensor_shape.TensorShape(input_shape).as_list() - output_shape = list(input_shape) - if self.data_format == 'channels_first': - c_axis, d_axis, h_axis, w_axis = 1, 2, 3, 4 - else: - c_axis, d_axis, h_axis, w_axis = 4, 1, 2, 3 - - kernel_d, kernel_h, kernel_w = self.kernel_size - stride_d, stride_h, stride_w = self.strides - - output_shape[c_axis] = self.filters - output_shape[d_axis] = utils.deconv_output_length( - output_shape[d_axis], kernel_d, self.padding, stride_d) - output_shape[h_axis] = utils.deconv_output_length( - output_shape[h_axis], kernel_h, self.padding, stride_h) - output_shape[w_axis] = utils.deconv_output_length( - output_shape[w_axis], kernel_w, self.padding, stride_w) - return tensor_shape.TensorShape(output_shape) @tf_export('layers.conv3d_transpose') diff --git a/tensorflow/python/layers/core.py b/tensorflow/python/layers/core.py index e598d9f83ab21f2dd5fabb3dd37fa0bfb5f003a4..6d8e9eac878bb2eb65bfa29e872a0576a39af662 100644 --- a/tensorflow/python/layers/core.py +++ b/tensorflow/python/layers/core.py @@ -27,23 +27,14 @@ import six from six.moves import xrange # pylint: disable=redefined-builtin import numpy as np -from tensorflow.python.eager import context -from tensorflow.python.framework import ops -from tensorflow.python.framework import tensor_shape +from tensorflow.python.keras._impl.keras import layers as keras_layers from tensorflow.python.layers import base -from tensorflow.python.layers import utils -from tensorflow.python.ops import array_ops from tensorflow.python.ops import init_ops -from tensorflow.python.ops import math_ops -from tensorflow.python.ops import gen_math_ops -from tensorflow.python.ops import nn -from tensorflow.python.ops import nn_ops -from tensorflow.python.ops import standard_ops from tensorflow.python.util.tf_export import tf_export @tf_export('layers.Dense') -class Dense(base.Layer): +class Dense(keras_layers.Dense, base.Layer): """Densely-connected layer class. This layer implements the operation: @@ -108,73 +99,19 @@ class Dense(base.Layer): trainable=True, name=None, **kwargs): - super(Dense, self).__init__(trainable=trainable, name=name, + super(Dense, self).__init__(units=units, + activation=activation, + use_bias=use_bias, + kernel_initializer=kernel_initializer, + bias_initializer=bias_initializer, + kernel_regularizer=kernel_regularizer, + bias_regularizer=bias_regularizer, activity_regularizer=activity_regularizer, + kernel_constraint=kernel_constraint, + bias_constraint=bias_constraint, + trainable=trainable, + name=name, **kwargs) - self.units = units - self.activation = activation - self.use_bias = use_bias - self.kernel_initializer = kernel_initializer - self.bias_initializer = bias_initializer - self.kernel_regularizer = kernel_regularizer - self.bias_regularizer = bias_regularizer - self.kernel_constraint = kernel_constraint - self.bias_constraint = bias_constraint - self.input_spec = base.InputSpec(min_ndim=2) - - def build(self, input_shape): - input_shape = tensor_shape.TensorShape(input_shape) - if input_shape[-1].value is None: - raise ValueError('The last dimension of the inputs to `Dense` ' - 'should be defined. Found `None`.') - self.input_spec = base.InputSpec(min_ndim=2, - axes={-1: input_shape[-1].value}) - self.kernel = self.add_variable('kernel', - shape=[input_shape[-1].value, self.units], - initializer=self.kernel_initializer, - regularizer=self.kernel_regularizer, - constraint=self.kernel_constraint, - dtype=self.dtype, - trainable=True) - if self.use_bias: - self.bias = self.add_variable('bias', - shape=[self.units,], - initializer=self.bias_initializer, - regularizer=self.bias_regularizer, - constraint=self.bias_constraint, - dtype=self.dtype, - trainable=True) - else: - self.bias = None - self.built = True - - def call(self, inputs): - inputs = ops.convert_to_tensor(inputs, dtype=self.dtype) - shape = inputs.get_shape().as_list() - if len(shape) > 2: - # Broadcasting is required for the inputs. - outputs = standard_ops.tensordot(inputs, self.kernel, [[len(shape) - 1], - [0]]) - # Reshape the output back to the original ndim of the input. - if not context.executing_eagerly(): - output_shape = shape[:-1] + [self.units] - outputs.set_shape(output_shape) - else: - outputs = gen_math_ops.mat_mul(inputs, self.kernel) - if self.use_bias: - outputs = nn.bias_add(outputs, self.bias) - if self.activation is not None: - return self.activation(outputs) # pylint: disable=not-callable - return outputs - - def compute_output_shape(self, input_shape): - input_shape = tensor_shape.TensorShape(input_shape) - input_shape = input_shape.with_rank_at_least(2) - if input_shape[-1].value is None: - raise ValueError( - 'The innermost dimension of input_shape must be defined, but saw: %s' - % input_shape) - return input_shape[:-1].concatenate(self.units) @tf_export('layers.dense') @@ -254,7 +191,7 @@ def dense( @tf_export('layers.Dropout') -class Dropout(base.Layer): +class Dropout(keras_layers.Dropout, base.Layer): """Applies Dropout to the input. Dropout consists in randomly setting a fraction `rate` of input units to 0 @@ -282,31 +219,14 @@ class Dropout(base.Layer): seed=None, name=None, **kwargs): - super(Dropout, self).__init__(name=name, **kwargs) - self.rate = rate - self.noise_shape = noise_shape - self.seed = seed - - def _get_noise_shape(self, inputs): - # Subclasses of `Dropout` may implement `_get_noise_shape(self, inputs)`, - # which will override `self.noise_shape`, and allows for custom noise - # shapes with dynamically sized inputs. - if self.noise_shape is None: - return self.noise_shape - return nn_ops._get_noise_shape(inputs, self.noise_shape) + super(Dropout, self).__init__(rate=rate, + noise_shape=noise_shape, + seed=seed, + name=name, + **kwargs) def call(self, inputs, training=False): - - def dropped_inputs(): - return nn.dropout(inputs, 1 - self.rate, - noise_shape=self._get_noise_shape(inputs), - seed=self.seed) - return utils.smart_cond(training, - dropped_inputs, - lambda: array_ops.identity(inputs)) - - def compute_output_shape(self, input_shape): - return input_shape + return super(Dropout, self).call(inputs, training=training) @tf_export('layers.dropout') @@ -352,7 +272,7 @@ def dropout(inputs, @tf_export('layers.Flatten') -class Flatten(base.Layer): +class Flatten(keras_layers.Flatten, base.Layer): """Flattens an input tensor while preserving the batch axis (axis 0). Examples: @@ -367,25 +287,7 @@ class Flatten(base.Layer): # now `y` has shape `(None, None)` ``` """ - - def __init__(self, **kwargs): - super(Flatten, self).__init__(**kwargs) - self.input_spec = base.InputSpec(min_ndim=2) - - def call(self, inputs): - outputs = array_ops.reshape(inputs, (array_ops.shape(inputs)[0], -1)) - if not context.executing_eagerly(): - outputs.set_shape(self.compute_output_shape(inputs.get_shape())) - return outputs - - def compute_output_shape(self, input_shape): - input_shape = tensor_shape.TensorShape(input_shape).as_list() - output_shape = [input_shape[0]] - if all(input_shape[1:]): - output_shape += [np.prod(input_shape[1:])] - else: - output_shape += [None] - return tensor_shape.TensorShape(output_shape) + pass @tf_export('layers.flatten') diff --git a/tensorflow/python/layers/normalization.py b/tensorflow/python/layers/normalization.py index 83b201e6420d48cfab38048d6638a9f9185d7d6c..33284b0d695272db5a4e0d757d6f24b1930068de 100644 --- a/tensorflow/python/layers/normalization.py +++ b/tensorflow/python/layers/normalization.py @@ -24,26 +24,14 @@ import six from six.moves import xrange # pylint: disable=redefined-builtin import numpy as np -from tensorflow.python.eager import context -from tensorflow.python.framework import constant_op -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import ops -from tensorflow.python.framework import tensor_shape +from tensorflow.python.keras._impl.keras import layers as keras_layers from tensorflow.python.layers import base -from tensorflow.python.layers import utils -from tensorflow.python.ops import array_ops from tensorflow.python.ops import init_ops -from tensorflow.python.ops import math_ops -from tensorflow.python.ops import nn -from tensorflow.python.ops import resource_variable_ops -from tensorflow.python.ops import state_ops -from tensorflow.python.training import distribute as distribute_lib -from tensorflow.python.training import moving_averages from tensorflow.python.util.tf_export import tf_export @tf_export('layers.BatchNormalization') -class BatchNormalization(base.Layer): +class BatchNormalization(keras_layers.BatchNormalization, base.Layer): """Batch Normalization layer from http://arxiv.org/abs/1502.03167. "Batch Normalization: Accelerating Deep Network Training by Reducing @@ -143,485 +131,31 @@ class BatchNormalization(base.Layer): name=None, **kwargs): super(BatchNormalization, self).__init__( - name=name, trainable=trainable, **kwargs) - if isinstance(axis, list): - self.axis = axis[:] - else: - self.axis = axis - self.momentum = momentum - self.epsilon = epsilon - self.center = center - self.scale = scale - self.beta_initializer = beta_initializer - self.gamma_initializer = gamma_initializer - self.moving_mean_initializer = moving_mean_initializer - self.moving_variance_initializer = moving_variance_initializer - self.beta_regularizer = beta_regularizer - self.gamma_regularizer = gamma_regularizer - self.beta_constraint = beta_constraint - self.gamma_constraint = gamma_constraint - self.renorm = renorm - self.virtual_batch_size = virtual_batch_size - self.adjustment = adjustment - if fused is None: - fused = True - - self.fused = fused - self._bessels_correction_test_only = True - - if renorm: - renorm_clipping = renorm_clipping or {} - keys = ['rmax', 'rmin', 'dmax'] - if set(renorm_clipping) - set(keys): - raise ValueError('renorm_clipping %s contains keys not in %s' % - (renorm_clipping, keys)) - self.renorm_clipping = renorm_clipping - self.renorm_momentum = renorm_momentum - - def _add_tower_local_variable(self, *args, **kwargs): - tower_context = distribute_lib.get_tower_context() - with tower_context.tower_local_var_scope('mean'): - return self.add_variable(*args, **kwargs) - - def build(self, input_shape): - input_shape = tensor_shape.TensorShape(input_shape) - if not input_shape.ndims: - raise ValueError('Input has undefined rank:', input_shape) - ndims = len(input_shape) - - # Convert axis to list and resolve negatives - if isinstance(self.axis, int): - self.axis = [self.axis] - - if not isinstance(self.axis, list): - raise TypeError('axis must be int or list, type given: %s' - % type(self.axis)) - - for idx, x in enumerate(self.axis): - if x < 0: - self.axis[idx] = ndims + x - - # Validate axes - for x in self.axis: - if x < 0 or x >= ndims: - raise ValueError('Invalid axis: %d' % x) - if len(self.axis) != len(set(self.axis)): - raise ValueError('Duplicate axis: %s' % self.axis) - - if self.virtual_batch_size is not None: - if self.virtual_batch_size <= 0: - raise ValueError('virtual_batch_size must be a positive integer that ' - 'divides the true batch size of the input Tensor') - # If using virtual batches, the first dimension must be the batch - # dimension and cannot be the batch norm axis - if 0 in self.axis: - raise ValueError('When using virtual_batch_size, the batch dimension ' - 'must be 0 and thus axis cannot include 0') - if self.adjustment is not None: - raise ValueError('When using virtual_batch_size, adjustment cannot ' - 'be specified') - - if self.fused: - # Currently fused batch norm doesn't support renorm. It also only supports - # an input tensor of rank 4 and a channel dimension on axis 1 or 3. - # TODO(yaozhang): if input is not 4D, reshape it to 4D and reshape the - # output back to its original shape accordingly. - self.fused = (not self.renorm and - ndims == 4 and - self.axis in [[1], [3]] and - self.virtual_batch_size is None and - self.adjustment is None) - # TODO(chrisying): fused batch norm is currently not supported for - # multi-axis batch norm and by extension virtual batches. In some cases, - # it might be possible to use fused batch norm but would require reshaping - # the Tensor to 4D with the axis in 1 or 3 (preferred 1) which is - # particularly tricky. A compromise might be to just support the most - # common use case (turning 5D w/ virtual batch to NCHW) - - if self.fused: - if self.axis == [1]: - self._data_format = 'NCHW' - elif self.axis == [3]: - self._data_format = 'NHWC' - else: - raise ValueError('Unsupported axis, fused batch norm only supports ' - 'axis == [1] or axis == [3]') - - # Raise parameters of fp16 batch norm to fp32 - if self.dtype == dtypes.float16 or self.dtype == dtypes.bfloat16: - param_dtype = dtypes.float32 - else: - param_dtype = self.dtype or dtypes.float32 - - axis_to_dim = {x: input_shape[x].value for x in self.axis} - for x in axis_to_dim: - if axis_to_dim[x] is None: - raise ValueError('Input has undefined `axis` dimension. Input shape: ', - input_shape) - self.input_spec = base.InputSpec(ndim=ndims, axes=axis_to_dim) - - if len(axis_to_dim) == 1 and self.virtual_batch_size is None: - # Single axis batch norm (most common/default use-case) - param_shape = (list(axis_to_dim.values())[0],) - else: - # Parameter shape is the original shape but with 1 in all non-axis dims - param_shape = [axis_to_dim[i] if i in axis_to_dim - else 1 for i in range(ndims)] - if self.virtual_batch_size is not None: - # When using virtual batches, add an extra dim at index 1 - param_shape.insert(1, 1) - for idx, x in enumerate(self.axis): - self.axis[idx] = x + 1 # Account for added dimension - - if self.scale: - self.gamma = self.add_variable( - name='gamma', - shape=param_shape, - dtype=param_dtype, - initializer=self.gamma_initializer, - regularizer=self.gamma_regularizer, - constraint=self.gamma_constraint, - trainable=True) - else: - self.gamma = None - if self.fused: - self._gamma_const = array_ops.constant( - 1.0, dtype=param_dtype, shape=param_shape) - - if self.center: - self.beta = self.add_variable( - name='beta', - shape=param_shape, - dtype=param_dtype, - initializer=self.beta_initializer, - regularizer=self.beta_regularizer, - constraint=self.beta_constraint, - trainable=True) - else: - self.beta = None - if self.fused: - self._beta_const = array_ops.constant( - 0.0, dtype=param_dtype, shape=param_shape) - - # Disable variable partitioning when creating the moving mean and variance - try: - if self._scope: - partitioner = self._scope.partitioner - self._scope.set_partitioner(None) - else: - partitioner = None - self.moving_mean = self._add_tower_local_variable( - name='moving_mean', - shape=param_shape, - dtype=param_dtype, - initializer=self.moving_mean_initializer, - trainable=False) - - self.moving_variance = self._add_tower_local_variable( - name='moving_variance', - shape=param_shape, - dtype=param_dtype, - initializer=self.moving_variance_initializer, - trainable=False) - - if self.renorm: - # Create variables to maintain the moving mean and standard deviation. - # These are used in training and thus are different from the moving - # averages above. The renorm variables are colocated with moving_mean - # and moving_variance. - # NOTE: below, the outer `with device` block causes the current device - # stack to be cleared. The nested ones use a `lambda` to set the desired - # device and ignore any devices that may be set by the custom getter. - def _renorm_variable(name, shape): - var = self._add_tower_local_variable( - name=name, - shape=shape, - dtype=param_dtype, - initializer=init_ops.zeros_initializer(), - trainable=False) - return var - - with distribute_lib.get_distribution_strategy().colocate_vars_with( - self.moving_mean): - self.renorm_mean = _renorm_variable('renorm_mean', param_shape) - self.renorm_mean_weight = _renorm_variable('renorm_mean_weight', ()) - # We initialize renorm_stddev to 0, and maintain the (0-initialized) - # renorm_stddev_weight. This allows us to (1) mix the average - # stddev with the minibatch stddev early in training, and (2) compute - # the unbiased average stddev by dividing renorm_stddev by the weight. - with distribute_lib.get_distribution_strategy().colocate_vars_with( - self.moving_variance): - self.renorm_stddev = _renorm_variable('renorm_stddev', param_shape) - self.renorm_stddev_weight = _renorm_variable('renorm_stddev_weight', - ()) - finally: - if partitioner: - self._scope.set_partitioner(partitioner) - self.built = True - - def _assign_moving_average(self, variable, value, momentum): - with ops.name_scope(None, 'AssignMovingAvg', - [variable, value, momentum]) as scope: - decay = ops.convert_to_tensor(1.0 - momentum, name='decay') - if decay.dtype != variable.dtype.base_dtype: - decay = math_ops.cast(decay, variable.dtype.base_dtype) - update_delta = (variable - value) * decay - return state_ops.assign_sub(variable, update_delta, name=scope) - - def _fused_batch_norm(self, inputs, training): - """Returns the output of fused batch norm.""" - beta = self.beta if self.center else self._beta_const - gamma = self.gamma if self.scale else self._gamma_const - - def _fused_batch_norm_training(): - return nn.fused_batch_norm( - inputs, - gamma, - beta, - epsilon=self.epsilon, - data_format=self._data_format) - - def _fused_batch_norm_inference(): - return nn.fused_batch_norm( - inputs, - gamma, - beta, - mean=self.moving_mean, - variance=self.moving_variance, - epsilon=self.epsilon, - is_training=False, - data_format=self._data_format) - - output, mean, variance = utils.smart_cond( - training, _fused_batch_norm_training, _fused_batch_norm_inference) - if not self._bessels_correction_test_only: - # Remove Bessel's correction to be consistent with non-fused batch norm. - # Note that the variance computed by fused batch norm is - # with Bessel's correction. - sample_size = math_ops.cast( - array_ops.size(inputs) / array_ops.size(variance), variance.dtype) - factor = (sample_size - math_ops.cast(1.0, variance.dtype)) / sample_size - variance *= factor - - training_value = utils.constant_value(training) - if training_value is None: - momentum = utils.smart_cond(training, lambda: self.momentum, lambda: 1.0) - else: - momentum = ops.convert_to_tensor(self.momentum) - if training_value or training_value is None: - mean_update = self._assign_moving_average(self.moving_mean, mean, - momentum) - variance_update = self._assign_moving_average(self.moving_variance, - variance, momentum) - self.add_update(mean_update, inputs=inputs) - self.add_update(variance_update, inputs=inputs) - - return output - - def _renorm_correction_and_moments(self, mean, variance, training): - """Returns the correction and update values for renorm.""" - stddev = math_ops.sqrt(variance + self.epsilon) - # Compute the average mean and standard deviation, as if they were - # initialized with this batch's moments. - mixed_renorm_mean = (self.renorm_mean + - (1. - self.renorm_mean_weight) * mean) - mixed_renorm_stddev = (self.renorm_stddev + - (1. - self.renorm_stddev_weight) * stddev) - # Compute the corrections for batch renorm. - r = stddev / mixed_renorm_stddev - d = (mean - mixed_renorm_mean) / mixed_renorm_stddev - # Ensure the corrections use pre-update moving averages. - with ops.control_dependencies([r, d]): - mean = array_ops.identity(mean) - stddev = array_ops.identity(stddev) - rmin, rmax, dmax = [self.renorm_clipping.get(key) - for key in ['rmin', 'rmax', 'dmax']] - if rmin is not None: - r = math_ops.maximum(r, rmin) - if rmax is not None: - r = math_ops.minimum(r, rmax) - if dmax is not None: - d = math_ops.maximum(d, -dmax) - d = math_ops.minimum(d, dmax) - # When not training, use r=1, d=0. - r = utils.smart_cond(training, lambda: r, lambda: array_ops.ones_like(r)) - d = utils.smart_cond(training, lambda: d, lambda: array_ops.zeros_like(d)) - - def _update_renorm_variable(var, weight, value): - """Updates a moving average and weight, returns the unbiased value.""" - value = array_ops.identity(value) - def _do_update(): - """Updates the var and weight, returns their updated ratio.""" - # Update the variables without zero debiasing. The debiasing will be - # accomplished by dividing the exponential moving average by the weight. - # For example, after a single update, the moving average would be - # (1-decay) * value. and the weight will be 1-decay, with their ratio - # giving the value. - # Make sure the weight is not updated until before r and d computation. - with ops.control_dependencies([value]): - weight_value = array_ops.constant(1., dtype=weight.dtype) - new_var = self._assign_moving_average(var, value, self.renorm_momentum) - new_weight = self._assign_moving_average(weight, weight_value, - self.renorm_momentum) - # TODO(yuefengz): the updates to var and weighted can not be batched - # together if we fetch their updated values here. Consider calculating - # new values and delaying the updates. - return new_var / new_weight - - def _fake_update(): - return array_ops.identity(var) - return utils.smart_cond(training, _do_update, _fake_update) - - # TODO(yuefengz): colocate the operations - new_mean = _update_renorm_variable(self.renorm_mean, - self.renorm_mean_weight, mean) - new_stddev = _update_renorm_variable(self.renorm_stddev, - self.renorm_stddev_weight, stddev) - # Make sqrt(moving_variance + epsilon) = new_stddev. - new_variance = math_ops.square(new_stddev) - self.epsilon - - return (r, d, new_mean, new_variance) + axis=axis, + momentum=momentum, + epsilon=epsilon, + center=center, + scale=scale, + beta_initializer=beta_initializer, + gamma_initializer=gamma_initializer, + moving_mean_initializer=moving_mean_initializer, + moving_variance_initializer=moving_variance_initializer, + beta_regularizer=beta_regularizer, + gamma_regularizer=gamma_regularizer, + beta_constraint=beta_constraint, + gamma_constraint=gamma_constraint, + renorm=renorm, + renorm_clipping=renorm_clipping, + renorm_momentum=renorm_momentum, + fused=fused, + trainable=trainable, + virtual_batch_size=virtual_batch_size, + adjustment=adjustment, + name=name, + **kwargs) def call(self, inputs, training=False): - in_eager_mode = context.executing_eagerly() - if self.virtual_batch_size is not None: - # Virtual batches (aka ghost batches) can be simulated by reshaping the - # Tensor and reusing the existing batch norm implementation - original_shape = [-1] + inputs.shape.as_list()[1:] - expanded_shape = [self.virtual_batch_size, -1] + original_shape[1:] - - # Will cause errors if virtual_batch_size does not divide the batch size - inputs = array_ops.reshape(inputs, expanded_shape) - - def undo_virtual_batching(outputs): - outputs = array_ops.reshape(outputs, original_shape) - return outputs - - if self.fused: - outputs = self._fused_batch_norm(inputs, training=training) - if self.virtual_batch_size is not None: - # Currently never reaches here since fused_batch_norm does not support - # virtual batching - return undo_virtual_batching(outputs) - return outputs - - # Compute the axes along which to reduce the mean / variance - input_shape = inputs.get_shape() - ndims = len(input_shape) - reduction_axes = [i for i in range(ndims) if i not in self.axis] - if self.virtual_batch_size is not None: - del reduction_axes[1] # Do not reduce along virtual batch dim - - # Broadcasting only necessary for single-axis batch norm where the axis is - # not the last dimension - broadcast_shape = [1] * ndims - broadcast_shape[self.axis[0]] = input_shape[self.axis[0]].value - def _broadcast(v): - if (v is not None and - len(v.get_shape()) != ndims and - reduction_axes != list(range(ndims - 1))): - return array_ops.reshape(v, broadcast_shape) - return v - - scale, offset = _broadcast(self.gamma), _broadcast(self.beta) - - def _compose_transforms(scale, offset, then_scale, then_offset): - if then_scale is not None: - scale *= then_scale - offset *= then_scale - if then_offset is not None: - offset += then_offset - return (scale, offset) - - # Determine a boolean value for `training`: could be True, False, or None. - training_value = utils.constant_value(training) - if training_value is not False: - if self.adjustment: - adj_scale, adj_bias = self.adjustment(array_ops.shape(inputs)) - # Adjust only during training. - adj_scale = utils.smart_cond(training, - lambda: adj_scale, - lambda: array_ops.ones_like(adj_scale)) - adj_bias = utils.smart_cond(training, - lambda: adj_bias, - lambda: array_ops.zeros_like(adj_bias)) - scale, offset = _compose_transforms(adj_scale, adj_bias, scale, offset) - - # Some of the computations here are not necessary when training==False - # but not a constant. However, this makes the code simpler. - keep_dims = self.virtual_batch_size is not None or len(self.axis) > 1 - mean, variance = nn.moments(inputs, reduction_axes, keep_dims=keep_dims) - - moving_mean = self.moving_mean - moving_variance = self.moving_variance - - mean = utils.smart_cond(training, - lambda: mean, - lambda: moving_mean) - variance = utils.smart_cond(training, - lambda: variance, - lambda: moving_variance) - - if self.renorm: - r, d, new_mean, new_variance = self._renorm_correction_and_moments( - mean, variance, training) - # When training, the normalized values (say, x) will be transformed as - # x * gamma + beta without renorm, and (x * r + d) * gamma + beta - # = x * (r * gamma) + (d * gamma + beta) with renorm. - r = _broadcast(array_ops.stop_gradient(r, name='renorm_r')) - d = _broadcast(array_ops.stop_gradient(d, name='renorm_d')) - scale, offset = _compose_transforms(r, d, scale, offset) - else: - new_mean, new_variance = mean, variance - - if self.virtual_batch_size is not None: - # This isn't strictly correct since in ghost batch norm, you are - # supposed to sequentially update the moving_mean and moving_variance - # with each sub-batch. However, since the moving statistics are only - # used during evaluation, it is more efficient to just update in one - # step and should not make a significant difference in the result. - new_mean = math_ops.reduce_mean(new_mean, - axis=1, keep_dims=True) - new_variance = math_ops.reduce_mean(new_variance, - axis=1, keep_dims=True) - - def _do_update(var, value): - if in_eager_mode and not self.trainable: - return - - return self._assign_moving_average(var, value, self.momentum) - - mean_update = utils.smart_cond( - training, - lambda: _do_update(self.moving_mean, new_mean), - lambda: self.moving_mean) - variance_update = utils.smart_cond( - training, - lambda: _do_update(self.moving_variance, new_variance), - lambda: self.moving_variance) - if not context.executing_eagerly(): - self.add_update(mean_update, inputs=inputs) - self.add_update(variance_update, inputs=inputs) - - else: - mean, variance = self.moving_mean, self.moving_variance - - outputs = nn.batch_normalization(inputs, - _broadcast(mean), - _broadcast(variance), - offset, - scale, - self.epsilon) - # If some components of the shape got lost due to adjustments, fix that. - outputs.set_shape(input_shape) - - if self.virtual_batch_size is not None: - return undo_virtual_batching(outputs) - - return outputs - - def compute_output_shape(self, input_shape): - return input_shape + return super(BatchNormalization, self).call(inputs, training=training) @tf_export('layers.batch_normalization') diff --git a/tensorflow/python/layers/pooling.py b/tensorflow/python/layers/pooling.py index 50503ce093fbc251b11c4d5cbccb2a2683d92e7a..75abe56f51f2a206ea3e5a5dad032446c150293a 100644 --- a/tensorflow/python/layers/pooling.py +++ b/tensorflow/python/layers/pooling.py @@ -13,92 +13,19 @@ # limitations under the License. # ============================================================================= -# pylint: disable=unused-import,g-bad-import-order """Contains the pooling layer classes and their functional aliases. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.python.eager import context -from tensorflow.python.framework import tensor_shape +from tensorflow.python.keras._impl.keras import layers as keras_layers from tensorflow.python.layers import base -from tensorflow.python.layers import utils -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import nn from tensorflow.python.util.tf_export import tf_export -class _Pooling1D(base.Layer): - """Pooling layer for arbitrary pooling functions, for 1D inputs. - - This class only exists for code reuse. It will never be an exposed API. - - Arguments: - pool_function: The pooling function to apply, e.g. `tf.nn.max_pool`. - pool_size: An integer or tuple/list of a single integer, - representing the size of the pooling window. - strides: An integer or tuple/list of a single integer, specifying the - strides of the pooling operation. - padding: A string. The padding method, either 'valid' or 'same'. - Case-insensitive. - data_format: A string, one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape - `(batch, length, channels)` while `channels_first` corresponds to - inputs with shape `(batch, channels, length)`. - name: A string, the name of the layer. - """ - - def __init__(self, pool_function, pool_size, strides, - padding='valid', data_format='channels_last', - name=None, **kwargs): - super(_Pooling1D, self).__init__(name=name, **kwargs) - self.pool_function = pool_function - self.pool_size = utils.normalize_tuple(pool_size, 1, 'pool_size') - self.strides = utils.normalize_tuple(strides, 1, 'strides') - self.padding = utils.normalize_padding(padding) - self.data_format = utils.normalize_data_format(data_format) - self.input_spec = base.InputSpec(ndim=3) - - def call(self, inputs): - # There is no TF op for 1D pooling, hence we make the inputs 4D. - if self.data_format == 'channels_last': - # input is NWC, make it NHWC - inputs = array_ops.expand_dims(inputs, 1) - # pool on the W dim - pool_shape = (1, 1) + self.pool_size + (1,) - strides = (1, 1) + self.strides + (1,) - data_format = 'NHWC' - else: - # input is NCW, make it NCHW - inputs = array_ops.expand_dims(inputs, 2) - # pool on the W dim - pool_shape = (1, 1, 1) + self.pool_size - strides = (1, 1, 1) + self.strides - data_format = 'NCHW' - - outputs = self.pool_function( - inputs, - ksize=pool_shape, - strides=strides, - padding=self.padding.upper(), - data_format=data_format) - - if self.data_format == 'channels_last': - return array_ops.squeeze(outputs, 1) - else: - return array_ops.squeeze(outputs, 2) - - def compute_output_shape(self, input_shape): - input_shape = tensor_shape.TensorShape(input_shape).as_list() - length = utils.conv_output_length(input_shape[1], self.pool_size[0], - self.padding, self.strides[0]) - return tensor_shape.TensorShape([input_shape[0], length, input_shape[2]]) - - @tf_export('layers.AveragePooling1D') -class AveragePooling1D(_Pooling1D): +class AveragePooling1D(keras_layers.AveragePooling1D, base.Layer): """Average Pooling layer for 1D inputs. Arguments: @@ -119,8 +46,9 @@ class AveragePooling1D(_Pooling1D): def __init__(self, pool_size, strides, padding='valid', data_format='channels_last', name=None, **kwargs): + if strides is None: + raise ValueError('Argument `strides` must not be None.') super(AveragePooling1D, self).__init__( - nn.avg_pool, pool_size=pool_size, strides=strides, padding=padding, @@ -165,7 +93,7 @@ def average_pooling1d(inputs, pool_size, strides, @tf_export('layers.MaxPooling1D') -class MaxPooling1D(_Pooling1D): +class MaxPooling1D(keras_layers.MaxPooling1D, base.Layer): """Max Pooling layer for 1D inputs. Arguments: @@ -186,8 +114,9 @@ class MaxPooling1D(_Pooling1D): def __init__(self, pool_size, strides, padding='valid', data_format='channels_last', name=None, **kwargs): + if strides is None: + raise ValueError('Argument `strides` must not be None.') super(MaxPooling1D, self).__init__( - nn.max_pool, pool_size=pool_size, strides=strides, padding=padding, @@ -231,79 +160,8 @@ def max_pooling1d(inputs, pool_size, strides, return layer.apply(inputs) -class _Pooling2D(base.Layer): - """Pooling layer for arbitrary pooling functions, for 2D inputs (e.g. images). - - This class only exists for code reuse. It will never be an exposed API. - - Arguments: - pool_function: The pooling function to apply, e.g. `tf.nn.max_pool`. - pool_size: An integer or tuple/list of 2 integers: (pool_height, pool_width) - specifying the size of the pooling window. - Can be a single integer to specify the same value for - all spatial dimensions. - strides: An integer or tuple/list of 2 integers, - specifying the strides of the pooling operation. - Can be a single integer to specify the same value for - all spatial dimensions. - padding: A string. The padding method, either 'valid' or 'same'. - Case-insensitive. - data_format: A string, one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape - `(batch, height, width, channels)` while `channels_first` corresponds to - inputs with shape `(batch, channels, height, width)`. - name: A string, the name of the layer. - """ - - def __init__(self, pool_function, pool_size, strides, - padding='valid', data_format='channels_last', - name=None, **kwargs): - super(_Pooling2D, self).__init__(name=name, **kwargs) - self.pool_function = pool_function - self.pool_size = utils.normalize_tuple(pool_size, 2, 'pool_size') - self.strides = utils.normalize_tuple(strides, 2, 'strides') - self.padding = utils.normalize_padding(padding) - self.data_format = utils.normalize_data_format(data_format) - self.input_spec = base.InputSpec(ndim=4) - - def call(self, inputs): - if self.data_format == 'channels_last': - pool_shape = (1,) + self.pool_size + (1,) - strides = (1,) + self.strides + (1,) - else: - pool_shape = (1, 1) + self.pool_size - strides = (1, 1) + self.strides - outputs = self.pool_function( - inputs, - ksize=pool_shape, - strides=strides, - padding=self.padding.upper(), - data_format=utils.convert_data_format(self.data_format, 4)) - return outputs - - def compute_output_shape(self, input_shape): - input_shape = tensor_shape.TensorShape(input_shape).as_list() - if self.data_format == 'channels_first': - rows = input_shape[2] - cols = input_shape[3] - else: - rows = input_shape[1] - cols = input_shape[2] - rows = utils.conv_output_length(rows, self.pool_size[0], self.padding, - self.strides[0]) - cols = utils.conv_output_length(cols, self.pool_size[1], self.padding, - self.strides[1]) - if self.data_format == 'channels_first': - return tensor_shape.TensorShape( - [input_shape[0], input_shape[1], rows, cols]) - else: - return tensor_shape.TensorShape( - [input_shape[0], rows, cols, input_shape[3]]) - - @tf_export('layers.AveragePooling2D') -class AveragePooling2D(_Pooling2D): +class AveragePooling2D(keras_layers.AveragePooling2D, base.Layer): """Average pooling layer for 2D inputs (e.g. images). Arguments: @@ -328,8 +186,9 @@ class AveragePooling2D(_Pooling2D): def __init__(self, pool_size, strides, padding='valid', data_format='channels_last', name=None, **kwargs): + if strides is None: + raise ValueError('Argument `strides` must not be None.') super(AveragePooling2D, self).__init__( - nn.avg_pool, pool_size=pool_size, strides=strides, padding=padding, data_format=data_format, name=name, **kwargs) @@ -373,7 +232,7 @@ def average_pooling2d(inputs, @tf_export('layers.MaxPooling2D') -class MaxPooling2D(_Pooling2D): +class MaxPooling2D(keras_layers.MaxPooling2D, base.Layer): """Max pooling layer for 2D inputs (e.g. images). Arguments: @@ -398,8 +257,9 @@ class MaxPooling2D(_Pooling2D): def __init__(self, pool_size, strides, padding='valid', data_format='channels_last', name=None, **kwargs): + if strides is None: + raise ValueError('Argument `strides` must not be None.') super(MaxPooling2D, self).__init__( - nn.max_pool, pool_size=pool_size, strides=strides, padding=padding, data_format=data_format, name=name, **kwargs) @@ -442,90 +302,8 @@ def max_pooling2d(inputs, return layer.apply(inputs) -class _Pooling3D(base.Layer): - """Pooling layer for arbitrary pooling functions, for 3D inputs. - - This class only exists for code reuse. It will never be an exposed API. - - Arguments: - pool_function: The pooling function to apply, e.g. `tf.nn.max_pool`. - pool_size: An integer or tuple/list of 3 integers: - (pool_depth, pool_height, pool_width) - specifying the size of the pooling window. - Can be a single integer to specify the same value for - all spatial dimensions. - strides: An integer or tuple/list of 3 integers, - specifying the strides of the pooling operation. - Can be a single integer to specify the same value for - all spatial dimensions. - padding: A string. The padding method, either 'valid' or 'same'. - Case-insensitive. - data_format: A string, one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape - `(batch, depth, height, width, channels)` - while `channels_first` corresponds to - inputs with shape `(batch, channels, depth, height, width)`. - name: A string, the name of the layer. - """ - - def __init__(self, pool_function, pool_size, strides, - padding='valid', data_format='channels_last', - name=None, **kwargs): - super(_Pooling3D, self).__init__(name=name, **kwargs) - self.pool_function = pool_function - self.pool_size = utils.normalize_tuple(pool_size, 3, 'pool_size') - self.strides = utils.normalize_tuple(strides, 3, 'strides') - self.padding = utils.normalize_padding(padding) - self.data_format = utils.normalize_data_format(data_format) - self.input_spec = base.InputSpec(ndim=5) - - def call(self, inputs): - pool_shape = (1,) + self.pool_size + (1,) - strides = (1,) + self.strides + (1,) - - if self.data_format == 'channels_first': - # TF does not support `channels_first` with 3D pooling operations, - # so we must handle this case manually. - # TODO(fchollet): remove this when TF pooling is feature-complete. - inputs = array_ops.transpose(inputs, (0, 2, 3, 4, 1)) - - outputs = self.pool_function( - inputs, - ksize=pool_shape, - strides=strides, - padding=self.padding.upper()) - - if self.data_format == 'channels_first': - outputs = array_ops.transpose(outputs, (0, 4, 1, 2, 3)) - return outputs - - def compute_output_shape(self, input_shape): - input_shape = tensor_shape.TensorShape(input_shape).as_list() - if self.data_format == 'channels_first': - len_dim1 = input_shape[2] - len_dim2 = input_shape[3] - len_dim3 = input_shape[4] - else: - len_dim1 = input_shape[1] - len_dim2 = input_shape[2] - len_dim3 = input_shape[3] - len_dim1 = utils.conv_output_length(len_dim1, self.pool_size[0], - self.padding, self.strides[0]) - len_dim2 = utils.conv_output_length(len_dim2, self.pool_size[1], - self.padding, self.strides[1]) - len_dim3 = utils.conv_output_length(len_dim3, self.pool_size[2], - self.padding, self.strides[2]) - if self.data_format == 'channels_first': - return tensor_shape.TensorShape( - [input_shape[0], input_shape[1], len_dim1, len_dim2, len_dim3]) - else: - return tensor_shape.TensorShape( - [input_shape[0], len_dim1, len_dim2, len_dim3, input_shape[4]]) - - @tf_export('layers.AveragePooling3D') -class AveragePooling3D(_Pooling3D): +class AveragePooling3D(keras_layers.AveragePooling3D, base.Layer): """Average pooling layer for 3D inputs (e.g. volumes). Arguments: @@ -552,8 +330,9 @@ class AveragePooling3D(_Pooling3D): def __init__(self, pool_size, strides, padding='valid', data_format='channels_last', name=None, **kwargs): + if strides is None: + raise ValueError('Argument `strides` must not be None.') super(AveragePooling3D, self).__init__( - nn.avg_pool3d, pool_size=pool_size, strides=strides, padding=padding, data_format=data_format, name=name, **kwargs) @@ -599,7 +378,7 @@ def average_pooling3d(inputs, @tf_export('layers.MaxPooling3D') -class MaxPooling3D(_Pooling3D): +class MaxPooling3D(keras_layers.MaxPooling3D, base.Layer): """Max pooling layer for 3D inputs (e.g. volumes). Arguments: @@ -626,8 +405,9 @@ class MaxPooling3D(_Pooling3D): def __init__(self, pool_size, strides, padding='valid', data_format='channels_last', name=None, **kwargs): + if strides is None: + raise ValueError('Argument `strides` must not be None.') super(MaxPooling3D, self).__init__( - nn.max_pool3d, pool_size=pool_size, strides=strides, padding=padding, data_format=data_format, name=name, **kwargs) diff --git a/tensorflow/python/layers/utils_test.py b/tensorflow/python/layers/utils_test.py index c941aad7bc63dbb891fbe78cd2a47dd6805bf231..7e94dda648166780af002ce6b979a751a0ced846 100644 --- a/tensorflow/python/layers/utils_test.py +++ b/tensorflow/python/layers/utils_test.py @@ -19,7 +19,6 @@ from __future__ import division from __future__ import print_function from tensorflow.python.layers import utils -from tensorflow.python.ops import array_ops from tensorflow.python.platform import test @@ -89,33 +88,5 @@ class ConvUtilsTest(test.TestCase): self.assertEqual(6, utils.deconv_output_length(4, 2, 'full', 2)) -class GraphUtilsTest(test.TestCase): - - def testGetReachableFromInputs(self): - - with self.test_session(): - pl_1 = array_ops.placeholder(shape=None, dtype='float32') - pl_2 = array_ops.placeholder(shape=None, dtype='float32') - pl_3 = array_ops.placeholder(shape=None, dtype='float32') - x_1 = pl_1 + pl_2 - x_2 = pl_2 * 2 - x_3 = pl_3 + 1 - x_4 = x_1 + x_2 - x_5 = x_3 * pl_1 - - self.assertEqual( - utils.get_reachable_from_inputs([pl_1]), - {pl_1, x_1, x_4, x_5}) - self.assertEqual( - utils.get_reachable_from_inputs([pl_1, pl_2]), - {pl_1, pl_2, x_1, x_2, x_4, x_5}) - self.assertEqual( - utils.get_reachable_from_inputs([pl_3]), - {pl_3, x_3, x_5}) - self.assertEqual( - utils.get_reachable_from_inputs([x_3]), - {x_3, x_5}) - - if __name__ == '__main__': test.main() diff --git a/tensorflow/python/lib/core/bfloat16.cc b/tensorflow/python/lib/core/bfloat16.cc index 7f07deebef3d8e8f24f73a42f29f4ade4cae568d..77fa2c1f66d2214dbb08e4d0ad3437fa4fe02822 100644 --- a/tensorflow/python/lib/core/bfloat16.cc +++ b/tensorflow/python/lib/core/bfloat16.cc @@ -616,8 +616,8 @@ bool Initialize() { }; // Comparisons - const std::array compare_types = {npy_bfloat16_, npy_bfloat16_, - NPY_BOOL}; + const std::array compare_types = { + {npy_bfloat16_, npy_bfloat16_, NPY_BOOL}}; if (!register_ufunc("equal", CompareUFunc, compare_types)) { diff --git a/tensorflow/python/lib/core/py_exception_registry.cc b/tensorflow/python/lib/core/py_exception_registry.cc new file mode 100644 index 0000000000000000000000000000000000000000..6637de632b48e4dfc8219543161464b10dcdbe12 --- /dev/null +++ b/tensorflow/python/lib/core/py_exception_registry.cc @@ -0,0 +1,50 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/python/lib/core/py_exception_registry.h" + +#include + +namespace tensorflow { + +PyExceptionRegistry* PyExceptionRegistry::singleton_ = nullptr; + +void PyExceptionRegistry::Init(PyObject* code_to_exc_type_map) { + DCHECK(singleton_ == nullptr) << "PyExceptionRegistry::Init() already called"; + singleton_ = new PyExceptionRegistry; + + DCHECK(PyDict_Check(code_to_exc_type_map)); + PyObject* key; + PyObject* value; + Py_ssize_t pos = 0; + while (PyDict_Next(code_to_exc_type_map, &pos, &key, &value)) { + TF_Code code = static_cast(PyLong_AsLong(key)); + singleton_->exc_types_[code] = value; + // The exception classes should also have the lifetime of the process, but + // incref just in case. + Py_INCREF(value); + } +} + +PyObject* PyExceptionRegistry::Lookup(TF_Code code) { + DCHECK(singleton_ != nullptr) << "Must call PyExceptionRegistry::Init() " + "before PyExceptionRegistry::Lookup()"; + DCHECK_NE(code, TF_OK); + DCHECK(singleton_->exc_types_.find(code) != singleton_->exc_types_.end()) + << "Unknown error code passed to PyExceptionRegistry::Lookup: " << code; + return singleton_->exc_types_[code]; +} + +} // namespace tensorflow diff --git a/tensorflow/python/lib/core/py_exception_registry.h b/tensorflow/python/lib/core/py_exception_registry.h new file mode 100644 index 0000000000000000000000000000000000000000..2b0f23b548c16130dee9a8ec086ae0283f1506e1 --- /dev/null +++ b/tensorflow/python/lib/core/py_exception_registry.h @@ -0,0 +1,73 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_PYTHON_LIB_CORE_PY_EXCEPTION_REGISTRY_H_ +#define TENSORFLOW_PYTHON_LIB_CORE_PY_EXCEPTION_REGISTRY_H_ + +#include + +#include "tensorflow/c/c_api.h" +#include "tensorflow/core/platform/logging.h" + +#ifndef PyObject_HEAD +struct _object; +typedef _object PyObject; +#endif + +namespace tensorflow { + +// Global registry mapping C API error codes to the corresponding custom Python +// exception type. This is used to expose the exception types to C extension +// code (i.e. so we can raise custom exceptions via SWIG). +// +// Init() must be called exactly once at the beginning of the process before +// Lookup() can be used. +// +// Example usage: +// TF_Status* status = TF_NewStatus(); +// TF_Foo(..., status); +// +// if (TF_GetCode(status) != TF_OK) { +// PyObject* exc_type = PyExceptionRegistry::Lookup(TF_GetCode(status)); +// // Arguments to OpError base class. Set `node_def` and `op` to None. +// PyObject* args = +// Py_BuildValue("sss", nullptr, nullptr, TF_Message(status)); +// PyErr_SetObject(exc_type, args); +// Py_DECREF(args); +// TF_DeleteStatus(status); +// return NULL; +// } +class PyExceptionRegistry { + public: + // Initializes the process-wide registry. Should be called exactly once near + // the beginning of the process. The arguments are the various Python + // exception types (e.g. `cancelled_exc` corresponds to + // errors.CancelledError). + static void Init(PyObject* code_to_exc_type_map); + + // Returns the Python exception type corresponding to `code`. Init() must be + // called before using this function. `code` should not be TF_OK. + static PyObject* Lookup(TF_Code code); + + private: + static PyExceptionRegistry* singleton_; + PyExceptionRegistry() = default; + + // Maps error codes to the corresponding Python exception type. + std::map exc_types_; +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_PYTHON_LIB_CORE_PY_EXCEPTION_REGISTRY_H_ diff --git a/tensorflow/python/lib/core/py_exception_registry.i b/tensorflow/python/lib/core/py_exception_registry.i new file mode 100644 index 0000000000000000000000000000000000000000..e872b74985e03e203c8aeb8fdec8a3e67f03e1f9 --- /dev/null +++ b/tensorflow/python/lib/core/py_exception_registry.i @@ -0,0 +1,28 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +%include "tensorflow/python/platform/base.i" + +%{ +#include "tensorflow/python/lib/core/py_exception_registry.h" +%} + +%ignoreall + +%unignore tensorflow::PyExceptionRegistry; +%unignore tensorflow::PyExceptionRegistry::Init; + +%include "tensorflow/python/lib/core/py_exception_registry.h" +%unignoreall diff --git a/tensorflow/python/lib/core/py_func.cc b/tensorflow/python/lib/core/py_func.cc index 22317a348c9d5472486ad118d865341ffb6ad829..8c6bb7955a4e29daddd92860e41d7105192eb24b 100644 --- a/tensorflow/python/lib/core/py_func.cc +++ b/tensorflow/python/lib/core/py_func.cc @@ -126,6 +126,9 @@ Status NumericNpDTypeToTfDType(const int np, DataType* tf) { case NPY_INT8: *tf = DT_INT8; break; + case NPY_UINT16: + *tf = DT_UINT16; + break; case NPY_INT16: *tf = DT_INT16; break; diff --git a/tensorflow/python/lib/core/py_seq_tensor.cc b/tensorflow/python/lib/core/py_seq_tensor.cc index 8247d354db62532c10c5acc9875cc08289cd31bf..32ea737a99067877e7f527e44d261a0b7c2eb07e 100644 --- a/tensorflow/python/lib/core/py_seq_tensor.cc +++ b/tensorflow/python/lib/core/py_seq_tensor.cc @@ -20,6 +20,7 @@ limitations under the License. #include "tensorflow/core/framework/types.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/core/stringpiece.h" +#include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/platform/types.h" #include "tensorflow/python/lib/core/numpy.h" #include "tensorflow/python/lib/core/py_util.h" @@ -77,9 +78,9 @@ string PyRepr(PyObject* obj) { bool IsPyDimension(PyObject* obj) { const char* tp_name = obj->ob_type->tp_name; if (strcmp(tp_name, "Dimension") != 0) return false; - bool ret = - StringPiece(PyRepr(PyType(obj))) - .ends_with("tensorflow.python.framework.tensor_shape.Dimension'>"); + bool ret = str_util::EndsWith( + PyRepr(PyType(obj)), + "tensorflow.python.framework.tensor_shape.Dimension'>"); return ret; } diff --git a/tensorflow/python/lib/io/py_record_reader.cc b/tensorflow/python/lib/io/py_record_reader.cc index 5fcb51b3b25e49192f651c648f48a1760ed23084..9500fc6a7c49d13ce0d6954f237f43ffba0869a0 100644 --- a/tensorflow/python/lib/io/py_record_reader.cc +++ b/tensorflow/python/lib/io/py_record_reader.cc @@ -43,9 +43,10 @@ PyRecordReader* PyRecordReader::New(const string& filename, uint64 start_offset, reader->offset_ = start_offset; reader->file_ = file.release(); + static const uint64 kReaderBufferSize = 16 * 1024 * 1024; RecordReaderOptions options = RecordReaderOptions::CreateRecordReaderOptions(compression_type_string); - + options.buffer_size = kReaderBufferSize; reader->reader_ = new RecordReader(reader->file_, options); return reader; } diff --git a/tensorflow/python/lib/io/tf_record.py b/tensorflow/python/lib/io/tf_record.py index 6fcf9c91d831e3a89552b522040e8e8647114a2f..bf2d6f68b55d78f9570d3854804e3d1316176c99 100644 --- a/tensorflow/python/lib/io/tf_record.py +++ b/tensorflow/python/lib/io/tf_record.py @@ -78,8 +78,7 @@ def tf_record_iterator(path, options=None): try: while True: try: - with errors.raise_exception_on_not_ok_status() as status: - reader.GetNext(status) + reader.GetNext() except errors.OutOfRangeError: break yield reader.record() diff --git a/tensorflow/python/ops/array_grad.py b/tensorflow/python/ops/array_grad.py index 3c6a5c9e562ff9765c2ef47555871c94cd6feb1e..3678bd4c1f6a4500622b6d9e8334cb1ebae46578 100644 --- a/tensorflow/python/ops/array_grad.py +++ b/tensorflow/python/ops/array_grad.py @@ -196,7 +196,7 @@ def _ConcatGradHelper(op, grad, start_value_index, end_value_index, dim_index): array_ops.where( math_ops.logical_and(grad.indices >= start, grad.indices < end)), - squeeze_dims=[1]) + axis=[1]) new_indices = array_ops.gather(grad.indices, indices_to_select) - start new_values = array_ops.gather(grad.values, indices_to_select) out_grads.append(ops.IndexedSlices(new_values, new_indices, size)) @@ -255,10 +255,15 @@ def _SliceGrad(op, grad): @ops.RegisterGradient("StridedSlice") def _StridedSliceGrad(op, grad): """Gradient for StridedSlice op.""" - x = array_ops.shape(op.inputs[0]) begin = op.inputs[1] end = op.inputs[2] strides = op.inputs[3] + # StridedSliceGrad requires `x`, `begin`, `end` and `strides` to be of the + # same dtype so we build a shape of the same type as other args. + # Note that the choice of `begin` for specifying `out_type` is arbitrary. + # We could choose any of {begin|end|strides}.dtype since they are required to + # be the same. + x = array_ops.shape(op.inputs[0], out_type=begin.dtype) return array_ops.strided_slice_grad( x, diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py index 207866610b01b15760b107193f32bcf03b03bf9a..bbffff04831829e3ec44337416819b967b7f55ba 100644 --- a/tensorflow/python/ops/array_ops.py +++ b/tensorflow/python/ops/array_ops.py @@ -144,6 +144,7 @@ def identity(input, name=None): # pylint: disable=redefined-builtin # pylint: disable=redefined-builtin,protected-access @tf_export("expand_dims") +@deprecation.deprecated_args(None, "Use the `axis` argument instead", "dim") def expand_dims(input, axis=None, name=None, dim=None): """Inserts a dimension of 1 into a tensor's shape. @@ -193,11 +194,7 @@ def expand_dims(input, axis=None, name=None, dim=None): Raises: ValueError: if both `dim` and `axis` are specified. """ - # TODO(aselle): Remove argument dim - if dim is not None: - if axis is not None: - raise ValueError("can't specify both 'dim' and 'axis'") - axis = dim + axis = deprecation.deprecated_argument_lookup("axis", axis, "dim", dim) return gen_array_ops.expand_dims(input, axis, name) @@ -387,7 +384,10 @@ def size_internal(input, name=None, optimize=True, out_type=dtypes.int32): """ if context.executing_eagerly() and not isinstance( input, (sparse_tensor.SparseTensor, sparse_tensor.SparseTensorValue)): - return np.prod(ops.convert_to_tensor(input)._shape_tuple()) # pylint: disable=protected-access + input = ops.convert_to_tensor(input) + np_out_type = out_type.as_numpy_dtype + num_elements = np.prod(input._shape_tuple(), dtype=np_out_type) # pylint: disable=protected-acces: + return ops.convert_to_tensor(num_elements, dtype=out_type) with ops.name_scope(name, "Size", [input]) as name: if isinstance(input, (sparse_tensor.SparseTensor, sparse_tensor.SparseTensorValue)): @@ -1057,9 +1057,7 @@ def unstack(value, num=None, axis=0, name="unstack"): `value[:, i, :, :]` and each tensor in `output` will have shape `(A, C, D)`. Etc. - This is the opposite of stack. The numpy equivalent is - - tf.unstack(x, n) = np.unstack(x) + This is the opposite of stack. Args: value: A rank `R > 0` `Tensor` to be unstacked. @@ -1232,7 +1230,7 @@ def boolean_mask(tensor, mask, name="boolean_mask", axis=None): def _apply_mask_1d(reshaped_tensor, mask, axis=None): """Mask tensor along dimension 0 with a 1-D mask.""" - indices = squeeze(where(mask), squeeze_dims=[1]) + indices = squeeze(where(mask), axis=[1]) return gather(reshaped_tensor, indices, axis=axis) with ops.name_scope(name, values=[tensor, mask]): @@ -1563,6 +1561,16 @@ def matrix_transpose(a, name="matrix_transpose", conjugate=False): # pylint: enable=invalid-name +def _constant_if_small(value, shape, dtype, name): + try: + if np.prod(shape) < 1000: + return constant(value, shape=shape, dtype=dtype, name=name) + except TypeError: + # Happens when shape is a Tensor, list with Tensor elements, etc. + pass + return None + + @tf_export("zeros") def zeros(shape, dtype=dtypes.float32, name=None): """Creates a tensor with all elements set to zero. @@ -1593,8 +1601,15 @@ def zeros(shape, dtype=dtypes.float32, name=None): zero = "" else: zero = 0 + if not isinstance(shape, ops.Tensor): try: + # Create a constant if it won't be very big. Otherwise create a fill op + # to prevent serialized GraphDefs from becoming too large. + output = _constant_if_small(zero, shape, dtype, name) + if output is not None: + return output + # Go through tensor shapes to get int64-if-needed semantics shape = constant_op._tensor_shape_tensor_conversion_function( tensor_shape.TensorShape(shape)) @@ -1726,6 +1741,12 @@ def ones(shape, dtype=dtypes.float32, name=None): one = True if dtype == dtypes.bool else 1 if not isinstance(shape, ops.Tensor): try: + # Create a constant if it won't be very big. Otherwise create a fill op + # to prevent serialized GraphDefs from becoming too large. + output = _constant_if_small(one, shape, dtype, name) + if output is not None: + return output + # Go through tensor shapes to get int64-if-needed semantics shape = constant_op._tensor_shape_tensor_conversion_function( tensor_shape.TensorShape(shape)) @@ -2555,6 +2576,8 @@ def sequence_mask(lengths, maxlen=None, dtype=dtypes.bool, name=None): @tf_export("squeeze") +@deprecation.deprecated_args(None, "Use the `axis` argument instead", + "squeeze_dims") def squeeze(input, axis=None, name=None, squeeze_dims=None): # pylint: disable=redefined-builtin """Removes dimensions of size 1 from the shape of a tensor. @@ -2595,10 +2618,8 @@ def squeeze(input, axis=None, name=None, squeeze_dims=None): Raises: ValueError: When both `squeeze_dims` and `axis` are specified. """ - if squeeze_dims is not None: - if axis is not None: - raise ValueError("Cannot specify both 'squeeze_dims' and 'axis'") - axis = squeeze_dims + axis = deprecation.deprecated_argument_lookup( + "axis", axis, "squeeze_dims", squeeze_dims) if np.isscalar(axis): axis = [axis] return gen_array_ops.squeeze(input, axis, name) diff --git a/tensorflow/python/ops/batch_norm_benchmark.py b/tensorflow/python/ops/batch_norm_benchmark.py index 5d68b47aeaef3a90973387ecd5b265eef1e96a5f..d83b81909755df8d187232e15ecda48b1cbf4557 100644 --- a/tensorflow/python/ops/batch_norm_benchmark.py +++ b/tensorflow/python/ops/batch_norm_benchmark.py @@ -25,6 +25,7 @@ import time from tensorflow.python.client import session as session_lib from tensorflow.python.framework import constant_op from tensorflow.python.framework import ops +from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import gen_nn_ops from tensorflow.python.ops import gradients_impl @@ -39,7 +40,7 @@ from tensorflow.python.platform import test def batch_norm_op(tensor, mean, variance, beta, gamma, scale): """Fused kernel for batch normalization.""" # _batch_norm_with_global_normalization is deprecated in v9 - ops.get_default_graph().graph_def_versions.producer = 8 + test_util.set_producer_version(ops.get_default_graph(), 8) # pylint: disable=protected-access return gen_nn_ops._batch_norm_with_global_normalization( tensor, mean, variance, beta, gamma, 0.001, scale) diff --git a/tensorflow/python/ops/boosted_trees_ops.py b/tensorflow/python/ops/boosted_trees_ops.py index 174d00987f9f76b4b07be73e5c29435bed7dfa06..2a2bcdd9d69b7a0aed1e7f3d3197cf6d7dd98451 100644 --- a/tensorflow/python/ops/boosted_trees_ops.py +++ b/tensorflow/python/ops/boosted_trees_ops.py @@ -115,7 +115,7 @@ class TreeEnsemble(object): def get_stamp_token(self): """Returns the current stamp token of the resource.""" - stamp_token, _, _, _ = ( + stamp_token, _, _, _, _ = ( gen_boosted_trees_ops.boosted_trees_get_ensemble_states( self.resource_handle)) return stamp_token @@ -124,17 +124,20 @@ class TreeEnsemble(object): """Returns states of the tree ensemble. Returns: - stamp_token, num_trees, num_finalized_trees, num_attempted_layers. + stamp_token, num_trees, num_finalized_trees, num_attempted_layers and + range of the nodes in the latest layer. """ - stamp_token, num_trees, num_finalized_trees, num_attempted_layers = ( - gen_boosted_trees_ops.boosted_trees_get_ensemble_states( - self.resource_handle)) + (stamp_token, num_trees, num_finalized_trees, num_attempted_layers, + nodes_range) = ( + gen_boosted_trees_ops.boosted_trees_get_ensemble_states( + self.resource_handle)) # Use identity to give names. return (array_ops.identity(stamp_token, name='stamp_token'), array_ops.identity(num_trees, name='num_trees'), array_ops.identity(num_finalized_trees, name='num_finalized_trees'), array_ops.identity( - num_attempted_layers, name='num_attempted_layers')) + num_attempted_layers, name='num_attempted_layers'), + array_ops.identity(nodes_range, name='last_layer_nodes_range')) def serialize(self): """Serializes the ensemble into proto and returns the serialized proto. diff --git a/tensorflow/python/ops/clip_ops.py b/tensorflow/python/ops/clip_ops.py index 49f8c665313562cb20dbe4494103ded16646c741..75c459a9cf10a90f6043d304b302e0a0806bf045 100644 --- a/tensorflow/python/ops/clip_ops.py +++ b/tensorflow/python/ops/clip_ops.py @@ -26,6 +26,7 @@ from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops +from tensorflow.python.ops import gen_array_ops from tensorflow.python.ops import gen_nn_ops from tensorflow.python.ops import math_ops from tensorflow.python.util.tf_export import tf_export @@ -70,6 +71,35 @@ def clip_by_value(t, clip_value_min, clip_value_max, _ = t.shape.merge_with(t_max.shape) return t_max + # TODO(scottzhu): switch to use new implmentation in 2 weeks. + # return gen_math_ops.clip_by_value( + # t, clip_value_min, clip_value_max, name=name) + + +# TODO(scottzhu): switch to use new implmentation in 2 weeks. +# @ops.RegisterGradient("ClipByValue") +def _clip_by_value_grad(op, grad): + """Returns grad of clip_by_value.""" + x = op.inputs[0] + y = op.inputs[1] + z = op.inputs[2] + gdtype = grad.dtype + sx = array_ops.shape(x) + sy = array_ops.shape(y) + sz = array_ops.shape(z) + gradshape = array_ops.shape(grad) + zeros = array_ops.zeros(gradshape, gdtype) + xymask = math_ops.less(x, y) + xzmask = math_ops.greater(x, z) + rx, ry = gen_array_ops.broadcast_gradient_args(sx, sy) + rx, rz = gen_array_ops.broadcast_gradient_args(sx, sz) + xgrad = array_ops.where(math_ops.logical_or(xymask, xzmask), zeros, grad) + ygrad = array_ops.where(xymask, grad, zeros) + zgrad = array_ops.where(xzmask, grad, zeros) + gx = array_ops.reshape(math_ops.reduce_sum(xgrad, rx), sx) + gy = array_ops.reshape(math_ops.reduce_sum(ygrad, ry), sy) + gz = array_ops.reshape(math_ops.reduce_sum(zgrad, rz), sz) + return (gx, gy, gz) @tf_export("clip_by_norm") diff --git a/tensorflow/python/ops/control_flow_grad.py b/tensorflow/python/ops/control_flow_grad.py index 45955554cab130597e106660ff1fb4cdf7e9aeb1..6a551deb5ba55871b3a3fb144a6ecd2a3cbfcbd8 100644 --- a/tensorflow/python/ops/control_flow_grad.py +++ b/tensorflow/python/ops/control_flow_grad.py @@ -20,6 +20,7 @@ from __future__ import print_function from six.moves import xrange # pylint: disable=redefined-builtin +from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor from tensorflow.python.ops import control_flow_ops @@ -74,6 +75,11 @@ def _SwitchGrad(op, *grad): # At this point, we have created zero_grad guarded by the right switch. # Unfortunately, we may still get None here for not trainable data types. if zero_grad is None: + # For resource variables we get None always on the other branch, so bypass + # this. + if op.inputs[0].dtype == dtypes.resource: + return merge( + [grad[op_ctxt.branch]] * 2, name="cond_resource_grad")[0], None return None, None return merge(grad, name="cond_grad")[0], None else: diff --git a/tensorflow/python/ops/control_flow_ops.py b/tensorflow/python/ops/control_flow_ops.py index 710287012eadea08d6c5be51a8e1be6cce6a5f65..a1bfe450c8986f448edba12b7431ccdd59c6f246 100644 --- a/tensorflow/python/ops/control_flow_ops.py +++ b/tensorflow/python/ops/control_flow_ops.py @@ -609,13 +609,13 @@ def _EnforceShapeInvariant(merge_var, next_var): """Check if the shapes of the loops variables are invariants. Args: - merge_vars: The list of tensors representing the initial values of the + merge_var: The list of tensors representing the initial values of the loop variables. - next_vars: The list of tensors representing the values of the loop + next_var: The list of tensors representing the values of the loop variables after one loop iteration. Raises: - ValueError: If any tensor in `merge_vars` has a more specific shape than + ValueError: If any tensor in `merge_var` has a more specific shape than its correspnding tensor in `next_var`. """ if isinstance(merge_var, ops.Tensor): @@ -833,7 +833,7 @@ class GradLoopState(object): if outer_grad_state: outer_forward_ctxt = outer_grad_state.forward_context else: - if not hasattr(forward_ctxt, 'outer_context'): + if not hasattr(forward_ctxt, "outer_context"): raise ValueError("Failed to call gradients on a while loop without" "properly serializing graph via MetaGraphDef") outer_forward_ctxt = forward_ctxt.outer_context @@ -1598,6 +1598,16 @@ class ControlFlowContext(object): last_context = self._context_stack.pop() graph._set_control_flow_context(last_context) + def EnterGradientColocation(self, op, gradient_uid): + """Start building a gradient colocated with an op.""" + if self._outer_context: + self._outer_context.EnterGradientColocation(op, gradient_uid) + + def ExitGradientColocation(self, op, gradient_uid): + """Start building a gradient colocated with an op.""" + if self._outer_context: + self._outer_context.ExitGradientColocation(op, gradient_uid) + def ExitResult(self, result): """Make a list of tensors available in the outer context.""" if self._outer_context: @@ -2369,7 +2379,15 @@ class WhileContext(ControlFlowContext): def AddValue(self, val): """Add `val` to the current context and its outer context recursively.""" result = val - if val.name not in self._values: + new_value = val.name not in self._values + # Don't treat ops in this context as new values. Usually all known values + # are in self._values, except when we're importing a while loop inside this + # WhileContext. Since there's a cycle in this case, `val` may be part of the + # imported while loop but not yet processed by this context and added to + # self._values in _AddOpInternal. We only want to process external input + # tensors to the while loop here. + new_value &= val.op._control_flow_context is not self # pylint: disable=protected-access + if new_value: self._values.add(val.name) # If we are in a grad context and val is from its forward context, @@ -2955,7 +2973,7 @@ class WhileContext(ControlFlowContext): packed_exit_vars = nest.pack_sequence_as( structure=original_body_result, flat_sequence=exit_vars_with_tensor_arrays) - return (packed_exit_vars[0] if len(exit_vars) == 1 else packed_exit_vars) + return packed_exit_vars[0] if len(exit_vars) == 1 else packed_exit_vars def _FixControlInputsAndContext(self, enters): graph = ops.get_default_graph() @@ -3184,12 +3202,18 @@ def while_loop(cond, body = lambda i, lv: (i + 1, orig_body(*lv)) if context.executing_eagerly(): + try_to_pack = len(loop_vars) == 1 + packed = False # whether the body result was packed into a 1-item tuple + while cond(*loop_vars): loop_vars = body(*loop_vars) + if try_to_pack and not isinstance(loop_vars, (list, _basetuple)): + packed = True + loop_vars = (loop_vars,) if maximum_iterations is not None: return loop_vars[1] else: - return loop_vars + return loop_vars[0] if packed else loop_vars if shape_invariants is not None: if maximum_iterations is not None: diff --git a/tensorflow/python/ops/control_flow_ops_test.py b/tensorflow/python/ops/control_flow_ops_test.py index f22f3059d139d1bb7c7db57a2939184f1089f397..289df6f3016e9df6a42d694ae854b4f22fdf84f9 100644 --- a/tensorflow/python/ops/control_flow_ops_test.py +++ b/tensorflow/python/ops/control_flow_ops_test.py @@ -947,5 +947,28 @@ class CaseTest(test_util.TensorFlowTestCase): sess.run(output, feed_dict={x: 4}) +@test_util.with_c_api +class WhileLoopTestCase(test_util.TensorFlowTestCase): + + @test_util.run_in_graph_and_eager_modes() + def testWhileLoopWithSingleVariable(self): + i = constant_op.constant(0) + c = lambda i: math_ops.less(i, 10) + b = lambda i: math_ops.add(i, 1) + r = control_flow_ops.while_loop(c, b, [i]) + + self.assertEqual(self.evaluate(r), 10) + + @test_util.run_in_graph_and_eager_modes() + def testEagerWhileLoopWithSingleVariable_bodyReturnsTuple(self): + i = constant_op.constant(0) + c = lambda i: math_ops.less(i, 10) + b = lambda i: (math_ops.add(i, 1),) + r = control_flow_ops.while_loop(c, b, [i]) + + # Expect a tuple since that is what the body returns. + self.assertEqual(self.evaluate(r), (10,)) + + if __name__ == "__main__": googletest.main() diff --git a/tensorflow/python/ops/cudnn_rnn_grad.py b/tensorflow/python/ops/cudnn_rnn_grad.py new file mode 100644 index 0000000000000000000000000000000000000000..97331bb5b5cdecee7d65fe8d712e22fd80f25bae --- /dev/null +++ b/tensorflow/python/ops/cudnn_rnn_grad.py @@ -0,0 +1,47 @@ +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Gradients for CuudnnRNN operators.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.framework import ops +from tensorflow.python.ops import gen_cudnn_rnn_ops + + +@ops.RegisterGradient("CudnnRNN") +def _cudnn_rnn_backward(op, *grads): + """Gradients for the CudnnRNN op.""" + if not op.get_attr("is_training"): + raise ValueError( + "CudnnRNN must set is_training to True to be used in gradients") + return gen_cudnn_rnn_ops.cudnn_rnn_backprop( + input=op.inputs[0], + input_h=op.inputs[1], + input_c=op.inputs[2], + params=op.inputs[3], + output=op.outputs[0], + output_h=op.outputs[1], + output_c=op.outputs[2], + output_backprop=grads[0], + output_h_backprop=grads[1], + output_c_backprop=grads[2], + reserve_space=op.outputs[3], + dropout=op.get_attr("dropout"), + seed=op.get_attr("seed"), + seed2=op.get_attr("seed2"), + rnn_mode=op.get_attr("rnn_mode"), + input_mode=op.get_attr("input_mode"), + direction=op.get_attr("direction")) diff --git a/tensorflow/python/ops/data_flow_ops.py b/tensorflow/python/ops/data_flow_ops.py index cb725199a8501d2a894f18a9b57c23de4e349374..62c5adc385a2e87d27298c72f8dd2f67303119df 100644 --- a/tensorflow/python/ops/data_flow_ops.py +++ b/tensorflow/python/ops/data_flow_ops.py @@ -571,7 +571,7 @@ class QueueBase(object): name=name) def is_closed(self, name=None): - """ Returns true if queue is closed. + """Returns true if queue is closed. This operation returns true if the queue is closed and false if the queue is open. @@ -1563,7 +1563,7 @@ class BaseStagingArea(object): of the staging area. Args: - vals: A tensor, a list or tuple of tensors, or a dictionary.. + vals: A tensor, a list or tuple of tensors, or a dictionary. Returns: A (tensors, indices) tuple where `tensors` is a list of `Tensor` objects @@ -1582,7 +1582,7 @@ class BaseStagingArea(object): (sorted(vals.keys()), sorted(self._names))) # The order of values in `self._names` indicates the order in which the # tensors in the dictionary `vals` must be listed. - vals, indices, n = zip(*[(vals[k], i, k) + vals, indices, _ = zip(*[(vals[k], i, k) for i, k in enumerate(self._names) if k in vals]) else: @@ -1612,7 +1612,7 @@ class BaseStagingArea(object): for val, i in zip(vals, indices): dtype, shape = self._dtypes[i], self._shapes[i] # Check dtype - if not val.dtype == dtype: + if val.dtype != dtype: raise ValueError("Datatypes do not match. '%s' != '%s'" % (str(val.dtype), str(dtype))) @@ -1626,7 +1626,7 @@ class BaseStagingArea(object): def _create_device_transfers(self, tensors): """Encode inter-device transfers if the current device - is not the same as the Staging Area's device + is not the same as the Staging Area's device. """ if not isinstance(tensors, (tuple, list)): @@ -1739,11 +1739,6 @@ class StagingArea(BaseStagingArea): Args: dtypes: A list of types. The length of dtypes must equal the number of tensors in each element. - capacity: (Optional.) Maximum number of elements. - An integer. If zero, the Staging Area is unbounded - memory_limit: (Optional.) Maximum number of bytes of all tensors - in the Staging Area. - An integer. If zero, the Staging Area is unbounded shapes: (Optional.) Constraints on the shapes of tensors in an element. A list of shape tuples or None. This list is the same length as dtypes. If the shape of any tensors in the element are constrained, @@ -1754,6 +1749,11 @@ class StagingArea(BaseStagingArea): shared_name: (Optional.) A name to be used for the shared object. By passing the same name to two different python objects they will share the underlying staging area. Must be a string. + capacity: (Optional.) Maximum number of elements. + An integer. If zero, the Staging Area is unbounded + memory_limit: (Optional.) Maximum number of bytes of all tensors + in the Staging Area. + An integer. If zero, the Staging Area is unbounded Raises: ValueError: If one of the arguments is invalid. @@ -1782,7 +1782,7 @@ class StagingArea(BaseStagingArea): """ with ops.name_scope(name, "%s_put" % self._name, self._scope_vals(values)) as scope: - + if not isinstance(values, (list, tuple, dict)): values = [values] @@ -1911,7 +1911,8 @@ class StagingArea(BaseStagingArea): class MapStagingArea(BaseStagingArea): - """A `MapStagingArea` is a TensorFlow data structure that stores tensors across multiple steps, and exposes operations that can put and get tensors. + """A `MapStagingArea` is a TensorFlow data structure that stores tensors + across multiple steps, and exposes operations that can put and get tensors. Each `MapStagingArea` element is a (key, value) pair. Only int64 keys are supported, other types should be @@ -2375,7 +2376,7 @@ class RecordInput(object): return records else: with ops.name_scope(self._name): - batch_list = [[] for i in six.moves.range(self._batches)] + batch_list = [[] for _ in six.moves.range(self._batches)] records = array_ops.split(records, self._batch_size, 0) records = [array_ops.reshape(record, []) for record in records] for index, protobuf in zip(six.moves.range(len(records)), records): diff --git a/tensorflow/python/ops/distributions/BUILD b/tensorflow/python/ops/distributions/BUILD index 9d9ede7ad75f4eafa91ad051458afbcb6dc8f7b5..e7ad028376b841fc485f7c9cded2a3cdd9dcc153 100644 --- a/tensorflow/python/ops/distributions/BUILD +++ b/tensorflow/python/ops/distributions/BUILD @@ -8,9 +8,13 @@ licenses(["notice"]) # Apache 2.0 py_library( name = "distributions", - srcs = glob(["*.py"]), + srcs = glob( + ["*.py"], + exclude = ["util.py"], + ), srcs_version = "PY2AND3", deps = [ + ":util", "//tensorflow/python:array_ops", "//tensorflow/python:check_ops", "//tensorflow/python:control_flow_ops", @@ -26,3 +30,23 @@ py_library( "@six_archive//:six", ], ) + +py_library( + name = "util", + srcs = ["util.py"], + srcs_version = "PY2AND3", + deps = [ + "//tensorflow/python:array_ops", + "//tensorflow/python:check_ops", + "//tensorflow/python:control_flow_ops", + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:math_ops", + "//tensorflow/python:nn", + "//tensorflow/python:nn_ops", + "//tensorflow/python:random_ops", + "//tensorflow/python:special_math_ops", + "//tensorflow/python:tensor_util", + "//third_party/py/numpy", + "@six_archive//:six", + ], +) diff --git a/tensorflow/python/ops/distributions/bernoulli.py b/tensorflow/python/ops/distributions/bernoulli.py index 68aaf3815e7e2b21c9550562aa49195569c8ea43..2c9f0e9a32dd3f2caa81befebc06dcc740c832cb 100644 --- a/tensorflow/python/ops/distributions/bernoulli.py +++ b/tensorflow/python/ops/distributions/bernoulli.py @@ -72,7 +72,7 @@ class Bernoulli(distribution.Distribution): ValueError: If p and logits are passed, or if neither are passed. """ parameters = locals() - with ops.name_scope(name): + with ops.name_scope(name) as name: self._logits, self._probs = distribution_util.get_logits_and_probs( logits=logits, probs=probs, diff --git a/tensorflow/python/ops/distributions/beta.py b/tensorflow/python/ops/distributions/beta.py index 469bcadb8ea3a0ec2a85d3a72c0ca5ba08796856..8beab99bf868cdb16dc842eae113cf55dd565131 100644 --- a/tensorflow/python/ops/distributions/beta.py +++ b/tensorflow/python/ops/distributions/beta.py @@ -151,7 +151,7 @@ class Beta(distribution.Distribution): name: Python `str` name prefixed to Ops created by this class. """ parameters = locals() - with ops.name_scope(name, values=[concentration1, concentration0]): + with ops.name_scope(name, values=[concentration1, concentration0]) as name: self._concentration1 = self._maybe_assert_valid_concentration( ops.convert_to_tensor(concentration1, name="concentration1"), validate_args) @@ -323,7 +323,7 @@ class BetaWithSoftplusConcentration(Beta): name="BetaWithSoftplusConcentration"): parameters = locals() with ops.name_scope(name, values=[concentration1, - concentration0]) as ns: + concentration0]) as name: super(BetaWithSoftplusConcentration, self).__init__( concentration1=nn.softplus(concentration1, name="softplus_concentration1"), @@ -331,7 +331,7 @@ class BetaWithSoftplusConcentration(Beta): name="softplus_concentration0"), validate_args=validate_args, allow_nan_stats=allow_nan_stats, - name=ns) + name=name) self._parameters = parameters diff --git a/tensorflow/python/ops/distributions/bijector_impl.py b/tensorflow/python/ops/distributions/bijector_impl.py index ed435557fde7a2e8a0a4f7eef4e240daef0565e7..4ebc600d034603a80d4fae93b1339e1a1feea038 100644 --- a/tensorflow/python/ops/distributions/bijector_impl.py +++ b/tensorflow/python/ops/distributions/bijector_impl.py @@ -23,7 +23,6 @@ import collections import contextlib import re -import numpy as np import six from tensorflow.python.framework import dtypes @@ -31,8 +30,8 @@ from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import tensor_util from tensorflow.python.ops import array_ops +from tensorflow.python.ops import check_ops from tensorflow.python.ops import math_ops -from tensorflow.python.util.tf_export import tf_export __all__ = [ @@ -41,23 +40,24 @@ __all__ = [ class _Mapping(collections.namedtuple( - "_Mapping", ["x", "y", "ildj", "kwargs"])): + "_Mapping", ["x", "y", "ildj_map", "kwargs"])): """Helper class to make it easier to manage caching in `Bijector`.""" - def __new__(cls, x=None, y=None, ildj=None, kwargs=None): + def __new__(cls, x=None, y=None, ildj_map=None, kwargs=None): """Custom __new__ so namedtuple items have defaults. Args: x: `Tensor`. Forward. y: `Tensor`. Inverse. - ildj: `Tensor`. Inverse log det Jacobian. + ildj_map: `Dictionary`. This is a mapping from event_ndims to a `Tensor` + representing the inverse log det jacobian. kwargs: Python dictionary. Extra args supplied to forward/inverse/etc functions. Returns: mapping: New instance of _Mapping. """ - return super(_Mapping, cls).__new__(cls, x, y, ildj, kwargs) + return super(_Mapping, cls).__new__(cls, x, y, ildj_map, kwargs) @property def x_key(self): @@ -69,13 +69,14 @@ class _Mapping(collections.namedtuple( """Returns key used for caching X=g^{-1}(Y).""" return (self.y,) + self._deep_tuple(tuple(sorted(self.kwargs.items()))) - def merge(self, x=None, y=None, ildj=None, kwargs=None, mapping=None): + def merge(self, x=None, y=None, ildj_map=None, kwargs=None, mapping=None): """Returns new _Mapping with args merged with self. Args: x: `Tensor`. Forward. y: `Tensor`. Inverse. - ildj: `Tensor`. Inverse log det Jacobian. + ildj_map: `Dictionary`. This is a mapping from event_ndims to a `Tensor` + representing the inverse log det jacobian. kwargs: Python dictionary. Extra args supplied to forward/inverse/etc functions. mapping: Instance of _Mapping to merge. Can only be specified if no other @@ -88,15 +89,30 @@ class _Mapping(collections.namedtuple( ValueError: if mapping and any other arg is not `None`. """ if mapping is None: - mapping = _Mapping(x=x, y=y, ildj=ildj, kwargs=kwargs) - elif not all(arg is None for arg in [x, y, ildj, kwargs]): - raise ValueError("Cannot specify mapping and individual args.") + mapping = _Mapping(x=x, y=y, ildj_map=ildj_map, kwargs=kwargs) + elif any(arg is not None for arg in [x, y, ildj_map, kwargs]): + raise ValueError("Cannot simultaneously specify mapping and individual " + "arguments.") + return _Mapping( x=self._merge(self.x, mapping.x), y=self._merge(self.y, mapping.y), - ildj=self._merge(self.ildj, mapping.ildj), + ildj_map=self._merge_dicts(self.ildj_map, mapping.ildj_map), kwargs=self._merge(self.kwargs, mapping.kwargs)) + def _merge_dicts(self, old=None, new=None): + """Helper to merge two dictionaries.""" + old = dict() if old is None else old + new = dict() if new is None else new + for k, v in six.iteritems(new): + val = old.get(k, None) + if val is not None and val != v: + raise ValueError("Found different value for existing key " + "(key:{} old_value:{} new_value:{}".format( + k, old[k], v)) + old[k] = v + return old + def _merge(self, old, new): """Helper to merge which handles merging one value.""" if old is None: @@ -112,7 +128,6 @@ class _Mapping(collections.namedtuple( @six.add_metaclass(abc.ABCMeta) -@tf_export("distributions.bijectors.Bijector") class Bijector(object): r"""Interface for transformations of a `Distribution` sample. @@ -137,11 +152,11 @@ class Bijector(object): 2. Inverse\ Useful for "reversing" a transformation to compute one probability in terms of another. - 3. `(log o det o Jacobian o inverse)(x)`\ + 3. `log_det_jacobian(x)`\ "The log of the determinant of the matrix of all first-order partial derivatives of the inverse function."\ Useful for inverting a transformation to compute one probability in terms - of another. Geometrically, the det(Jacobian) is the volume of the + of another. Geometrically, the Jacobian determinant is the volume of the transformation and is used to scale the probability. By convention, transformations of random variables are named in terms of the @@ -164,7 +179,7 @@ class Bijector(object): ```python def transformed_log_prob(bijector, log_prob, x): - return (bijector.inverse_log_det_jacobian(x) + + return (bijector.inverse_log_det_jacobian(x, event_ndims=0) + log_prob(bijector.inverse(x))) ``` @@ -199,9 +214,11 @@ class Bijector(object): ```python class Exp(Bijector): - def __init__(self, event_ndims=0, validate_args=False, name="exp"): + def __init__(self, validate_args=False, name="exp"): super(Exp, self).__init__( - event_ndims=event_ndims, validate_args=validate_args, name=name) + validate_args=validate_args, + forward_min_event_ndims=0, + name=name) def _forward(self, x): return math_ops.exp(x) @@ -213,10 +230,11 @@ class Bijector(object): return -self._forward_log_det_jacobian(self._inverse(y)) def _forward_log_det_jacobian(self, x): - if self.event_ndims is None: - raise ValueError("Jacobian requires known event_ndims.") - event_dims = array_ops.shape(x)[-self.event_ndims:] - return math_ops.reduce_sum(x, axis=event_dims) + # Notice that we needn't do any reducing, even when`event_ndims > 0`. + # The base Bijector class will handle reducing for us; it knows how + # to do so because we called `super` `__init__` with + # `forward_min_event_ndims = 0`. + return x ``` - "Affine" @@ -237,18 +255,50 @@ class Bijector(object): MultivariateNormal(inv(sqrtSigma) * (y - mu); 0, I_d) ``` - #### Jacobian + #### Min_event_ndims and Naming + + Bijectors are named for the dimensionality of data they act on (i.e. without + broadcasting). We can think of bijectors having an intrinsic `min_event_ndims` + , which is the minimum number of dimensions for the bijector act on. For + instance, a Cholesky decomposition requires a matrix, and hence + `min_event_ndims=2`. + + Some examples: + + `AffineScalar: min_event_ndims=0` + `Affine: min_event_ndims=1` + `Cholesky: min_event_ndims=2` + `Exp: min_event_ndims=0` + `Sigmoid: min_event_ndims=0` + `SoftmaxCentered: min_event_ndims=1` + + Note the difference between `Affine` and `AffineScalar`. `AffineScalar` + operates on scalar events, whereas `Affine` operates on vector-valued events. - The Jacobian is a reduction over event dims. To see this, consider the `Exp` - `Bijector` applied to a `Tensor` which has sample, batch, and event (S, B, E) - shape semantics. Suppose the `Tensor`'s partitioned-shape is `(S=[4], B=[2], - E=[3, 3])`. The shape of the `Tensor` returned by `forward` and `inverse` is - unchanged, i.e., `[4, 2, 3, 3]`. However the shape returned by - `inverse_log_det_jacobian` is `[4, 2]` because the Jacobian is a reduction - over the event dimensions. + More generally, there is a `forward_min_event_ndims` and an + `inverse_min_event_ndims`. In most cases, these will be the same. + However, for some shape changing bijectors, these will be different + (e.g. a bijector which pads an extra dimension at the end, might have + `forward_min_event_ndims=0` and `inverse_min_event_ndims=1`. - It is sometimes useful to implement the inverse Jacobian as the negative - forward Jacobian. For example, + + #### Jacobian Determinant + + The Jacobian determinant is a reduction over `event_ndims - min_event_ndims` + (`forward_min_event_ndims` for `forward_log_det_jacobian` and + `inverse_min_event_ndims` for `inverse_log_det_jacobian`). + To see this, consider the `Exp` `Bijector` applied to a `Tensor` which has + sample, batch, and event (S, B, E) shape semantics. Suppose the `Tensor`'s + partitioned-shape is `(S=[4], B=[2], E=[3, 3])`. The shape of the `Tensor` + returned by `forward` and `inverse` is unchanged, i.e., `[4, 2, 3, 3]`. + However the shape returned by `inverse_log_det_jacobian` is `[4, 2]` because + the Jacobian determinant is a reduction over the event dimensions. + + Another example is the `Affine` `Bijector`. Because `min_event_ndims = 1`, the + Jacobian determinant reduction is over `event_ndims - 1`. + + It is sometimes useful to implement the inverse Jacobian determinant as the + negative forward Jacobian determinant. For example, ```python def _inverse_log_det_jacobian(self, y): @@ -279,9 +329,54 @@ class Bijector(object): The claim follows from [properties of determinant]( https://en.wikipedia.org/wiki/Determinant#Multiplicativity_and_matrix_groups). - Generally its preferable to directly implement the inverse Jacobian. This - should have superior numerical stability and will often share subgraphs with - the `_inverse` implementation. + Generally its preferable to directly implement the inverse Jacobian + determinant. This should have superior numerical stability and will often + share subgraphs with the `_inverse` implementation. + + #### Is_constant_jacobian + + Certain bijectors will have constant jacobian matrices. For instance, the + `Affine` bijector encodes multiplication by a matrix plus a shift, with + jacobian matrix, the same aforementioned matrix. + + `is_constant_jacobian` encodes the fact that the jacobian matrix is constant. + The semantics of this argument are the following: + + * Repeated calls to "log_det_jacobian" functions with the same + `event_ndims` (but not necessarily same input), will return the first + computed jacobian (because the matrix is constant, and hence is input + independent). + * `log_det_jacobian` implementations are merely broadcastable to the true + `log_det_jacobian` (because, again, the jacobian matrix is input + independent). Specifically, `log_det_jacobian` is implemented as the + log jacobian determinant for a single input. + + ```python + class Identity(Bijector): + + def __init__(self, validate_args=False, name="identity"): + super(Identity, self).__init__( + is_constant_jacobian=True, + validate_args=validate_args, + forward_min_event_ndims=0, + name=name) + + def _forward(self, x): + return x + + def _inverse(self, y): + return y + + def _inverse_log_det_jacobian(self, y): + return -self._forward_log_det_jacobian(self._inverse(y)) + + def _forward_log_det_jacobian(self, x): + # The full log jacobian determinant would be array_ops.zero_like(x). + # However, we circumvent materializing that, since the jacobian + # calculation is input independent, and we specify it for one input. + return constant_op.constant(0., x.dtype.base_dtype) + + ``` #### Subclass Requirements @@ -364,14 +459,14 @@ class Bijector(object): ==> (-1., 1.) # The |dX/dY| is constant, == 1. So Log|dX/dY| == 0. - abs.inverse_log_det_jacobian(1.) + abs.inverse_log_det_jacobian(1., event_ndims=0) ==> (0., 0.) # Special case handling of 0. abs.inverse(0.) ==> (0., 0.) - abs.inverse_log_det_jacobian(0.) + abs.inverse_log_det_jacobian(0., event_ndims=0) ==> (0., 0.) ``` @@ -379,11 +474,12 @@ class Bijector(object): @abc.abstractmethod def __init__(self, - event_ndims=None, graph_parents=None, is_constant_jacobian=False, validate_args=False, dtype=None, + forward_min_event_ndims=None, + inverse_min_event_ndims=None, name=None): """Constructs Bijector. @@ -392,42 +488,61 @@ class Bijector(object): Examples: ```python - # Create the Y = g(X) = X transform which operates on vector events. - identity = Identity(event_ndims=1) + # Create the Y = g(X) = X transform. + identity = Identity() - # Create the Y = g(X) = exp(X) transform which operates on matrices. - exp = Exp(event_ndims=2) + # Create the Y = g(X) = exp(X) transform. + exp = Exp() ``` See `Bijector` subclass docstring for more details and specific examples. Args: - event_ndims: number of dimensions associated with event coordinates. graph_parents: Python list of graph prerequisites of this `Bijector`. - is_constant_jacobian: Python `bool` indicating that the Jacobian is not a - function of the input. + is_constant_jacobian: Python `bool` indicating that the Jacobian matrix is + not a function of the input. validate_args: Python `bool`, default `False`. Whether to validate input with asserts. If `validate_args` is `False`, and the inputs are invalid, correct behavior is not guaranteed. dtype: `tf.dtype` supported by this `Bijector`. `None` means dtype is not enforced. + forward_min_event_ndims: Python `integer` indicating the minimum number of + dimensions `forward` operates on. + inverse_min_event_ndims: Python `integer` indicating the minimum number of + dimensions `inverse` operates on. Will be set to + `forward_min_event_ndims` by default, if no value is provided. name: The name to give Ops created by the initializer. Raises: + ValueError: If neither `forward_min_event_ndims` and + `inverse_min_event_ndims` are specified, or if either of them is + negative. ValueError: If a member of `graph_parents` is not a `Tensor`. """ - self._event_ndims = ( - ops.convert_to_tensor(event_ndims, dtype=dtypes.int32) - if event_ndims is not None else None) self._graph_parents = graph_parents or [] + + if forward_min_event_ndims is None and inverse_min_event_ndims is None: + raise ValueError("Must specify at least one of `forward_min_event_ndims` " + "and `inverse_min_event_ndims`.") + elif inverse_min_event_ndims is None: + inverse_min_event_ndims = forward_min_event_ndims + elif forward_min_event_ndims is None: + forward_min_event_ndims = inverse_min_event_ndims + + if forward_min_event_ndims < 0: + raise ValueError("forward_min_event_ndims must be a non-negative " + "integer.") + if inverse_min_event_ndims < 0: + raise ValueError("inverse_min_event_ndims must be a non-negative " + "integer.") + self._forward_min_event_ndims = forward_min_event_ndims + self._inverse_min_event_ndims = inverse_min_event_ndims self._is_constant_jacobian = is_constant_jacobian + self._constant_ildj_map = {} self._validate_args = validate_args self._dtype = dtype self._from_y = {} self._from_x = {} - # Using abbreviation ildj for "inverse log det Jacobian." - # This variable is not `None` iff is_constant_jacobian is `True`. - self._constant_ildj = None if name: self._name = name else: @@ -442,21 +557,27 @@ class Bijector(object): if t is None or not tensor_util.is_tensor(t): raise ValueError("Graph parent item %d is not a Tensor; %s." % (i, t)) - @property - def event_ndims(self): - """Returns then number of event dimensions this bijector operates on.""" - return self._event_ndims - @property def graph_parents(self): """Returns this `Bijector`'s graph_parents as a Python list.""" return self._graph_parents + @property + def forward_min_event_ndims(self): + """Returns the minimal number of dimensions bijector.forward operates on.""" + return self._forward_min_event_ndims + + @property + def inverse_min_event_ndims(self): + """Returns the minimal number of dimensions bijector.inverse operates on.""" + return self._inverse_min_event_ndims + @property def is_constant_jacobian(self): - """Returns true iff the Jacobian is not a function of x. + """Returns true iff the Jacobian matrix is not a function of x. - Note: Jacobian is either constant for both forward and inverse or neither. + Note: Jacobian matrix is either constant for both forward and inverse or + neither. Returns: is_constant_jacobian: Python `bool`. @@ -653,36 +774,57 @@ class Bijector(object): return self._call_inverse(y, name) def _inverse_log_det_jacobian(self, y): - """Subclass implementation of `inverse_log_det_jacobian` public function.""" + """Subclass implementation of `inverse_log_det_jacobian` public function. + + In particular, this method differs from the public function, in that it + does not take `event_ndims`. Thus, this implements the minimal Jacobian + determinant calculation (i.e. over `inverse_min_event_ndims`). + + Args: + y: `Tensor`. The input to the "inverse_log_det_jacobian" evaluation. + Returns: + inverse_log_det_jacobian: `Tensor`, if this bijector is injective. + If not injective, returns the k-tuple containing jacobians for the + unique `k` points `(x1, ..., xk)` such that `g(xi) = y`. + """ raise NotImplementedError("inverse_log_det_jacobian not implemented.") - def _call_inverse_log_det_jacobian(self, y, name, **kwargs): + def _call_inverse_log_det_jacobian(self, y, event_ndims, name, **kwargs): with self._name_scope(name, [y]): - if self._constant_ildj is not None: - return self._constant_ildj + if event_ndims in self._constant_ildj_map: + return self._constant_ildj_map[event_ndims] y = ops.convert_to_tensor(y, name="y") self._maybe_assert_dtype(y) if not self._is_injective: # No caching for non-injective - return self._inverse_log_det_jacobian(y, **kwargs) + ildjs = self._inverse_log_det_jacobian(y, **kwargs) + return tuple(self._reduce_jacobian_det_over_event( + y, ildj, self.inverse_min_event_ndims, event_ndims) + for ildj in ildjs) mapping = self._lookup(y=y, kwargs=kwargs) - if mapping.ildj is not None: - return mapping.ildj + if mapping.ildj_map is not None and event_ndims in mapping.ildj_map: + return mapping.ildj_map[event_ndims] try: x = None # Not needed; leave cache as is. ildj = self._inverse_log_det_jacobian(y, **kwargs) + ildj = self._reduce_jacobian_det_over_event( + y, ildj, self.inverse_min_event_ndims, event_ndims) except NotImplementedError as original_exception: try: x = mapping.x if mapping.x is not None else self._inverse(y, **kwargs) ildj = -self._forward_log_det_jacobian(x, **kwargs) + ildj = self._reduce_jacobian_det_over_event( + x, ildj, self.forward_min_event_ndims, event_ndims) except NotImplementedError: raise original_exception - mapping = mapping.merge(x=x, ildj=ildj) + + mapping = mapping.merge(x=x, ildj_map={event_ndims: ildj}) self._cache(mapping) if self.is_constant_jacobian: - self._constant_ildj = mapping.ildj - return mapping.ildj + self._constant_ildj_map[event_ndims] = ildj + return ildj - def inverse_log_det_jacobian(self, y, name="inverse_log_det_jacobian"): + def inverse_log_det_jacobian( + self, y, event_ndims, name="inverse_log_det_jacobian"): """Returns the (log o det o Jacobian o inverse)(y). Mathematically, returns: `log(det(dX/dY))(Y)`. (Recall that: `X=g^{-1}(Y)`.) @@ -691,7 +833,12 @@ class Bijector(object): evaluated at `g^{-1}(y)`. Args: - y: `Tensor`. The input to the "inverse" Jacobian evaluation. + y: `Tensor`. The input to the "inverse" Jacobian determinant evaluation. + event_ndims: Number of dimensions in the probabilistic events being + transformed. Must be greater than or equal to + `self.inverse_min_event_ndims`. The result is summed over the final + dimensions to produce a scalar Jacobian determinant for each event, + i.e. it has shape `y.shape.ndims - event_ndims` dimensions. name: The name to give this op. Returns: @@ -705,45 +852,74 @@ class Bijector(object): `self.dtype`. NotImplementedError: if `_inverse_log_det_jacobian` is not implemented. """ - return self._call_inverse_log_det_jacobian(y, name) + with ops.control_dependencies(self._check_valid_event_ndims( + min_event_ndims=self.inverse_min_event_ndims, event_ndims=event_ndims)): + return self._call_inverse_log_det_jacobian(y, event_ndims, name) def _forward_log_det_jacobian(self, x): - """Subclass implementation of `forward_log_det_jacobian`.""" + """Subclass implementation of `forward_log_det_jacobian` public function. + + In particular, this method differs from the public function, in that it + does not take `event_ndims`. Thus, this implements the minimal Jacobian + determinant calculation (i.e. over `forward_min_event_ndims`). + + Args: + x: `Tensor`. The input to the "forward_log_det_jacobian" evaluation. + + Returns: + forward_log_det_jacobian: `Tensor`, if this bijector is injective. + If not injective, returns the k-tuple containing jacobians for the + unique `k` points `(x1, ..., xk)` such that `g(xi) = y`. + """ + raise NotImplementedError( "forward_log_det_jacobian not implemented.") - def _call_forward_log_det_jacobian(self, x, name, **kwargs): + def _call_forward_log_det_jacobian(self, x, event_ndims, name, **kwargs): with self._name_scope(name, [x]): - if self._constant_ildj is not None: + if event_ndims in self._constant_ildj_map: # Need "-1. *" to avoid invalid-unary-operand-type linter warning. - return -1. * self._constant_ildj + return -1. * self._constant_ildj_map[event_ndims] x = ops.convert_to_tensor(x, name="x") self._maybe_assert_dtype(x) if not self._is_injective: - return self._forward_log_det_jacobian(x, **kwargs) # No caching. + fldjs = self._forward_log_det_jacobian(x, **kwargs) # No caching. + return tuple(self._reduce_jacobian_det_over_event( + x, fldj, self.forward_min_event_ndims, event_ndims) + for fldj in fldjs) mapping = self._lookup(x=x, kwargs=kwargs) - if mapping.ildj is not None: - return -mapping.ildj + if mapping.ildj_map is not None and event_ndims in mapping.ildj_map: + return -mapping.ildj_map[event_ndims] try: y = None # Not needed; leave cache as is. ildj = -self._forward_log_det_jacobian(x, **kwargs) + ildj = self._reduce_jacobian_det_over_event( + x, ildj, self.forward_min_event_ndims, event_ndims) except NotImplementedError as original_exception: try: y = mapping.y if mapping.y is not None else self._forward(x, **kwargs) ildj = self._inverse_log_det_jacobian(y, **kwargs) + ildj = self._reduce_jacobian_det_over_event( + y, ildj, self.inverse_min_event_ndims, event_ndims) except NotImplementedError: raise original_exception - mapping = mapping.merge(y=y, ildj=ildj) + mapping = mapping.merge(y=y, ildj_map={event_ndims: ildj}) self._cache(mapping) if self.is_constant_jacobian: - self._constant_ildj = mapping.ildj - return -mapping.ildj + self._constant_ildj_map[event_ndims] = ildj + return -ildj - def forward_log_det_jacobian(self, x, name="forward_log_det_jacobian"): + def forward_log_det_jacobian( + self, x, event_ndims, name="forward_log_det_jacobian"): """Returns both the forward_log_det_jacobian. Args: - x: `Tensor`. The input to the "forward" Jacobian evaluation. + x: `Tensor`. The input to the "forward" Jacobian determinant evaluation. + event_ndims: Number of dimensions in the probabilistic events being + transformed. Must be greater than or equal to + `self.forward_min_event_ndims`. The result is summed over the final + dimensions to produce a scalar Jacobian determinant for each event, + i.e. it has shape `x.shape.ndims - event_ndims` dimensions. name: The name to give this op. Returns: @@ -761,7 +937,9 @@ class Bijector(object): raise NotImplementedError( "forward_log_det_jacobian cannot be implemented for non-injective " "transforms.") - return self._call_forward_log_det_jacobian(x, name) + with ops.control_dependencies(self._check_valid_event_ndims( + min_event_ndims=self.forward_min_event_ndims, event_ndims=event_ndims)): + return self._call_forward_log_det_jacobian(x, event_ndims, name) @contextlib.contextmanager def _name_scope(self, name=None, values=None): @@ -779,9 +957,6 @@ class Bijector(object): def _cache(self, mapping): """Helper which stores mapping info in forward/inverse dicts.""" - if self._constant_ildj is not None: - # Fold in ildj if known constant Jacobian. - mapping = mapping.merge(ildj=self._constant_ildj) # Merging from lookup is an added check that we're not overwriting anything # which is not None. mapping = mapping.merge(mapping=self._lookup( @@ -803,22 +978,66 @@ class Bijector(object): return self._from_y.get(mapping.y_key, mapping) return mapping - def _event_dims_tensor(self, sample): - """Return a 1D `int32` tensor: `range(rank(sample))[-event_ndims:]`.""" - if self.event_ndims is None: - raise ValueError("Jacobian cannot be computed with unknown event_ndims") - static_event_ndims = tensor_util.constant_value(self.event_ndims) - static_rank = sample.get_shape().ndims - if static_event_ndims is not None and static_rank is not None: - return ops.convert_to_tensor( - static_rank + np.arange(-static_event_ndims, 0).astype(np.int32)) - - if static_event_ndims is not None: - event_range = np.arange(-static_event_ndims, 0).astype(np.int32) - else: - event_range = math_ops.range(-self.event_ndims, 0, dtype=dtypes.int32) - - if static_rank is not None: - return event_range + static_rank + def _reduce_jacobian_det_over_event( + self, y, ildj, min_event_ndims, event_ndims): + """Reduce jacobian over event_ndims - min_event_ndims.""" + if not self.is_constant_jacobian: + return math_ops.reduce_sum( + ildj, + self._get_event_reduce_dims(min_event_ndims, event_ndims)) + + # In this case, we need to tile the jacobian over the event and reduce. + y_rank = array_ops.rank(y) + y_shape = array_ops.shape(y)[ + y_rank - event_ndims : y_rank - min_event_ndims] + + ones = array_ops.ones(y_shape, ildj.dtype) + reduced_ildj = math_ops.reduce_sum( + ones * ildj, + axis=self._get_event_reduce_dims(min_event_ndims, event_ndims)) + # The multiplication by ones can change the inferred static shape so we try + # to recover as much as possible. + if (isinstance(event_ndims, int) and + y.get_shape().ndims and ildj.get_shape().ndims): + y_shape = y.get_shape() + y_shape = y_shape[y_shape.ndims - event_ndims : + y_shape.ndims - min_event_ndims] + ildj_shape = ildj.get_shape() + broadcast_shape = array_ops.broadcast_static_shape( + ildj_shape, y_shape) + reduced_ildj.set_shape( + broadcast_shape[: broadcast_shape.ndims - ( + event_ndims - min_event_ndims)]) + + return reduced_ildj + + def _get_event_reduce_dims(self, min_event_ndims, event_ndims): + """Compute the reduction dimensions given event_ndims.""" + min_event_ndims_ = (min_event_ndims if isinstance(min_event_ndims, int) + else tensor_util.constant_value(min_event_ndims)) + event_ndims_ = (event_ndims if isinstance(event_ndims, int) + else tensor_util.constant_value(event_ndims)) + + if min_event_ndims_ is not None and event_ndims_ is not None: + return [-index for index in range(1, event_ndims_ - min_event_ndims_ + 1)] else: - return event_range + array_ops.rank(sample) + reduce_ndims = event_ndims - min_event_ndims + return math_ops.range(-reduce_ndims, 0) + + def _check_valid_event_ndims(self, min_event_ndims, event_ndims): + """Check whether event_ndims is atleast min_event_ndims.""" + min_event_ndims_ = (min_event_ndims if isinstance(min_event_ndims, int) + else tensor_util.constant_value(min_event_ndims)) + event_ndims_ = (event_ndims if isinstance(event_ndims, int) + else tensor_util.constant_value(event_ndims)) + + if min_event_ndims_ is not None and event_ndims_ is not None: + if min_event_ndims_ > event_ndims_: + raise ValueError("event_ndims ({}) must be larger than " + "min_event_ndims ({})".format( + event_ndims_, min_event_ndims_)) + return [] + + if self.validate_args: + return [check_ops.assert_greater_equal(event_ndims, min_event_ndims)] + return [] diff --git a/tensorflow/python/ops/distributions/bijector_test_util.py b/tensorflow/python/ops/distributions/bijector_test_util.py index ff3535c62642d98bdd9b18808f45deae27d6d88d..784bfd58352f4035cd1bd4caa91eba6e6dc8d30d 100644 --- a/tensorflow/python/ops/distributions/bijector_test_util.py +++ b/tensorflow/python/ops/distributions/bijector_test_util.py @@ -79,9 +79,7 @@ def assert_scalar_congruency(bijector, Raises: AssertionError: If tests fail. """ - # Checks and defaults. - assert bijector.event_ndims.eval() == 0 if sess is None: sess = ops.get_default_session() @@ -111,7 +109,10 @@ def assert_scalar_congruency(bijector, # (b - a) = \int_a^b dx = \int_{y(a)}^{y(b)} |dx/dy| dy # "change_measure_dy_dx" below is a Monte Carlo approximation to the right # hand side, which should then be close to the left, which is (b - a). - dy_dx = math_ops.exp(bijector.inverse_log_det_jacobian(uniform_y_samps)) + # We assume event_ndims=0 because we assume scalar -> scalar. The log_det + # methods will handle whether they expect event_ndims > 0. + dy_dx = math_ops.exp(bijector.inverse_log_det_jacobian( + uniform_y_samps, event_ndims=0)) # E[|dx/dy|] under Uniform[lower_y, upper_y] # = \int_{y(a)}^{y(b)} |dx/dy| dP(u), where dP(u) is the uniform measure expectation_of_dy_dx_under_uniform = math_ops.reduce_mean(dy_dx) @@ -121,7 +122,8 @@ def assert_scalar_congruency(bijector, # We'll also check that dy_dx = 1 / dx_dy. dx_dy = math_ops.exp( - bijector.forward_log_det_jacobian(bijector.inverse(uniform_y_samps))) + bijector.forward_log_det_jacobian( + bijector.inverse(uniform_y_samps), event_ndims=0)) [ forward_on_10_pts_v, @@ -158,7 +160,8 @@ def assert_scalar_congruency(bijector, dy_dx_v, np.divide(1., dx_dy_v), atol=1e-5, rtol=1e-3) -def assert_bijective_and_finite(bijector, x, y, atol=0, rtol=1e-5, sess=None): +def assert_bijective_and_finite( + bijector, x, y, event_ndims, atol=0, rtol=1e-5, sess=None): """Assert that forward/inverse (along with jacobians) are inverses and finite. It is recommended to use x and y values that are very very close to the edge @@ -168,6 +171,8 @@ def assert_bijective_and_finite(bijector, x, y, atol=0, rtol=1e-5, sess=None): bijector: A Bijector instance. x: np.array of values in the domain of bijector.forward. y: np.array of values in the domain of bijector.inverse. + event_ndims: Integer describing the number of event dimensions this bijector + operates on. atol: Absolute tolerance. rtol: Relative tolerance. sess: TensorFlow session. Defaults to the default session. @@ -197,10 +202,10 @@ def assert_bijective_and_finite(bijector, x, y, atol=0, rtol=1e-5, sess=None): ] = sess.run([ bijector.inverse(f_x), bijector.forward(g_y), - bijector.inverse_log_det_jacobian(f_x), - bijector.forward_log_det_jacobian(x), - bijector.inverse_log_det_jacobian(y), - bijector.forward_log_det_jacobian(g_y), + bijector.inverse_log_det_jacobian(f_x, event_ndims=event_ndims), + bijector.forward_log_det_jacobian(x, event_ndims=event_ndims), + bijector.inverse_log_det_jacobian(y, event_ndims=event_ndims), + bijector.forward_log_det_jacobian(g_y, event_ndims=event_ndims), f_x, g_y, ]) diff --git a/tensorflow/python/ops/distributions/categorical.py b/tensorflow/python/ops/distributions/categorical.py index 9161e3fa9f5f7f844e7f4926992c954acae246d6..8f25b1149c3c8b220fe2bea95c33854ee2f6275a 100644 --- a/tensorflow/python/ops/distributions/categorical.py +++ b/tensorflow/python/ops/distributions/categorical.py @@ -183,7 +183,7 @@ class Categorical(distribution.Distribution): name: Python `str` name prefixed to Ops created by this class. """ parameters = locals() - with ops.name_scope(name, values=[logits, probs]): + with ops.name_scope(name, values=[logits, probs]) as name: self._logits, self._probs = distribution_util.get_logits_and_probs( logits=logits, probs=probs, @@ -311,7 +311,7 @@ class Categorical(distribution.Distribution): nn_ops.log_softmax(self.logits) * self.probs, axis=-1) def _mode(self): - ret = math_ops.argmax(self.logits, dimension=self._batch_rank) + ret = math_ops.argmax(self.logits, axis=self._batch_rank) ret = math_ops.cast(ret, self.dtype) ret.set_shape(self.batch_shape) return ret diff --git a/tensorflow/python/ops/distributions/dirichlet.py b/tensorflow/python/ops/distributions/dirichlet.py index 25afeec936069b9cbf926cdc3bbb79226a79aa30..eafcd5c78f7752dc3f9f5be8f8dbe7ac368fd3be 100644 --- a/tensorflow/python/ops/distributions/dirichlet.py +++ b/tensorflow/python/ops/distributions/dirichlet.py @@ -155,7 +155,7 @@ class Dirichlet(distribution.Distribution): name: Python `str` name prefixed to Ops created by this class. """ parameters = locals() - with ops.name_scope(name, values=[concentration]): + with ops.name_scope(name, values=[concentration]) as name: self._concentration = self._maybe_assert_valid_concentration( ops.convert_to_tensor(concentration, name="concentration"), validate_args) diff --git a/tensorflow/python/ops/distributions/dirichlet_multinomial.py b/tensorflow/python/ops/distributions/dirichlet_multinomial.py index 03a98c56ba509ea1f70f12a74ba67b903013cf70..fe0ed7e07d59654b8a4c7ac989212913697c6832 100644 --- a/tensorflow/python/ops/distributions/dirichlet_multinomial.py +++ b/tensorflow/python/ops/distributions/dirichlet_multinomial.py @@ -192,7 +192,7 @@ class DirichletMultinomial(distribution.Distribution): name: Python `str` name prefixed to Ops created by this class. """ parameters = locals() - with ops.name_scope(name, values=[total_count, concentration]): + with ops.name_scope(name, values=[total_count, concentration]) as name: # Broadcasting works because: # * The broadcasting convention is to prepend dimensions of size [1], and # we use the last dimension for the distribution, whereas diff --git a/tensorflow/python/ops/distributions/distribution.py b/tensorflow/python/ops/distributions/distribution.py index 7c43bf54fc783815127f03cc287ab0fc4349beb5..3815abf72de1e2b5278706315a10899dccca4182 100644 --- a/tensorflow/python/ops/distributions/distribution.py +++ b/tensorflow/python/ops/distributions/distribution.py @@ -434,13 +434,17 @@ class Distribution(_BaseDistribution): for i, t in enumerate(graph_parents): if t is None or not tensor_util.is_tensor(t): raise ValueError("Graph parent item %d is not a Tensor; %s." % (i, t)) + if not name or name[-1] != "/": # `name` is not a name scope + non_unique_name = name or type(self).__name__ + with ops.name_scope(non_unique_name) as name: + pass self._dtype = dtype self._reparameterization_type = reparameterization_type self._allow_nan_stats = allow_nan_stats self._validate_args = validate_args self._parameters = parameters or {} self._graph_parents = graph_parents - self._name = name or type(self).__name__ + self._name = name @classmethod def param_shapes(cls, sample_shape, name="DistributionParamShapes"): diff --git a/tensorflow/python/ops/distributions/distributions.py b/tensorflow/python/ops/distributions/distributions.py index 9df7d148a583e533475276e090bcb02cb705290f..7c4b8697d818a88000fc2e84aec461d2918386a6 100644 --- a/tensorflow/python/ops/distributions/distributions.py +++ b/tensorflow/python/ops/distributions/distributions.py @@ -19,7 +19,6 @@ from __future__ import print_function # pylint: disable=wildcard-import,unused-import -from tensorflow.python.ops.distributions import bijectors from tensorflow.python.ops.distributions.bernoulli import Bernoulli from tensorflow.python.ops.distributions.beta import Beta from tensorflow.python.ops.distributions.categorical import Categorical @@ -40,7 +39,6 @@ from tensorflow.python.util.all_util import remove_undocumented _allowed_symbols = [ - "bijectors", "Bernoulli", "Beta", "Categorical", diff --git a/tensorflow/python/ops/distributions/exponential.py b/tensorflow/python/ops/distributions/exponential.py index 6345a76d485c64659aa01fa1611cd27426d8c8a5..cf0e729e1a189d94381d56aedb5a753862a20962 100644 --- a/tensorflow/python/ops/distributions/exponential.py +++ b/tensorflow/python/ops/distributions/exponential.py @@ -95,7 +95,7 @@ class Exponential(gamma.Gamma): # true in the parent class "Gamma." Therefore, passing # allow_nan_stats=True # through to the parent class results in unnecessary asserts. - with ops.name_scope(name, values=[rate]): + with ops.name_scope(name, values=[rate]) as name: self._rate = ops.convert_to_tensor(rate, name="rate") super(Exponential, self).__init__( concentration=array_ops.ones([], dtype=self._rate.dtype), @@ -144,7 +144,7 @@ class ExponentialWithSoftplusRate(Exponential): allow_nan_stats=True, name="ExponentialWithSoftplusRate"): parameters = locals() - with ops.name_scope(name, values=[rate]): + with ops.name_scope(name, values=[rate]) as name: super(ExponentialWithSoftplusRate, self).__init__( rate=nn.softplus(rate, name="softplus_rate"), validate_args=validate_args, diff --git a/tensorflow/python/ops/distributions/gamma.py b/tensorflow/python/ops/distributions/gamma.py index adb1f4f9a879e44cf8cb4cafd22b92554f487712..d39f7c56d39ae1681c32bacd9771f4f2372d3f98 100644 --- a/tensorflow/python/ops/distributions/gamma.py +++ b/tensorflow/python/ops/distributions/gamma.py @@ -127,7 +127,7 @@ class Gamma(distribution.Distribution): TypeError: if `concentration` and `rate` are different dtypes. """ parameters = locals() - with ops.name_scope(name, values=[concentration, rate]): + with ops.name_scope(name, values=[concentration, rate]) as name: with ops.control_dependencies([ check_ops.assert_positive(concentration), check_ops.assert_positive(rate), @@ -262,7 +262,7 @@ class GammaWithSoftplusConcentrationRate(Gamma): allow_nan_stats=True, name="GammaWithSoftplusConcentrationRate"): parameters = locals() - with ops.name_scope(name, values=[concentration, rate]): + with ops.name_scope(name, values=[concentration, rate]) as name: super(GammaWithSoftplusConcentrationRate, self).__init__( concentration=nn.softplus(concentration, name="softplus_concentration"), diff --git a/tensorflow/python/ops/distributions/identity_bijector.py b/tensorflow/python/ops/distributions/identity_bijector.py index 2972c3554b3639a1ae30a4167f73613b1ff8add2..8628e68f967337fb81187bae9576a168e1cd5a36 100644 --- a/tensorflow/python/ops/distributions/identity_bijector.py +++ b/tensorflow/python/ops/distributions/identity_bijector.py @@ -20,7 +20,6 @@ from __future__ import print_function from tensorflow.python.framework import constant_op from tensorflow.python.ops.distributions import bijector -from tensorflow.python.util.tf_export import tf_export __all__ = [ @@ -28,7 +27,6 @@ __all__ = [ ] -@tf_export("distributions.bijectors.Identity") class Identity(bijector.Bijector): """Compute Y = g(X) = X. @@ -37,7 +35,7 @@ class Identity(bijector.Bijector): ```python # Create the Y=g(X)=X transform which is intended for Tensors with 1 batch # ndim and 1 event ndim (i.e., vector of vectors). - identity = Identity(event_ndims=1) + identity = Identity() x = [[1., 2], [3, 4]] x == identity.forward(x) == identity.inverse(x) @@ -45,10 +43,10 @@ class Identity(bijector.Bijector): """ - def __init__(self, validate_args=False, event_ndims=0, name="identity"): + def __init__(self, validate_args=False, name="identity"): super(Identity, self).__init__( + forward_min_event_ndims=0, is_constant_jacobian=True, - event_ndims=event_ndims, validate_args=validate_args, name=name) diff --git a/tensorflow/python/ops/distributions/laplace.py b/tensorflow/python/ops/distributions/laplace.py index e98ac855c58efa1ef3ccef2de24f329d839bac26..3ccfc618d1157722bbc4ce2d5f88fbcd4fb1ba10 100644 --- a/tensorflow/python/ops/distributions/laplace.py +++ b/tensorflow/python/ops/distributions/laplace.py @@ -101,7 +101,7 @@ class Laplace(distribution.Distribution): TypeError: if `loc` and `scale` are of different dtype. """ parameters = locals() - with ops.name_scope(name, values=[loc, scale]): + with ops.name_scope(name, values=[loc, scale]) as name: with ops.control_dependencies([check_ops.assert_positive(scale)] if validate_args else []): self._loc = array_ops.identity(loc, name="loc") @@ -218,7 +218,7 @@ class LaplaceWithSoftplusScale(Laplace): allow_nan_stats=True, name="LaplaceWithSoftplusScale"): parameters = locals() - with ops.name_scope(name, values=[loc, scale]): + with ops.name_scope(name, values=[loc, scale]) as name: super(LaplaceWithSoftplusScale, self).__init__( loc=loc, scale=nn.softplus(scale, name="softplus_scale"), diff --git a/tensorflow/python/ops/distributions/multinomial.py b/tensorflow/python/ops/distributions/multinomial.py index 4ae67a009b0a4052f6e23e2e42262bb7c42f1c14..ab77f5c1f815f36edb989c5b315ef90fb198b001 100644 --- a/tensorflow/python/ops/distributions/multinomial.py +++ b/tensorflow/python/ops/distributions/multinomial.py @@ -183,7 +183,7 @@ class Multinomial(distribution.Distribution): name: Python `str` name prefixed to Ops created by this class. """ parameters = locals() - with ops.name_scope(name, values=[total_count, logits, probs]): + with ops.name_scope(name, values=[total_count, logits, probs]) as name: self._total_count = ops.convert_to_tensor(total_count, name="total_count") if validate_args: self._total_count = ( diff --git a/tensorflow/python/ops/distributions/normal.py b/tensorflow/python/ops/distributions/normal.py index 32e8a49c81bc4b23d8897639998dd33942b41a80..20d4420e91886cf84855bd585cfc4869fc674178 100644 --- a/tensorflow/python/ops/distributions/normal.py +++ b/tensorflow/python/ops/distributions/normal.py @@ -132,7 +132,7 @@ class Normal(distribution.Distribution): TypeError: if `loc` and `scale` have different `dtype`. """ parameters = locals() - with ops.name_scope(name, values=[loc, scale]): + with ops.name_scope(name, values=[loc, scale]) as name: with ops.control_dependencies([check_ops.assert_positive(scale)] if validate_args else []): self._loc = array_ops.identity(loc, name="loc") @@ -244,7 +244,7 @@ class NormalWithSoftplusScale(Normal): allow_nan_stats=True, name="NormalWithSoftplusScale"): parameters = locals() - with ops.name_scope(name, values=[scale]): + with ops.name_scope(name, values=[scale]) as name: super(NormalWithSoftplusScale, self).__init__( loc=loc, scale=nn.softplus(scale, name="softplus_scale"), diff --git a/tensorflow/python/ops/distributions/student_t.py b/tensorflow/python/ops/distributions/student_t.py index 9d9e65b4e8d6d2e40bf9c263339f899439c842c3..961b07a7bdac34b76ca1a594fa3df5e97951c76b 100644 --- a/tensorflow/python/ops/distributions/student_t.py +++ b/tensorflow/python/ops/distributions/student_t.py @@ -158,7 +158,7 @@ class StudentT(distribution.Distribution): TypeError: if loc and scale are different dtypes. """ parameters = locals() - with ops.name_scope(name, values=[df, loc, scale]): + with ops.name_scope(name, values=[df, loc, scale]) as name: with ops.control_dependencies([check_ops.assert_positive(df)] if validate_args else []): self._df = array_ops.identity(df, name="df") @@ -350,7 +350,7 @@ class StudentTWithAbsDfSoftplusScale(StudentT): allow_nan_stats=True, name="StudentTWithAbsDfSoftplusScale"): parameters = locals() - with ops.name_scope(name, values=[df, scale]): + with ops.name_scope(name, values=[df, scale]) as name: super(StudentTWithAbsDfSoftplusScale, self).__init__( df=math_ops.floor(math_ops.abs(df)), loc=loc, diff --git a/tensorflow/python/ops/distributions/transformed_distribution.py b/tensorflow/python/ops/distributions/transformed_distribution.py index 1efcf9d32e9ea9924bb080459efb7015e33ccd54..6aa6ec40d9b2ac4d156ba132e75e1846c870b772 100644 --- a/tensorflow/python/ops/distributions/transformed_distribution.py +++ b/tensorflow/python/ops/distributions/transformed_distribution.py @@ -197,8 +197,7 @@ class TransformedDistribution(distribution_lib.Distribution): distribution=ds.Normal(loc=0., scale=1.), bijector=ds.bijectors.Affine( shift=-1., - scale_identity_multiplier=2., - event_ndims=0), + scale_identity_multiplier=2.) name="NormalTransformedDistribution") ``` @@ -258,7 +257,7 @@ class TransformedDistribution(distribution_lib.Distribution): parameters = locals() name = name or (("" if bijector is None else bijector.name) + distribution.name) - with ops.name_scope(name, values=[event_shape, batch_shape]): + with ops.name_scope(name, values=[event_shape, batch_shape]) as name: # For convenience we define some handy constants. self._zero = constant_op.constant(0, dtype=dtypes.int32, name="zero") self._empty = constant_op.constant([], dtype=dtypes.int32, name="empty") @@ -419,48 +418,51 @@ class TransformedDistribution(distribution_lib.Distribution): # For caching to work, it is imperative that the bijector is the first to # modify the input. x = self.bijector.inverse(y) - ildj = self.bijector.inverse_log_det_jacobian(y) + event_ndims = self._maybe_get_event_ndims_statically() + + ildj = self.bijector.inverse_log_det_jacobian(y, event_ndims=event_ndims) if self.bijector._is_injective: # pylint: disable=protected-access - return self._finish_log_prob_for_one_fiber(y, x, ildj) + return self._finish_log_prob_for_one_fiber(y, x, ildj, event_ndims) lp_on_fibers = [ - self._finish_log_prob_for_one_fiber(y, x_i, ildj_i) + self._finish_log_prob_for_one_fiber(y, x_i, ildj_i, event_ndims) for x_i, ildj_i in zip(x, ildj)] return math_ops.reduce_logsumexp(array_ops.stack(lp_on_fibers), axis=0) - def _finish_log_prob_for_one_fiber(self, y, x, ildj): + def _finish_log_prob_for_one_fiber(self, y, x, ildj, event_ndims): """Finish computation of log_prob on one element of the inverse image.""" x = self._maybe_rotate_dims(x, rotate_right=True) log_prob = self.distribution.log_prob(x) if self._is_maybe_event_override: log_prob = math_ops.reduce_sum(log_prob, self._reduce_event_indices) log_prob += math_ops.cast(ildj, log_prob.dtype) - if self._is_maybe_event_override: + if self._is_maybe_event_override and isinstance(event_ndims, int): log_prob.set_shape(array_ops.broadcast_static_shape( - y.get_shape().with_rank_at_least(1)[:-1], self.batch_shape)) + x.get_shape().with_rank_at_least(1)[:-event_ndims], self.batch_shape)) return log_prob def _prob(self, y): x = self.bijector.inverse(y) - ildj = self.bijector.inverse_log_det_jacobian(y) + event_ndims = self._maybe_get_event_ndims_statically() + ildj = self.bijector.inverse_log_det_jacobian(y, event_ndims=event_ndims) if self.bijector._is_injective: # pylint: disable=protected-access - return self._finish_prob_for_one_fiber(y, x, ildj) + return self._finish_prob_for_one_fiber(y, x, ildj, event_ndims) prob_on_fibers = [ - self._finish_prob_for_one_fiber(y, x_i, ildj_i) + self._finish_prob_for_one_fiber(y, x_i, ildj_i, event_ndims) for x_i, ildj_i in zip(x, ildj)] return sum(prob_on_fibers) - def _finish_prob_for_one_fiber(self, y, x, ildj): + def _finish_prob_for_one_fiber(self, y, x, ildj, event_ndims): """Finish computation of prob on one element of the inverse image.""" x = self._maybe_rotate_dims(x, rotate_right=True) prob = self.distribution.prob(x) if self._is_maybe_event_override: prob = math_ops.reduce_prod(prob, self._reduce_event_indices) prob *= math_ops.exp(math_ops.cast(ildj, prob.dtype)) - if self._is_maybe_event_override: + if self._is_maybe_event_override and isinstance(event_ndims, int): prob.set_shape(array_ops.broadcast_static_shape( - y.get_shape().with_rank_at_least(1)[:-1], self.batch_shape)) + y.get_shape().with_rank_at_least(1)[:-event_ndims], self.batch_shape)) return prob def _log_cdf(self, y): @@ -545,10 +547,17 @@ class TransformedDistribution(distribution_lib.Distribution): _ones_like(self.distribution.batch_shape_tensor()) ], 0) entropy = array_ops.tile(entropy, multiples) - dummy = array_ops.zeros([], self.dtype) - entropy -= math_ops.cast( - self.bijector.inverse_log_det_jacobian(dummy), - entropy.dtype) + dummy = array_ops.zeros( + shape=array_ops.concat( + [self.batch_shape_tensor(), self.event_shape_tensor()], + 0), + dtype=self.dtype) + event_ndims = (self.event_shape.ndims if self.event_shape.ndims is not None + else array_ops.size(self.event_shape_tensor())) + ildj = self.bijector.inverse_log_det_jacobian( + dummy, event_ndims=event_ndims) + + entropy -= math_ops.cast(ildj, entropy.dtype) entropy.set_shape(self.batch_shape) return entropy @@ -610,3 +619,16 @@ class TransformedDistribution(distribution_lib.Distribution): n = (ndims - self._rotate_ndims) if rotate_right else self._rotate_ndims return array_ops.transpose( x, _concat_vectors(math_ops.range(n, ndims), math_ops.range(0, n))) + + def _maybe_get_event_ndims_statically(self): + if self.event_shape.ndims is not None: + return self.event_shape.ndims + + event_ndims = array_ops.size(self.event_shape_tensor()) + + static_event_ndims = tensor_util.constant_value(event_ndims) + + if static_event_ndims is not None: + return static_event_ndims + + return event_ndims diff --git a/tensorflow/python/ops/distributions/uniform.py b/tensorflow/python/ops/distributions/uniform.py index ec623b55eb0067e16599c18c9c504635da863907..087797c653bb3ee294b747dd7da8ff2896e79fc6 100644 --- a/tensorflow/python/ops/distributions/uniform.py +++ b/tensorflow/python/ops/distributions/uniform.py @@ -103,7 +103,7 @@ class Uniform(distribution.Distribution): InvalidArgumentError: if `low >= high` and `validate_args=False`. """ parameters = locals() - with ops.name_scope(name, values=[low, high]): + with ops.name_scope(name, values=[low, high]) as name: with ops.control_dependencies([ check_ops.assert_less( low, high, message="uniform not defined when low >= high.") @@ -166,7 +166,8 @@ class Uniform(distribution.Distribution): return self.low + self.range() * samples def _prob(self, x): - broadcasted_x = x * array_ops.ones(self.batch_shape_tensor()) + broadcasted_x = x * array_ops.ones( + self.batch_shape_tensor(), dtype=x.dtype) return array_ops.where( math_ops.is_nan(broadcasted_x), broadcasted_x, diff --git a/tensorflow/python/ops/distributions/util.py b/tensorflow/python/ops/distributions/util.py index 0fe6aa30f945dc7682a53fa6495823288cf111b7..2e067eab459050e30d220bdb7ff0d65cb9c552f7 100644 --- a/tensorflow/python/ops/distributions/util.py +++ b/tensorflow/python/ops/distributions/util.py @@ -58,8 +58,7 @@ def assert_close( if data is None: data = [ message, - "Condition x ~= y did not hold element-wise: x = ", x.name, x, "y = ", - y.name, y + "Condition x ~= y did not hold element-wise: x = ", x, "y = ", y ] if x.dtype.is_integer: @@ -95,7 +94,7 @@ def assert_integer_form( x = ops.convert_to_tensor(x, name="x") if x.dtype.is_integer: return control_flow_ops.no_op() - message = message or "{} has non-integer components".format(x.op.name) + message = message or "{} has non-integer components".format(x) if int_dtype is None: try: int_dtype = { @@ -123,13 +122,13 @@ def embed_check_nonnegative_integer_form( x = ops.convert_to_tensor(x, name="x") assertions = [ check_ops.assert_non_negative( - x, message="'{}' must be non-negative.".format(x.op.name)), + x, message="'{}' must be non-negative.".format(x)), ] if not x.dtype.is_integer: assertions += [ assert_integer_form( x, message="'{}' cannot contain fractional components.".format( - x.op.name)), + x)), ] return control_flow_ops.with_dependencies(assertions, x) @@ -434,7 +433,7 @@ def embed_check_integer_casting_closed( and not _is_integer_like_by_dtype(target_dtype)): raise TypeError("At least one of {}.dtype ({}) and target_dtype ({}) " "must be integer-type.".format( - x.op.name, x.dtype.name, target_dtype.name)) + x, x.dtype.name, target_dtype.name)) assertions = [] if assert_nonnegative: @@ -683,7 +682,7 @@ def pick_vector(cond, cond = ops.convert_to_tensor(cond, name="cond") if cond.dtype != dtypes.bool: raise TypeError("%s.dtype=%s which is not %s" % - (cond.name, cond.dtype, dtypes.bool)) + (cond, cond.dtype, dtypes.bool)) cond_value_static = tensor_util.constant_value(cond) if cond_value_static is not None: return true_vector if cond_value_static else false_vector @@ -692,8 +691,8 @@ def pick_vector(cond, if true_vector.dtype != false_vector.dtype: raise TypeError( "%s.dtype=%s does not match %s.dtype=%s" - % (true_vector.name, true_vector.dtype, - false_vector.name, false_vector.dtype)) + % (true_vector, true_vector.dtype, + false_vector, false_vector.dtype)) n = array_ops.shape(true_vector)[0] return array_ops.slice( array_ops.concat([true_vector, false_vector], 0), diff --git a/tensorflow/python/ops/embedding_ops.py b/tensorflow/python/ops/embedding_ops.py index f0120f2957db12caf6a513fde9aa8c756aff8bad..9e46739bc1b6899b9b1fea898254f6e61fb7b8ba 100644 --- a/tensorflow/python/ops/embedding_ops.py +++ b/tensorflow/python/ops/embedding_ops.py @@ -331,11 +331,11 @@ def embedding_lookup_sparse(params, representing sharded embedding tensors. Alternatively, a `PartitionedVariable`, created by partitioning along dimension 0. Each element must be appropriately sized for the given `partition_strategy`. - sp_ids: N x M SparseTensor of int64 ids (typically from FeatureValueToId), + sp_ids: N x M `SparseTensor` of int64 ids (typically from FeatureValueToId), where N is typically batch size and M is arbitrary. - sp_weights: either a SparseTensor of float / double weights, or None to - indicate all weights should be taken to be 1. If specified, sp_weights - must have exactly the same shape and indices as sp_ids. + sp_weights: either a `SparseTensor` of float / double weights, or `None` to + indicate all weights should be taken to be 1. If specified, `sp_weights` + must have exactly the same shape and indices as `sp_ids`. partition_strategy: A string specifying the partitioning strategy, relevant if `len(params) > 1`. Currently `"div"` and `"mod"` are supported. Default is `"mod"`. See `tf.nn.embedding_lookup` for more details. @@ -351,39 +351,43 @@ def embedding_lookup_sparse(params, Returns: A dense tensor representing the combined embeddings for the - sparse ids. For each row in the dense tensor represented by sp_ids, the op + sparse ids. For each row in the dense tensor represented by `sp_ids`, the op looks up the embeddings for all ids in that row, multiplies them by the corresponding weight, and combines these embeddings as specified. In other words, if - shape(combined params) = [p0, p1, ..., pm] + `shape(combined params) = [p0, p1, ..., pm]` and - shape(sp_ids) = shape(sp_weights) = [d0, d1, ..., dn] + `shape(sp_ids) = shape(sp_weights) = [d0, d1, ..., dn]` then - shape(output) = [d0, d1, ..., dn-1, p1, ..., pm]. + `shape(output) = [d0, d1, ..., dn-1, p1, ..., pm]`. For instance, if params is a 10x20 matrix, and sp_ids / sp_weights are + ```python [0, 0]: id 1, weight 2.0 [0, 1]: id 3, weight 0.5 [1, 0]: id 0, weight 1.0 [2, 3]: id 1, weight 3.0 + ``` with `combiner`="mean", then the output will be a 3x20 matrix where + ```python output[0, :] = (params[1, :] * 2.0 + params[3, :] * 0.5) / (2.0 + 0.5) output[1, :] = (params[0, :] * 1.0) / 1.0 output[2, :] = (params[1, :] * 3.0) / 3.0 + ``` Raises: - TypeError: If sp_ids is not a SparseTensor, or if sp_weights is neither - None nor SparseTensor. - ValueError: If combiner is not one of {"mean", "sqrtn", "sum"}. + TypeError: If `sp_ids` is not a `SparseTensor`, or if `sp_weights` is + neither `None` nor `SparseTensor`. + ValueError: If `combiner` is not one of {"mean", "sqrtn", "sum"}. """ if combiner is None: logging.warn("The default value of combiner will change from \"mean\" " diff --git a/tensorflow/python/ops/functional_ops.py b/tensorflow/python/ops/functional_ops.py index a840b1eddfc6922dc310490e8166efd73480c437..161f6f36596279ee0dc7d04d454d670167ba798b 100644 --- a/tensorflow/python/ops/functional_ops.py +++ b/tensorflow/python/ops/functional_ops.py @@ -1,4 +1,4 @@ -# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -27,22 +27,24 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from tensorflow.core.framework import attr_value_pb2 from tensorflow.python.eager import context from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import function from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import gen_functional_ops +from tensorflow.python.ops import math_ops from tensorflow.python.ops import tensor_array_ops from tensorflow.python.ops import variable_scope as vs -# go/tf-wildcard-import -# pylint: disable=wildcard-import -from tensorflow.python.ops.gen_functional_ops import * -# pylint: enable=wildcard-import # pylint: disable=unused-import -from tensorflow.python.ops.gen_functional_ops import symbolic_gradient +from tensorflow.python.ops.gen_functional_ops import remote_call # pylint: enable=unused-import +from tensorflow.python.ops.gen_functional_ops import symbolic_gradient from tensorflow.python.util import nest from tensorflow.python.util.tf_export import tf_export @@ -365,7 +367,15 @@ def map_fn(fn, elems, dtype=None, parallel_iterations=10, back_prop=True, dtype_flat = output_flatten(dtype) # Convert elems to tensor array. n may be known statically. - n = elems_flat[0].shape[0].value or array_ops.shape(elems_flat[0])[0] + static_shape = elems_flat[0].shape + if static_shape.ndims is not None and static_shape.ndims < 1: + if len(elems_flat) == 1: + raise ValueError("elems must be a 1+ dimensional Tensor, not a scalar") + else: + raise ValueError( + "elements in elems must be 1+ dimensional Tensors, not scalars" + ) + n = static_shape[0].value or array_ops.shape(elems_flat[0])[0] # TensorArrays are always flat elems_ta = [ @@ -634,3 +644,249 @@ def scan(fn, elems, initializer=None, parallel_iterations=10, back_prop=True, varscope.set_caching_device(None) return output_pack(results_flat) + + +# pylint: disable=invalid-name +def If(cond, inputs, then_branch, else_branch, name=None): + r"""output = Cond(inputs) ? then_branch(inputs) : else_branch(inputs). + + Args: + cond: A `Tensor`. A scalar. If the scalar is not a boolean, the scalar is + converted to a boolean according to the following rule: if the + scalar is a numerical value, non-zero means True and zero means + False; if the scalar is a string, non-empty means True and empty + means False. + inputs: A list of input tensors. + then_branch: A function takes 'inputs' and returns a list of tensors, + whose types are the same as what else_branch returns. + else_branch: A function takes 'inputs' and returns a list of tensors. + whose types are the same as what then_branch returns. + name: A name for the operation (optional). + + Returns: + A list of tensors returned by either then_branch(inputs) + or else_branch(inputs). + """ + # pylint: disable=protected-access + return gen_functional_ops._if( + cond, + inputs, [_.type for _ in then_branch.definition.signature.output_arg], + then_branch, + else_branch, + name=name) + + +def Gradient(inputs, f, name=None): + r"""Computes the gradient function for function f via backpropagation. + + Args: + inputs: A list of tensors of size N + M. + f: The function we want to compute the gradient for. + + The function 'f' must be a numerical function which takes N inputs and + produces M outputs. Its gradient function 'g', which is a function + taking N + M inputs and produces N outputs. + + I.e. if we have + (y1, y2, ..., yM) = f(x1, x2, ..., xN), + then, g is + (dL/dx1, dL/dx2, ..., dL/dxN) = g(x1, x2, ..., xN, + dL/dy1, dL/dy2, ..., dL/dyM), + + where L is a scalar-value function of (x1, x2, ..., xN) (e.g., the + loss function). dL/dxi is the partial derivative of L with respect + to xi. + + name: A name for the operation (optional). + + Returns: + A list of tensors of size N. + """ + # TODO(zhifengc): Pretty-print the above spec in latex. + # TODO(zhfiengc): Needs some math expert to say the comment above better. + tlist = [_.type for _ in f.definition.signature.input_arg] + return symbolic_gradient(input=inputs, Tout=tlist, f=f, name=name) + + +# pylint: disable=invalid-name,protected-access +def While(input_, cond, body, name=None, hostmem=None): + r"""output = input; While (Cond(output)) { output = Body(output) }. + + Args: + input_: A list of `Tensor` objects. + A list of input tensors whose types are T. + cond: . A function takes 'input' and returns a tensor. If the tensor is + a scalar of non-boolean, the scalar is converted to a boolean + according to the following rule: if the scalar is a numerical + value, non-zero means True and zero means False; if the scalar is + a string, non-empty means True and empty means False. If the + tensor is not a scalar, non-emptiness means True and False + otherwise. + body: . A funcion takes a list of tensors and returns another + list tensors. Both lists have the same types as specified + by T. + name: A name for the operation (optional). + hostmem: A list of integer. If i is in the list, input[i] is a + host memory tensor. + + Returns: + A list of `Tensor` objects. Has the same type as `input`. + A list of output tensors whose types are T. + """ + ret = gen_functional_ops._while(input_, cond, body, name=name) + if hostmem: + input_attr = attr_value_pb2.AttrValue() + input_attr.list.i.extend(hostmem) + ret[0].op._set_attr("_input_hostmem", input_attr) # pylint: disable=protected-access + + output_attr = attr_value_pb2.AttrValue() + output_attr.list.i.extend(hostmem) + ret[0].op._set_attr("_output_hostmem", output_attr) # pylint: disable=protected-access + return ret + + +# b/36459430 +# +# Ideally, we do not need this rewrite For loop into a While loop. +# However, today, if a While runs on GPU and the condition returns a +# boolean, the While kernel crashes. Even if we fix the crash, the +# bool needs to be copied between GPU and CPU. So, a for loop is much +# preferred when running on GPU. +# +# On the other hand, For op has no directly XLA kernel. So, when we run +# a for loop, we need to rewrite it using a While op. +# +# It should be possible and probably better to write a XLA C++ kernel +# implementing the logic in _ForUsingWhile. +def _ForUsingWhile(start, + limit, + delta, + inputs, + forbody, + name=None, + hostmem=None): + """Helper to implement a For loop using a While.""" + # To support negative delta (e.g., range(100, 0, -3)), we iterate + # over the range(n) and use iter * delta + start as the real + # iteration index. (e.g., for i in range(34): iter = i * (-3) + + # 100). + d = math_ops.abs(delta) + # XLA on TPUs doesn't support integer division + n = math_ops.cast( + math_ops.cast((math_ops.abs(limit - start) + d - 1), dtypes.float32) / + math_ops.cast(d, dtypes.float32), dtypes.int32) + + # Carried loop variables ("extra_args") are implicitly added to the input list + # of the WhileBody function. WhileCond does not call forbody, and so does not + # depend on any of forbody's extra_args. Since WhileCond and WhileBody + # must have identical inputs, we have to augment the cond signature to take + # the same types as the carried loop variables. + body_sig = [dtypes.int32] * 4 + list(forbody.declared_input_types)[1:] + cond_sig = body_sig + [t.dtype for t in forbody.captured_inputs] + + cond_name = "%s_Cond" % forbody.name + + @function.Defun(*cond_sig, func_name=cond_name) + def WhileCond(i, n, *args): + del args + return i < n + + body_name = "%s_Body" % forbody.name + + @function.Defun(*body_sig, func_name=body_name) + def WhileBody(i, n, start, delta, *args): + """A While wrapper for forbody that handles loop-carried captured inputs.""" + for_result = forbody(start + i * delta, *args) + # Nullary functions return an Operation. Normal functions can't do this + # because their return values are converted to Tensors. + if isinstance(for_result, ops.Operation): + for_result = () + # Unary functions return a single Tensor value. + elif isinstance(for_result, ops.Tensor): + for_result = (for_result,) + extra_args = tuple(function.get_extra_args()) + return (i + 1, n, start, delta) + tuple(for_result) + extra_args + + if hostmem is not None: + hostmem = [(4 + _) for _ in hostmem] + + results = While( + input_=[0, n, start, delta] + inputs + WhileBody.captured_inputs, + cond=WhileCond, + body=WhileBody, + name=name, + hostmem=hostmem) + # Slice off the loop-carried captured inputs. + return list(results[4:len(results) - len(WhileBody.captured_inputs)]) + + +def For(start, + limit, + delta, + inputs, + body, + name=None, + hostmem=None, + rewrite_with_while=None): + r"""out = input; for i in range(start, limit, delta) out = body(i, out). + + Args: + start: A `Tensor` of type `int32`. + limit: A `Tensor` of type `int32`. + delta: A `Tensor` of type `int32`. + inputs: A list of `Tensor` objects. + A list of input tensors whose types are T. + body: A function takes a list of tensors and returns another + list of tensors. Both lists have the same types as (int32, T...). + name: A name for the operation (optional). + hostmem: A list of integer. If i is in the list, inputs[i] is a + host memory tensor. In other words, (i+1)-th argument of the body + function is expecting a host memory. + rewrite_with_while: If True, using While op to implement the For. + + Returns: + A list of `Tensor` objects. Has the same type as `input`. + A list of output tensors whose types are T. + """ + if rewrite_with_while: + return _ForUsingWhile(start, limit, delta, inputs, body, name, hostmem) + if body.captured_inputs: + wrapper_name = "%s_BodyWrapper" % body.name + + @function.Defun(*body.declared_input_types, func_name=wrapper_name) + def BodyWrapper(*args): + """A wrapper for body that handles loop-carried captured inputs.""" + body_result = body(*args) + extra_args = tuple(function.get_extra_args()) + # Nullary functions return an Operation. Normal functions can't do this + # because their return values are converted to Tensors. + if isinstance(body_result, ops.Operation): + return extra_args + # Unary functions return a single Tensor value. + elif not isinstance(body_result, tuple): + return (body_result,) + extra_args + # N-ary functions return a tuple of Tensors. + else: + return body_result + extra_args + + inputs += BodyWrapper.captured_inputs + ret = gen_functional_ops._for( + start, limit, delta, inputs, BodyWrapper, name=name) + # Slice off the loop-carried captured inputs. + ret = ret[:-len(BodyWrapper.captured_inputs)] + else: + ret = gen_functional_ops._for(start, limit, delta, inputs, body, name=name) + if hostmem: + num_for_params = 3 # start/limit/delta + + input_attr = attr_value_pb2.AttrValue() + input_attr.list.i.extend([num_for_params + i for i in hostmem]) + ret[0].op._set_attr("_input_hostmem", input_attr) # pylint: disable=protected-access + + output_attr = attr_value_pb2.AttrValue() + output_attr.list.i.extend(hostmem) + ret[0].op._set_attr("_output_hostmem", output_attr) # pylint: disable=protected-access + return ret + + +# pylint: enable=invalid-name,protected-access diff --git a/tensorflow/python/ops/gradients_impl.py b/tensorflow/python/ops/gradients_impl.py index 44473ec69c8ac6cf565f635621eebff7bc403225..13420b7f0ee5f2c186ff99409588b827b281c95f 100644 --- a/tensorflow/python/ops/gradients_impl.py +++ b/tensorflow/python/ops/gradients_impl.py @@ -208,7 +208,10 @@ def _AsList(x): return x if isinstance(x, (list, tuple)) else [x] -def _DefaultGradYs(grad_ys, ys, colocate_gradients_with_ops): +def _DefaultGradYs(grad_ys, + ys, + colocate_gradients_with_ops, + gradient_uid="__unsupported__"): """Fill in default values for grad_ys. Args: @@ -216,6 +219,9 @@ def _DefaultGradYs(grad_ys, ys, colocate_gradients_with_ops): ys: List of tensors. colocate_gradients_with_ops: If True, try colocating gradients with the corresponding op. + gradient_uid: A unique identifier within the graph indicating + which invocation of gradients is being executed. Used to cluster + ops for compilation. Returns: A list of gradients to use, without None. @@ -231,7 +237,7 @@ def _DefaultGradYs(grad_ys, ys, colocate_gradients_with_ops): for i in xrange(len(grad_ys)): grad_y = grad_ys[i] y = ys[i] - with _maybe_colocate_with(y.op, colocate_gradients_with_ops): + with _maybe_colocate_with(y.op, gradient_uid, colocate_gradients_with_ops): if grad_y is None: if y.dtype.is_complex: raise TypeError( @@ -338,10 +344,10 @@ def _StopOps(from_ops, stop_gradient_ops, pending_count): @contextlib.contextmanager -def _maybe_colocate_with(op, colocate_gradients_with_ops): +def _maybe_colocate_with(op, gradient_uid, colocate_gradients_with_ops): # pylint: disable=invalid-name """Context to colocate with `op` if `colocate_gradients_with_ops`.""" if colocate_gradients_with_ops: - with ops.colocate_with(op): + with ops._colocate_with_for_gradient(op, gradient_uid): # pylint: disable=protected-access yield else: yield @@ -506,6 +512,9 @@ def _GradientsHelper(ys, xs, grad_ys, name, colocate_gradients_with_ops, with ops.name_scope( name, "gradients", list(ys) + list(xs) + list(stop_gradients) + list(grad_ys)) as grad_scope: + # Get a uid for this call to gradients that can be used to help + # cluster ops for compilation. + gradient_uid = ops.get_default_graph().unique_name("uid") ys = ops.convert_n_to_tensor_or_indexed_slices(ys, name="y") xs = [ x.handle if resource_variable_ops.is_resource_variable(x) else x @@ -513,7 +522,8 @@ def _GradientsHelper(ys, xs, grad_ys, name, colocate_gradients_with_ops, ] xs = ops.internal_convert_n_to_tensor_or_indexed_slices( xs, name="x", as_ref=True) - grad_ys = _DefaultGradYs(grad_ys, ys, colocate_gradients_with_ops) + grad_ys = _DefaultGradYs(grad_ys, ys, colocate_gradients_with_ops, + gradient_uid) # The approach we take here is as follows: Create a list of all ops in the # subgraph between the ys and xs. Visit these ops in reverse order of ids @@ -570,10 +580,11 @@ def _GradientsHelper(ys, xs, grad_ys, name, colocate_gradients_with_ops, while queue: # generate gradient subgraph for op. op = queue.popleft() - with _maybe_colocate_with(op, colocate_gradients_with_ops): + with _maybe_colocate_with(op, gradient_uid, colocate_gradients_with_ops): if loop_state: loop_state.EnterGradWhileContext(op, before=True) - out_grads = _AggregatedGrads(grads, op, loop_state, aggregation_method) + out_grads = _AggregatedGrads(grads, op, gradient_uid, loop_state, + aggregation_method) if loop_state: loop_state.ExitGradWhileContext(op, before=True) @@ -633,7 +644,10 @@ def _GradientsHelper(ys, xs, grad_ys, name, colocate_gradients_with_ops, if gate_gradients and len([x for x in in_grads if x is not None]) > 1: with ops.device(None): - with ops.colocate_with(None, ignore_existing=True): + with ops._colocate_with_for_gradient( # pylint: disable=protected-access + None, + gradient_uid, + ignore_existing=True): in_grads = control_flow_ops.tuple(in_grads) _LogOpGradients(op, out_grads, in_grads) else: @@ -789,7 +803,7 @@ def _LogOpGradients(op, out_grads, in_grads): ", ".join([x.name for x in in_grads if _FilterGrad(x)])) -def _MultiDeviceAddN(tensor_list): +def _MultiDeviceAddN(tensor_list, gradient_uid): """Adds tensors from potentially multiple devices.""" # Basic function structure comes from control_flow_ops.group(). # Sort tensors according to their devices. @@ -808,7 +822,10 @@ def _MultiDeviceAddN(tensor_list): for dev in sorted(six.iterkeys(tensors_on_device), key=DeviceKey): tensors = tensors_on_device[dev] - with ops.colocate_with(tensors[0].op, ignore_existing=True): + with ops._colocate_with_for_gradient( # pylint: disable=protected-access + tensors[0].op, + gradient_uid, + ignore_existing=True): summands.append(math_ops.add_n(tensors)) return math_ops.add_n(summands) @@ -834,12 +851,19 @@ class AggregationMethod(object): EXPERIMENTAL_ACCUMULATE_N = 2 -def _AggregatedGrads(grads, op, loop_state, aggregation_method=None): +def _AggregatedGrads(grads, + op, + gradient_uid, + loop_state, + aggregation_method=None): """Get the aggregated gradients for op. Args: grads: The map of memoized gradients. op: The op to get gradients for. + gradient_uid: A unique identifier within the graph indicating + which invocation of gradients is being executed. Used to cluster + ops for compilation. loop_state: An object for maintaining the state of the while loops in the graph. It is of type ControlFlowState. None if the graph contains no while loops. @@ -916,7 +940,7 @@ def _AggregatedGrads(grads, op, loop_state, aggregation_method=None): out_grads[i] = running_sum else: used = "add_n" - out_grads[i] = _MultiDeviceAddN(out_grad) + out_grads[i] = _MultiDeviceAddN(out_grad, gradient_uid) logging.vlog(2, " _AggregatedGrads %d x %s using %s", len(out_grad), tensor_shape, used) else: diff --git a/tensorflow/python/ops/gradients_test.py b/tensorflow/python/ops/gradients_test.py index c94f1396b28e2124c6e5123cf711ac86abf174ab..0603d3b6706b960a0fa9d9b33d383dd0c9063780 100644 --- a/tensorflow/python/ops/gradients_test.py +++ b/tensorflow/python/ops/gradients_test.py @@ -44,6 +44,7 @@ from tensorflow.python.ops import gradients_impl from tensorflow.python.ops import math_grad # pylint: disable=unused-import from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn_grad # pylint: disable=unused-import +from tensorflow.python.ops import resource_variable_ops from tensorflow.python.ops import state_grad # pylint: disable=unused-import from tensorflow.python.ops import tensor_array_grad # pylint: disable=unused-import from tensorflow.python.ops import tensor_array_ops @@ -810,5 +811,29 @@ class OnlyRealGradientsTest(test_util.TensorFlowTestCase): gradients.gradients(y, x) +class ResourceCondTest(test_util.TensorFlowTestCase): + + def testBasic(self): + gamma = resource_variable_ops.ResourceVariable( + np.random.random((3,)), + dtype="float32", name="gamma") + + inputs = array_ops.ones(shape=(3,), dtype="float32") + + def TestFn(): + output = inputs + gamma + return output + + training = array_ops.placeholder_with_default(True, shape=()) + output = control_flow_ops.cond( + training, TestFn, lambda: inputs) + + loss = output + + grads = gradients.gradients( + loss, [gamma]) + self.assertTrue(None not in grads) + + if __name__ == "__main__": googletest.main() diff --git a/tensorflow/python/ops/hidden_ops.txt b/tensorflow/python/ops/hidden_ops.txt new file mode 100644 index 0000000000000000000000000000000000000000..e1217e984c8f6723ca171b0fdaa9fa8aed43d75a --- /dev/null +++ b/tensorflow/python/ops/hidden_ops.txt @@ -0,0 +1,395 @@ +# array_ops +BatchToSpace +BroadcastArgs +BroadcastGradientArgs +ConcatOffset +Concat +ConcatV2 +ConjugateTranspose +Const +DebugGradientIdentity +DebugGradientRefIdentity +EditDistance +ExpandDims +ListDiff +MirrorPad +MirrorPadGrad +OneHot +Pack +Pad +PadV2 +ParallelConcat +Placeholder +RefIdentity +Reverse +Snapshot +SpaceToBatch +Split +SplitV +Squeeze +Slice +TileGrad # Exported through array_grad instead of array_ops. +ZerosLike # TODO(josh11b): Use this instead of the Python version. +Unique +UniqueV2 +UniqueWithCounts +UniqueWithCountsV2 +Unpack + +# candidate_sampling_ops +AllCandidateSampler +ComputeAccidentalHits +FixedUnigramCandidateSampler +LearnedUnigramCandidateSampler +LogUniformCandidateSampler +ThreadUnsafeUnigramCandidateSampler +UniformCandidateSampler + +# checkpoint_ops +GenerateVocabRemapping +LoadAndRemapMatrix + + +# control_flow_ops +Switch +Merge +RefMerge +Exit +RefExit + +# ctc_ops +CTCLoss +CTCGreedyDecoder +CTCBeamSearchDecoder + +# data_flow_ops +Barrier +BarrierClose +BarrierIncompleteSize +BarrierInsertMany +BarrierReadySize +BarrierTakeMany +DeleteSessionTensor +FakeQueue +FIFOQueue +FIFOQueueV2 +GetSessionHandle +GetSessionHandleV2 +GetSessionTensor +HashTable +HashTableV2 +InitializeTable +InitializeTableV2 +InitializeTableFromTextFile +InitializeTableFromTextFileV2 +LookupTableExport +LookupTableExportV2 +LookupTableFind +LookupTableFindV2 +LookupTableImport +LookupTableImportV2 +LookupTableInsert +LookupTableInsertV2 +LookupTableSize +LookupTableSizeV2 +MutableDenseHashTable +MutableDenseHashTableV2 +MutableHashTable +MutableHashTableV2 +MutableHashTableOfTensors +MutableHashTableOfTensorsV2 +Mutex +MutexAcquire +MutexRelease +PaddingFIFOQueue +PaddingFIFOQueueV2 +PriorityQueue +PriorityQueueV2 +QueueClose +QueueCloseV2 +QueueDequeue +QueueDequeueV2 +QueueDequeueMany +QueueDequeueManyV2 +QueueDequeueUpTo +QueueDequeueUpToV2 +QueueEnqueue +QueueEnqueueV2 +QueueEnqueueMany +QueueEnqueueManyV2 +QueueSize +QueueSizeV2 +RandomShuffleQueue +RandomShuffleQueueV2 +Stack +StackClose +StackPop +StackPush +StackV2 +StackCloseV2 +StackPopV2 +StackPushV2 +TensorArray +TensorArrayClose +TensorArrayCloseV2 +TensorArrayConcat +TensorArrayConcatV2 +TensorArrayGather +TensorArrayGatherV2 +TensorArrayGrad +TensorArrayGradV2 +TensorArrayPack +TensorArrayPackV2 +TensorArrayRead +TensorArrayReadV2 +TensorArrayScatter +TensorArrayScatterV2 +TensorArraySize +TensorArraySizeV2 +TensorArraySplit +TensorArraySplitV2 +TensorArrayUnpack +TensorArrayUnpackV2 +TensorArrayV2 +TensorArrayWrite +TensorArrayWriteV2 +TensorArrayV3 +TensorArrayCloseV3 +TensorArrayConcatV3 +TensorArrayGatherV3 +TensorArrayGradV3 +TensorArrayReadV3 +TensorArrayPackV3 +TensorArrayScatterV3 +TensorArraySizeV3 +TensorArraySplitV3 +TensorArrayUnpackV3 +TensorArrayWriteV3 + +# functional_ops +SymbolicGradient + +# image_ops +AdjustContrastv2 +NonMaxSuppression +NonMaxSuppressionV2 +RandomCrop +ResizeBilinearGrad +ResizeBicubicGrad +ResizeNearestNeighborGrad +SampleDistortedBoundingBox +SampleDistortedBoundingBoxV2 +ScaleImageGrad + +# io_ops +FixedLengthRecordReader +IdentityReader +ReaderNumRecordsProduced +ReaderNumWorkUnitsCompleted +ReaderRead +ReaderReadUpTo +ReaderReset +ReaderRestoreState +ReaderSerializeState +ReaderWorkQueueLength +FixedLengthRecordReaderV2 +IdentityReaderV2 +ReaderNumRecordsProducedV2 +ReaderNumWorkUnitsCompletedV2 +ReaderReadV2 +ReaderReadUpToV2 +ReaderResetV2 +ReaderRestoreStateV2 +ReaderSerializeStateV2 +ReaderWorkQueueLengthV2 +Restore +RestoreSlice +Save +SaveSlices +ShardedFilename +ShardedFilespec +TextLineReader +TFRecordReader +WholeFileReader +TextLineReaderV2 +TFRecordReaderV2 +WholeFileReaderV2 +LMDBReader +DecodeCSV + +# linalg_ops +BatchCholesky +BatchCholeskyGrad +BatchMatrixDeterminant +BatchMatrixInverse +BatchMatrixSolve +BatchMatrixSolveLs +BatchMatrixTriangularSolve +BatchSelfAdjointEig +BatchSelfAdjointEigV2 +BatchSvd +LogMatrixDeterminant +MatrixExponential +MatrixLogarithm +MatrixSolveLs +SelfAdjointEig +SelfAdjointEigV2 +Svd + +# logging_ops +Assert +AudioSummary +AudioSummaryV2 +HistogramSummary +ImageSummary +MergeSummary +Print +ScalarSummary +TensorSummary +TensorSummaryV2 + +# math_ops +Abs +AccumulateNV2 +AddN +AddV2 +All +Any +BatchMatMul +BatchFFT +BatchFFT2D +BatchFFT3D +BatchIFFT +BatchIFFT2D +BatchIFFT3D +Bucketize +ClipByValue +Complex +ComplexAbs +Conj +FloorDiv +FloorMod +HistogramFixedWidth +Max +Mean +Min +Mul +Neg +Pow +Prod +Range +RealDiv +Select +SparseMatMul +Sub +Sum +MatMul +Sigmoid +Tanh +SigmoidGrad +TanhGrad +InvGrad +ReciprocalGrad +SqrtGrad +RsqrtGrad +TruncateDiv +TruncateMod + +# nn_ops +AvgPoolGrad # "*Grad" accessible through nn_grad instead of nn_ops. +AvgPool3DGrad +BatchNormWithGlobalNormalization +BatchNormWithGlobalNormalizationGrad +FusedBatchNorm +FusedBatchNormV2 +SoftmaxCrossEntropyWithLogits +SparseSoftmaxCrossEntropyWithLogits +LRNGrad +MaxPoolGrad +MaxPoolGradWithArgmax +MaxPoolGradGrad +MaxPoolGradGradWithArgmax +MaxPool3DGrad +MaxPool3DGradGrad +ReluGrad +Relu6Grad +EluGrad +SeluGrad +SoftplusGrad +SoftsignGrad +TopK +TopKV2 +BiasAdd +BiasAddV1 +Relu6 +AvgPool +MaxPool +MaxPoolV2 +Softmax +LogSoftmax +FractionalAvgPoolGrad +FractionalMaxPoolGrad +InTopK +InTopKV2 + +# parsing_ops +ParseExample +ParseSingleSequenceExample + +# random_ops +RandomGamma +RandomPoisson +RandomUniform +RandomUniformInt +RandomShuffle +RandomStandardNormal +ParameterizedTruncatedNormal +TruncatedNormal + +# script_ops +PyFunc +PyFuncStateless +EagerPyFunc + +# sdca_ops + +# state_ops +Variable +VariableV2 +TemporaryVariable +DestroyTemporaryVariable + +# sparse_ops +AddSparseToTensorsMap +AddManySparseToTensorsMap +TakeManySparseFromTensorsMap +DeserializeManySparse +DeserializeSparse +SerializeManySparse +SerializeSparse +SparseAdd +SparseAddGrad +SparseConcat +SparseCross +SparseFillEmptyRows +SparseFillEmptyRowsGrad +SparseSplit +SparseSelectLastK +SparseReorder +SparseReshape +SparseToDense +SparseTensorDenseAdd +SparseTensorDenseMatMul + +# string_ops +StringSplit + +# user_ops +Fact + +# training_ops +# (None) + +# word2vec deprecated ops +NegTrain +Skipgram diff --git a/tensorflow/python/ops/histogram_ops.py b/tensorflow/python/ops/histogram_ops.py index 4a1ef54fb50013881aa832f83674ac66ecccd9bc..ec38d89a0ec04469e14e2f1f99b98a51d0a9f513 100644 --- a/tensorflow/python/ops/histogram_ops.py +++ b/tensorflow/python/ops/histogram_ops.py @@ -32,7 +32,6 @@ from tensorflow.python.ops import clip_ops from tensorflow.python.ops import gen_math_ops from tensorflow.python.ops import math_ops from tensorflow.python.util.tf_export import tf_export -from tensorflow.python.util.tf_export import tf_export @tf_export('histogram_fixed_width_bins') diff --git a/tensorflow/python/ops/image_ops_impl.py b/tensorflow/python/ops/image_ops_impl.py index 3369fe3c9b37ca05311c5548dbfa3228ba04ee80..bd5b2ae83b5dd16e15dcf87de9714c082fa10ef9 100644 --- a/tensorflow/python/ops/image_ops_impl.py +++ b/tensorflow/python/ops/image_ops_impl.py @@ -269,17 +269,7 @@ def random_flip_up_down(image, seed=None): Raises: ValueError: if the shape of `image` not supported. """ - with ops.name_scope(None, 'random_flip_up_down', [image]) as scope: - image = ops.convert_to_tensor(image, name='image') - image = _Assert3DImage(image) - uniform_random = random_ops.random_uniform([], 0, 1.0, seed=seed) - mirror_cond = math_ops.less(uniform_random, .5) - result = control_flow_ops.cond( - mirror_cond, - lambda: array_ops.reverse(image, [0]), - lambda: image, - name=scope) - return fix_image_flip_shape(image, result) + return _random_flip(image, 0, seed, 'random_flip_up_down') @tf_export('image.random_flip_left_right') @@ -301,14 +291,34 @@ def random_flip_left_right(image, seed=None): Raises: ValueError: if the shape of `image` not supported. """ - with ops.name_scope(None, 'random_flip_left_right', [image]) as scope: + return _random_flip(image, 1, seed, 'random_flip_left_right') + + +def _random_flip(image, flip_index, seed, scope_name): + """Randomly (50% chance) flip an image along axis `flip_index`. + Args: + image: A 3-D tensor of shape `[height, width, channels].` + flip_index: The dimension along which to flip the image. + Vertical: 0, Horizontal: 1 + seed: A Python integer. Used to create a random seed. See + @{tf.set_random_seed} + for behavior. + scope_name: Name of the scope in which the ops are added. + + Returns: + A 3-D tensor of the same type and shape as `image`. + + Raises: + ValueError: if the shape of `image` not supported. + """ + with ops.name_scope(None, scope_name, [image]) as scope: image = ops.convert_to_tensor(image, name='image') image = _Assert3DImage(image) uniform_random = random_ops.random_uniform([], 0, 1.0, seed=seed) mirror_cond = math_ops.less(uniform_random, .5) result = control_flow_ops.cond( mirror_cond, - lambda: array_ops.reverse(image, [1]), + lambda: array_ops.reverse(image, [flip_index]), lambda: image, name=scope) return fix_image_flip_shape(image, result) @@ -332,16 +342,7 @@ def flip_left_right(image): Raises: ValueError: if the shape of `image` not supported. """ - with ops.name_scope(None, 'flip_left_right', [image]): - image = ops.convert_to_tensor(image, name='image') - image = _AssertAtLeast3DImage(image) - shape = image.get_shape() - if shape.ndims == 3 or shape.ndims is None: - return fix_image_flip_shape(image, array_ops.reverse(image, [1])) - elif shape.ndims == 4: - return array_ops.reverse(image, [2]) - else: - raise ValueError('\'image\' must have either 3 or 4 dimensions.') + return _flip(image, 1, 'flip_left_right') @tf_export('image.flip_up_down') @@ -362,14 +363,35 @@ def flip_up_down(image): Raises: ValueError: if the shape of `image` not supported. """ - with ops.name_scope(None, 'flip_up_down', [image]): + return _flip(image, 0, 'flip_up_down') + + +def _flip(image, flip_index, scope_name): + """Flip an image either horizontally or vertically. + + Outputs the contents of `image` flipped along the dimension `flip_index`. + + See also `reverse()`. + + Args: + image: 4-D Tensor of shape `[batch, height, width, channels]` or + 3-D Tensor of shape `[height, width, channels]`. + flip_index: 0 For vertical, 1 for horizontal. + + Returns: + A tensor of the same type and shape as `image`. + + Raises: + ValueError: if the shape of `image` not supported. + """ + with ops.name_scope(None, scope_name, [image]): image = ops.convert_to_tensor(image, name='image') image = _AssertAtLeast3DImage(image) shape = image.get_shape() if shape.ndims == 3 or shape.ndims is None: - return fix_image_flip_shape(image, array_ops.reverse(image, [0])) + return fix_image_flip_shape(image, array_ops.reverse(image, [flip_index])) elif shape.ndims == 4: - return array_ops.reverse(image, [1]) + return array_ops.reverse(image, [flip_index+1]) else: raise ValueError('\'image\' must have either 3 or 4 dimensions.') @@ -630,7 +652,7 @@ def pad_to_bounding_box(image, offset_height, offset_width, target_height, padded.set_shape(padded_shape) if not is_batch: - padded = array_ops.squeeze(padded, squeeze_dims=[0]) + padded = array_ops.squeeze(padded, axis=[0]) return padded @@ -710,7 +732,7 @@ def crop_to_bounding_box(image, offset_height, offset_width, target_height, cropped.set_shape(cropped_shape) if not is_batch: - cropped = array_ops.squeeze(cropped, squeeze_dims=[0]) + cropped = array_ops.squeeze(cropped, axis=[0]) return cropped @@ -827,7 +849,7 @@ def resize_image_with_crop_or_pad(image, target_height, target_width): resized = control_flow_ops.with_dependencies(assert_ops, resized) if not is_batch: - resized = array_ops.squeeze(resized, squeeze_dims=[0]) + resized = array_ops.squeeze(resized, axis=[0]) return resized @@ -920,7 +942,7 @@ def resize_images(images, for x in [new_width_const, width, new_height_const, height]) and ( width == new_width_const and height == new_height_const): if not is_batch: - images = array_ops.squeeze(images, squeeze_dims=[0]) + images = array_ops.squeeze(images, axis=[0]) return images if method == ResizeMethod.BILINEAR: @@ -943,7 +965,7 @@ def resize_images(images, images.set_shape([None, new_height_const, new_width_const, None]) if not is_batch: - images = array_ops.squeeze(images, squeeze_dims=[0]) + images = array_ops.squeeze(images, axis=[0]) return images diff --git a/tensorflow/python/ops/init_ops.py b/tensorflow/python/ops/init_ops.py index 9dfe5ffbf42bcfc9657739b6fc6ad1f3c4823a7d..f93bf0a17f31b4451cad9c5bc525d9f237f97bef 100644 --- a/tensorflow/python/ops/init_ops.py +++ b/tensorflow/python/ops/init_ops.py @@ -39,10 +39,10 @@ import numpy as np from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.ops import array_ops -from tensorflow.python.ops import linalg_ops +from tensorflow.python.ops import linalg_ops_impl +from tensorflow.python.ops import gen_linalg_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import random_ops -from tensorflow.python.ops import random_ops from tensorflow.python.util.deprecation import deprecated from tensorflow.python.util.tf_export import tf_export @@ -499,10 +499,10 @@ class Orthogonal(Initializer): Args: gain: multiplicative factor to apply to the orthogonal matrix - dtype: The type of the output. seed: A Python integer. Used to create random seeds. See @{tf.set_random_seed} for behavior. + dtype: The data type. """ def __init__(self, gain=1.0, seed=None, dtype=dtypes.float32): @@ -529,7 +529,7 @@ class Orthogonal(Initializer): # Generate a random matrix a = random_ops.random_normal(flat_shape, dtype=dtype, seed=self.seed) # Compute the qr factorization - q, r = linalg_ops.qr(a, full_matrices=False) + q, r = gen_linalg_ops.qr(a, full_matrices=False) # Make Q uniform d = array_ops.diag_part(r) q *= math_ops.sign(d) @@ -549,13 +549,12 @@ class ConvolutionDeltaOrthogonal(Initializer): tensor form an orthogonal matrix. Other pixels are set to be zero. Args: - gain: multiplicative factor to apply to the orthogonal matrix. Default is 1. + gain: Multiplicative factor to apply to the orthogonal matrix. Default is 1. The 2-norm of an input is multiplied by a factor of 'sqrt(gain)' after applying this convolution. - dtype: The type of the output. seed: A Python integer. Used to create random seeds. See - @{tf.set_random_seed} - for behavior. + @{tf.set_random_seed} for behavior. + dtype: The data type. """ def __init__(self, gain=1.0, seed=None, dtype=dtypes.float32): @@ -578,10 +577,9 @@ class ConvolutionDeltaOrthogonal(Initializer): a = random_ops.random_normal([shape[-1], shape[-1]], dtype=dtype, seed=self.seed) # Compute the qr factorization - q, r = linalg_ops.qr(a, full_matrices=False) + q, r = gen_linalg_ops.qr(a, full_matrices=False) # Make Q uniform d = array_ops.diag_part(r) - # ph = d / math_ops.abs(d) q *= math_ops.sign(d) q = q[:shape[-2], :] q *= math_ops.sqrt(math_ops.cast(self.gain, dtype=dtype)) @@ -601,6 +599,469 @@ class ConvolutionDeltaOrthogonal(Initializer): return {"gain": self.gain, "seed": self.seed, "dtype": self.dtype.name} +class ConvolutionOrthogonal(Initializer): + """Initializer that generates orthogonal kernel for ConvNets. + + Base class used to construct 1D, 2D and 3D orthogonal kernels for convolution. + + Args: + gain: multiplicative factor to apply to the orthogonal matrix. Default is 1. + The 2-norm of an input is multiplied by a factor of 'sqrt(gain)' after + applying this convolution. + seed: A Python integer. Used to create random seeds. See + @{tf.set_random_seed} for behavior. + dtype: The data type. + """ + + def __init__(self, gain=1.0, seed=None, dtype=dtypes.float32): + self.gain = gain + self.dtype = _assert_float_dtype(dtypes.as_dtype(dtype)) + self.seed = seed + + def __call__(self, shape, dtype=None, partition_info=None): + raise NotImplementedError + + def get_config(self): + return {"gain": self.gain, "seed": self.seed, "dtype": self.dtype.name} + + # Helper functions. + def _orthogonal_matrix(self, n): + """Construct an n x n orthogonal matrix. + + Args: + n: Dimension. + Returns: + A n x n orthogonal matrix. + """ + a = random_ops.random_normal([n, n], dtype=self.dtype, seed=self.seed) + if self.seed: + self.seed += 1 + q, r = gen_linalg_ops.qr(a) + d = array_ops.diag_part(r) + # make q uniform + q *= math_ops.sign(d) + return q + + def _symmetric_projection(self, n): + """Compute a n x n symmetric projection matrix. + + Args: + n: Dimension. + Returns: + A n x n symmetric projection matrix, i.e. a matrix P s.t. P=P*P, P=P^T. + """ + q = self._orthogonal_matrix(n) + # randomly zeroing out some columns + mask = math_ops.cast(random_ops.random_normal([n], seed=self.seed) > 0, + self.dtype) + if self.seed: + self.seed += 1 + c = math_ops.multiply(q, mask) + return math_ops.matmul(c, array_ops.matrix_transpose(c)) + + +class ConvolutionOrthogonal2D(ConvolutionOrthogonal): + """Initializer that generates a 2D orthogonal kernel for ConvNets. + + The shape of the tensor must have length 4. The number of input + filters must not exceed the number of output filters. + The orthogonality(==isometry) is exact when the inputs are circular padded. + There are finite-width effects with non-circular padding (e.g. zero padding). + + Args: + gain: Multiplicative factor to apply to the orthogonal matrix. Default is 1. + This has the effect of scaling the output 2-norm by a factor of + `sqrt(gain)`. + seed: A Python integer. Used to create random seeds. See + @{tf.set_random_seed} for behavior. + dtype: The data type. + """ + + def __call__(self, shape, dtype=None, partition_info=None): + if dtype is None: + dtype = self.dtype + if len(shape) != 4: + raise ValueError("The tensor to initialize must be four-dimensional") + + if shape[-2] > shape[-1]: + raise ValueError("In_filters cannot be greater than out_filters.") + + if shape[0] != shape[1]: + raise ValueError("Kernel sizes must be equal.") + + kernel = self._orthogonal_kernel(shape[0], shape[2], shape[3]) + kernel *= math_ops.sqrt(math_ops.cast(self.gain, dtype=dtype)) + return kernel + + def _dict_to_tensor(self, x, k1, k2): + """Convert a dictionary to a tensor. + + Args: + x: A k1 * k2 dictionary. + k1: First dimension of x. + k2: Second dimension of x. + Returns: + A k1 * k2 tensor. + """ + + return array_ops.stack([array_ops.stack([x[i, j] for j in range(k2)]) + for i in range(k1)]) + + def _block_orth(self, p1, p2): + """Construct a 2 x 2 kernel. Used to construct orthgonal kernel. + + Args: + p1: A symmetric projection matrix. + p2: A symmetric projection matrix. + Returns: + A 2 x 2 kernel [[p1p2, p1(1-p2)], + [(1-p1)p2, (1-p1)(1-p2)]]. + Raises: + ValueError: If the dimensions of p1 and p2 are different. + """ + if p1.shape.as_list() != p2.shape.as_list(): + raise ValueError("The dimension of the matrices must be the same.") + n = p1.shape.as_list()[0] + kernel2x2 = {} + eye = linalg_ops_impl.eye(n, dtype=self.dtype) + kernel2x2[0, 0] = math_ops.matmul(p1, p2) + kernel2x2[0, 1] = math_ops.matmul(p1, (eye - p2)) + kernel2x2[1, 0] = math_ops.matmul((eye - p1), p2) + kernel2x2[1, 1] = math_ops.matmul((eye - p1), (eye - p2)) + + return kernel2x2 + + def _matrix_conv(self, m1, m2): + """Matrix convolution. + + Args: + m1: A k x k dictionary, each element is a n x n matrix. + m2: A l x l dictionary, each element is a n x n matrix. + + Returns: + (k + l - 1) * (k + l - 1) dictionary each element is a n x n matrix. + Raises: + ValueError: if the entries of m1 and m2 are of different dimensions. + """ + + n = (m1[0, 0]).shape.as_list()[0] + if n != (m2[0, 0]).shape.as_list()[0]: + raise ValueError("The entries in matrices m1 and m2 " + "must have the same dimensions!") + k = int(np.sqrt(len(m1))) + l = int(np.sqrt(len(m2))) + result = {} + size = k + l - 1 + # Compute matrix convolution between m1 and m2. + for i in range(size): + for j in range(size): + result[i, j] = array_ops.zeros([n, n], self.dtype) + for index1 in range(min(k, i + 1)): + for index2 in range(min(k, j + 1)): + if (i - index1) < l and (j - index2) < l: + result[i, j] += math_ops.matmul(m1[index1, index2], + m2[i - index1, j - index2]) + return result + + def _orthogonal_kernel(self, ksize, cin, cout): + """Construct orthogonal kernel for convolution. + + Args: + ksize: Kernel size. + cin: Number of input channels. + cout: Number of output channels. + Returns: + An [ksize, ksize, cin, cout] orthogonal kernel. + Raises: + ValueError: If cin > cout. + """ + if cin > cout: + raise ValueError("The number of input channels cannot exceed " + "the number of output channels.") + orth = self._orthogonal_matrix(cout)[0:cin, :] + if ksize == 1: + return array_ops.expand_dims(array_ops.expand_dims(orth, 0), 0) + + p = self._block_orth(self._symmetric_projection(cout), + self._symmetric_projection(cout)) + for _ in range(ksize - 2): + temp = self._block_orth(self._symmetric_projection(cout), + self._symmetric_projection(cout)) + p = self._matrix_conv(p, temp) + for i in range(ksize): + for j in range(ksize): + p[i, j] = math_ops.matmul(orth, p[i, j]) + + return self._dict_to_tensor(p, ksize, ksize) + + +class ConvolutionOrthogonal1D(ConvolutionOrthogonal): + """Initializer that generates a 1D orthogonal kernel for ConvNets. + + The shape of the tensor must have length 3. The number of input + filters must not exceed the number of output filters. + The orthogonality(==isometry) is exact when the inputs are circular padded. + There are finite-width effects with non-circular padding (e.g. zero padding). + + Args: + gain: Multiplicative factor to apply to the orthogonal matrix. Default is 1. + The 2-norm of an input is multiplied by a factor of 'sqrt(gain)' after + applying this convolution. + seed: A Python integer. Used to create random seeds. See + @{tf.set_random_seed} + for behavior. + dtype: The data type. + """ + + def __call__(self, shape, dtype=None, partition_info=None): + if dtype is None: + dtype = self.dtype + if len(shape) != 3: + raise ValueError("The tensor to initialize must be three-dimensional") + + if shape[-2] > shape[-1]: + raise ValueError("In_filters cannot be greater than out_filters.") + + kernel = self._orthogonal_kernel(shape[0], shape[-2], shape[-1]) + kernel *= math_ops.sqrt(math_ops.cast(self.gain, dtype=dtype)) + return kernel + + def _dict_to_tensor(self, x, k): + """Convert a dictionary to a tensor. + + Args: + x: A dictionary of length k. + k: Dimension of x. + Returns: + A tensor with the same dimension. + """ + + return array_ops.stack([x[i] for i in range(k)]) + + def _block_orth(self, projection_matrix): + """Construct a kernel. Used to construct orthgonal kernel. + + Args: + projection_matrix: A symmetric projection matrix of size n x n. + Returns: + [projection_matrix, (1 - projection_matrix)]. + """ + n = projection_matrix.shape.as_list()[0] + kernel = {} + eye = linalg_ops_impl.eye(n, dtype=self.dtype) + kernel[0] = projection_matrix + kernel[1] = eye - projection_matrix + return kernel + + def _matrix_conv(self, m1, m2): + """Matrix convolution. + + Args: + m1: A dictionary of length k, each element is a n x n matrix. + m2: A dictionary of length l, each element is a n x n matrix. + + Returns: + (k + l - 1) dictionary each element is a n x n matrix. + Raises: + ValueError: Ff the entries of m1 and m2 are of different dimensions. + """ + + n = (m1[0]).shape.as_list()[0] + if n != (m2[0]).shape.as_list()[0]: + raise ValueError("The entries in matrices m1 and m2 " + "must have the same dimensions!") + k = len(m1) + l = len(m2) + result = {} + size = k + l - 1 + # Compute matrix convolution between m1 and m2. + for i in range(size): + result[i] = array_ops.zeros([n, n], self.dtype) + for index in range(min(k, i + 1)): + if (i - index) < l: + result[i] += math_ops.matmul(m1[index], m2[i - index]) + return result + + def _orthogonal_kernel(self, ksize, cin, cout): + """Construct orthogonal kernel for convolution. + + Args: + ksize: Kernel size. + cin: Number of input channels. + cout: Number of output channels. + Returns: + An [ksize, ksize, cin, cout] orthogonal kernel. + Raises: + ValueError: If cin > cout. + """ + if cin > cout: + raise ValueError("The number of input channels cannot exceed " + "the number of output channels.") + orth = self._orthogonal_matrix(cout)[0:cin, :] + if ksize == 1: + return array_ops.expand_dims(orth, 0) + + p = self._block_orth(self._symmetric_projection(cout)) + for _ in range(ksize - 2): + temp = self._block_orth(self._symmetric_projection(cout)) + p = self._matrix_conv(p, temp) + for i in range(ksize): + p[i] = math_ops.matmul(orth, p[i]) + + return self._dict_to_tensor(p, ksize) + + +class ConvolutionOrthogonal3D(ConvolutionOrthogonal): + """Initializer that generates a 3D orthogonal kernel for ConvNets. + + The shape of the tensor must have length 5. The number of input + filters must not exceed the number of output filters. + The orthogonality(==isometry) is exact when the inputs are circular padded. + There are finite-width effects with non-circular padding (e.g. zero padding). + + Args: + gain: Multiplicative factor to apply to the orthogonal matrix. Default is 1. + The 2-norm of an input is multiplied by a factor of 'sqrt(gain)' after + applying this convolution. + seed: A Python integer. Used to create random seeds. See + @{tf.set_random_seed} for behavior. + dtype: The data type. + """ + + def __call__(self, shape, dtype=None, partition_info=None): + if dtype is None: + dtype = self.dtype + if len(shape) != 5: + raise ValueError("The tensor to initialize must be five-dimensional") + + if shape[-2] > shape[-1]: + raise ValueError("In_filters cannot be greater than out_filters.") + + if shape[0] != shape[1] or shape[0] != shape[2]: + raise ValueError("Kernel sizes must be equal.") + + kernel = self._orthogonal_kernel(shape[0], shape[-2], shape[-1]) + kernel *= math_ops.sqrt(math_ops.cast(self.gain, dtype=dtype)) + return kernel + + def _dict_to_tensor(self, x, k1, k2, k3): + """Convert a dictionary to a tensor. + + Args: + x: A k1 * k2 dictionary. + k1: First dimension of x. + k2: Second dimension of x. + k3: Third dimension of x. + Returns: + A k1 * k2 * k3 tensor. + """ + + return array_ops.stack([array_ops.stack( + [array_ops.stack([x[i, j, k] for k in range(k3)]) + for j in range(k2)]) for i in range(k1)]) + + def _block_orth(self, p1, p2, p3): + """Construct a 3 x 3 kernel. Used to construct orthgonal kernel. + + Args: + p1: A symmetric projection matrix. + p2: A symmetric projection matrix. + p3: A symmetric projection matrix. + Returns: + A 2 x 2 x 2 kernel. + Raises: + ValueError: If the dimensions of p1, p2 and p3 are different. + """ + p1_shape = p1.shape.as_list() + if p1_shape != p2.shape.as_list() or p1_shape != p3.shape.as_list(): + raise ValueError("The dimension of the matrices must be the same.") + n = p1_shape[0] + eye = linalg_ops_impl.eye(n, dtype=self.dtype) + kernel2x2x2 = {} + def matmul(p1, p2, p3): + return math_ops.matmul(math_ops.matmul(p1, p2), p3) + def cast(i, p): + """Return p or (1-p).""" + return i * p + (1-i) * (eye - p) + for i in [0, 1]: + for j in [0, 1]: + for k in [0, 1]: + kernel2x2x2[i, j, k] = matmul(cast(i, p1), cast(j, p2), cast(k, p3)) + return kernel2x2x2 + + def _matrix_conv(self, m1, m2): + """Matrix convolution. + + Args: + m1: is a k x k x k dictionary, each element is a n x n matrix. + m2: is a l x l x l dictionary, each element is a n x n matrix. + + Returns: + (k + l - 1) x (k + l - 1) x (k + l - 1) dictionary each + element is a n x n matrix. + Raises: + ValueError: if the entries of m1 and m2 are of different dimensions. + """ + + n = (m1[0, 0, 0]).shape.as_list()[0] + if n != (m2[0, 0, 0]).shape.as_list()[0]: + raise ValueError("The entries in matrices m1 and m2 " + "must have the same dimensions!") + k = int(np.cbrt(len(m1))) + l = int(np.cbrt(len(m2))) + result = {} + size = k + l - 1 + # Compute matrix convolution between m1 and m2. + for i in range(size): + for j in range(size): + for r in range(size): + result[i, j, r] = array_ops.zeros([n, n], self.dtype) + for index1 in range(min(k, i + 1)): + for index2 in range(min(k, j + 1)): + for index3 in range(min(k, r + 1)): + if (i - index1) < l and (j - index2) < l and (r - index3) < l: + result[i, j, r] += math_ops.matmul(m1[index1, index2, index3], + m2[i - index1, j - index2, + r - index3]) + return result + + def _orthogonal_kernel(self, ksize, cin, cout): + """Construct orthogonal kernel for convolution. + + Args: + ksize: Kernel size. + cin: Number of input channels. + cout: Number of output channels. + Returns: + An [ksize, ksize, ksize, cin, cout] orthogonal kernel. + Raises: + ValueError: If cin > cout. + """ + if cin > cout: + raise ValueError("The number of input channels cannot exceed " + "the number of output channels.") + orth = self._orthogonal_matrix(cout)[0:cin, :] + if ksize == 1: + return array_ops.expand_dims( + array_ops.expand_dims( + array_ops.expand_dims(orth, 0), 0), 0) + + p = self._block_orth(self._symmetric_projection(cout), + self._symmetric_projection(cout), + self._symmetric_projection(cout)) + for _ in range(ksize - 2): + temp = self._block_orth(self._symmetric_projection(cout), + self._symmetric_projection(cout), + self._symmetric_projection(cout)) + p = self._matrix_conv(p, temp) + for i in range(ksize): + for j in range(ksize): + for k in range(ksize): + p[i, j, k] = math_ops.matmul(orth, p[i, j, k]) + + return self._dict_to_tensor(p, ksize, ksize, ksize) + + @tf_export("keras.initializers.Identity", "initializers.identity") class Identity(Initializer): """Initializer that generates the identity matrix. @@ -623,7 +1084,7 @@ class Identity(Initializer): "Identity matrix initializer can only be used for 2D matrices.") if dtype is None: dtype = self.dtype - initializer = linalg_ops.eye(*full_shape, dtype=dtype) + initializer = linalg_ops_impl.eye(*full_shape, dtype=dtype) if partition_info is not None: initializer = array_ops.slice(initializer, partition_info.var_offset, shape) @@ -646,6 +1107,9 @@ variance_scaling_initializer = VarianceScaling orthogonal_initializer = Orthogonal identity_initializer = Identity convolutional_delta_orthogonal = ConvolutionDeltaOrthogonal +convolutional_orthogonal_1d = ConvolutionOrthogonal1D +convolutional_orthogonal_2d = ConvolutionOrthogonal2D +convolutional_orthogonal_3d = ConvolutionOrthogonal3D # pylint: enable=invalid-name diff --git a/tensorflow/python/ops/inplace_ops.py b/tensorflow/python/ops/inplace_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..e5b000086b05219d23cd88935948f88f2cc718bf --- /dev/null +++ b/tensorflow/python/ops/inplace_ops.py @@ -0,0 +1,227 @@ +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Inplace operations. +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import gen_array_ops +from tensorflow.python.ops import math_ops + + +def _inplace_helper(x, i, v, op): + """Applies an inplace op on (x, i, v). + + op is one of gen_array_ops.alias_inplace_update, + gen_array_ops.alias_inplace_add, or gen_array_ops.alias_inplace_sub. + + If i is None, x and v must be the same shape. Computes + x op v; + If i is a scalar, x has a rank 1 higher than v's. Computes + x[i, :] op v; + Otherwise, x and v must have the same rank. Computes + x[i, :] op v; + + Args: + x: A Tensor. + i: None, a scalar or a vector. + v: A Tensor. + op: alias_inplace_update, alias_inplace_add, or alias_inplace_sub. + + Returns: + Returns x. + + """ + x = ops.convert_to_tensor(x) + v = ops.convert_to_tensor(v, x.dtype) + if i is None: + # Full tensor. + return array_ops.reshape( + op(array_ops.reshape(x, [1, -1]), [0], array_ops.reshape(v, [1, -1])), + array_ops.shape(x)) + i = math_ops.to_int32(i) + if i.get_shape().ndims == 0: + # Single 0-dim update. + return op(x, array_ops.reshape(i, [1]), array_ops.expand_dims(v, 0)) + return op(x, i, v) + + +def alias_inplace_update(x, i, v): + """Applies an inplace update on input x at index i with value v. Aliases x. + + If i is None, x and v must be the same shape. Computes + x = v; + If i is a scalar, x has a rank 1 higher than v's. Computes + x[i, :] = v; + Otherwise, x and v must have the same rank. Computes + x[i, :] = v; + + Args: + x: A Tensor. + i: None, a scalar or a vector. + v: A Tensor. + + Returns: + Returns x. + + """ + return _inplace_helper(x, i, v, gen_array_ops.inplace_update) + + +def alias_inplace_add(x, i, v): + """Applies an inplace add on input x at index i with value v. Aliases x. + + If i is None, x and v must be the same shape. Computes + x += v; + If i is a scalar, x has a rank 1 higher than v's. Computes + x[i, :] += v; + Otherwise, x and v must have the same rank. Computes + x[i, :] += v; + + Args: + x: A Tensor. + i: None, a scalar or a vector. + v: A Tensor. + + Returns: + Returns x. + + """ + return _inplace_helper(x, i, v, gen_array_ops.inplace_add) + + +def alias_inplace_sub(x, i, v): + """Applies an inplace sub on input x at index i with value v. Aliases x. + + If i is None, x and v must be the same shape. Computes + x -= v; + If i is a scalar, x has a rank 1 higher than v's. Computes + x[i, :] -= v; + Otherwise, x and v must have the same rank. Computes + x[i, :] -= v; + + Args: + x: A Tensor. + i: None, a scalar or a vector. + v: A Tensor. + + Returns: + Returns x. + + """ + return _inplace_helper(x, i, v, gen_array_ops.inplace_sub) + + +def empty_like(x, init=None): + """Returns a non-initialized tensor with the same shape and dtype as x. + + Args: + x: A Tensor. + init: Initialize the returned tensor with the default value of + x.dtype(), if True. Otherwise, do not initialize. Defaults to + None. + + Returns: + A tensor y, whose dtype and shape are the same as those of x. + y is guaranteed not to be an alias of x. Upon return, y may contain + arbitrary data. + + """ + x = ops.convert_to_tensor(x) + return gen_array_ops.empty(array_ops.shape(x), x.dtype, init=init) + + +def inplace_update(x, i, v): + """Applies an inplace update on input x at index i with value v. + + Note that this function is not actually inplace - it allocates + a copy of x. The utility is not avoiding memory copies but rather + specifying a sparse update. + + If i is None, x and v must be the same shape. Computes + y = x; y = v; + If i is a scalar, x has a rank 1 higher than v's. Computes + y = x; y[i, :] = v; + Otherwise, x and v must have the same rank. Computes + y = x; y[i, :] = v; + + Args: + x: A Tensor. + i: None, a scalar or a vector. + v: A Tensor. + + Returns: + Returns y, which is guaranteed not to be an alias of x. + + """ + return alias_inplace_update(gen_array_ops.deep_copy(x), i, v) + + +def inplace_add(x, i, v): + """Applies an inplace add on input x at index i with value v. + + Note that this function is not actually inplace - it allocates + a copy of x. The utility is not avoiding memory copies but rather + specifying a sparse update. + + If i is None, x and v must be the same shape. Computes + y = x; y += v; + If i is a scalar, x has a rank 1 higher than v's. Computes + y = x; y[i, :] += v; + Otherwise, x and v must have the same rank. Computes + y = x; y[i, :] += v; + + Args: + x: A Tensor. + i: None, a scalar or a vector. + v: A Tensor. + + Returns: + Returns y, which is guaranteed not to be an alias of x. + + """ + return alias_inplace_add(gen_array_ops.deep_copy(x), i, v) + + +def inplace_sub(x, i, v): + """Applies an inplace sub on input x at index i with value v. + + Note that this function is not actually inplace - it allocates + a copy of x. The utility is not avoiding memory copies but rather + specifying a sparse update. + + If i is None, x and v must be the same shape. Computes + y = x; y -= v; + If i is a scalar, x has a rank 1 higher than v's. Computes + y = x; y[i, :] -= v; + Otherwise, x and v must have the same rank. Computes + y = x; y[i, :] -= v; + + Args: + x: A Tensor. + i: None, a scalar or a vector. + v: A Tensor. + + Returns: + Returns y, which is guaranteed not to be an alias of x. + + """ + return alias_inplace_sub(gen_array_ops.deep_copy(x), i, v) + +empty = gen_array_ops.empty diff --git a/tensorflow/python/ops/linalg/linear_operator.py b/tensorflow/python/ops/linalg/linear_operator.py index 193c787baa2ac68feec7e5d8bb03b251fc78d781..8cfe964b1c0a572f43a14c66885e74ea105b0916 100644 --- a/tensorflow/python/ops/linalg/linear_operator.py +++ b/tensorflow/python/ops/linalg/linear_operator.py @@ -699,9 +699,10 @@ class LinearOperator(object): " Requires conversion to a dense matrix and O(N^3) operations.") rhs = linalg.adjoint(rhs) if adjoint_arg else rhs if self._can_use_cholesky(): - return linalg_ops.cholesky_solve( + return linear_operator_util.cholesky_solve_with_broadcast( linalg_ops.cholesky(self.to_dense()), rhs) - return linalg_ops.matrix_solve(self.to_dense(), rhs, adjoint=adjoint) + return linear_operator_util.matrix_solve_with_broadcast( + self.to_dense(), rhs, adjoint=adjoint) def solve(self, rhs, adjoint=False, adjoint_arg=False, name="solve"): """Solve (exact or approx) `R` (batch) systems of equations: `A X = rhs`. diff --git a/tensorflow/python/ops/linalg/linear_operator_full_matrix.py b/tensorflow/python/ops/linalg/linear_operator_full_matrix.py index 5ba3b090ae9decaba239b31226db84c2d7b254bd..746da8df1ce957e86bc2e730b5709a699adbf612 100644 --- a/tensorflow/python/ops/linalg/linear_operator_full_matrix.py +++ b/tensorflow/python/ops/linalg/linear_operator_full_matrix.py @@ -21,8 +21,8 @@ from __future__ import print_function from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops -from tensorflow.python.ops import math_ops from tensorflow.python.ops.linalg import linear_operator +from tensorflow.python.ops.linalg import linear_operator_util from tensorflow.python.util.tf_export import tf_export __all__ = ["LinearOperatorFullMatrix"] @@ -176,7 +176,7 @@ class LinearOperatorFullMatrix(linear_operator.LinearOperator): return array_ops.shape(self._matrix) def _matmul(self, x, adjoint=False, adjoint_arg=False): - return math_ops.matmul( + return linear_operator_util.matmul_with_broadcast( self._matrix, x, adjoint_a=adjoint, adjoint_b=adjoint_arg) def _to_dense(self): diff --git a/tensorflow/python/ops/linalg/linear_operator_low_rank_update.py b/tensorflow/python/ops/linalg/linear_operator_low_rank_update.py index be911029095920d424ac90b406e7b85b73884b3b..08e5896e1034fb1782beacfb18fef16da083bded 100644 --- a/tensorflow/python/ops/linalg/linear_operator_low_rank_update.py +++ b/tensorflow/python/ops/linalg/linear_operator_low_rank_update.py @@ -27,6 +27,7 @@ from tensorflow.python.ops import math_ops from tensorflow.python.ops.linalg import linear_operator from tensorflow.python.ops.linalg import linear_operator_diag from tensorflow.python.ops.linalg import linear_operator_identity +from tensorflow.python.ops.linalg import linear_operator_util from tensorflow.python.util.tf_export import tf_export __all__ = [ @@ -365,14 +366,17 @@ class LinearOperatorLowRankUpdate(linear_operator.LinearOperator): leading_term = l.matmul(x, adjoint=adjoint, adjoint_arg=adjoint_arg) if adjoint: - uh_x = math_ops.matmul(u, x, adjoint_a=True, adjoint_b=adjoint_arg) + uh_x = linear_operator_util.matmul_with_broadcast( + u, x, adjoint_a=True, adjoint_b=adjoint_arg) d_uh_x = d.matmul(uh_x, adjoint=adjoint) - v_d_uh_x = math_ops.matmul(v, d_uh_x) + v_d_uh_x = linear_operator_util.matmul_with_broadcast( + v, d_uh_x) return leading_term + v_d_uh_x else: - vh_x = math_ops.matmul(v, x, adjoint_a=True, adjoint_b=adjoint_arg) + vh_x = linear_operator_util.matmul_with_broadcast( + v, x, adjoint_a=True, adjoint_b=adjoint_arg) d_vh_x = d.matmul(vh_x, adjoint=adjoint) - u_d_vh_x = math_ops.matmul(u, d_vh_x) + u_d_vh_x = linear_operator_util.matmul_with_broadcast(u, d_vh_x) return leading_term + u_d_vh_x def _determinant(self): @@ -431,16 +435,18 @@ class LinearOperatorLowRankUpdate(linear_operator.LinearOperator): # L^{-1} rhs linv_rhs = l.solve(rhs, adjoint=adjoint, adjoint_arg=adjoint_arg) # V^H L^{-1} rhs - vh_linv_rhs = math_ops.matmul(v, linv_rhs, adjoint_a=True) + vh_linv_rhs = linear_operator_util.matmul_with_broadcast( + v, linv_rhs, adjoint_a=True) # C^{-1} V^H L^{-1} rhs if self._use_cholesky: - capinv_vh_linv_rhs = linalg_ops.cholesky_solve( + capinv_vh_linv_rhs = linear_operator_util.cholesky_solve_with_broadcast( self._chol_capacitance, vh_linv_rhs) else: - capinv_vh_linv_rhs = linalg_ops.matrix_solve( + capinv_vh_linv_rhs = linear_operator_util.matrix_solve_with_broadcast( self._capacitance, vh_linv_rhs, adjoint=adjoint) # U C^{-1} V^H M^{-1} rhs - u_capinv_vh_linv_rhs = math_ops.matmul(u, capinv_vh_linv_rhs) + u_capinv_vh_linv_rhs = linear_operator_util.matmul_with_broadcast( + u, capinv_vh_linv_rhs) # L^{-1} U C^{-1} V^H L^{-1} rhs linv_u_capinv_vh_linv_rhs = l.solve(u_capinv_vh_linv_rhs, adjoint=adjoint) @@ -454,7 +460,8 @@ class LinearOperatorLowRankUpdate(linear_operator.LinearOperator): # L^{-1} U linv_u = self.base_operator.solve(self.u) # V^H L^{-1} U - vh_linv_u = math_ops.matmul(self.v, linv_u, adjoint_a=True) + vh_linv_u = linear_operator_util.matmul_with_broadcast( + self.v, linv_u, adjoint_a=True) # D^{-1} + V^H L^{-1} V capacitance = self._diag_inv_operator.add_to_tensor(vh_linv_u) diff --git a/tensorflow/python/ops/linalg/linear_operator_lower_triangular.py b/tensorflow/python/ops/linalg/linear_operator_lower_triangular.py index c4d386ccb4efc7dede8310243e517fe2f6b45bd9..fb1eb2fedba5b47ce38f9635527b91e18d894a8f 100644 --- a/tensorflow/python/ops/linalg/linear_operator_lower_triangular.py +++ b/tensorflow/python/ops/linalg/linear_operator_lower_triangular.py @@ -21,7 +21,6 @@ from __future__ import print_function from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops -from tensorflow.python.ops import linalg_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops.linalg import linalg_impl as linalg from tensorflow.python.ops.linalg import linear_operator @@ -194,7 +193,7 @@ class LinearOperatorLowerTriangular(linear_operator.LinearOperator): message="Singular operator: Diagonal contained zero values.") def _matmul(self, x, adjoint=False, adjoint_arg=False): - return math_ops.matmul( + return linear_operator_util.matmul_with_broadcast( self._tril, x, adjoint_a=adjoint, adjoint_b=adjoint_arg) def _determinant(self): @@ -206,7 +205,7 @@ class LinearOperatorLowerTriangular(linear_operator.LinearOperator): def _solve(self, rhs, adjoint=False, adjoint_arg=False): rhs = linalg.adjoint(rhs) if adjoint_arg else rhs - return linalg_ops.matrix_triangular_solve( + return linear_operator_util.matrix_triangular_solve_with_broadcast( self._tril, rhs, lower=True, adjoint=adjoint) def _to_dense(self): diff --git a/tensorflow/python/ops/linalg/linear_operator_test_util.py b/tensorflow/python/ops/linalg/linear_operator_test_util.py index ce1a112ad584a14298be6e471578858ef31573d5..7e4fb6a6fc31960d570c78398ae211ba45a4a4a7 100644 --- a/tensorflow/python/ops/linalg/linear_operator_test_util.py +++ b/tensorflow/python/ops/linalg/linear_operator_test_util.py @@ -32,6 +32,7 @@ from tensorflow.python.ops import linalg_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import random_ops from tensorflow.python.ops.linalg import linalg_impl as linalg +from tensorflow.python.ops.linalg import linear_operator_util from tensorflow.python.platform import test @@ -126,13 +127,16 @@ class LinearOperatorDerivedClassTest(test.TestCase): raise NotImplementedError("Not implemented yet.") @abc.abstractmethod - def _make_rhs(self, operator, adjoint): + def _make_rhs(self, operator, adjoint, with_batch=True): """Make a rhs appropriate for calling operator.solve(rhs). Args: operator: A `LinearOperator` adjoint: Python `bool`. If `True`, we are making a 'rhs' value for the adjoint operator. + with_batch: Python `bool`. If `True`, create `rhs` with the same batch + shape as operator, and otherwise create a matrix without any batch + shape. Returns: A `Tensor` @@ -140,13 +144,15 @@ class LinearOperatorDerivedClassTest(test.TestCase): raise NotImplementedError("_make_rhs is not defined.") @abc.abstractmethod - def _make_x(self, operator, adjoint): + def _make_x(self, operator, adjoint, with_batch=True): """Make an 'x' appropriate for calling operator.matmul(x). Args: operator: A `LinearOperator` adjoint: Python `bool`. If `True`, we are making an 'x' value for the adjoint operator. + with_batch: Python `bool`. If `True`, create `x` with the same batch shape + as operator, and otherwise create a matrix without any batch shape. Returns: A `Tensor` @@ -224,10 +230,15 @@ class LinearOperatorDerivedClassTest(test.TestCase): [op_log_abs_det, mat_log_abs_det], feed_dict=feed_dict) self.assertAC(op_log_abs_det_v, mat_log_abs_det_v) - def test_matmul(self): - self._skip_if_tests_to_skip_contains("matmul") + def _test_matmul(self, with_batch): for use_placeholder in self._use_placeholder_options: for build_info in self._operator_build_infos: + # If batch dimensions are omitted, but there are + # no batch dimensions for the linear operator, then + # skip the test case. This is already checked with + # with_batch=True. + if not with_batch and len(build_info.shape) <= 2: + continue for dtype in self._dtypes_to_test: for adjoint in self._adjoint_options: for adjoint_arg in self._adjoint_arg_options: @@ -235,7 +246,8 @@ class LinearOperatorDerivedClassTest(test.TestCase): sess.graph.seed = random_seed.DEFAULT_GRAPH_SEED operator, mat, feed_dict = self._operator_and_mat_and_feed_dict( build_info, dtype, use_placeholder=use_placeholder) - x = self._make_x(operator, adjoint=adjoint) + x = self._make_x( + operator, adjoint=adjoint, with_batch=with_batch) # If adjoint_arg, compute A X^H^H = A X. if adjoint_arg: op_matmul = operator.matmul( @@ -244,7 +256,8 @@ class LinearOperatorDerivedClassTest(test.TestCase): adjoint_arg=adjoint_arg) else: op_matmul = operator.matmul(x, adjoint=adjoint) - mat_matmul = math_ops.matmul(mat, x, adjoint_a=adjoint) + mat_matmul = linear_operator_util.matmul_with_broadcast( + mat, x, adjoint_a=adjoint) if not use_placeholder: self.assertAllEqual(op_matmul.get_shape(), mat_matmul.get_shape()) @@ -252,10 +265,23 @@ class LinearOperatorDerivedClassTest(test.TestCase): [op_matmul, mat_matmul], feed_dict=feed_dict) self.assertAC(op_matmul_v, mat_matmul_v) - def test_solve(self): - self._skip_if_tests_to_skip_contains("solve") + def test_matmul(self): + self._skip_if_tests_to_skip_contains("matmul") + self._test_matmul(with_batch=True) + + def test_matmul_with_broadcast(self): + self._skip_if_tests_to_skip_contains("matmul_with_broadcast") + self._test_matmul(with_batch=False) + + def _test_solve(self, with_batch): for use_placeholder in self._use_placeholder_options: for build_info in self._operator_build_infos: + # If batch dimensions are omitted, but there are + # no batch dimensions for the linear operator, then + # skip the test case. This is already checked with + # with_batch=True. + if not with_batch and len(build_info.shape) <= 2: + continue for dtype in self._dtypes_to_test: for adjoint in self._adjoint_options: for adjoint_arg in self._adjoint_arg_options: @@ -263,7 +289,8 @@ class LinearOperatorDerivedClassTest(test.TestCase): sess.graph.seed = random_seed.DEFAULT_GRAPH_SEED operator, mat, feed_dict = self._operator_and_mat_and_feed_dict( build_info, dtype, use_placeholder=use_placeholder) - rhs = self._make_rhs(operator, adjoint=adjoint) + rhs = self._make_rhs( + operator, adjoint=adjoint, with_batch=with_batch) # If adjoint_arg, solve A X = (rhs^H)^H = rhs. if adjoint_arg: op_solve = operator.solve( @@ -273,7 +300,8 @@ class LinearOperatorDerivedClassTest(test.TestCase): else: op_solve = operator.solve( rhs, adjoint=adjoint, adjoint_arg=adjoint_arg) - mat_solve = linalg_ops.matrix_solve(mat, rhs, adjoint=adjoint) + mat_solve = linear_operator_util.matrix_solve_with_broadcast( + mat, rhs, adjoint=adjoint) if not use_placeholder: self.assertAllEqual(op_solve.get_shape(), mat_solve.get_shape()) @@ -281,6 +309,14 @@ class LinearOperatorDerivedClassTest(test.TestCase): [op_solve, mat_solve], feed_dict=feed_dict) self.assertAC(op_solve_v, mat_solve_v) + def test_solve(self): + self._skip_if_tests_to_skip_contains("solve") + self._test_solve(with_batch=True) + + def test_solve_with_broadcast(self): + self._skip_if_tests_to_skip_contains("solve_with_broadcast") + self._test_solve(with_batch=False) + def test_trace(self): self._skip_if_tests_to_skip_contains("trace") for use_placeholder in self._use_placeholder_options: @@ -358,13 +394,13 @@ class SquareLinearOperatorDerivedClassTest(LinearOperatorDerivedClassTest): build_info((3, 4, 4)), build_info((2, 1, 4, 4))] - def _make_rhs(self, operator, adjoint): + def _make_rhs(self, operator, adjoint, with_batch=True): # This operator is square, so rhs and x will have same shape. # adjoint value makes no difference because the operator shape doesn't # change since it is square, but be pedantic. - return self._make_x(operator, adjoint=not adjoint) + return self._make_x(operator, adjoint=not adjoint, with_batch=with_batch) - def _make_x(self, operator, adjoint): + def _make_x(self, operator, adjoint, with_batch=True): # Value of adjoint makes no difference because the operator is square. # Return the number of systems to solve, R, equal to 1 or 2. r = self._get_num_systems(operator) @@ -373,11 +409,17 @@ class SquareLinearOperatorDerivedClassTest(LinearOperatorDerivedClassTest): if operator.shape.is_fully_defined(): batch_shape = operator.batch_shape.as_list() n = operator.domain_dimension.value - x_shape = batch_shape + [n, r] + if with_batch: + x_shape = batch_shape + [n, r] + else: + x_shape = [n, r] else: batch_shape = operator.batch_shape_tensor() n = operator.domain_dimension_tensor() - x_shape = array_ops.concat((batch_shape, [n, r]), 0) + if with_batch: + x_shape = array_ops.concat((batch_shape, [n, r]), 0) + else: + x_shape = [n, r] return random_normal(x_shape, dtype=operator.dtype) @@ -404,7 +446,7 @@ class NonSquareLinearOperatorDerivedClassTest(LinearOperatorDerivedClassTest): @property def _tests_to_skip(self): """List of test names to skip.""" - return ["solve", "det", "log_abs_det"] + return ["solve", "solve_with_broadcast", "det", "log_abs_det"] @property def _operator_build_infos(self): @@ -417,12 +459,12 @@ class NonSquareLinearOperatorDerivedClassTest(LinearOperatorDerivedClassTest): build_info((3, 3, 4)), build_info((2, 1, 2, 4))] - def _make_rhs(self, operator, adjoint): + def _make_rhs(self, operator, adjoint, with_batch=True): # TODO(langmore) Add once we're testing solve_ls. raise NotImplementedError( "_make_rhs not implemented because we don't test solve") - def _make_x(self, operator, adjoint): + def _make_x(self, operator, adjoint, with_batch=True): # Return the number of systems for the argument 'x' for .matmul(x) r = self._get_num_systems(operator) # If operator.shape = [B1,...,Bb, M, N] this returns a random matrix of @@ -433,14 +475,20 @@ class NonSquareLinearOperatorDerivedClassTest(LinearOperatorDerivedClassTest): n = operator.range_dimension.value else: n = operator.domain_dimension.value - x_shape = batch_shape + [n, r] + if with_batch: + x_shape = batch_shape + [n, r] + else: + x_shape = [n, r] else: batch_shape = operator.batch_shape_tensor() if adjoint: n = operator.range_dimension_tensor() else: n = operator.domain_dimension_tensor() - x_shape = array_ops.concat((batch_shape, [n, r]), 0) + if with_batch: + x_shape = array_ops.concat((batch_shape, [n, r]), 0) + else: + x_shape = [n, r] return random_normal(x_shape, dtype=operator.dtype) diff --git a/tensorflow/python/ops/linalg/linear_operator_util.py b/tensorflow/python/ops/linalg/linear_operator_util.py index 427bd1e890305618264a6a588be4e4ffade33c01..9dd40765c20222c6998260547b7e8fa341e65437 100644 --- a/tensorflow/python/ops/linalg/linear_operator_util.py +++ b/tensorflow/python/ops/linalg/linear_operator_util.py @@ -23,6 +23,7 @@ from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import check_ops from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import linalg_ops from tensorflow.python.ops import math_ops @@ -102,6 +103,22 @@ def assert_is_batch_matrix(tensor): "%s" % tensor) +def shape_tensor(shape, name=None): + """Convert Tensor using default type, unless empty list or tuple.""" + # Works just like random_ops._ShapeTensor. + if isinstance(shape, (tuple, list)) and not shape: + dtype = dtypes.int32 + else: + dtype = None + return ops.convert_to_tensor(shape, dtype=dtype, name=name) + + +################################################################################ +# Broadcasting versions of common linear algebra functions. +# TODO(b/77519145) Do this more efficiently in some special cases. +################################################################################ + + def broadcast_matrix_batch_dims(batch_matrices, name=None): """Broadcast leading dimensions of zero or more [batch] matrices. @@ -170,7 +187,8 @@ def broadcast_matrix_batch_dims(batch_matrices, name=None): bcast_batch_shape = batch_matrices[0].get_shape()[:-2] for mat in batch_matrices[1:]: bcast_batch_shape = array_ops.broadcast_static_shape( - bcast_batch_shape, mat.get_shape()[:-2]) + bcast_batch_shape, + mat.get_shape()[:-2]) if bcast_batch_shape.is_fully_defined(): # The [1, 1] at the end will broadcast with anything. bcast_shape = bcast_batch_shape.concatenate([1, 1]) @@ -183,7 +201,8 @@ def broadcast_matrix_batch_dims(batch_matrices, name=None): bcast_batch_shape = array_ops.shape(batch_matrices[0])[:-2] for mat in batch_matrices[1:]: bcast_batch_shape = array_ops.broadcast_dynamic_shape( - bcast_batch_shape, array_ops.shape(mat)[:-2]) + bcast_batch_shape, + array_ops.shape(mat)[:-2]) bcast_shape = array_ops.concat([bcast_batch_shape, [1, 1]], axis=0) for i, mat in enumerate(batch_matrices): batch_matrices[i] = _broadcast_to_shape(mat, bcast_shape) @@ -195,6 +214,13 @@ def _broadcast_to_shape(x, shape): return x + array_ops.zeros(shape=shape, dtype=x.dtype) +def cholesky_solve_with_broadcast(chol, rhs, name=None): + """Solve systems of linear equations.""" + with ops.name_scope(name, "CholeskySolveWithBroadcast", [chol, rhs]): + chol, rhs = broadcast_matrix_batch_dims([chol, rhs]) + return linalg_ops.cholesky_solve(chol, rhs) + + def matmul_with_broadcast(a, b, transpose_a=False, @@ -206,6 +232,11 @@ def matmul_with_broadcast(a, name=None): """Multiplies matrix `a` by matrix `b`, producing `a @ b`. + Works identically to `tf.matmul`, but broadcasts batch dims + of `a` and `b` (by replicating) if they are determined statically to be + different, or if static shapes are not fully defined. Thus, this may result + in an inefficient replication of data. + The inputs must be matrices (or tensors of rank > 2, representing batches of matrices). @@ -276,7 +307,7 @@ def matmul_with_broadcast(a, ValueError: If transpose_a and adjoint_a, or transpose_b and adjoint_b are both set to True. """ - with ops.name_scope(name, "MatMulWithBroadcast", [a, b]) as name: + with ops.name_scope(name, "MatMulWithBroadcast", [a, b]): a, b = broadcast_matrix_batch_dims([a, b]) return math_ops.matmul( a, @@ -289,11 +320,43 @@ def matmul_with_broadcast(a, b_is_sparse=b_is_sparse) -def shape_tensor(shape, name=None): - """Convert Tensor using default type, unless empty list or tuple.""" - # Works just like random_ops._ShapeTensor. - if isinstance(shape, (tuple, list)) and not shape: - dtype = dtypes.int32 - else: - dtype = None - return ops.convert_to_tensor(shape, dtype=dtype, name=name) +def matrix_solve_with_broadcast(matrix, rhs, adjoint=False, name=None): + """Solve systems of linear equations.""" + with ops.name_scope(name, "MatrixSolveWithBroadcast", [matrix, rhs]): + matrix, rhs = broadcast_matrix_batch_dims([matrix, rhs]) + return linalg_ops.matrix_solve(matrix, rhs, adjoint=adjoint) + + +def matrix_triangular_solve_with_broadcast(matrix, + rhs, + lower=True, + adjoint=False, + name=None): + """Solves triangular systems of linear equations with by backsubstitution. + + Works identically to `tf.matrix_triangular_solve`, but broadcasts batch dims + of `matrix` and `rhs` (by replicating) if they are determined statically to be + different, or if static shapes are not fully defined. Thus, this may result + in an inefficient replication of data. + + Args: + matrix: A Tensor. Must be one of the following types: + `float64`, `float32`, `complex64`, `complex128`. Shape is `[..., M, M]`. + rhs: A `Tensor`. Must have the same `dtype` as `matrix`. + Shape is `[..., M, K]`. + lower: An optional `bool`. Defaults to `True`. Indicates whether the + innermost matrices in `matrix` are lower or upper triangular. + adjoint: An optional `bool`. Defaults to `False`. Indicates whether to solve + with matrix or its (block-wise) adjoint. + name: A name for the operation (optional). + + Returns: + `Tensor` with same `dtype` as `matrix` and shape `[..., M, K]`. + """ + with ops.name_scope(name, "MatrixTriangularSolve", [matrix, rhs]): + matrix, rhs = broadcast_matrix_batch_dims([matrix, rhs]) + return linalg_ops.matrix_triangular_solve( + matrix, + rhs, + lower=lower, + adjoint=adjoint) diff --git a/tensorflow/python/ops/linalg_ops.py b/tensorflow/python/ops/linalg_ops.py index 170861b43fd980ab0e107fc0b2e3d6f02339ed34..a0dfa543f9b3aee15f11b073dc683b1d2d14388f 100644 --- a/tensorflow/python/ops/linalg_ops.py +++ b/tensorflow/python/ops/linalg_ops.py @@ -24,12 +24,13 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import functional_ops from tensorflow.python.ops import gen_linalg_ops +from tensorflow.python.ops import linalg_ops_impl from tensorflow.python.ops import math_ops # pylint: disable=wildcard-import from tensorflow.python.ops.gen_linalg_ops import * # pylint: enable=wildcard-import -from tensorflow.python.util import compat from tensorflow.python.util import deprecation from tensorflow.python.util.tf_export import tf_export @@ -159,36 +160,11 @@ def eye(num_rows, Returns: A `Tensor` of shape `batch_shape + [num_rows, num_columns]` """ - with ops.name_scope( - name, default_name='eye', values=[num_rows, num_columns, batch_shape]): - is_square = num_columns is None - batch_shape = [] if batch_shape is None else batch_shape - num_columns = num_rows if num_columns is None else num_columns - if isinstance(num_rows, ops.Tensor) or isinstance( - num_columns, ops.Tensor) or isinstance(batch_shape, ops.Tensor): - batch_shape = ops.convert_to_tensor( - batch_shape, name='shape', dtype=dtypes.int32) - diag_size = math_ops.minimum(num_rows, num_columns) - diag_shape = array_ops.concat((batch_shape, [diag_size]), 0) - if not is_square: - shape = array_ops.concat((batch_shape, [num_rows, num_columns]), 0) - else: - if not isinstance(num_rows, compat.integral_types) or not isinstance( - num_columns, compat.integral_types): - raise TypeError( - 'num_rows and num_columns must be positive integer values.') - batch_shape = [dim for dim in batch_shape] - is_square = num_rows == num_columns - diag_shape = batch_shape + [np.minimum(num_rows, num_columns)] - if not is_square: - shape = batch_shape + [num_rows, num_columns] - - diag_ones = array_ops.ones(diag_shape, dtype=dtype) - if is_square: - return array_ops.matrix_diag(diag_ones) - else: - zero_matrix = array_ops.zeros(shape, dtype=dtype) - return array_ops.matrix_set_diag(zero_matrix, diag_ones) + return linalg_ops_impl.eye(num_rows, + num_columns=num_columns, + batch_shape=batch_shape, + dtype=dtype, + name=name) @tf_export('matrix_solve_ls', 'linalg.lstsq') @@ -454,7 +430,7 @@ def norm(tensor, This function can compute several different vector norms (the 1-norm, the Euclidean or 2-norm, the inf-norm, and in general the p-norm for p > 0) and - matrix norms (Frobenius, 1-norm, and inf-norm). + matrix norms (Frobenius, 1-norm, 2-norm and inf-norm). Args: tensor: `Tensor` of types `float32`, `float64`, `complex64`, `complex128` @@ -465,7 +441,7 @@ def norm(tensor, Some restrictions apply: a) The Frobenius norm `fro` is not defined for vectors, b) If axis is a 2-tuple (matrix norm), only 'euclidean', 'fro', `1`, - `np.inf` are supported. + `2`, `np.inf` are supported. See the description of `axis` on how to compute norms for a batch of vectors or matrices stored in a tensor. axis: If `axis` is `None` (the default), the input is considered a vector @@ -521,8 +497,7 @@ def norm(tensor, axis[0] == axis[1]): raise ValueError( "'axis' must be None, an integer, or a tuple of 2 unique integers") - # TODO(rmlarsen): Implement matrix 2-norm using tf.svd(). - supported_matrix_norms = ['euclidean', 'fro', 1, np.inf] + supported_matrix_norms = ['euclidean', 'fro', 1, 2, np.inf] if ord not in supported_matrix_norms: raise ValueError("'ord' must be a supported matrix norm in %s, got %s" % (supported_matrix_norms, ord)) @@ -539,12 +514,34 @@ def norm(tensor, with ops.name_scope(name, 'norm', [tensor]): tensor = ops.convert_to_tensor(tensor) + if ord in ['fro', 'euclidean', 2, 2.0]: - # TODO(rmlarsen): Move 2-norm to a separate clause once we support it for - # matrices. - result = math_ops.sqrt( - math_ops.reduce_sum( - tensor * math_ops.conj(tensor), axis, keepdims=True)) + if is_matrix_norm and ord in [2, 2.0]: + rank = array_ops.rank(tensor) + positive_axis = functional_ops.map_fn( + lambda i: control_flow_ops.cond(i >= 0, lambda: i, lambda: i + rank), + ops.convert_to_tensor(axis)) + axes = math_ops.range(rank) + perm_before = array_ops.concat( + [array_ops.setdiff1d(axes, positive_axis)[0], positive_axis], + axis=0) + perm_after = functional_ops.map_fn( + lambda i: math_ops.cast( + array_ops.squeeze( + array_ops.where(math_ops.equal(perm_before, i))), + dtype=dtypes.int32), axes) + permed = array_ops.transpose(tensor, perm=perm_before) + matrix_2_norm = array_ops.expand_dims( + math_ops.reduce_max( + math_ops.abs(gen_linalg_ops.svd(permed, compute_uv=False)[0]), + axis=-1, + keepdims=True), + axis=-1) + result = array_ops.transpose(matrix_2_norm, perm=perm_after) + else: + result = math_ops.sqrt( + math_ops.reduce_sum( + tensor * math_ops.conj(tensor), axis, keepdims=True)) else: result = math_ops.abs(tensor) if ord == 1: diff --git a/tensorflow/python/ops/linalg_ops_impl.py b/tensorflow/python/ops/linalg_ops_impl.py new file mode 100644 index 0000000000000000000000000000000000000000..e7c89f6ae3e9c517920e6c9afce99a8b192be164 --- /dev/null +++ b/tensorflow/python/ops/linalg_ops_impl.py @@ -0,0 +1,73 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Operations for linear algebra.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.util import compat + +# Names below are lower_case. +# pylint: disable=invalid-name + + +def eye(num_rows, + num_columns=None, + batch_shape=None, + dtype=dtypes.float32, + name=None): + """Construct an identity matrix, or a batch of matrices. + + See `linalg_ops.eye`. + """ + with ops.name_scope( + name, default_name='eye', values=[num_rows, num_columns, batch_shape]): + is_square = num_columns is None + batch_shape = [] if batch_shape is None else batch_shape + num_columns = num_rows if num_columns is None else num_columns + if isinstance(num_rows, ops.Tensor) or isinstance( + num_columns, ops.Tensor) or isinstance(batch_shape, ops.Tensor): + batch_shape = ops.convert_to_tensor( + batch_shape, name='shape', dtype=dtypes.int32) + diag_size = math_ops.minimum(num_rows, num_columns) + diag_shape = array_ops.concat((batch_shape, [diag_size]), 0) + if not is_square: + shape = array_ops.concat((batch_shape, [num_rows, num_columns]), 0) + else: + if not isinstance(num_rows, compat.integral_types) or not isinstance( + num_columns, compat.integral_types): + raise TypeError( + 'num_rows and num_columns must be positive integer values.') + batch_shape = [dim for dim in batch_shape] + is_square = num_rows == num_columns + diag_shape = batch_shape + [np.minimum(num_rows, num_columns)] + if not is_square: + shape = batch_shape + [num_rows, num_columns] + + diag_ones = array_ops.ones(diag_shape, dtype=dtype) + if is_square: + return array_ops.matrix_diag(diag_ones) + else: + zero_matrix = array_ops.zeros(shape, dtype=dtype) + return array_ops.matrix_set_diag(zero_matrix, diag_ones) + +# pylint: enable=invalid-name,redefined-builtin diff --git a/tensorflow/python/ops/list_ops.py b/tensorflow/python/ops/list_ops.py index bba59ebcef9c7caf1a53d724767999ae7ac079e5..d9ede875301c52219cc1e3f05a892ee887a70e67 100644 --- a/tensorflow/python/ops/list_ops.py +++ b/tensorflow/python/ops/list_ops.py @@ -29,6 +29,10 @@ from tensorflow.python.ops.gen_list_ops import * # pylint: enable=wildcard-import +ops.NotDifferentiable("TensorListConcat") +ops.NotDifferentiable("TensorListPushBackBatch") + + @ops.RegisterGradient("TensorListPushBack") def _PushBackGrad(op, dresult): return gen_list_ops.tensor_list_pop_back( @@ -54,8 +58,8 @@ def _TensorListStackGrad(unused_op, dtensor): @ops.RegisterGradient("TensorListFromTensor") def _TensorListFromTensorGrad(op, dlist): """Gradient for TensorListFromTensor.""" - if op.inputs[0].shape[0] is not None: - num_elements = op.inputs[0].shape[0] + if op.inputs[0].shape[0].value is not None: + num_elements = op.inputs[0].shape[0].value else: num_elements = None if dlist is None: @@ -63,9 +67,10 @@ def _TensorListFromTensorGrad(op, dlist): element_dtype=op.inputs[0].dtype, element_shape=gen_list_ops.tensor_list_element_shape( op.outputs[0], shape_type=dtypes.int32)) - return gen_list_ops.tensor_list_stack( - dlist, element_dtype=op.inputs[0].dtype, - num_elements=num_elements) + tensor_grad = gen_list_ops.tensor_list_stack( + dlist, element_dtype=op.inputs[0].dtype, num_elements=num_elements) + shape_grad = None + return tensor_grad, shape_grad @ops.RegisterGradient("TensorListGetItem") diff --git a/tensorflow/python/ops/losses/losses_impl.py b/tensorflow/python/ops/losses/losses_impl.py index 34ca1adc3e13dc67560fb21d70c16cd42dc40552..9fc545c9678e7eb33a7ad35e2a84f890885e09af 100644 --- a/tensorflow/python/ops/losses/losses_impl.py +++ b/tensorflow/python/ops/losses/losses_impl.py @@ -29,6 +29,7 @@ from tensorflow.python.ops import nn_ops from tensorflow.python.ops import weights_broadcast_ops from tensorflow.python.ops.losses import util from tensorflow.python.util.deprecation import deprecated_args +from tensorflow.python.util.deprecation import deprecated_argument_lookup from tensorflow.python.util.tf_export import tf_export @@ -306,11 +307,8 @@ def cosine_distance( ValueError: If `predictions` shape doesn't match `labels` shape, or `axis`, `labels`, `predictions` or `weights` is `None`. """ - if dim is not None: - if axis is not None: - raise ValueError("Cannot specify both 'axis' and 'dim'") - axis = dim - if axis is None and dim is None: + axis = deprecated_argument_lookup("axis", axis, "dim", dim) + if axis is None: raise ValueError("You must specify 'axis'.") if labels is None: raise ValueError("labels must not be None.") @@ -696,7 +694,7 @@ def softmax_cross_entropy( onehot_labels, logits, weights=1.0, label_smoothing=0, scope=None, loss_collection=ops.GraphKeys.LOSSES, reduction=Reduction.SUM_BY_NONZERO_WEIGHTS): - """Creates a cross-entropy loss using tf.nn.softmax_cross_entropy_with_logits. + """Creates a cross-entropy loss using tf.nn.softmax_cross_entropy_with_logits_v2. `weights` acts as a coefficient for the loss. If a scalar is provided, then the loss is simply scaled by the given value. If `weights` is a @@ -707,11 +705,16 @@ def softmax_cross_entropy( new_onehot_labels = onehot_labels * (1 - label_smoothing) + label_smoothing / num_classes + Note that `onehot_labels` and `logits` must have the same shape, + e.g. `[batch_size, num_classes]`. The shape of `weights` must be + broadcastable to loss, whose shape is decided by the shape of `logits`. + In case the shape of `logits` is `[batch_size, num_classes]`, loss is + a `Tensor` of shape `[batch_size]`. + Args: - onehot_labels: `[batch_size, num_classes]` target one-hot-encoded labels. - logits: `[batch_size, num_classes]` logits outputs of the network . - weights: Optional `Tensor` whose rank is either 0, or rank 1 and is - broadcastable to the loss which is a `Tensor` of shape `[batch_size]`. + onehot_labels: One-hot-encoded labels. + logits: Logits outputs of the network. + weights: Optional `Tensor` that is broadcastable to loss. label_smoothing: If greater than 0 then smooth the labels. scope: the scope for the operations performed in computing the loss. loss_collection: collection to which the loss will be added. diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py index 276897ab99e5e8770b72cb1eb27d07fb8dbc08bb..30ac001c2510dd3c8cc8afd9a8c29b1af22f6c3a 100644 --- a/tensorflow/python/ops/math_ops.py +++ b/tensorflow/python/ops/math_ops.py @@ -174,6 +174,7 @@ from tensorflow.python.ops.gen_math_ops import * # pylint: enable=wildcard-import from tensorflow.python.util import compat from tensorflow.python.util import deprecation +from tensorflow.python.util import nest from tensorflow.python.util.tf_export import tf_export # Aliases for some automatically-generated names. @@ -184,7 +185,6 @@ arg_min = deprecation.deprecated(None, "Use `argmin` instead")(arg_min) # pylin tf_export("arg_max")(arg_max) tf_export("arg_min")(arg_min) - # This is set by resource_variable_ops.py. It is included in this way since # there is a circular dependency between math_ops and resource_variable_ops _resource_variable_type = None @@ -211,11 +211,9 @@ def argmax(input, name=None, dimension=None, output_type=dtypes.int64): - if dimension is not None: - if axis is not None: - raise ValueError("Cannot specify both 'axis' and 'dimension'") - axis = dimension - elif axis is None: + axis = deprecation.deprecated_argument_lookup( + "axis", axis, "dimension", dimension) + if axis is None: axis = 0 return gen_math_ops.arg_max(input, axis, name=name, output_type=output_type) @@ -231,11 +229,9 @@ def argmin(input, name=None, dimension=None, output_type=dtypes.int64): - if dimension is not None: - if axis is not None: - raise ValueError("Cannot specify both 'axis' and 'dimension'") - axis = dimension - elif axis is None: + axis = deprecation.deprecated_argument_lookup( + "axis", axis, "dimension", dimension) + if axis is None: axis = 0 return gen_math_ops.arg_min(input, axis, name=name, output_type=output_type) @@ -761,13 +757,25 @@ def cast(x, dtype, name=None): tf.cast(x, tf.int32) # [1, 2], dtype=tf.int32 ``` + The operation supports data types (for `x` and `dtype`) of + `uint8`, `int8`, `uint16`, `int16`, `int32`, `int64`, `float16`, `float32`, + `float64`, `complex64`, `complex128`, `bfloat16`. In case of casting from + complex types (`complex64`, `complex128`) to real types, only the real part + of `x` is returned. In case of casting from real types to complex types + (`complex64`, `complex128`), the imaginary part of the returned value is set + to `0`. The handling of complex types here matches the behavior of numpy. + Args: - x: A `Tensor` or `SparseTensor`. - dtype: The destination type. + x: A `Tensor` or `SparseTensor` of numeric type. It could be + `uint8`, `int8`, `uint16`, `int16`, `int32`, `int64`, + `float16`, `float32`, `float64`, `complex64`, `complex128`, `bfloat16`. + dtype: The destination type. The list of supported dtypes is the same + as `x`. name: A name for the operation (optional). Returns: - A `Tensor` or `SparseTensor` with same shape as `x`. + A `Tensor` or `SparseTensor` with same shape as `x` and + same type as `dtype`. Raises: TypeError: If `x` cannot be cast to the `dtype`. @@ -1343,8 +1351,7 @@ def _ReductionDims(x, axis, reduction_indices): else: # Fast path: avoid creating Rank and Range ops if ndims is known. if isinstance(x, ops.Tensor) and x._rank() is not None: # pylint: disable=protected-access - return constant_op.constant( - np.arange(x._rank()), dtype=dtypes.int32) # pylint: disable=protected-access + return constant_op.constant(np.arange(x._rank()), dtype=dtypes.int32) # pylint: disable=protected-access if (isinstance(x, sparse_tensor.SparseTensor) and x.dense_shape.get_shape().is_fully_defined()): rank = x.dense_shape.get_shape()[0].value # sparse.dense_shape is 1-D. @@ -1403,10 +1410,11 @@ def reduce_sum(input_tensor, keep_dims: Deprecated alias for `keepdims`. Returns: - The reduced tensor. + The reduced tensor, of the same dtype as the input_tensor. @compatibility(numpy) - Equivalent to np.sum + Equivalent to np.sum appart the fact that numpy upcast uint8 and int32 to + int64 while tensorflow returns the same dtype as the input. @end_compatibility """ keepdims = deprecation.deprecated_argument_lookup("keepdims", keepdims, @@ -1458,8 +1466,18 @@ def count_nonzero(input_tensor, tf.count_nonzero(x, [0, 1]) # 3 ``` + **NOTE** Strings are compared against zero-length empty string `""`. Any + string with a size greater than zero is already considered as nonzero. + + For example: + ```python + x = tf.constant(["", "a", " ", "b", ""]) + tf.count_nonzero(x) # 3, with "a", " ", and "b" as nonzero strings. + ``` + Args: - input_tensor: The tensor to reduce. Should be of numeric type, or `bool`. + input_tensor: The tensor to reduce. Should be of numeric type, `bool`, + or `string`. axis: The dimensions to reduce. If `None` (the default), reduces all dimensions. Must be in the range `[-rank(input_tensor), rank(input_tensor))`. @@ -1479,7 +1497,8 @@ def count_nonzero(input_tensor, with ops.name_scope(name, "count_nonzero", [input_tensor]): input_tensor = ops.convert_to_tensor(input_tensor, name="input_tensor") - zero = input_tensor.dtype.as_numpy_dtype() + # A scalar of 'zero' is enough as `not_equal` will broadcast. + zero = array_ops.zeros([], dtype=input_tensor.dtype) return cast( reduce_sum( # int64 reduction happens on GPU @@ -1522,7 +1541,7 @@ def reduce_mean(input_tensor, input_tensor: The tensor to reduce. Should have numeric type. axis: The dimensions to reduce. If `None` (the default), reduces all dimensions. Must be in the range - `[-rank(input_tensor), rank(input_tensor)]`. + `[-rank(input_tensor), rank(input_tensor))`. keepdims: If true, retains reduced dimensions with length 1. name: A name for the operation (optional). reduction_indices: The old (deprecated) name for axis. @@ -1632,7 +1651,7 @@ def reduce_min(input_tensor, tensor with a single element is returned. Args: - input_tensor: The tensor to reduce. Should have numeric type. + input_tensor: The tensor to reduce. Should have real numeric type. axis: The dimensions to reduce. If `None` (the default), reduces all dimensions. Must be in the range `[-rank(input_tensor), rank(input_tensor))`. @@ -1681,7 +1700,7 @@ def reduce_max(input_tensor, tensor with a single element is returned. Args: - input_tensor: The tensor to reduce. Should have numeric type. + input_tensor: The tensor to reduce. Should have real numeric type. axis: The dimensions to reduce. If `None` (the default), reduces all dimensions. Must be in the range `[-rank(input_tensor), rank(input_tensor))`. @@ -2273,10 +2292,11 @@ def accumulate_n(inputs, shape=None, tensor_dtype=None, name=None): ValueError: If `inputs` don't all have same shape and dtype or the shape cannot be inferred. """ + def _input_error(): - return ValueError( - "inputs must be a list of at least one Tensor with the " - "same dtype and shape") + return ValueError("inputs must be a list of at least one Tensor with the " + "same dtype and shape") + if not inputs or not isinstance(inputs, (list, tuple)): raise _input_error() inputs = ops.convert_n_to_tensor_or_indexed_slices(inputs) @@ -2294,8 +2314,8 @@ def accumulate_n(inputs, shape=None, tensor_dtype=None, name=None): # tensor_dtype is for safety only; operator's output type computed in C++ if tensor_dtype is not None and tensor_dtype != inputs[0].dtype: - raise TypeError("tensor_dtype is {}, but input is of type {}" - .format(tensor_dtype, inputs[0].dtype)) + raise TypeError("tensor_dtype is {}, but input is of type {}".format( + tensor_dtype, inputs[0].dtype)) if len(inputs) == 1 and name is None: return inputs[0] @@ -2761,14 +2781,14 @@ def sparse_segment_sum(data, indices, segment_ids, name=None, name=name) else: return gen_math_ops.sparse_segment_sum( - data=data, - indices=indices, - segment_ids=segment_ids, - name=name) + data=data, indices=indices, segment_ids=segment_ids, name=name) @tf_export("sparse_segment_mean") -def sparse_segment_mean(data, indices, segment_ids, name=None, +def sparse_segment_mean(data, + indices, + segment_ids, + name=None, num_segments=None): r"""Computes the mean along sparse segments of a tensor. @@ -2805,14 +2825,14 @@ def sparse_segment_mean(data, indices, segment_ids, name=None, name=name) else: return gen_math_ops.sparse_segment_mean( - data=data, - indices=indices, - segment_ids=segment_ids, - name=name) + data=data, indices=indices, segment_ids=segment_ids, name=name) @tf_export("sparse_segment_sqrt_n") -def sparse_segment_sqrt_n(data, indices, segment_ids, name=None, +def sparse_segment_sqrt_n(data, + indices, + segment_ids, + name=None, num_segments=None): r"""Computes the sum along sparse segments of a tensor divided by the sqrt(N). @@ -2842,10 +2862,7 @@ def sparse_segment_sqrt_n(data, indices, segment_ids, name=None, name=name) else: return gen_math_ops.sparse_segment_sqrt_n( - data=data, - indices=indices, - segment_ids=segment_ids, - name=name) + data=data, indices=indices, segment_ids=segment_ids, name=name) @tf_export("tensordot", "linalg.tensordot") @@ -3016,6 +3033,47 @@ def tensordot(a, b, axes, name=None): return product +@tf_export("math.polyval") +def polyval(coeffs, x, name=None): + r"""Computes the elementwise value of a polynomial. + + If `x` is a tensor and `coeffs` is a list n + 1 tensors, this function returns + the value of the n-th order polynomial + + p(x) = coeffs[n-1] + coeffs[n-2] * x + ... + coeffs[0] * x**(n-1) + + evaluated using Horner's method, i.e. + + p(x) = coeffs[n-1] + x * (coeffs[n-2] + ... + x * (coeffs[1] + + x * coeffs[0])) + + Args: + coeffs: A list of `Tensor` representing the coefficients of the polynomial. + x: A `Tensor` representing the variable of the polynomial. + name: A name for the operation (optional). + + Returns: + A `tensor` of the shape as the expression p(x) with usual broadcasting rules + for element-wise addition and multiplication applied. + + @compatibility(numpy) + Equivalent to numpy.polyval. + @end_compatibility + """ + + with ops.name_scope(name, "polyval", nest.flatten(coeffs) + [x]) as name: + x = ops.convert_to_tensor(x, name="x") + if len(coeffs) < 1: + return array_ops.zeros_like(x, name=name) + coeffs = [ + ops.convert_to_tensor(coeff, name=("coeff_%d" % index)) + for index, coeff in enumerate(coeffs) + ] + p = coeffs[0] + for c in coeffs[1:]: + p = c + p * x + return p + # FFT ops were moved to tf.spectral. tf.fft symbols were part of the TensorFlow # 1.0 API so we leave these here for backwards compatibility. fft = gen_spectral_ops.fft diff --git a/tensorflow/python/ops/math_ops_test.py b/tensorflow/python/ops/math_ops_test.py index 9f85188b3513563a7444f7a0e908f11af985498b..05bcee8801259e4bc6c20c3f61cf20025ba5ea33 100644 --- a/tensorflow/python/ops/math_ops_test.py +++ b/tensorflow/python/ops/math_ops_test.py @@ -155,9 +155,7 @@ class RoundTest(test_util.TensorFlowTestCase): @test_util.run_in_graph_and_eager_modes() def testRounding(self): - x = [0.49, 0.7, -0.3, -0.8] - # TODO(nolivia): Remove this when RoundOp is forwards compatible - # x = np.arange(-5.0, 5.0, .25) + x = np.arange(-5.0, 5.0, .25) for dtype in [np.float32, np.double, np.int32]: x_np = np.array(x, dtype=dtype) with test_util.device(use_gpu=True): diff --git a/tensorflow/python/ops/metrics_impl.py b/tensorflow/python/ops/metrics_impl.py index 9ec49545796cfa7a603b31c23bfd0d495639898d..47eea6ef6b58abd4819544e29783048964104922 100644 --- a/tensorflow/python/ops/metrics_impl.py +++ b/tensorflow/python/ops/metrics_impl.py @@ -33,6 +33,7 @@ from tensorflow.python.ops import sparse_ops from tensorflow.python.ops import state_ops from tensorflow.python.ops import variable_scope from tensorflow.python.ops import weights_broadcast_ops +from tensorflow.python.platform import tf_logging as logging from tensorflow.python.util.deprecation import deprecated from tensorflow.python.util.tf_export import tf_export @@ -626,10 +627,16 @@ def auc(labels, curve: Specifies the name of the curve to be computed, 'ROC' [default] or 'PR' for the Precision-Recall-curve. name: An optional variable_scope name. - summation_method: Specifies the Riemann summation method used, 'trapezoidal' - [default] that applies the trapezoidal rule, 'minoring' that applies - left summation for increasing intervals and right summation for decreasing - intervals or 'majoring' that applies the opposite. + summation_method: Specifies the Riemann summation method used + (https://en.wikipedia.org/wiki/Riemann_sum): 'trapezoidal' [default] that + applies the trapezoidal rule; 'careful_interpolation', a variant of it + differing only by a more correct interpolation scheme for PR-AUC - + interpolating (true/false) positives but not the ratio that is precision; + 'minoring' that applies left summation for increasing intervals and right + summation for decreasing intervals; 'majoring' that does the opposite. + Note that 'careful_interpolation' is strictly preferred to 'trapezoidal' + (to be deprecated soon) as it applies the same method for ROC, and a + better one (see Davis & Goadrich 2006 for details) for the PR curve. Returns: auc: A scalar `Tensor` representing the current area-under-curve. @@ -664,8 +671,62 @@ def auc(labels, # Add epsilons to avoid dividing by 0. epsilon = 1.0e-6 + def interpolate_pr_auc(tp, fp, fn): + """Interpolation formula inspired by section 4 of Davis & Goadrich 2006. + + Note here we derive & use a closed formula not present in the paper + - as follows: + Modeling all of TP (true positive weight), + FP (false positive weight) and their sum P = TP + FP (positive weight) + as varying linearly within each interval [A, B] between successive + thresholds, we get + Precision = (TP_A + slope * (P - P_A)) / P + with slope = dTP / dP = (TP_B - TP_A) / (P_B - P_A). + The area within the interval is thus (slope / total_pos_weight) times + int_A^B{Precision.dP} = int_A^B{(TP_A + slope * (P - P_A)) * dP / P} + int_A^B{Precision.dP} = int_A^B{slope * dP + intercept * dP / P} + where intercept = TP_A - slope * P_A = TP_B - slope * P_B, resulting in + int_A^B{Precision.dP} = TP_B - TP_A + intercept * log(P_B / P_A) + Bringing back the factor (slope / total_pos_weight) we'd put aside, we get + slope * [dTP + intercept * log(P_B / P_A)] / total_pos_weight + where dTP == TP_B - TP_A. + Note that when P_A == 0 the above calculation simplifies into + int_A^B{Precision.dTP} = int_A^B{slope * dTP} = slope * (TP_B - TP_A) + which is really equivalent to imputing constant precision throughout the + first bucket having >0 true positives. + + Args: + tp: true positive counts + fp: false positive counts + fn: false negative counts + Returns: + pr_auc: an approximation of the area under the P-R curve. + """ + dtp = tp[:num_thresholds - 1] - tp[1:] + p = tp + fp + prec_slope = _safe_div(dtp, p[:num_thresholds - 1] - p[1:], 'prec_slope') + intercept = tp[1:] - math_ops.multiply(prec_slope, p[1:]) + safe_p_ratio = array_ops.where( + math_ops.logical_and(p[:num_thresholds - 1] > 0, p[1:] > 0), + _safe_div(p[:num_thresholds - 1], p[1:], 'recall_relative_ratio'), + array_ops.ones_like(p[1:])) + return math_ops.reduce_sum( + _safe_div( + prec_slope * (dtp + intercept * math_ops.log(safe_p_ratio)), + tp[1:] + fn[1:], + name='pr_auc_increment'), + name='interpolate_pr_auc') + def compute_auc(tp, fn, tn, fp, name): """Computes the roc-auc or pr-auc based on confusion counts.""" + if curve == 'PR': + if summation_method == 'trapezoidal': + logging.warning( + 'Trapezoidal rule is known to produce incorrect PR-AUCs; ' + 'please switch to "careful_interpolation" instead.') + elif summation_method == 'careful_interpolation': + # This one is a bit tricky and is handled separately. + return interpolate_pr_auc(tp, fp, fn) rec = math_ops.div(tp + epsilon, tp + fn + epsilon) if curve == 'ROC': fp_rate = math_ops.div(fp, fp + tn + epsilon) @@ -675,7 +736,9 @@ def auc(labels, prec = math_ops.div(tp + epsilon, tp + fp + epsilon) x = rec y = prec - if summation_method == 'trapezoidal': + if summation_method in ('trapezoidal', 'careful_interpolation'): + # Note that the case ('PR', 'careful_interpolation') has been handled + # above. return math_ops.reduce_sum( math_ops.multiply(x[:num_thresholds - 1] - x[1:], (y[:num_thresholds - 1] + y[1:]) / 2.), @@ -923,8 +986,8 @@ def mean_per_class_accuracy(labels, weights = array_ops.reshape(weights, [-1]) weights = math_ops.to_float(weights) - is_correct = is_correct * weights - ones = ones * weights + is_correct *= weights + ones *= weights update_total_op = state_ops.scatter_add(total, labels, ones) update_count_op = state_ops.scatter_add(count, labels, is_correct) diff --git a/tensorflow/python/ops/nn.py b/tensorflow/python/ops/nn.py index ee1a00623a734e18d4aebe6c84f77ba53ee1050c..1d0d9a52a125b32c8fba65f8b2c9907331296836 100644 --- a/tensorflow/python/ops/nn.py +++ b/tensorflow/python/ops/nn.py @@ -98,6 +98,7 @@ See the @{$python/nn} guide. @@fixed_unigram_candidate_sampler @@compute_accidental_hits @@quantized_conv2d +@@quantized_relu @@quantized_relu_x @@quantized_max_pool @@quantized_avg_pool @@ -126,8 +127,6 @@ from tensorflow.python.ops.nn_impl import * from tensorflow.python.ops.nn_ops import * from tensorflow.python.ops.candidate_sampling_ops import * from tensorflow.python.ops.embedding_ops import * -from tensorflow.python.ops.rnn import * -from tensorflow.python.ops import rnn_cell # pylint: enable=wildcard-import,unused-import diff --git a/tensorflow/python/ops/nn_grad.py b/tensorflow/python/ops/nn_grad.py index 4af5bd26dd80b984b1c898411c2a23827bed1b4b..3a41391340edbe25bd97cfadc58587d91bef9de2 100644 --- a/tensorflow/python/ops/nn_grad.py +++ b/tensorflow/python/ops/nn_grad.py @@ -94,6 +94,7 @@ def _Conv3DGrad(op, grad): array_ops.shape(op.inputs[0]), op.inputs[1], grad, + dilations=op.get_attr("dilations"), strides=op.get_attr("strides"), padding=op.get_attr("padding"), data_format=data_format), @@ -101,6 +102,7 @@ def _Conv3DGrad(op, grad): op.inputs[0], array_ops.shape(op.inputs[1]), grad, + dilations=op.get_attr("dilations"), strides=op.get_attr("strides"), padding=op.get_attr("padding"), data_format=data_format) @@ -116,12 +118,14 @@ def _Conv3DBackpropInputGrad(op, grad): grad, array_ops.shape(op.inputs[1]), op.inputs[2], + dilations=op.get_attr("dilations"), strides=op.get_attr("strides"), padding=op.get_attr("padding"), data_format=data_format), nn_ops.conv3d( grad, op.inputs[1], + dilations=op.get_attr("dilations"), strides=op.get_attr("strides"), padding=op.get_attr("padding"), data_format=data_format) @@ -136,12 +140,14 @@ def _Conv3DBackpropFilterGrad(op, grad): array_ops.shape(op.inputs[0]), grad, op.inputs[2], + dilations=op.get_attr("dilations"), strides=op.get_attr("strides"), padding=op.get_attr("padding"), data_format=data_format), None, nn_ops.conv3d( op.inputs[0], grad, + dilations=op.get_attr("dilations"), strides=op.get_attr("strides"), padding=op.get_attr("padding"), data_format=data_format) diff --git a/tensorflow/python/ops/nn_impl.py b/tensorflow/python/ops/nn_impl.py index 47cc4da7f2abd1f5b00e193a76c8391be94ca27d..576627e78ed10d832f82477487140e470e70cfb2 100644 --- a/tensorflow/python/ops/nn_impl.py +++ b/tensorflow/python/ops/nn_impl.py @@ -765,9 +765,9 @@ def weighted_moments(x, axes, frequency_weights, name=None, keep_dims=False): weighted_variance = math_ops.multiply(weighted_distsq, divisor) if not keep_dims: - weighted_mean = array_ops.squeeze(weighted_mean, squeeze_dims=axes) + weighted_mean = array_ops.squeeze(weighted_mean, axis=axes) weighted_variance = array_ops.squeeze( - weighted_variance, squeeze_dims=axes) + weighted_variance, axis=axes) if needs_cast: weighted_mean = math_ops.cast(weighted_mean, dtypes.float16) @@ -987,7 +987,7 @@ def _compute_sampled_logits(weights, class biases. labels: A `Tensor` of type `int64` and shape `[batch_size, num_true]`. The target classes. Note that this format differs from - the `labels` argument of `nn.softmax_cross_entropy_with_logits`. + the `labels` argument of `nn.softmax_cross_entropy_with_logits_v2`. inputs: A `Tensor` of shape `[batch_size, dim]`. The forward activations of the input network. num_sampled: An `int`. The number of classes to randomly sample per batch. @@ -1012,7 +1012,7 @@ def _compute_sampled_logits(weights, out_logits: `Tensor` object with shape `[batch_size, num_true + num_sampled]`, for passing to either `nn.sigmoid_cross_entropy_with_logits` (NCE) or - `nn.softmax_cross_entropy_with_logits` (sampled softmax). + `nn.softmax_cross_entropy_with_logits_v2` (sampled softmax). out_labels: A Tensor object with the same shape as `out_logits`. """ @@ -1285,7 +1285,7 @@ def sampled_softmax_loss(weights, logits = tf.matmul(inputs, tf.transpose(weights)) logits = tf.nn.bias_add(logits, biases) labels_one_hot = tf.one_hot(labels, n_classes) - loss = tf.nn.softmax_cross_entropy_with_logits( + loss = tf.nn.softmax_cross_entropy_with_logits_v2( labels=labels_one_hot, logits=logits) ``` @@ -1303,7 +1303,7 @@ def sampled_softmax_loss(weights, biases: A `Tensor` of shape `[num_classes]`. The class biases. labels: A `Tensor` of type `int64` and shape `[batch_size, num_true]`. The target classes. Note that this format differs from - the `labels` argument of `nn.softmax_cross_entropy_with_logits`. + the `labels` argument of `nn.softmax_cross_entropy_with_logits_v2`. inputs: A `Tensor` of shape `[batch_size, dim]`. The forward activations of the input network. num_sampled: An `int`. The number of classes to randomly sample per batch. @@ -1340,7 +1340,8 @@ def sampled_softmax_loss(weights, partition_strategy=partition_strategy, name=name, seed=seed) - sampled_losses = nn_ops.softmax_cross_entropy_with_logits( + labels = array_ops.stop_gradient(labels, name="labels_stop_gradient") + sampled_losses = nn_ops.softmax_cross_entropy_with_logits_v2( labels=labels, logits=logits) # sampled_losses is a [batch_size] tensor. return sampled_losses diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py index 0c5538624105a5c0a37d6ca1e6da60ba8eda8000..cd07550d2ee31e9c830afa8e9c5e17deb35d1135 100644 --- a/tensorflow/python/ops/nn_ops.py +++ b/tensorflow/python/ops/nn_ops.py @@ -1155,7 +1155,7 @@ def atrous_conv2d(value, filters, rate, padding, name=None): Returns: A `Tensor` with the same type as `value`. - Output shape with `'VALID`` padding is: + Output shape with `'VALID'` padding is: [batch, height - 2 * (filter_width - 1), width - 2 * (filter_height - 1), out_channels]. @@ -1458,10 +1458,10 @@ def conv3d_transpose( if isinstance(output_shape, (list, np.ndarray)): # output_shape's shape should be == [5] if reached this point. - if not filter.get_shape()[3].is_compatible_with(output_shape[4]): + if not filter.get_shape()[3].is_compatible_with(output_shape[axis]): raise ValueError( "output_shape does not match filter's output channels, " - "{} != {}".format(output_shape[4], + "{} != {}".format(output_shape[axis], filter.get_shape()[3])) if padding != "VALID" and padding != "SAME": @@ -1803,12 +1803,15 @@ def softmax_cross_entropy_with_logits_v2( on `logits` internally for efficiency. Do not call this op with the output of `softmax`, as it will produce incorrect results. - `logits` and `labels` must have the same shape, e.g. - `[batch_size, num_classes]` and the same dtype (either `float16`, `float32`, + A common use case is to have logits and labels of shape + `[batch_size, num_classes]`, but higher dimensions are supported, with + the `dim` argument specifying the class dimension. + + `logits` and `labels` must have the same dtype (either `float16`, `float32`, or `float64`). Backpropagation will happen into both `logits` and `labels`. To disallow - backpropagation into `labels`, pass label tensors through a `stop_gradients` + backpropagation into `labels`, pass label tensors through @{tf.stop_gradient} before feeding it to this function. **Note that to avoid confusion, it is required to pass only named arguments to @@ -1816,14 +1819,17 @@ def softmax_cross_entropy_with_logits_v2( Args: _sentinel: Used to prevent positional parameters. Internal, do not use. - labels: Each row `labels[i]` must be a valid probability distribution. + labels: Each vector along the class dimension should hold a valid + probability distribution e.g. for the case in which labels are of shape + `[batch_size, num_classes]`, each row of `labels[i]` must be a valid + probability distribution. logits: Unscaled log probabilities. dim: The class dimension. Defaulted to -1 which is the last dimension. name: A name for the operation (optional). Returns: - A 1-D `Tensor` of length `batch_size` of the same type as `logits` with the - softmax cross entropy loss. + A `Tensor` of the same shape as `labels` and of the same type as `logits` + with the softmax cross entropy loss. """ _ensure_xent_args("softmax_cross_entropy_with_logits", _sentinel, labels, logits) @@ -1895,7 +1901,7 @@ _XENT_DEPRECATION = """ Future major versions of TensorFlow will allow gradients to flow into the labels input on backprop by default. -See tf.nn.softmax_cross_entropy_with_logits_v2. +See @{tf.nn.softmax_cross_entropy_with_logits_v2}. """ @@ -1926,9 +1932,9 @@ def softmax_cross_entropy_with_logits( on `logits` internally for efficiency. Do not call this op with the output of `softmax`, as it will produce incorrect results. - `logits` and `labels` must have the same shape, e.g. - `[batch_size, num_classes]` and the same dtype (either `float16`, `float32`, - or `float64`). + A common use case is to have logits and labels of shape + `[batch_size, num_classes]`, but higher dimensions are supported, with + the `dim` argument specifying the class dimension. Backpropagation will happen only into `logits`. To calculate a cross entropy loss that allows backpropagation into both `logits` and `labels`, see @@ -1939,14 +1945,17 @@ def softmax_cross_entropy_with_logits( Args: _sentinel: Used to prevent positional parameters. Internal, do not use. - labels: Each row `labels[i]` must be a valid probability distribution. + labels: Each vector along the class dimension should hold a valid + probability distribution e.g. for the case in which labels are of shape + `[batch_size, num_classes]`, each row of `labels[i]` must be a valid + probability distribution. logits: Unscaled log probabilities. dim: The class dimension. Defaulted to -1 which is the last dimension. name: A name for the operation (optional). Returns: - A 1-D `Tensor` of length `batch_size` of the same type as `logits` with the - softmax cross entropy loss. + A `Tensor` of the same shape as `labels` and of the same type as `logits` + with the softmax cross entropy loss. """ _ensure_xent_args("softmax_cross_entropy_with_logits", _sentinel, labels, logits) @@ -1977,14 +1986,17 @@ def sparse_softmax_cross_entropy_with_logits( must provide a single specific index for the true class for each row of `logits` (each minibatch entry). For soft softmax classification with a probability distribution for each entry, see - `softmax_cross_entropy_with_logits`. + `softmax_cross_entropy_with_logits_v2`. **WARNING:** This op expects unscaled logits, since it performs a `softmax` on `logits` internally for efficiency. Do not call this op with the output of `softmax`, as it will produce incorrect results. - A common use case is to have logits of shape `[batch_size, num_classes]` and - labels of shape `[batch_size]`. But higher dimensions are supported. + A common use case is to have logits and labels of shape + `[batch_size, num_classes]`, but higher dimensions are supported, in which + case the `dim`-th dimension is assumed to be of size `num_classes`. + `logits` and `labels` must have the same dtype (either `float16`, `float32`, + or `float64`). **Note that to avoid confusion, it is required to pass only named arguments to this function.** diff --git a/tensorflow/python/ops/nn_test.py b/tensorflow/python/ops/nn_test.py index da86d5f6ca0800175ec350945d856a50e9390a76..46a5f4fae6b15766c21011ebeae5437262192df7 100644 --- a/tensorflow/python/ops/nn_test.py +++ b/tensorflow/python/ops/nn_test.py @@ -1081,6 +1081,42 @@ class DataFormatDimMapTest(test_lib.TestCase): self._test([1, -3, -2], [2, 2, 3]) self._test([[1, -3], [1, -1]], [[2, 2], [2, 1]]) + def testNHWCtoNCHW(self): + x_val = [1, -3, -2] + y_val_expected = [2, 2, 3] + x = constant_op.constant(x_val) + y = nn_ops.data_format_dim_map(x, src_format="NHWC", dst_format="NCHW") + with self.test_session(use_gpu=test_lib.is_gpu_available()) as sess: + y_val = sess.run(y) + self.assertAllEqual(y_val, y_val_expected) + + def testNHWCtoHWNC(self): + x_val = [-4, -3, -2, -1, 0, 1, 2, 3] + y_val_expected = [2, 0, 1, 3, 2, 0, 1, 3] + x = constant_op.constant(x_val) + y = nn_ops.data_format_dim_map(x, src_format="NHWC", dst_format="HWNC") + with self.test_session(use_gpu=test_lib.is_gpu_available()) as sess: + y_val = sess.run(y) + self.assertAllEqual(y_val, y_val_expected) + + def testNHWCtoWHCN(self): + x_val = [-4, -3, -2, -1, 0, 1, 2, 3] + y_val_expected = [3, 1, 0, 2, 3, 1, 0, 2] + x = constant_op.constant(x_val) + y = nn_ops.data_format_dim_map(x, src_format="NHWC", dst_format="WHCN") + with self.test_session(use_gpu=test_lib.is_gpu_available()) as sess: + y_val = sess.run(y) + self.assertAllEqual(y_val, y_val_expected) + + def testArbitraryASCII(self): + x_val = [-4, -3, -2, -1, 0, 1, 2, 3] + y_val_expected = [3, 2, 1, 0, 3, 2, 1, 0] + x = constant_op.constant(x_val) + y = nn_ops.data_format_dim_map(x, src_format="qwer", dst_format="rewq") + with self.test_session(use_gpu=test_lib.is_gpu_available()) as sess: + y_val = sess.run(y) + self.assertAllEqual(y_val, y_val_expected) + class DataFormatVectorPermuteTest(test_lib.TestCase): diff --git a/tensorflow/python/ops/parsing_ops.py b/tensorflow/python/ops/parsing_ops.py index 075b38d743d13329e646c0b268e938b5c5704e47..d8d9af545f17fe3e0133b51b1eab82f7732dc299 100644 --- a/tensorflow/python/ops/parsing_ops.py +++ b/tensorflow/python/ops/parsing_ops.py @@ -1176,8 +1176,13 @@ def _parse_single_sequence_example_raw(serialized, # Swap `name` and `na_value` for backward compatibility. @tf_export("decode_csv") -def decode_csv(records, record_defaults, field_delim=",", - use_quote_delim=True, name=None, na_value=""): +def decode_csv(records, + record_defaults, + field_delim=",", + use_quote_delim=True, + name=None, + na_value="", + select_cols=None): """Convert CSV records to tensors. Each column maps to one tensor. RFC 4180 format is expected for the CSV records. @@ -1200,19 +1205,32 @@ def decode_csv(records, record_defaults, field_delim=",", Bullet 5). name: A name for the operation (optional). na_value: Additional string to recognize as NA/NaN. + select_cols: Optional sorted list of column indices to select. If specified, + only this subset of columns will be parsed and returned. Returns: A list of `Tensor` objects. Has the same type as `record_defaults`. Each tensor will have the same shape as records. + + Raises: + ValueError: If any of the arguments is malformed. """ - # TODO(martinwicke), remove the wrapper when new Python API generator is done. + if select_cols is not None and any(select_cols[i] >= select_cols[i + 1] + for i in range(len(select_cols) - 1)): + raise ValueError("select_cols is not strictly increasing.") + if select_cols is not None and select_cols[0] < 0: + raise ValueError("select_cols contains negative values.") + if select_cols is not None and len(select_cols) != len(record_defaults): + raise ValueError("Length of select_cols and record_defaults do not match.") return gen_parsing_ops.decode_csv( records=records, record_defaults=record_defaults, field_delim=field_delim, use_quote_delim=use_quote_delim, na_value=na_value, - name=name) + name=name, + select_cols=select_cols, + ) # TODO(b/70890287): Combine the implementation of this op and diff --git a/tensorflow/python/ops/resource_variable_ops.py b/tensorflow/python/ops/resource_variable_ops.py index 2f39ea2e7d6044e727e096f1c3b6410b623e65ab..4d26b2f46e3f7982305b1163c7f3762660ccef2f 100644 --- a/tensorflow/python/ops/resource_variable_ops.py +++ b/tensorflow/python/ops/resource_variable_ops.py @@ -24,6 +24,8 @@ from tensorflow.core.framework import variable_pb2 from tensorflow.python import pywrap_tensorflow from tensorflow.python.eager import context from tensorflow.python.eager import tape +from tensorflow.python.framework import c_api_util +from tensorflow.python.framework import cpp_shape_inference_pb2 from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape @@ -41,6 +43,19 @@ from tensorflow.python.training import checkpointable from tensorflow.python.util import compat +def get_resource_handle_data(graph_op): + assert ops._USE_C_SHAPES # pylint: disable=protected-access + assert type(graph_op) == ops.Tensor # pylint: disable=unidiomatic-typecheck + + with c_api_util.tf_buffer() as buf: + pywrap_tensorflow.TFE_GetResourceHandleShapeAndType( + graph_op.graph._c_graph, graph_op._as_tf_output(), buf) # pylint: disable=protected-access + data = pywrap_tensorflow.TF_GetBuffer(buf) + + return cpp_shape_inference_pb2.CppShapeInferenceResult.HandleData.FromString( + compat.as_bytes(data)) + + def _eager_safe_variable_handle(shape, dtype, shared_name, name, graph_mode): """Creates a variable handle with information to do shape inference.""" container = ops.get_default_graph()._container # pylint: disable=protected-access @@ -72,7 +87,15 @@ def _eager_safe_variable_handle(shape, dtype, shared_name, name, graph_mode): # know the shape and dtype of the variable pointed to by a handle. Since # shape inference doesn't run in eager mode we copy this data here for when # the handle is captured by an eager mode function. - handle._handle_data = h._handle_data # pylint: disable=protected-access + # pylint: disable=protected-access + if ops._USE_C_SHAPES: + handle._handle_data = get_resource_handle_data(h) + else: + if h._handle_data is None: + ops.set_shape_and_handle_data_for_outputs(h.op) + handle._handle_data = h._handle_data + # pylint: enable=protected-access + # Clean up our reference cycles to avoid making the garbage collector run. # pylint: disable=protected-access # OrderedDict, constructed on Graph creation, makes a simple reference loop @@ -171,7 +194,9 @@ class ResourceVariable(variables.Variable): to see all modifications to the value of the variable which happen in any operation on which the read_value depends on (either directly, indirectly, or via a control dependency) and guaranteed to not see any modification to the - value of the variable on which the read_value operation does not depend on. + value of the variable from operations that depend on the read_value operation. + Updates from operations that have no dependency relationship to the read_value + operation might or might not be visible to read_value. For example, if there is more than one assignment to a ResourceVariable in a single session.run call there is a well-defined value for each operation @@ -515,11 +540,19 @@ class ResourceVariable(variables.Variable): else: self._initial_value = None if variable_def.snapshot_name: - self._cached_value = g.as_graph_element( + snapshot = g.as_graph_element( ops.prepend_name_scope( variable_def.snapshot_name, import_scope=import_scope)) + self._cached_value = snapshot + while snapshot.op.type != "ReadVariableOp": + snapshot = snapshot.op.inputs[0] + self._graph_element = snapshot else: self._cached_value = None + # Legacy case for protos without the snapshot name; assume it's the + # following. + self._graph_element = g.get_tensor_by_name( + self._handle.op.name + "/Read/ReadVariableOp:0") if variable_def.HasField("save_slice_info_def"): self._save_slice_info = variables.Variable.SaveSliceInfo( save_slice_info_def=variable_def.save_slice_info_def, @@ -528,8 +561,6 @@ class ResourceVariable(variables.Variable): self._save_slice_info = None self._caching_device = None self._dtype = dtypes.as_dtype(self._handle.op.get_attr("dtype")) - self._graph_element = g.get_tensor_by_name( - self._handle.op.name + "/Read/ReadVariableOp:0") self._constraint = None self._cached_shape_as_list = None @@ -738,6 +769,10 @@ class ResourceVariable(variables.Variable): if self._cached_value is not None: var_def.snapshot_name = ops.strip_name_scope(self._cached_value.name, export_scope) + else: + # Store the graph_element here + var_def.snapshot_name = ops.strip_name_scope(self._graph_element.name, + export_scope) var_def.is_resource = True if self._save_slice_info: var_def.save_slice_info_def.MergeFrom( @@ -903,7 +938,6 @@ class ResourceVariable(variables.Variable): def _dense_var_to_tensor(self, dtype=None, name=None, as_ref=False): del name if dtype is not None and dtype != self.dtype: - print("trying to switch the dtype to ", dtype, " from ", self.dtype) return NotImplemented if as_ref: return self.read_value().op.inputs[0] diff --git a/tensorflow/python/ops/rnn_cell_impl.py b/tensorflow/python/ops/rnn_cell_impl.py index fe380c44dafdad6dc25d50102bacba610132674d..86dc053c0fb8d0d91eda87c40f41076391b3d9b8 100644 --- a/tensorflow/python/ops/rnn_cell_impl.py +++ b/tensorflow/python/ops/rnn_cell_impl.py @@ -352,10 +352,17 @@ class BasicRNNCell(LayerRNNCell): name: String, the name of the layer. Layers with the same name will share weights, but to avoid mistakes we require reuse=True in such cases. + dtype: Default dtype of the layer (default of `None` means use the type + of the first input). Required when `build` is called before `call`. """ - def __init__(self, num_units, activation=None, reuse=None, name=None): - super(BasicRNNCell, self).__init__(_reuse=reuse, name=name) + def __init__(self, + num_units, + activation=None, + reuse=None, + name=None, + dtype=None): + super(BasicRNNCell, self).__init__(_reuse=reuse, name=name, dtype=dtype) # Inputs must be 2-dimensional. self.input_spec = base_layer.InputSpec(ndim=2) @@ -413,6 +420,8 @@ class GRUCell(LayerRNNCell): name: String, the name of the layer. Layers with the same name will share weights, but to avoid mistakes we require reuse=True in such cases. + dtype: Default dtype of the layer (default of `None` means use the type + of the first input). Required when `build` is called before `call`. """ def __init__(self, @@ -421,8 +430,9 @@ class GRUCell(LayerRNNCell): reuse=None, kernel_initializer=None, bias_initializer=None, - name=None): - super(GRUCell, self).__init__(_reuse=reuse, name=name) + name=None, + dtype=None): + super(GRUCell, self).__init__(_reuse=reuse, name=name, dtype=dtype) # Inputs must be 2-dimensional. self.input_spec = base_layer.InputSpec(ndim=2) @@ -531,8 +541,14 @@ class BasicLSTMCell(LayerRNNCell): that follows. """ - def __init__(self, num_units, forget_bias=1.0, - state_is_tuple=True, activation=None, reuse=None, name=None): + def __init__(self, + num_units, + forget_bias=1.0, + state_is_tuple=True, + activation=None, + reuse=None, + name=None, + dtype=None): """Initialize the basic LSTM cell. Args: @@ -550,11 +566,13 @@ class BasicLSTMCell(LayerRNNCell): name: String, the name of the layer. Layers with the same name will share weights, but to avoid mistakes we require reuse=True in such cases. + dtype: Default dtype of the layer (default of `None` means use the type + of the first input). Required when `build` is called before `call`. When restoring from CudnnLSTM-trained checkpoints, must use `CudnnCompatibleLSTMCell` instead. """ - super(BasicLSTMCell, self).__init__(_reuse=reuse, name=name) + super(BasicLSTMCell, self).__init__(_reuse=reuse, name=name, dtype=dtype) if not state_is_tuple: logging.warn("%s: Using a concatenated state is slower and will soon be " "deprecated. Use state_is_tuple=True.", self) @@ -599,9 +617,9 @@ class BasicLSTMCell(LayerRNNCell): Args: inputs: `2-D` tensor with shape `[batch_size, input_size]`. state: An `LSTMStateTuple` of state tensors, each shaped - `[batch_size, self.state_size]`, if `state_is_tuple` has been set to + `[batch_size, num_units]`, if `state_is_tuple` has been set to `True`. Otherwise, a `Tensor` shaped - `[batch_size, 2 * self.state_size]`. + `[batch_size, 2 * num_units]`. Returns: A pair containing the new hidden state, and the new state (either a @@ -668,7 +686,7 @@ class LSTMCell(LayerRNNCell): initializer=None, num_proj=None, proj_clip=None, num_unit_shards=None, num_proj_shards=None, forget_bias=1.0, state_is_tuple=True, - activation=None, reuse=None, name=None): + activation=None, reuse=None, name=None, dtype=None): """Initialize the parameters for an LSTM cell. Args: @@ -701,11 +719,13 @@ class LSTMCell(LayerRNNCell): name: String, the name of the layer. Layers with the same name will share weights, but to avoid mistakes we require reuse=True in such cases. + dtype: Default dtype of the layer (default of `None` means use the type + of the first input). Required when `build` is called before `call`. When restoring from CudnnLSTM-trained checkpoints, use `CudnnCompatibleLSTMCell` instead. """ - super(LSTMCell, self).__init__(_reuse=reuse, name=name) + super(LSTMCell, self).__init__(_reuse=reuse, name=name, dtype=dtype) if not state_is_tuple: logging.warn("%s: Using a concatenated state is slower and will soon be " "deprecated. Use state_is_tuple=True.", self) @@ -1206,7 +1226,16 @@ class DeviceWrapper(RNNCell): @tf_export("nn.rnn_cell.MultiRNNCell") class MultiRNNCell(RNNCell): - """RNN cell composed sequentially of multiple simple cells.""" + """RNN cell composed sequentially of multiple simple cells. + + Example: + + ```python + num_units = [128, 64] + cells = [BasicLSTMCell(num_units=n) for n in num_units] + stacked_rnn_cell = MultiRNNCell(cells) + ``` + """ def __init__(self, cells, state_is_tuple=True): """Create a RNN cell composed sequentially of a number of RNNCells. diff --git a/tensorflow/python/ops/standard_ops.py b/tensorflow/python/ops/standard_ops.py index e90ff0746a8e86b4b462b71028fd677632c9075d..f71f98aa12c458b74228cfac53a00bd4f6d9a013 100644 --- a/tensorflow/python/ops/standard_ops.py +++ b/tensorflow/python/ops/standard_ops.py @@ -22,12 +22,13 @@ from __future__ import print_function import sys as _sys +# pylint: disable=g-bad-import-order # Imports the following modules so that @RegisterGradient get executed. from tensorflow.python.ops import array_grad +from tensorflow.python.ops import cudnn_rnn_grad from tensorflow.python.ops import data_flow_grad from tensorflow.python.ops import manip_grad from tensorflow.python.ops import math_grad -from tensorflow.python.ops import manip_grad from tensorflow.python.ops import sparse_grad from tensorflow.python.ops import spectral_grad from tensorflow.python.ops import state_grad @@ -96,6 +97,7 @@ from tensorflow.python.ops.tensor_array_ops import * from tensorflow.python.ops.variable_scope import * from tensorflow.python.ops.variables import * # pylint: enable=wildcard-import +# pylint: enable=g-bad-import-order #### For use in remove_undocumented below: from tensorflow.python.framework import constant_op as _constant_op diff --git a/tensorflow/python/ops/state_ops.py b/tensorflow/python/ops/state_ops.py index 01fc3182bc6f7b4f85d0df540bb26308d9fec72f..f6a11ca625b46cd088c3764039a10bc72619d1f8 100644 --- a/tensorflow/python/ops/state_ops.py +++ b/tensorflow/python/ops/state_ops.py @@ -423,3 +423,55 @@ def scatter_nd_update(ref, indices, updates, use_locking=True, name=None): ref.handle, indices, ops.convert_to_tensor(updates, dtype=ref.dtype), use_locking, name)]): return ref.read_value() + + +@tf_export("scatter_add") +def scatter_add(ref, indices, updates, use_locking=False, name=None): + # pylint: disable=line-too-long + r"""Adds sparse updates to the variable referenced by `resource`. + + This operation computes + + ```python + # Scalar indices + ref[indices, ...] += updates[...] + + # Vector indices (for each i) + ref[indices[i], ...] += updates[i, ...] + + # High rank indices (for each i, ..., j) + ref[indices[i, ..., j], ...] += updates[i, ..., j, ...] + ``` + + This operation outputs `ref` after the update is done. + This makes it easier to chain operations that need to use the updated value. + Duplicate entries are handled correctly: if multiple `indices` reference + the same location, their contributions add. + + Requires `updates.shape = indices.shape + ref.shape[1:]`. + +
+ +
+ + Args: + ref: A `Variable`. + indices: A `Tensor`. Must be one of the following types: `int32`, `int64`. + A tensor of indices into the first dimension of `ref`. + updates: A `Tensor`. Must have the same type as `ref`. + A tensor of updated values to store in `ref`. + use_locking: An optional `bool`. Defaults to `True`. + If True, the assignment will be protected by a lock; + otherwise the behavior is undefined, but may exhibit less contention. + name: A name for the operation (optional). + + Returns: + Same as `ref`. Returned as a convenience for operations that want + to use the updated values after the update is done. + """ + if ref.dtype._is_ref_dtype: + return gen_state_ops.scatter_add(ref, indices, updates, + use_locking=use_locking, name=name) + return ref._lazy_read(gen_resource_variable_ops.resource_scatter_add( # pylint: disable=protected-access + ref.handle, indices, ops.convert_to_tensor(updates, ref.dtype), + name=name)) diff --git a/tensorflow/contrib/summary/summary_ops.py b/tensorflow/python/ops/summary_ops_v2.py similarity index 90% rename from tensorflow/contrib/summary/summary_ops.py rename to tensorflow/python/ops/summary_ops_v2.py index bc763fe655edc455e2538e536d6efab314c8228c..12f361c513fcebf8ce4b9c367d101b11ab10260b 100644 --- a/tensorflow/contrib/summary/summary_ops.py +++ b/tensorflow/python/ops/summary_ops_v2.py @@ -31,7 +31,7 @@ from tensorflow.python.eager import context from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops -from tensorflow.python.layers import utils +from tensorflow.python.framework import smart_cond from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import gen_summary_ops @@ -108,8 +108,10 @@ class SummaryWriter(object): - @{tf.contrib.summary.create_db_writer} """ - def __init__(self, resource): + def __init__(self, resource, init_op_fn): self._resource = resource + # TODO(nickfelt): cache constructed ops in graph mode + self._init_op_fn = init_op_fn if context.executing_eagerly() and self._resource is not None: self._resource_deleter = resource_variable_ops.EagerResourceDeleter( handle=self._resource, handle_device="cpu:0") @@ -129,10 +131,32 @@ class SummaryWriter(object): yield self # Flushes the summary writer in eager mode or in graph functions, but not # in legacy graph mode (you're on your own there). - with ops.device("cpu:0"): - gen_summary_ops.flush_summary_writer(self._resource) + self.flush() context.context().summary_writer_resource = old + def init(self): + """Operation to initialize the summary writer resource.""" + if self._resource is not None: + return self._init_op_fn() + + def _flush(self): + return _flush_fn(writer=self) + + def flush(self): + """Operation to force the summary writer to flush any buffered data.""" + if self._resource is not None: + return self._flush() + + def _close(self): + with ops.control_dependencies([self.flush()]): + with ops.device("cpu:0"): + return gen_summary_ops.close_summary_writer(self._resource) + + def close(self): + """Operation to flush and close the summary writer resource.""" + if self._resource is not None: + return self._close() + def initialize( graph=None, # pylint: disable=redefined-outer-name @@ -178,7 +202,7 @@ def create_file_writer(logdir, flush_millis=None, filename_suffix=None, name=None): - """Creates a summary file writer in the current context. + """Creates a summary file writer in the current context under the given name. Args: logdir: a string, or None. If a string, creates a summary file writer @@ -186,18 +210,20 @@ def create_file_writer(logdir, a mock object which acts like a summary writer but does nothing, useful to use as a context manager. max_queue: the largest number of summaries to keep in a queue; will - flush once the queue gets bigger than this. - flush_millis: the largest interval between flushes. - filename_suffix: optional suffix for the event file name. + flush once the queue gets bigger than this. Defaults to 10. + flush_millis: the largest interval between flushes. Defaults to 120,000. + filename_suffix: optional suffix for the event file name. Defaults to `.v2`. name: Shared name for this SummaryWriter resource stored to default - Graph. + Graph. Defaults to the provided logdir prefixed with `logdir:`. Note: if a + summary writer resource with this shared name already exists, the returned + SummaryWriter wraps that resource and the other arguments have no effect. Returns: Either a summary writer or an empty object which can be used as a summary writer. """ if logdir is None: - return SummaryWriter(None) + return SummaryWriter(None, None) with ops.device("cpu:0"): if max_queue is None: max_queue = constant_op.constant(10) @@ -205,6 +231,8 @@ def create_file_writer(logdir, flush_millis = constant_op.constant(2 * 60 * 1000) if filename_suffix is None: filename_suffix = constant_op.constant(".v2") + if name is None: + name = "logdir:" + logdir return _make_summary_writer( name, gen_summary_ops.create_summary_file_writer, @@ -267,13 +295,12 @@ def create_db_writer(db_uri, def _make_summary_writer(name, factory, **kwargs): resource = gen_summary_ops.summary_writer(shared_name=name) + init_op_fn = lambda: factory(resource, **kwargs) # TODO(apassos): Consider doing this instead. - # node = factory(resource, **kwargs) # if not context.executing_eagerly(): - # ops.get_default_session().run(node) - ops.add_to_collection(_SUMMARY_WRITER_INIT_COLLECTION_NAME, - factory(resource, **kwargs)) - return SummaryWriter(resource) + # ops.get_default_session().run(init_op) + ops.add_to_collection(_SUMMARY_WRITER_INIT_COLLECTION_NAME, init_op_fn()) + return SummaryWriter(resource, init_op_fn) def _cleanse_string(name, pattern, value): @@ -341,7 +368,7 @@ def summary_writer_function(name, tensor, function, family=None): if context.context().summary_writer_resource is None: return control_flow_ops.no_op() with ops.device("cpu:0"): - op = utils.smart_cond( + op = smart_cond.smart_cond( should_record_summaries(), record, _nothing, name="") ops.add_to_collection(ops.GraphKeys._SUMMARY_COLLECTION, op) # pylint: disable=protected-access return op @@ -538,7 +565,14 @@ def flush(writer=None, name=None): writer = context.context().summary_writer_resource if writer is None: return control_flow_ops.no_op() - return gen_summary_ops.flush_summary_writer(writer, name=name) + else: + if isinstance(writer, SummaryWriter): + writer = writer._resource # pylint: disable=protected-access + with ops.device("cpu:0"): + return gen_summary_ops.flush_summary_writer(writer, name=name) + + +_flush_fn = flush # for within SummaryWriter.flush() def eval_dir(model_dir, name=None): diff --git a/tensorflow/python/ops/template.py b/tensorflow/python/ops/template.py index 0294ecee548d1e7f507a5e4195e4ee320a0b9918..9b6b8c508fcd7e73e6259d14b466892544ec65d7 100644 --- a/tensorflow/python/ops/template.py +++ b/tensorflow/python/ops/template.py @@ -452,8 +452,7 @@ class Template(checkpointable.CheckpointableBase): # Only reuse variables if they were already created. with variable_scope.variable_scope( self._variable_scope, reuse=self._variables_created): - result = self._call_func(args, kwargs) - return result + return self._call_func(args, kwargs) else: # The scope was not created at construction time, so create it here. # Subsequent calls should reuse variables. @@ -461,8 +460,7 @@ class Template(checkpointable.CheckpointableBase): self._unique_name, self._name, custom_getter=self._custom_getter) as vs: self._variable_scope = vs - result = self._call_func(args, kwargs) - return result + return self._call_func(args, kwargs) @property def name(self): @@ -730,8 +728,7 @@ class EagerTemplate(Template): self._variable_scope, reuse=variable_scope.AUTO_REUSE) with self._variable_scope_context_manager: with self._template_store.as_default(): - result = self._call_func(args, kwargs) - return result + return self._call_func(args, kwargs) else: # The scope was not created at construction time, so create it here. # Subsequent calls should reuse variables. @@ -743,8 +740,7 @@ class EagerTemplate(Template): # store's variable scope name is unset; set it here. self._template_store.set_variable_scope_name(vs.name) with self._template_store.as_default(): - result = self._call_func(args, kwargs) - return result + return self._call_func(args, kwargs) @property def name(self): diff --git a/tensorflow/python/ops/variable_scope.py b/tensorflow/python/ops/variable_scope.py index c35735ca656b21d43f758830e68e5777d654f271..ba213ef884165f7f72094d27932913e39c9a5901 100644 --- a/tensorflow/python/ops/variable_scope.py +++ b/tensorflow/python/ops/variable_scope.py @@ -307,6 +307,17 @@ class _VariableStore(object): raise ValueError( "Passed a custom_getter which is not callable: %s" % custom_getter) + with ops.init_scope(): + if context.executing_eagerly(): + # Variable creation and initialization takes place in `init_scope`s; + # as such, if an `init_scope` lifts us into the eager context, then we + # need to use `ResourceVariable`s. + use_resource = True + + # Note that it's fine to reuse eager variables whose initialization was + # lifted from a function-building graph into the eager context (that's why + # the following clause is not wrapped in an `init_scope`); lifted variables + # are tracked by the graph's `VariableStore`. if context.executing_eagerly(): if not self._store_eager_variables and reuse: raise RuntimeError( @@ -315,7 +326,6 @@ class _VariableStore(object): " EagerVariableStore for example usage.") if self._store_eager_variables: reuse = AUTO_REUSE - use_resource = True # If a *_ref type is passed in an error would be triggered further down the # stack. We prevent this using base_dtype to get a non-ref version of the @@ -1164,7 +1174,7 @@ class _VariableScopeStore(threading.local): self.variable_scopes_count[scope_name] = 1 def close_variable_subscopes(self, scope_name): - for k in self.variable_scopes_count: + for k in list(self.variable_scopes_count.keys()): if not scope_name or k.startswith(scope_name + "/"): self.variable_scopes_count[k] = 0 diff --git a/tensorflow/python/platform/base.i b/tensorflow/python/platform/base.i index dbefca2be9615b18418a92f4cbe0b1a0b2917449..478dd46f7e6965f8727e5741f2ccdfdc69247980 100644 --- a/tensorflow/python/platform/base.i +++ b/tensorflow/python/platform/base.i @@ -229,3 +229,25 @@ _COPY_TYPEMAPS(unsigned int, mode_t); %define final %enddef %define override %enddef #endif + +// Typemaps to automatically raise a Python exception from bad output TF_Status. +// TODO(b/77295559): expand this to all TF_Status* output params and deprecate +// raise_exception_on_not_ok_status (currently it only affects the C API). +%typemap(in, numinputs=0) TF_Status* status (TF_Status* status) { + $1 = TF_NewStatus(); +} + +%typemap(freearg) (TF_Status* status) { + TF_DeleteStatus($1); +} + +%typemap(argout) TF_Status* status { + TF_Code code = TF_GetCode($1); + if (code != TF_OK) { + PyObject* exc = tensorflow::PyExceptionRegistry::Lookup(code); + // Arguments to OpError. + PyObject* exc_args = Py_BuildValue("sss", nullptr, nullptr, TF_Message($1)); + SWIG_SetErrorObj(exc, exc_args); + SWIG_fail; + } +} diff --git a/tensorflow/python/profiler/tfprof_logger_test.py b/tensorflow/python/profiler/tfprof_logger_test.py index 141144f98776f3aa7c95b9ef743022aeca5084e1..caf3869f56d94a272218a0e318f2fdf5cb238795 100644 --- a/tensorflow/python/profiler/tfprof_logger_test.py +++ b/tensorflow/python/profiler/tfprof_logger_test.py @@ -38,7 +38,7 @@ class TFProfLoggerTest(test.TestCase): return math_ops.matmul(a, b) # pylint: disable=pointless-string-statement - """# TODO(xpan): This this out of core so it doesn't depend on contrib. + """# TODO(xpan): This out of core so it doesn't depend on contrib. def testFillMissingShape(self): a, b, y = self._BuildSmallPlaceholderlModel() run_options = config_pb2.RunOptions( diff --git a/tensorflow/python/pywrap_tfe.i b/tensorflow/python/pywrap_tfe.i index 39fabb9c1bc646a09557293c1f645a8b97f5bbdd..0982a67deebb2fbf086df8352069e498b2972b72 100644 --- a/tensorflow/python/pywrap_tfe.i +++ b/tensorflow/python/pywrap_tfe.i @@ -13,6 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +%include "tensorflow/python/platform/base.i" + %ignore ""; %rename("%s") TFE_NewContext; @@ -57,6 +59,8 @@ limitations under the License. %rename("%s") TFE_ContextOptionsSetAsync; %rename("%s") TFE_DeleteContextOptions; %rename("%s") TFE_Py_TensorShapeSlice; +%rename("%s") TFE_GetResourceHandleShapeAndType; +%rename("%s") TFE_SetResourceHandleShapeAndType; %{ #include "tensorflow/python/eager/pywrap_tfe.h" @@ -118,9 +122,9 @@ limitations under the License. } %typemap(out) (TFE_Context*) { - if ($1 == nullptr) { - SWIG_fail; - } else { + // When the TFE_Context* returned is a nullptr, we expect the status is not + // OK. This will raise an error (happens in another typemap). + if ($1 != nullptr) { $result = PyCapsule_New($1, nullptr, TFE_DeleteContextCapsule); } } diff --git a/tensorflow/python/summary/writer/event_file_writer_v2.py b/tensorflow/python/summary/writer/event_file_writer_v2.py new file mode 100644 index 0000000000000000000000000000000000000000..5c66c0f7a8fd4c65d3539d257b4e4fa89f839a98 --- /dev/null +++ b/tensorflow/python/summary/writer/event_file_writer_v2.py @@ -0,0 +1,140 @@ +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Writes events to disk in a logdir.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import summary_ops_v2 +from tensorflow.python.platform import gfile + + +class EventFileWriterV2(object): + """Writes `Event` protocol buffers to an event file via the graph. + + The `EventFileWriterV2` class is backed by the summary file writer in the v2 + summary API (currently in tf.contrib.summary), so it uses a shared summary + writer resource and graph ops to write events. + + As with the original EventFileWriter, this class will asynchronously write + Event protocol buffers to the backing file. The Event file is encoded using + the tfrecord format, which is similar to RecordIO. + """ + + def __init__(self, session, logdir, max_queue=10, flush_secs=120, + filename_suffix=''): + """Creates an `EventFileWriterV2` and an event file to write to. + + On construction, this calls `tf.contrib.summary.create_file_writer` within + the graph from `session.graph` to look up a shared summary writer resource + for `logdir` if one exists, and create one if not. Creating the summary + writer resource in turn creates a new event file in `logdir` to be filled + with `Event` protocol buffers passed to `add_event`. Graph ops to control + this writer resource are added to `session.graph` during this init call; + stateful methods on this class will call `session.run()` on these ops. + + Note that because the underlying resource is shared, it is possible that + other parts of the code using the same session may interact independently + with the resource, e.g. by flushing or even closing it. It is the caller's + responsibility to avoid any undesirable sharing in this regard. + + The remaining arguments to the constructor (`flush_secs`, `max_queue`, and + `filename_suffix`) control the construction of the shared writer resource + if one is created. If an existing resource is reused, these arguments have + no effect. See `tf.contrib.summary.create_file_writer` for details. + + Args: + session: A `tf.Session`. Session that will hold shared writer resource. + The writer ops will be added to session.graph during this init call. + logdir: A string. Directory where event file will be written. + max_queue: Integer. Size of the queue for pending events and summaries. + flush_secs: Number. How often, in seconds, to flush the + pending events and summaries to disk. + filename_suffix: A string. Every event file's name is suffixed with + `filename_suffix`. + """ + self._session = session + self._logdir = logdir + self._closed = False + if not gfile.IsDirectory(self._logdir): + gfile.MakeDirs(self._logdir) + + with self._session.graph.as_default(): + with ops.name_scope('filewriter'): + file_writer = summary_ops_v2.create_file_writer( + logdir=self._logdir, + max_queue=max_queue, + flush_millis=flush_secs * 1000, + filename_suffix=filename_suffix) + with summary_ops_v2.always_record_summaries(), file_writer.as_default(): + self._event_placeholder = array_ops.placeholder_with_default( + constant_op.constant('unused', dtypes.string), + shape=[]) + self._add_event_op = summary_ops_v2.import_event( + self._event_placeholder) + self._init_op = file_writer.init() + self._flush_op = file_writer.flush() + self._close_op = file_writer.close() + self._session.run(self._init_op) + + def get_logdir(self): + """Returns the directory where event file will be written.""" + return self._logdir + + def reopen(self): + """Reopens the EventFileWriter. + + Can be called after `close()` to add more events in the same directory. + The events will go into a new events file. + + Does nothing if the EventFileWriter was not closed. + """ + if self._closed: + self._closed = False + self._session.run(self._init_op) + + def add_event(self, event): + """Adds an event to the event file. + + Args: + event: An `Event` protocol buffer. + """ + if not self._closed: + event_pb = event.SerializeToString() + self._session.run( + self._add_event_op, feed_dict={self._event_placeholder: event_pb}) + + def flush(self): + """Flushes the event file to disk. + + Call this method to make sure that all pending events have been written to + disk. + """ + self._session.run(self._flush_op) + + def close(self): + """Flushes the event file to disk and close the file. + + Call this method when you do not need the summary writer anymore. + """ + if not self._closed: + self.flush() + self._session.run(self._close_op) + self._closed = True diff --git a/tensorflow/python/summary/writer/writer.py b/tensorflow/python/summary/writer/writer.py index 57f78c156b1334a5486b29f2ddec957e49156e73..aca084fc9168e710316e4c988594cff69e54ebab 100644 --- a/tensorflow/python/summary/writer/writer.py +++ b/tensorflow/python/summary/writer/writer.py @@ -32,6 +32,7 @@ from tensorflow.python.platform import gfile from tensorflow.python.platform import tf_logging as logging from tensorflow.python.summary import plugin_asset from tensorflow.python.summary.writer.event_file_writer import EventFileWriter +from tensorflow.python.summary.writer.event_file_writer_v2 import EventFileWriterV2 from tensorflow.python.util.tf_export import tf_export _PLUGINS_DIR = "plugins" @@ -286,6 +287,11 @@ class FileWriter(SummaryToEventTransformer): file contents asynchronously. This allows a training program to call methods to add data to the file directly from the training loop, without slowing down training. + + When constructed with a `tf.Session` parameter, a `FileWriter` instead forms + a compatibility layer over new graph-based summaries (`tf.contrib.summary`) + to facilitate the use of new summary writing with pre-existing code that + expects a `FileWriter` instance. """ def __init__(self, @@ -294,10 +300,11 @@ class FileWriter(SummaryToEventTransformer): max_queue=10, flush_secs=120, graph_def=None, - filename_suffix=None): - """Creates a `FileWriter` and an event file. + filename_suffix=None, + session=None): + """Creates a `FileWriter`, optionally shared within the given session. - On construction the summary writer creates a new event file in `logdir`. + Typically, constructing a file writer creates a new event file in `logdir`. This event file will contain `Event` protocol buffers constructed when you call one of the following functions: `add_summary()`, `add_session_log()`, `add_event()`, or `add_graph()`. @@ -317,13 +324,16 @@ class FileWriter(SummaryToEventTransformer): writer = tf.summary.FileWriter(, sess.graph) ``` - The other arguments to the constructor control the asynchronous writes to - the event file: - - * `flush_secs`: How often, in seconds, to flush the added summaries - and events to disk. - * `max_queue`: Maximum number of summaries or events pending to be - written to disk before one of the 'add' calls block. + The `session` argument to the constructor makes the returned `FileWriter` a + a compatibility layer over new graph-based summaries (`tf.contrib.summary`). + Crucially, this means the underlying writer resource and events file will + be shared with any other `FileWriter` using the same `session` and `logdir`, + and with any `tf.contrib.summary.SummaryWriter` in this session using the + the same shared resource name (which by default scoped to the logdir). If + no such resource exists, one will be created using the remaining arguments + to this constructor, but if one already exists those arguments are ignored. + In either case, ops will be added to `session.graph` to control the + underlying file writer resource. See `tf.contrib.summary` for more details. Args: logdir: A string. Directory where event file will be written. @@ -334,6 +344,7 @@ class FileWriter(SummaryToEventTransformer): graph_def: DEPRECATED: Use the `graph` argument instead. filename_suffix: A string. Every event file's name is suffixed with `suffix`. + session: A `tf.Session` object. See details above. Raises: RuntimeError: If called with eager execution enabled. @@ -347,9 +358,12 @@ class FileWriter(SummaryToEventTransformer): raise RuntimeError( "tf.summary.FileWriter is not compatible with eager execution. " "Use tf.contrib.summary instead.") - - event_writer = EventFileWriter(logdir, max_queue, flush_secs, - filename_suffix) + if session is not None: + event_writer = EventFileWriterV2( + session, logdir, max_queue, flush_secs, filename_suffix) + else: + event_writer = EventFileWriter(logdir, max_queue, flush_secs, + filename_suffix) super(FileWriter, self).__init__(event_writer, graph, graph_def) def __enter__(self): diff --git a/tensorflow/python/summary/writer/writer_test.py b/tensorflow/python/summary/writer/writer_test.py index 88ade0aac33f1cd8f9d8cb30344aabca76a13511..dc990c2602427049ecdb7588ff217207a69cbcd2 100644 --- a/tensorflow/python/summary/writer/writer_test.py +++ b/tensorflow/python/summary/writer/writer_test.py @@ -29,10 +29,12 @@ from tensorflow.core.protobuf import config_pb2 from tensorflow.core.protobuf import meta_graph_pb2 from tensorflow.core.util import event_pb2 from tensorflow.core.util.event_pb2 import SessionLog +from tensorflow.python.client import session from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import meta_graph from tensorflow.python.framework import ops +from tensorflow.python.ops import summary_ops_v2 from tensorflow.python.platform import gfile from tensorflow.python.platform import test from tensorflow.python.summary import plugin_asset @@ -42,7 +44,10 @@ from tensorflow.python.summary.writer import writer_cache from tensorflow.python.util import compat -class SummaryWriterTestCase(test.TestCase): +class FileWriterTestCase(test.TestCase): + + def _FileWriter(self, *args, **kwargs): + return writer.FileWriter(*args, **kwargs) def _TestDir(self, test_name): test_dir = os.path.join(self.get_temp_dir(), test_name) @@ -96,7 +101,7 @@ class SummaryWriterTestCase(test.TestCase): def testAddingSummaryGraphAndRunMetadata(self): test_dir = self._CleanTestDir("basics") - sw = writer.FileWriter(test_dir) + sw = self._FileWriter(test_dir) sw.add_session_log(event_pb2.SessionLog(status=SessionLog.START), 1) sw.add_summary( @@ -171,7 +176,7 @@ class SummaryWriterTestCase(test.TestCase): test_dir = self._CleanTestDir("basics_named_graph") with ops.Graph().as_default() as g: constant_op.constant([12], name="douze") - sw = writer.FileWriter(test_dir, graph=g) + sw = self._FileWriter(test_dir, graph=g) sw.close() self._assertEventsWithGraph(test_dir, g, True) @@ -179,7 +184,7 @@ class SummaryWriterTestCase(test.TestCase): test_dir = self._CleanTestDir("basics_positional_graph") with ops.Graph().as_default() as g: constant_op.constant([12], name="douze") - sw = writer.FileWriter(test_dir, g) + sw = self._FileWriter(test_dir, g) sw.close() self._assertEventsWithGraph(test_dir, g, True) @@ -188,7 +193,7 @@ class SummaryWriterTestCase(test.TestCase): with ops.Graph().as_default() as g: constant_op.constant([12], name="douze") gd = g.as_graph_def() - sw = writer.FileWriter(test_dir, graph_def=gd) + sw = self._FileWriter(test_dir, graph_def=gd) sw.close() self._assertEventsWithGraph(test_dir, g, False) @@ -197,7 +202,7 @@ class SummaryWriterTestCase(test.TestCase): with ops.Graph().as_default() as g: constant_op.constant([12], name="douze") gd = g.as_graph_def() - sw = writer.FileWriter(test_dir, gd) + sw = self._FileWriter(test_dir, gd) sw.close() self._assertEventsWithGraph(test_dir, g, False) @@ -207,18 +212,18 @@ class SummaryWriterTestCase(test.TestCase): with ops.Graph().as_default() as g: constant_op.constant([12], name="douze") gd = g.as_graph_def() - sw = writer.FileWriter(test_dir, graph=g, graph_def=gd) + sw = self._FileWriter(test_dir, graph=g, graph_def=gd) sw.close() def testNeitherGraphNorGraphDef(self): with self.assertRaises(TypeError): test_dir = self._CleanTestDir("basics_string_instead_of_graph") - sw = writer.FileWriter(test_dir, "string instead of graph object") + sw = self._FileWriter(test_dir, "string instead of graph object") sw.close() def testCloseAndReopen(self): test_dir = self._CleanTestDir("close_and_reopen") - sw = writer.FileWriter(test_dir) + sw = self._FileWriter(test_dir) sw.add_session_log(event_pb2.SessionLog(status=SessionLog.START), 1) sw.close() # Sleep at least one second to make sure we get a new event file name. @@ -261,7 +266,7 @@ class SummaryWriterTestCase(test.TestCase): def testNonBlockingClose(self): test_dir = self._CleanTestDir("non_blocking_close") - sw = writer.FileWriter(test_dir) + sw = self._FileWriter(test_dir) # Sleep 1.2 seconds to make sure event queue is empty. time.sleep(1.2) time_before_close = time.time() @@ -270,7 +275,7 @@ class SummaryWriterTestCase(test.TestCase): def testWithStatement(self): test_dir = self._CleanTestDir("with_statement") - with writer.FileWriter(test_dir) as sw: + with self._FileWriter(test_dir) as sw: sw.add_session_log(event_pb2.SessionLog(status=SessionLog.START), 1) event_paths = sorted(glob.glob(os.path.join(test_dir, "event*"))) self.assertEquals(1, len(event_paths)) @@ -280,7 +285,7 @@ class SummaryWriterTestCase(test.TestCase): # protocol buffers correctly. def testAddingSummariesFromSessionRunCalls(self): test_dir = self._CleanTestDir("global_step") - sw = writer.FileWriter(test_dir) + sw = self._FileWriter(test_dir) with self.test_session(): i = constant_op.constant(1, dtype=dtypes.int32, shape=[]) l = constant_op.constant(2, dtype=dtypes.int64, shape=[]) @@ -327,7 +332,7 @@ class SummaryWriterTestCase(test.TestCase): def testPluginMetadataStrippedFromSubsequentEvents(self): test_dir = self._CleanTestDir("basics") - sw = writer.FileWriter(test_dir) + sw = self._FileWriter(test_dir) sw.add_session_log(event_pb2.SessionLog(status=SessionLog.START), 1) @@ -386,7 +391,7 @@ class SummaryWriterTestCase(test.TestCase): def testFileWriterWithSuffix(self): test_dir = self._CleanTestDir("test_suffix") - sw = writer.FileWriter(test_dir, filename_suffix="_test_suffix") + sw = self._FileWriter(test_dir, filename_suffix="_test_suffix") for _ in range(10): sw.add_summary( summary_pb2.Summary(value=[ @@ -400,9 +405,178 @@ class SummaryWriterTestCase(test.TestCase): for filename in event_filenames: self.assertTrue(filename.endswith("_test_suffix")) + def testPluginAssetSerialized(self): + class ExamplePluginAsset(plugin_asset.PluginAsset): + plugin_name = "example" + + def assets(self): + return {"foo.txt": "foo!", "bar.txt": "bar!"} + + with ops.Graph().as_default() as g: + plugin_asset.get_plugin_asset(ExamplePluginAsset) + + logdir = self.get_temp_dir() + fw = self._FileWriter(logdir) + fw.add_graph(g) + plugin_dir = os.path.join(logdir, writer._PLUGINS_DIR, "example") + + with gfile.Open(os.path.join(plugin_dir, "foo.txt"), "r") as f: + content = f.read() + self.assertEqual(content, "foo!") + + with gfile.Open(os.path.join(plugin_dir, "bar.txt"), "r") as f: + content = f.read() + self.assertEqual(content, "bar!") -class SummaryWriterCacheTest(test.TestCase): - """SummaryWriterCache tests.""" + +class SessionBasedFileWriterTestCase(FileWriterTestCase): + """Tests for FileWriter behavior when passed a Session argument.""" + + def _FileWriter(self, *args, **kwargs): + if "session" not in kwargs: + # Pass in test_session() as the session. It will be cached during this + # test method invocation so that any other use of test_session() with no + # graph should result in re-using the same underlying Session. + with self.test_session() as sess: + kwargs["session"] = sess + return writer.FileWriter(*args, **kwargs) + return writer.FileWriter(*args, **kwargs) + + def _createTaggedSummary(self, tag): + summary = summary_pb2.Summary() + summary.value.add(tag=tag) + return summary + + def testSharing_withOtherSessionBasedFileWriters(self): + logdir = self.get_temp_dir() + with session.Session() as sess: + # Initial file writer + writer1 = writer.FileWriter(session=sess, logdir=logdir) + writer1.add_summary(self._createTaggedSummary("one"), 1) + writer1.flush() + + # File writer, should share file with writer1 + writer2 = writer.FileWriter(session=sess, logdir=logdir) + writer2.add_summary(self._createTaggedSummary("two"), 2) + writer2.flush() + + # File writer with different logdir (shouldn't be in this logdir at all) + writer3 = writer.FileWriter(session=sess, logdir=logdir + "-other") + writer3.add_summary(self._createTaggedSummary("three"), 3) + writer3.flush() + + # File writer in a different session (should be in separate file) + time.sleep(1.1) # Ensure filename has a different timestamp + with session.Session() as other_sess: + writer4 = writer.FileWriter(session=other_sess, logdir=logdir) + writer4.add_summary(self._createTaggedSummary("four"), 4) + writer4.flush() + + # One more file writer, should share file with writer1 + writer5 = writer.FileWriter(session=sess, logdir=logdir) + writer5.add_summary(self._createTaggedSummary("five"), 5) + writer5.flush() + + event_paths = iter(sorted(glob.glob(os.path.join(logdir, "event*")))) + + # First file should have tags "one", "two", and "five" + events = summary_iterator.summary_iterator(next(event_paths)) + self.assertEqual("brain.Event:2", next(events).file_version) + self.assertEqual("one", next(events).summary.value[0].tag) + self.assertEqual("two", next(events).summary.value[0].tag) + self.assertEqual("five", next(events).summary.value[0].tag) + self.assertRaises(StopIteration, lambda: next(events)) + + # Second file should have just "four" + events = summary_iterator.summary_iterator(next(event_paths)) + self.assertEqual("brain.Event:2", next(events).file_version) + self.assertEqual("four", next(events).summary.value[0].tag) + self.assertRaises(StopIteration, lambda: next(events)) + + # No more files + self.assertRaises(StopIteration, lambda: next(event_paths)) + + # Just check that the other logdir file exists to be sure we wrote it + self.assertTrue(glob.glob(os.path.join(logdir + "-other", "event*"))) + + def testSharing_withExplicitSummaryFileWriters(self): + logdir = self.get_temp_dir() + with session.Session() as sess: + # Initial file writer via FileWriter(session=?) + writer1 = writer.FileWriter(session=sess, logdir=logdir) + writer1.add_summary(self._createTaggedSummary("one"), 1) + writer1.flush() + + # Next one via create_file_writer(), should use same file + writer2 = summary_ops_v2.create_file_writer(logdir=logdir) + with summary_ops_v2.always_record_summaries(), writer2.as_default(): + summary2 = summary_ops_v2.scalar("two", 2.0, step=2) + sess.run(writer2.init()) + sess.run(summary2) + sess.run(writer2.flush()) + + # Next has different shared name, should be in separate file + time.sleep(1.1) # Ensure filename has a different timestamp + writer3 = summary_ops_v2.create_file_writer(logdir=logdir, name="other") + with summary_ops_v2.always_record_summaries(), writer3.as_default(): + summary3 = summary_ops_v2.scalar("three", 3.0, step=3) + sess.run(writer3.init()) + sess.run(summary3) + sess.run(writer3.flush()) + + # Next uses a second session, should be in separate file + time.sleep(1.1) # Ensure filename has a different timestamp + with session.Session() as other_sess: + writer4 = summary_ops_v2.create_file_writer(logdir=logdir) + with summary_ops_v2.always_record_summaries(), writer4.as_default(): + summary4 = summary_ops_v2.scalar("four", 4.0, step=4) + other_sess.run(writer4.init()) + other_sess.run(summary4) + other_sess.run(writer4.flush()) + + # Next via FileWriter(session=?) uses same second session, should be in + # same separate file. (This checks sharing in the other direction) + writer5 = writer.FileWriter(session=other_sess, logdir=logdir) + writer5.add_summary(self._createTaggedSummary("five"), 5) + writer5.flush() + + # One more via create_file_writer(), should use same file + writer6 = summary_ops_v2.create_file_writer(logdir=logdir) + with summary_ops_v2.always_record_summaries(), writer6.as_default(): + summary6 = summary_ops_v2.scalar("six", 6.0, step=6) + sess.run(writer6.init()) + sess.run(summary6) + sess.run(writer6.flush()) + + event_paths = iter(sorted(glob.glob(os.path.join(logdir, "event*")))) + + # First file should have tags "one", "two", and "six" + events = summary_iterator.summary_iterator(next(event_paths)) + self.assertEqual("brain.Event:2", next(events).file_version) + self.assertEqual("one", next(events).summary.value[0].tag) + self.assertEqual("two", next(events).summary.value[0].tag) + self.assertEqual("six", next(events).summary.value[0].tag) + self.assertRaises(StopIteration, lambda: next(events)) + + # Second file should have just "three" + events = summary_iterator.summary_iterator(next(event_paths)) + self.assertEqual("brain.Event:2", next(events).file_version) + self.assertEqual("three", next(events).summary.value[0].tag) + self.assertRaises(StopIteration, lambda: next(events)) + + # Third file should have "four" and "five" + events = summary_iterator.summary_iterator(next(event_paths)) + self.assertEqual("brain.Event:2", next(events).file_version) + self.assertEqual("four", next(events).summary.value[0].tag) + self.assertEqual("five", next(events).summary.value[0].tag) + self.assertRaises(StopIteration, lambda: next(events)) + + # No more files + self.assertRaises(StopIteration, lambda: next(event_paths)) + + +class FileWriterCacheTest(test.TestCase): + """FileWriterCache tests.""" def _test_dir(self, test_name): """Create an empty dir to use for tests. @@ -448,32 +622,5 @@ class SummaryWriterCacheTest(test.TestCase): self.assertFalse(sw1 == sw2) -class ExamplePluginAsset(plugin_asset.PluginAsset): - plugin_name = "example" - - def assets(self): - return {"foo.txt": "foo!", "bar.txt": "bar!"} - - -class PluginAssetsTest(test.TestCase): - - def testPluginAssetSerialized(self): - with ops.Graph().as_default() as g: - plugin_asset.get_plugin_asset(ExamplePluginAsset) - - logdir = self.get_temp_dir() - fw = writer.FileWriter(logdir) - fw.add_graph(g) - plugin_dir = os.path.join(logdir, writer._PLUGINS_DIR, "example") - - with gfile.Open(os.path.join(plugin_dir, "foo.txt"), "r") as f: - content = f.read() - self.assertEqual(content, "foo!") - - with gfile.Open(os.path.join(plugin_dir, "bar.txt"), "r") as f: - content = f.read() - self.assertEqual(content, "bar!") - - if __name__ == "__main__": test.main() diff --git a/tensorflow/python/tensorflow.i b/tensorflow/python/tensorflow.i index 82b908ac0e95643d1daf5ed062be44a58cfea97f..26e8acd8977734768accb1f9c7e37431c337ee34 100644 --- a/tensorflow/python/tensorflow.i +++ b/tensorflow/python/tensorflow.i @@ -25,6 +25,7 @@ limitations under the License. %include "tensorflow/python/util/tfprof.i" %include "tensorflow/python/lib/core/py_func.i" +%include "tensorflow/python/lib/core/py_exception_registry.i" %include "tensorflow/python/lib/io/py_record_reader.i" %include "tensorflow/python/lib/io/py_record_writer.i" @@ -54,4 +55,3 @@ limitations under the License. %include "tensorflow/python/grappler/tf_optimizer.i" %include "tensorflow/python/grappler/cost_analyzer.i" %include "tensorflow/python/grappler/model_analyzer.i" - diff --git a/tensorflow/python/tools/BUILD b/tensorflow/python/tools/BUILD index 6e39ce8c808a1716ff9263982e99a14592472c76..84d20f8e3628ed28fc408e9100b2999973911a33 100644 --- a/tensorflow/python/tools/BUILD +++ b/tensorflow/python/tools/BUILD @@ -28,7 +28,7 @@ py_library( name = "saved_model_utils", srcs = ["saved_model_utils.py"], srcs_version = "PY2AND3", - deps = ["//tensorflow:tensorflow_py"], + deps = ["//tensorflow/contrib/saved_model:reader"], ) py_library( @@ -41,8 +41,10 @@ py_library( "//tensorflow/python", # TODO(b/34059704): remove when fixed "//tensorflow/python:client", "//tensorflow/python:framework", + "//tensorflow/python:parsing_ops", "//tensorflow/python:platform", "//tensorflow/python:training", + "//tensorflow/python/saved_model:loader", "@six_archive//:six", ], ) @@ -52,14 +54,7 @@ py_binary( srcs = ["freeze_graph.py"], srcs_version = "PY2AND3", deps = [ - ":saved_model_utils", - "//tensorflow/core:protos_all_py", - "//tensorflow/python", # TODO(b/34059704): remove when fixed - "//tensorflow/python:client", - "//tensorflow/python:framework", - "//tensorflow/python:platform", - "//tensorflow/python:training", - "@six_archive//:six", + ":freeze_graph_lib", ], ) diff --git a/tensorflow/python/tools/optimize_for_inference.py b/tensorflow/python/tools/optimize_for_inference.py index 902748d55efedf2166cbeb0d8e0fcff0d18ed152..dac6a06a89c7596dd66d0ed7a2e5a59a0ba9b9dd 100644 --- a/tensorflow/python/tools/optimize_for_inference.py +++ b/tensorflow/python/tools/optimize_for_inference.py @@ -87,7 +87,9 @@ def main(unused_args): output_graph_def = optimize_for_inference_lib.optimize_for_inference( input_graph_def, FLAGS.input_names.split(","), - FLAGS.output_names.split(","), FLAGS.placeholder_type_enum) + FLAGS.output_names.split(","), + FLAGS.placeholder_type_enum, + FLAGS.toco_compatible) if FLAGS.frozen_graph: f = gfile.FastGFile(FLAGS.output, "w") @@ -138,6 +140,14 @@ def parse_args(): type=int, default=dtypes.float32.as_datatype_enum, help="The AttrValue enum to use for placeholders.") + parser.add_argument( + "--toco_compatible", + type=bool, + default=False, + help="""\ + If true, only use ops compatible with Tensorflow + Lite Optimizing Converter.\ + """) return parser.parse_known_args() diff --git a/tensorflow/python/tools/optimize_for_inference_lib.py b/tensorflow/python/tools/optimize_for_inference_lib.py index 9c1927122252f45ddfa8092045c7589fa0f45532..bb90d1cd6e33aacf4bb7498fb9c9e7ecfb447c04 100644 --- a/tensorflow/python/tools/optimize_for_inference_lib.py +++ b/tensorflow/python/tools/optimize_for_inference_lib.py @@ -87,7 +87,7 @@ EPSILON_ATTR = { def optimize_for_inference(input_graph_def, input_node_names, output_node_names, - placeholder_type_enum): + placeholder_type_enum, toco_compatible=False): """Applies a series of inference optimizations on the input graph. Args: @@ -98,6 +98,8 @@ def optimize_for_inference(input_graph_def, input_node_names, output_node_names, results. placeholder_type_enum: The AttrValue enum for the placeholder data type, or a list that specifies one value per input node name. + toco_compatible: Boolean, if True, only runs optimizations that result in + TOCO compatible graph operations (default=False). Returns: An optimized version of the input graph. @@ -110,8 +112,9 @@ def optimize_for_inference(input_graph_def, input_node_names, output_node_names, optimized_graph_def = graph_util.remove_training_nodes( optimized_graph_def, output_node_names) optimized_graph_def = fold_batch_norms(optimized_graph_def) - optimized_graph_def = fuse_resize_and_conv(optimized_graph_def, - output_node_names) + if not toco_compatible: + optimized_graph_def = fuse_resize_and_conv(optimized_graph_def, + output_node_names) ensure_graph_is_valid(optimized_graph_def) return optimized_graph_def diff --git a/tensorflow/python/tools/saved_model_cli.py b/tensorflow/python/tools/saved_model_cli.py index b88be4ae04d5dc7a7641fb8dbd7e56e61035869f..73ea85ab0c4c5896efe6106cc12c62043e738d11 100644 --- a/tensorflow/python/tools/saved_model_cli.py +++ b/tensorflow/python/tools/saved_model_cli.py @@ -41,6 +41,7 @@ from tensorflow.python.debug.wrappers import local_cli_wrapper from tensorflow.python.framework import meta_graph as meta_graph_lib from tensorflow.python.framework import ops as ops_lib from tensorflow.python.platform import app # pylint: disable=unused-import +from tensorflow.python.lib.io import file_io from tensorflow.python.saved_model import loader from tensorflow.python.tools import saved_model_utils @@ -543,7 +544,7 @@ def load_inputs_from_input_arg_string(inputs_str, input_exprs_str, input_examples = preprocess_input_examples_arg_string(input_examples_str) for input_tensor_key, (filename, variable_name) in inputs.items(): - data = np.load(filename) + data = np.load(file_io.FileIO(filename, mode='r')) # When a variable_name key is specified for the input file if variable_name: diff --git a/tensorflow/python/training/adam.py b/tensorflow/python/training/adam.py index 006e360389b404a8edd97c9a8bf4b8876c828004..6fa3ff66583ce07a6ee7b0d8158c851ea578637c 100644 --- a/tensorflow/python/training/adam.py +++ b/tensorflow/python/training/adam.py @@ -43,23 +43,19 @@ class AdamOptimizer(optimizer.Optimizer): Initialization: - ``` - m_0 <- 0 (Initialize initial 1st moment vector) - v_0 <- 0 (Initialize initial 2nd moment vector) - t <- 0 (Initialize timestep) - ``` + $$m_0 := 0 (Initialize initial 1st moment vector)$$ + $$v_0 := 0 (Initialize initial 2nd moment vector)$$ + $$t := 0 (Initialize timestep)$$ The update rule for `variable` with gradient `g` uses an optimization described at the end of section2 of the paper: - ``` - t <- t + 1 - lr_t <- learning_rate * sqrt(1 - beta2^t) / (1 - beta1^t) + $$t := t + 1$$ + $$lr_t := \text{learning_rate} * \sqrt{(1 - beta_2^t) / (1 - beta_1^t)}$$ - m_t <- beta1 * m_{t-1} + (1 - beta1) * g - v_t <- beta2 * v_{t-1} + (1 - beta2) * g * g - variable <- variable - lr_t * m_t / (sqrt(v_t) + epsilon) - ``` + $$m_t := beta_1 * m_{t-1} + (1 - beta_1) * g$$ + $$v_t := beta_2 * v_{t-1} + (1 - beta_2) * g * g$$ + $$variable := variable - lr_t * m_t / (\sqrt{v_t} + \epsilon)$$ The default value of 1e-8 for epsilon might not be a good default in general. For example, when training an Inception network on ImageNet a diff --git a/tensorflow/python/training/basic_session_run_hooks.py b/tensorflow/python/training/basic_session_run_hooks.py index aae757b99aa9abb2fca112dcc781fc31e367649d..3651291bdfcc77d6a6c288e1f53a78ef696a99d1 100644 --- a/tensorflow/python/training/basic_session_run_hooks.py +++ b/tensorflow/python/training/basic_session_run_hooks.py @@ -391,7 +391,8 @@ class CheckpointSaverHook(session_run_hook.SessionRunHook): saver=None, checkpoint_basename="model.ckpt", scaffold=None, - listeners=None): + listeners=None, + steps_per_run=1): """Initializes a `CheckpointSaverHook`. Args: @@ -404,6 +405,9 @@ class CheckpointSaverHook(session_run_hook.SessionRunHook): listeners: List of `CheckpointSaverListener` subclass instances. Used for callbacks that run immediately before or after this hook saves the checkpoint. + steps_per_run: `int`, number of steps that occur between each invocation + of the hook. Primarily used for TPU workloads which run multiple steps + in a while loop in a single Session.run. Raises: ValueError: One of `save_steps` or `save_secs` should be set. @@ -419,6 +423,7 @@ class CheckpointSaverHook(session_run_hook.SessionRunHook): self._timer = SecondOrStepTimer(every_secs=save_secs, every_steps=save_steps) self._listeners = listeners or [] + self._steps_per_run = steps_per_run def begin(self): self._summary_writer = SummaryWriterCache.get(self._checkpoint_dir) @@ -450,7 +455,8 @@ class CheckpointSaverHook(session_run_hook.SessionRunHook): def after_run(self, run_context, run_values): stale_global_step = run_values.results - if self._timer.should_trigger_for_step(stale_global_step+1): + if self._timer.should_trigger_for_step( + stale_global_step + self._steps_per_run): # get the real value after train op. global_step = run_context.session.run(self._global_step_tensor) if self._timer.should_trigger_for_step(global_step): @@ -859,6 +865,7 @@ class ProfilerHook(session_run_hook.SessionRunHook): showing the sizes and lifetimes of tensors. """ self._output_file = os.path.join(output_dir, "timeline-{}.json") + self._file_writer = SummaryWriterCache.get(output_dir) self._show_dataflow = show_dataflow self._show_memory = show_memory self._timer = SecondOrStepTimer( @@ -889,6 +896,8 @@ class ProfilerHook(session_run_hook.SessionRunHook): self._save(global_step, self._output_file.format(global_step), run_values.run_metadata.step_stats) + self._file_writer.add_run_metadata(run_values.run_metadata, + "step_%d" % global_step) self._next_step = global_step + 1 diff --git a/tensorflow/python/training/basic_session_run_hooks_test.py b/tensorflow/python/training/basic_session_run_hooks_test.py index 2547661e5250e94136a100aa8c30c9dbb7455018..25962f6bf7abf5f11785ccb6ba0e31c5c85f4ebd 100644 --- a/tensorflow/python/training/basic_session_run_hooks_test.py +++ b/tensorflow/python/training/basic_session_run_hooks_test.py @@ -719,6 +719,99 @@ class CheckpointSaverHookTest(test.TestCase): fake_summary_writer.FakeSummaryWriter.uninstall() +class CheckpointSaverHookMultiStepTest(test.TestCase): + + def setUp(self): + self.model_dir = tempfile.mkdtemp() + self.graph = ops.Graph() + self.steps_per_run = 5 + with self.graph.as_default(): + self.scaffold = monitored_session.Scaffold() + self.global_step = variables.get_or_create_global_step() + self.train_op = training_util._increment_global_step(self.steps_per_run) + + def tearDown(self): + shutil.rmtree(self.model_dir, ignore_errors=True) + + def test_save_steps_saves_in_first_step(self): + with self.graph.as_default(): + hook = basic_session_run_hooks.CheckpointSaverHook( + self.model_dir, + save_steps=2*self.steps_per_run, + scaffold=self.scaffold, + steps_per_run=self.steps_per_run) + hook.begin() + self.scaffold.finalize() + with session_lib.Session() as sess: + sess.run(self.scaffold.init_op) + mon_sess = monitored_session._HookedSession(sess, [hook]) + mon_sess.run(self.train_op) + self.assertEqual(5, + checkpoint_utils.load_variable(self.model_dir, + self.global_step.name)) + + def test_save_steps_saves_periodically(self): + with self.graph.as_default(): + hook = basic_session_run_hooks.CheckpointSaverHook( + self.model_dir, + save_steps=2*self.steps_per_run, + scaffold=self.scaffold, + steps_per_run=self.steps_per_run) + hook.begin() + self.scaffold.finalize() + with session_lib.Session() as sess: + sess.run(self.scaffold.init_op) + mon_sess = monitored_session._HookedSession(sess, [hook]) + mon_sess.run(self.train_op) + # Saved (step=5) + self.assertEqual(5, + checkpoint_utils.load_variable(self.model_dir, + self.global_step.name)) + + mon_sess.run(self.train_op) + # Not saved (step=10) + self.assertEqual(5, + checkpoint_utils.load_variable(self.model_dir, + self.global_step.name)) + + mon_sess.run(self.train_op) + # Saved (step=15) + self.assertEqual(15, + checkpoint_utils.load_variable(self.model_dir, + self.global_step.name)) + + mon_sess.run(self.train_op) + # Not saved (step=20) + self.assertEqual(15, + checkpoint_utils.load_variable(self.model_dir, + self.global_step.name)) + + mon_sess.run(self.train_op) + # Saved (step=25) + self.assertEqual(25, + checkpoint_utils.load_variable(self.model_dir, + self.global_step.name)) + + def test_save_steps_saves_at_end(self): + with self.graph.as_default(): + hook = basic_session_run_hooks.CheckpointSaverHook( + self.model_dir, + save_steps=2*self.steps_per_run, + scaffold=self.scaffold, + steps_per_run=self.steps_per_run) + hook.begin() + self.scaffold.finalize() + with session_lib.Session() as sess: + sess.run(self.scaffold.init_op) + mon_sess = monitored_session._HookedSession(sess, [hook]) + mon_sess.run(self.train_op) + mon_sess.run(self.train_op) + hook.end(sess) + self.assertEqual(10, + checkpoint_utils.load_variable(self.model_dir, + self.global_step.name)) + + class ResourceCheckpointSaverHookTest(test.TestCase): def setUp(self): @@ -1274,6 +1367,19 @@ class ProfilerHookTest(test.TestCase): sess.run(self.train_op) # Saved. self.assertEqual(3, self._count_timeline_files()) + def test_run_metadata_saves_in_first_step(self): + writer_cache.FileWriterCache.clear() + fake_summary_writer.FakeSummaryWriter.install() + fake_writer = writer_cache.FileWriterCache.get(self.output_dir) + with self.graph.as_default(): + hook = basic_session_run_hooks.ProfilerHook( + save_secs=2, output_dir=self.output_dir) + with monitored_session.SingularMonitoredSession(hooks=[hook]) as sess: + sess.run(self.train_op) # Saved. + self.assertEqual( + list(fake_writer._added_run_metadata.keys()), ['step_1']) + fake_summary_writer.FakeSummaryWriter.uninstall() + if __name__ == '__main__': test.main() diff --git a/tensorflow/python/training/checkpointable.py b/tensorflow/python/training/checkpointable.py index bbbe1e8ac5b985b11f2481ddcadedc06ed70a4fb..0b8473742c1b46c49d66a0bdbbd329aff58201ab 100644 --- a/tensorflow/python/training/checkpointable.py +++ b/tensorflow/python/training/checkpointable.py @@ -26,6 +26,11 @@ from tensorflow.python.ops import array_ops from tensorflow.python.ops import gen_io_ops as io_ops from tensorflow.python.util import nest + +# Key where the object graph proto is saved in a TensorBundle +OBJECT_GRAPH_PROTO_KEY = "_CHECKPOINTABLE_OBJECT_GRAPH" + + # A key indicating a variable's value in an object's checkpointed Tensors # (Checkpointable._gather_saveables_for_checkpoint). If this is the only key and # the object has no dependencies, then its value may be restored on object @@ -94,12 +99,13 @@ class _CheckpointPosition(object): def restore(self, checkpointable): """Restore this value into `checkpointable`.""" - if self.bind_object(checkpointable): - # This object's correspondence with a checkpointed object is new, so - # process deferred restorations for it and its dependencies. - restore_ops = checkpointable._restore_from_checkpoint_position(self) # pylint: disable=protected-access - if restore_ops: - self._checkpoint.restore_ops.extend(restore_ops) + with ops.init_scope(): + if self.bind_object(checkpointable): + # This object's correspondence with a checkpointed object is new, so + # process deferred restorations for it and its dependencies. + restore_ops = checkpointable._restore_from_checkpoint_position(self) # pylint: disable=protected-access + if restore_ops: + self._checkpoint.restore_ops.extend(restore_ops) def bind_object(self, checkpointable): """Set a checkpoint<->object correspondence and process slot variables. @@ -409,28 +415,29 @@ class CheckpointableBase(object): "Checkpointable._add_variable called to create another with " "that name. Variable names must be unique within a Checkpointable " "object.") % (name,)) - if context.executing_eagerly(): - # If this is a variable with a single Tensor stored in the checkpoint, we - # can set that value as an initializer rather than initializing and then - # assigning (when executing eagerly). This call returns None if there is - # nothing to restore. - checkpoint_initializer = self._preload_simple_restoration( - name=name, shape=shape) - else: - checkpoint_initializer = None - if (checkpoint_initializer is not None - and not ( - isinstance(initializer, CheckpointInitialValue) - and initializer.restore_uid > checkpoint_initializer.restore_uid)): - # If multiple Checkpointable objects are "creating" the same variable via - # the magic of custom getters, the one with the highest restore UID (the - # one called last) has to make the final initializer. If another custom - # getter interrupts this process by overwriting the initializer, then - # we'll catch that when we call _track_checkpointable. So this is "best - # effort" to set the initializer with the highest restore UID. - initializer = checkpoint_initializer - shape = None - + with ops.init_scope(): + if context.executing_eagerly(): + # If this is a variable with a single Tensor stored in the checkpoint, + # we can set that value as an initializer rather than initializing and + # then assigning (when executing eagerly). This call returns None if + # there is nothing to restore. + checkpoint_initializer = self._preload_simple_restoration( + name=name, shape=shape) + else: + checkpoint_initializer = None + if (checkpoint_initializer is not None + and not ( + isinstance(initializer, CheckpointInitialValue) + and (initializer.restore_uid + > checkpoint_initializer.restore_uid))): + # If multiple Checkpointable objects are "creating" the same variable + # via the magic of custom getters, the one with the highest restore UID + # (the one called last) has to make the final initializer. If another + # custom getter interrupts this process by overwriting the initializer, + # then we'll catch that when we call _track_checkpointable. So this is + # "best effort" to set the initializer with the highest restore UID. + initializer = checkpoint_initializer + shape = None new_variable = getter( name=name, shape=shape, dtype=dtype, initializer=initializer, **kwargs_for_getter) diff --git a/tensorflow/python/training/checkpointable_utils.py b/tensorflow/python/training/checkpointable_utils.py index 32123f87ef2d12497077ab0e2f7d4d4cad1ec5dd..4769e15120c32707c2048c4fdcdce0ba3672b9d0 100644 --- a/tensorflow/python/training/checkpointable_utils.py +++ b/tensorflow/python/training/checkpointable_utils.py @@ -17,14 +17,47 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import abc +import collections import weakref +from tensorflow.core.protobuf import checkpointable_object_graph_pb2 +from tensorflow.python import pywrap_tensorflow +from tensorflow.python.client import session as session_lib +from tensorflow.python.eager import context +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import errors_impl from tensorflow.python.framework import ops -from tensorflow.python.training import checkpointable +from tensorflow.python.framework import tensor_shape +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import init_ops +from tensorflow.python.ops import resource_variable_ops +from tensorflow.python.ops import variable_scope +from tensorflow.python.training import checkpointable as checkpointable_lib +from tensorflow.python.training import optimizer as optimizer_lib from tensorflow.python.training import saver as saver_lib +from tensorflow.python.util import deprecation +from tensorflow.python.util.tf_export import tf_export -class _Checkpoint(object): +_ESCAPE_CHAR = "." # For avoiding conflicts with user-specified names. + +# Keyword for identifying that the next bit of a checkpoint variable name is a +# slot name. Checkpoint names for slot variables look like: +# +# /<_OPTIMIZER_SLOTS_NAME>// +# +# Where is a full path from the checkpoint root to the +# variable being slotted for. +_OPTIMIZER_SLOTS_NAME = _ESCAPE_CHAR + "OPTIMIZER_SLOT" +# Keyword for separating the path to an object from the name of an +# attribute in checkpoint names. Used like: +# /<_OBJECT_ATTRIBUTES_NAME>/ +_OBJECT_ATTRIBUTES_NAME = _ESCAPE_CHAR + "ATTRIBUTES" + + +class _CheckpointRestoreCoordinator(object): """Holds the status of an object-based checkpoint load.""" def __init__(self, object_graph_proto, save_path, dtype_map=None): @@ -72,7 +105,973 @@ class _Checkpoint(object): # `node` refers to an `Optimizer`, since only these have slot variables. self.slot_restorations.setdefault( slot_reference.original_variable_node_id, []).append( - checkpointable._SlotVariableRestoration( # pylint: disable=protected-access + checkpointable_lib._SlotVariableRestoration( # pylint: disable=protected-access optimizer_id=node_index, slot_variable_id=slot_reference.slot_variable_node_id, slot_name=slot_reference.slot_name)) + + +# TODO(allenl): If this ends up in a public API, consider adding LINT.IfChange +# or consolidating the implementation with get_variable. +def _default_getter(name, shape, dtype, initializer=None, + partition_info=None, **kwargs): + """A pared-down version of get_variable which does not reuse variables.""" + dtype = dtypes.as_dtype(dtype) + shape_object = tensor_shape.as_shape(shape) + with ops.init_scope(): + if initializer is None: + initializer, initializing_from_value = ( + variable_scope._get_default_variable_store()._get_default_initializer( # pylint: disable=protected-access + name=name, shape=shape_object, dtype=dtype)) + else: + initializing_from_value = not callable(initializer) + # Same logic as get_variable + variable_dtype = dtype.base_dtype + if initializing_from_value: + if shape is not None: + raise ValueError("If initializer is a constant, do not specify shape.") + initial_value = initializer + else: + # Instantiate initializer if provided initializer is a type object. + if isinstance(initializer, type(init_ops.Initializer)): + initializer = initializer(dtype=dtype) + def initial_value(): + return initializer( + shape_object.as_list(), dtype=dtype, partition_info=partition_info) + return resource_variable_ops.ResourceVariable( + initial_value=initial_value, + name=name, + dtype=variable_dtype, + **kwargs + ) + + +def add_variable(checkpointable, name, shape=None, dtype=dtypes.float32, + initializer=None): + """Add a variable to a Checkpointable with no scope influence.""" + return checkpointable._add_variable_with_custom_getter( # pylint: disable=protected-access + name=name, shape=shape, dtype=dtype, + initializer=initializer, getter=_default_getter) + + +def _breadth_first_checkpointable_traversal(root_checkpointable): + """Find shortest paths to all variables owned by dependencies of root.""" + bfs_sorted = [] + to_visit = collections.deque([root_checkpointable]) + path_to_root = {root_checkpointable: ()} + while to_visit: + current_checkpointable = to_visit.popleft() + current_checkpointable._maybe_initialize_checkpointable() # pylint: disable=protected-access + bfs_sorted.append(current_checkpointable) + for child_checkpointable in ( + current_checkpointable._checkpoint_dependencies): # pylint: disable=protected-access + if child_checkpointable.ref not in path_to_root: + path_to_root[child_checkpointable.ref] = ( + path_to_root[current_checkpointable] + (child_checkpointable,)) + to_visit.append(child_checkpointable.ref) + return bfs_sorted, path_to_root + + +def _escape_local_name(name): + # We need to support slashes in local names for compatibility, since this + # naming scheme is being patched in to things like Layer.add_variable where + # slashes were previously accepted. We also want to use slashes to indicate + # edges traversed to reach the variable, so we escape forward slashes in + # names. + return (name.replace(_ESCAPE_CHAR, _ESCAPE_CHAR + _ESCAPE_CHAR) + .replace(r"/", _ESCAPE_CHAR + "S")) + + +def _object_prefix_from_path(path_to_root): + return "/".join( + (_escape_local_name(checkpointable.name) + for checkpointable in path_to_root)) + + +def _slot_variable_naming_for_optimizer(optimizer_path): + """Make a function for naming slot variables in an optimizer.""" + # Name slot variables: + # + # /<_OPTIMIZER_SLOTS_NAME>// + # + # where is exactly the checkpoint name used for the original + # variable, including the path from the checkpoint root and the local name in + # the object which owns it. Note that we only save slot variables if the + # variable it's slotting for is also being saved. + + optimizer_identifier = "/%s/%s/" % (_OPTIMIZER_SLOTS_NAME, optimizer_path) + + def _name_slot_variable(variable_path, slot_name): + """With an optimizer specified, name a slot variable.""" + return (variable_path + + optimizer_identifier + + _escape_local_name(slot_name)) + + return _name_slot_variable + + +def _serialize_slot_variables(checkpointable_objects, node_ids, object_names): + """Gather and name slot variables.""" + non_slot_objects = list(checkpointable_objects) + slot_variables = {} + for checkpointable in non_slot_objects: + if isinstance(checkpointable, optimizer_lib.Optimizer): + naming_scheme = _slot_variable_naming_for_optimizer( + optimizer_path=object_names[checkpointable]) + slot_names = checkpointable.get_slot_names() + for slot_name in slot_names: + for original_variable_node_id, original_variable in enumerate( + non_slot_objects): + try: + slot_variable = checkpointable.get_slot( + original_variable, slot_name) + except AttributeError: + slot_variable = None + if slot_variable is None: + continue + slot_variable._maybe_initialize_checkpointable() # pylint: disable=protected-access + if slot_variable._checkpoint_dependencies: # pylint: disable=protected-access + # TODO(allenl): Gather dependencies of slot variables. + raise NotImplementedError( + "Currently only variables with no dependencies can be saved as " + "slot variables. File a feature request if this limitation " + "bothers you.") + if slot_variable in node_ids: + raise NotImplementedError( + "A slot variable was re-used as a dependency of a " + "Checkpointable object. This is not currently allowed. File a " + "feature request if this limitation bothers you.") + checkpoint_name = naming_scheme( + variable_path=object_names[original_variable], + slot_name=slot_name) + object_names[slot_variable] = checkpoint_name + slot_variable_node_id = len(checkpointable_objects) + node_ids[slot_variable] = slot_variable_node_id + checkpointable_objects.append(slot_variable) + slot_variable_proto = ( + checkpointable_object_graph_pb2.CheckpointableObjectGraph + .CheckpointableObject.SlotVariableReference( + slot_name=slot_name, + original_variable_node_id=original_variable_node_id, + slot_variable_node_id=slot_variable_node_id)) + slot_variables.setdefault(checkpointable, []).append( + slot_variable_proto) + return slot_variables + + +def _serialize_checkpointables( + checkpointable_objects, node_ids, object_names, slot_variables): + """Name non-slot `Checkpointable`s and add them to `object_graph_proto`.""" + object_graph_proto = ( + checkpointable_object_graph_pb2.CheckpointableObjectGraph()) + named_saveables = {} + + for checkpoint_id, checkpointable in enumerate(checkpointable_objects): + assert node_ids[checkpointable] == checkpoint_id + object_proto = object_graph_proto.nodes.add() + object_proto.slot_variables.extend(slot_variables.get(checkpointable, ())) + object_name = object_names[checkpointable] + for name, saveable_factory in ( + checkpointable._gather_saveables_for_checkpoint().items()): # pylint: disable=protected-access + attribute = object_proto.attributes.add() + attribute.name = name + attribute.checkpoint_key = "%s/%s/%s" % ( + object_name, _OBJECT_ATTRIBUTES_NAME, _escape_local_name(name)) + if callable(saveable_factory): + saveable = saveable_factory(name=attribute.checkpoint_key) + else: + saveable = saveable_factory + # Figure out the name-based Saver's name for this variable. + saver_dict = saver_lib.BaseSaverBuilder.OpListToDict( + [saveable], convert_variable_to_tensor=False) + attribute.full_name, = saver_dict.keys() + named_saveables[attribute.checkpoint_key] = saveable + + for child in checkpointable._checkpoint_dependencies: # pylint: disable=protected-access + child_proto = object_proto.children.add() + child_proto.node_id = node_ids[child.ref] + child_proto.local_name = child.name + + return named_saveables, object_graph_proto + + +def _serialize_object_graph(root_checkpointable): + """Determine checkpoint keys for variables and build a serialized graph. + + Non-slot variables are keyed based on a shortest path from the root saveable + to the object which owns the variable (i.e. the one which called + `Checkpointable._add_variable` to create it). + + Slot variables are keyed based on a shortest path to the variable being + slotted for, a shortest path to their optimizer, and the slot name. + + Args: + root_checkpointable: A `Checkpointable` object whose variables (including + the variables of dependencies, recursively) should be saved. + + Returns: + A tuple of (named_variables, object_graph_proto): + named_variables: A dictionary mapping names to variable objects. + object_graph_proto: A CheckpointableObjectGraph protocol buffer containing + the serialized object graph and variable references. + + Raises: + ValueError: If there are invalid characters in an optimizer's slot names. + """ + checkpointable_objects, path_to_root = ( + _breadth_first_checkpointable_traversal(root_checkpointable)) + object_names = { + obj: _object_prefix_from_path(path) + for obj, path in path_to_root.items()} + node_ids = {node: node_id for node_id, node + in enumerate(checkpointable_objects)} + slot_variables = _serialize_slot_variables( + checkpointable_objects=checkpointable_objects, + node_ids=node_ids, + object_names=object_names) + return _serialize_checkpointables( + checkpointable_objects=checkpointable_objects, + node_ids=node_ids, + object_names=object_names, + slot_variables=slot_variables) + + +def gather_initializers(root_checkpointable): + """Traverse the object graph and find initialization ops. + + Looks for `Checkpointable` objects which are dependencies of + `root_checkpointable` and which have an `initializer` property. Includes + initializers for slot variables only if the variable they are slotting for and + the optimizer are dependencies of `root_checkpointable` (i.e. if they would be + saved with a checkpoint). + + Args: + root_checkpointable: A `Checkpointable` object to gather initializers for. + Returns: + A list of initialization ops. + """ + # TODO(allenl): Extract out gathering logic so the naming logic doesn't have + # to run. + checkpointable_objects, path_to_root = ( + _breadth_first_checkpointable_traversal(root_checkpointable)) + object_names = { + obj: _object_prefix_from_path(path) + for obj, path in path_to_root.items()} + node_ids = {node: node_id for node_id, node + in enumerate(checkpointable_objects)} + _serialize_slot_variables( + checkpointable_objects=checkpointable_objects, + node_ids=node_ids, + object_names=object_names) + return [c.initializer for c in checkpointable_objects + if hasattr(c, "initializer") and c.initializer is not None] + + +class _NoRestoreSaveable(saver_lib.BaseSaverBuilder.SaveableObject): + + def __init__(self, tensor, name): + spec = saver_lib.BaseSaverBuilder.SaveSpec(tensor, "", name) + super(_NoRestoreSaveable, self).__init__(tensor, [spec], name) + + def restore(self, restored_tensors, restored_shapes): + return control_flow_ops.no_op() + + +class _LoadStatus(object): + """Abstract base for load status callbacks.""" + + @abc.abstractmethod + def assert_consumed(self): + """Raises an exception unless a non-trivial restoration has completed.""" + pass + + @abc.abstractmethod + def run_restore_ops(self, session=None): + """Runs restore ops from the checkpoint. Requires a valid checkpoint.""" + pass + + @abc.abstractmethod + def initialize_or_restore(self, session=None): + """Runs restore ops from the checkpoint, or initializes variables.""" + pass + + +class CheckpointLoadStatus(_LoadStatus): + """Checks the status of checkpoint loading and manages restore ops. + + Returned from `Saver.restore`. Since `restore` may defer the loading of values + in the checkpoint which don't yet have corresponding Python objects, + `CheckpointLoadStatus` provides a callback to verify that checkpoint loading + is complete (`assert_consumed`). + + When graph building, `restore` does not run restore ops itself since their + creation may be deferred. The `run_restore_ops` method must be called once all + Python objects with values to restore have been created and added to the + dependency graph (this does not necessarily have to be the whole checkpoint; + calling `run_restore_ops` while `assert_consumed` fails is supported and will + partially restore the checkpoint). + + See `Saver.restore` for usage examples. + """ + + def __init__(self, checkpoint, feed_dict): + self._checkpoint = checkpoint + self._feed_dict = feed_dict + + def assert_consumed(self): + """Asserts that all objects in the checkpoint have been created/matched. + + Returns: + `self` for chaining. + Raises: + AssertionError: If there are any Python objects in the dependency graph + which have not been restored from this checkpoint or a later `restore`, + or if there are any checkpointed values which have not been matched to + Python objects. + """ + for node_id, node in enumerate(self._checkpoint.object_graph_proto.nodes): + checkpointable = self._checkpoint.object_by_proto_id.get(node_id, None) + if checkpointable is None: + raise AssertionError("Unresolved object in checkpoint: %s" % (node,)) + if checkpointable._update_uid < self._checkpoint.restore_uid: # pylint: disable=protected-access + raise AssertionError( + "Object not assigned a value from checkpoint: %s" % (node,)) + if self._checkpoint.slot_restorations: + # Sanity check; this collection should be clear if everything has been + # restored. + raise AssertionError("Unresolved slot restorations: %s" % ( + self._checkpoint.slot_restorations,)) + if self._checkpoint.unused_attributes: + raise AssertionError( + ("Unused attributes in these objects (the attributes exist in the " + "checkpoint but not in the objects): %s") % ( + self._checkpoint.unused_attributes.items(),)) + return self + + def run_restore_ops(self, session=None): + """Run operations to restore objects in the dependency graph.""" + if context.executing_eagerly(): + return # Run eagerly + if session is None: + session = ops.get_default_session() + session.run(self._checkpoint.restore_ops, feed_dict=self._feed_dict) + + def initialize_or_restore(self, session=None): + """Alias for `run_restore_ops`. + + This method has a sibling in `InitializationOnlyStatus` which instead + initializes variables. That type is returned if no checkpoint is specified + in `Saver.restore`. + + Args: + session: The session to run restore ops in. If `None`, uses the default + session. + """ + self.run_restore_ops(session=session) + + +class InitializationOnlyStatus(_LoadStatus): + """Returned from `Saver.restore` when no checkpoint has been specified. + + Objects of this type have the same `assert_consumed` method as + `CheckpointLoadStatus`, but it always fails. However, + `initialize_or_restore` works on objects of both types, and will + initialize variables in `InitializationOnlyStatus` objects or restore them + otherwise. + """ + + def __init__(self, root_checkpointable): + self._root_checkpointable = root_checkpointable + + def assert_consumed(self): + """Assertion for consistency with `CheckpointLoadStatus`. Always fails.""" + raise AssertionError( + "No checkpoint specified (save_path=None); nothing is being restored.") + + def run_restore_ops(self, session=None): + """For consistency with `CheckpointLoadStatus`. + + Use `initialize_or_restore` for initializing if no checkpoint was passed + to `Saver.restore` and restoring otherwise. + + Args: + session: Not used. + """ + raise AssertionError( + "No checkpoint specified, so no restore ops are available " + "(save_path=None to Saver.restore).") + + def initialize_or_restore(self, session=None): + """Runs initialization ops for variables. + + Only objects which would be saved by `Saver.save` will be initialized. See + `gather_initializers` for details. + + This method does nothing when executing eagerly (initializers get run + eagerly). + + Args: + session: The session to run initialization ops in. If `None`, uses the + default session. + """ + if context.executing_eagerly(): + return # run eagerly + if session is None: + session = ops.get_default_session() + session.run(gather_initializers(self._root_checkpointable)) + + +_DEPRECATED_RESTORE_INSTRUCTIONS = ( + "Restoring a name-based tf.train.Saver checkpoint using the object-based " + "restore API. This mode uses global names to match variables, and so is " + "somewhat fragile. It also adds new restore ops to the graph each time it " + "is called. Prefer re-encoding training checkpoints in the object-based " + "format: run save() on the object-based saver (the same one this message " + "is coming from) and use that checkpoint in the future.") + + +class NameBasedSaverStatus(_LoadStatus): + """Status for loading a name-based training checkpoint.""" + + def __init__(self, object_saver, save_path): + self._object_saver = object_saver + self._save_path = save_path + + def assert_consumed(self): + """Assertion for consistency with `CheckpointLoadStatus`. Always fails.""" + raise AssertionError( + "Restoring a name-based checkpoint. No load status is available.") + + @deprecation.deprecated( + date=None, instructions=_DEPRECATED_RESTORE_INSTRUCTIONS) + def run_restore_ops(self, session=None): + """Load the name-based training checkpoint using a new `tf.train.Saver`.""" + if session is None and not context.executing_eagerly(): + session = ops.get_default_session() + with ops.device("/cpu:0"): + saver_lib.Saver(self._object_saver._global_variable_names()).restore( # pylint: disable=protected-access + sess=session, save_path=self._save_path) + + def initialize_or_restore(self, session=None): + """Alias for `run_restore_ops`.""" + self.run_restore_ops(session=session) + + +class _SessionWithFeedDictAdditions(session_lib.SessionInterface): + """Pretends to be a session, inserts extra feeds on run().""" + + def __init__(self, session, feed_additions): + self._wrapped_session = session + self._feed_additions = feed_additions + + def run(self, fetches, feed_dict=None, **kwargs): + if feed_dict is None: + feed_dict = {} + else: + feed_dict = feed_dict.copy() + feed_dict.update(self._feed_additions) + return self._wrapped_session.run( + fetches=fetches, feed_dict=feed_dict, **kwargs) + + +def _copy_saver_with_new_var_list(old_saver, new_var_list): + """Copy a `tf.train.Saver`'s state to a new Saver with different variables.""" + new_saver = saver_lib.Saver(var_list=new_var_list) + # TODO(allenl): Move to copying functionality to Saver? + # pylint: disable=protected-access + new_saver._last_checkpoints = old_saver._last_checkpoints + new_saver._checkpoints_to_be_deleted = old_saver._checkpoints_to_be_deleted + new_saver._next_checkpoint_time = old_saver._next_checkpoint_time + # pylint: enable=protected-access + return new_saver + + +class CheckpointableSaver(object): + """Saves and restores a `Checkpointable` object and its dependencies. + + See `Checkpointable` for details of dependency management. `Saver` wraps + `tf.train.Saver` for saving, including extra information about the graph of + dependencies between Python objects. When restoring, it uses this information + about the save-time dependency graph to more robustly match objects with their + checkpointed values. When executing eagerly, it supports restoring variables + on object creation (see `Saver.restore`). + + Values in a checkpoint are mapped to `Checkpointable` Python objects + (`Variable`s, `Optimizer`s, `Layer`s) based on the names provided when the + checkpoint was written. To avoid breaking existing checkpoints when modifying + a class, dependency names (the names of attributes to which `Checkpointable` + objects are assigned) may not change. These names are local to objects, in + contrast to the `Variable.name`-based save/restore from `tf.train.Saver`, and + so allow additional program transformations. + """ + + def __init__(self, root_checkpointable): + """Configure saving. + + Args: + root_checkpointable: The root of the object graph to save/restore. This + object and all of its dependencies are saved in the checkpoint. When + restoring, objects are matched and restored starting from this root. + """ + # Allow passing in a weak reference to avoid reference cycles when + # `Checkpointable` objects save themselves. + self._root_checkpointable_ref = root_checkpointable + if not context.executing_eagerly(): + with ops.device("/cpu:0"): + self._file_prefix_placeholder = constant_op.constant("model") + else: + self._file_prefix_placeholder = None + + # Op caching for save + self._object_graph_feed_tensor = None + self._last_save_object_graph = None + self._last_save_saver = None + + # Op caching for restore + self._last_restore_object_graph = None + self._last_restore_checkpoint = None + + @property + def _root_checkpointable(self): + if isinstance(self._root_checkpointable_ref, weakref.ref): + derefed = self._root_checkpointable_ref() + assert derefed is not None + return derefed + else: + return self._root_checkpointable_ref + + def save(self, file_prefix, checkpoint_number=None, session=None): + """Save a training checkpoint. + + The saved checkpoint includes variables created by this object and any + Checkpointable objects it depends on at the time `Saver.save()` is called. + + Args: + file_prefix: A prefix to use for the checkpoint filenames + (/path/to/directory/and_a_prefix). Names are generated based on this + prefix and `checkpoint_number`, if provided. + checkpoint_number: An integer variable or Tensor, used to number + checkpoints. Typically this value is saved along with other variables in + training checkpoints, which will happen automatically if it was created + by `root_checkpointable` or one of its dependencies (via + `Checkpointable._add_variable`). + session: The session to evaluate variables in. Ignored when executing + eagerly. If not provided when graph building, the default session is + used. + + Returns: + The full path to the checkpoint. + """ + named_variables, graph_proto = _serialize_object_graph( + self._root_checkpointable) + if not context.executing_eagerly(): + if session is None: + session = ops.get_default_session() + if self._object_graph_feed_tensor is None: + with ops.device("/cpu:0"): + self._object_graph_feed_tensor = constant_op.constant( + "", dtype=dtypes.string) + object_graph_tensor = self._object_graph_feed_tensor + feed_additions = {object_graph_tensor: graph_proto.SerializeToString()} + else: + session = None + with ops.device("/cpu:0"): + object_graph_tensor = constant_op.constant( + graph_proto.SerializeToString(), dtype=dtypes.string) + feed_additions = None + assert checkpointable_lib.OBJECT_GRAPH_PROTO_KEY not in named_variables + named_variables[checkpointable_lib.OBJECT_GRAPH_PROTO_KEY] = ( + _NoRestoreSaveable( + tensor=object_graph_tensor, + name=checkpointable_lib.OBJECT_GRAPH_PROTO_KEY)) + if (self._last_save_object_graph != graph_proto + # When executing eagerly, we need to re-create SaveableObjects each time + # save() is called so they pick up new Tensors passed to their + # constructors. That means the Saver needs to be copied with a new + # var_list. + or context.executing_eagerly()): + if self._last_save_object_graph is not None: + self._last_save_saver = _copy_saver_with_new_var_list( + old_saver=self._last_save_saver, new_var_list=named_variables) + else: + self._last_save_saver = saver_lib.Saver(var_list=named_variables) + self._last_save_object_graph = graph_proto + with ops.device("/cpu:0"): + save_path = self._last_save_saver.save( + sess=_SessionWithFeedDictAdditions( + session=session, feed_additions=feed_additions), + save_path=file_prefix, + write_meta_graph=False, + global_step=checkpoint_number) + return save_path + + def _global_variable_names(self): + """Generate a `tf.train.Saver`-style `var_list` using `variable.name`s.""" + named_saveables, graph_proto = _serialize_object_graph( + self._root_checkpointable) + saver_names = {} + for object_proto in graph_proto.nodes: + for attribute_proto in object_proto.attributes: + saver_names[attribute_proto.full_name] = named_saveables[ + attribute_proto.checkpoint_key] + return saver_names + + def restore(self, save_path): + """Restore a training checkpoint. + + Restores `root_checkpointable` and any objects that it tracks + (transitive). Either assigns values immediately if variables to restore have + been created already, or defers restoration until the variables are + created. Dependencies added to the `root_checkpointable` passed to the + constructor after this call will be matched if they have a corresponding + object in the checkpoint. + + When building a graph, restorations are added to the graph but not run. + + To disallow deferred loading, assert immediately that all checkpointed + variables have been matched to variable objects: + + ```python + saver = Saver(root) + saver.restore(path).assert_consumed() + ``` + + An exception will be raised unless every object was matched and its + variables already exist. + + When graph building, `assert_consumed()` indicates that all of the restore + ops which will be created for this checkpoint have been created. They can be + run via the `run_restore_ops()` function of the status object: + + ```python + saver.restore(path).assert_consumed().run_restore_ops() + ``` + + If the checkpoint has not been consumed completely, then the list of restore + ops will grow as more objects are added to the dependency graph. + + Name-based `tf.train.Saver` checkpoints can be loaded using this + method. There is no deferred loading, and names are used to match + variables. No restore ops are created/run until `run_restore_ops()` or + `initialize_or_restore()` are called on the returned status object, even + when executing eagerly. Re-encode name-based checkpoints using this + object-based `Saver.save` as soon as possible. + + Args: + save_path: The path to the checkpoint, as returned by `save` or + `tf.train.latest_checkpoint`. If None (as when there is no latest + checkpoint for `tf.train.latest_checkpoint` to return), returns an + object which may run initializers for objects in the dependency + graph. If the checkpoint was written by the name-based `tf.train.Saver`, + names are used to match variables. + + Returns: + A load status object, which can be used to make assertions about the + status of checkpoint restoration and run initialization/restore ops + (of type `CheckpointLoadStatus`, or `InitializationOnlyStatus` if + `save_path` is `None`). + + If `save_path` points to a name-based checkpoint, a `NameBasedSaverStatus` + object is returned which runs restore ops from a name-based saver. + """ + if save_path is None: + return InitializationOnlyStatus(self._root_checkpointable) + in_graph_mode = not context.executing_eagerly() + if in_graph_mode: + file_prefix_tensor = self._file_prefix_placeholder + file_prefix_feed_dict = {self._file_prefix_placeholder: save_path} + else: + with ops.device("/cpu:0"): + file_prefix_tensor = constant_op.constant(save_path) + file_prefix_feed_dict = None + reader = pywrap_tensorflow.NewCheckpointReader(save_path) + try: + object_graph_string = reader.get_tensor( + checkpointable_lib.OBJECT_GRAPH_PROTO_KEY) + except errors_impl.NotFoundError: + # The object graph proto does not exist in this checkpoint. Try again with + # name-based saving. + return NameBasedSaverStatus(self, save_path) + + object_graph_proto = ( + checkpointable_object_graph_pb2.CheckpointableObjectGraph()) + object_graph_proto.ParseFromString(object_graph_string) + if in_graph_mode and object_graph_proto == self._last_restore_object_graph: + checkpoint = self._last_restore_checkpoint + else: + if in_graph_mode: + dtype_map = None + else: + dtype_map = reader.get_variable_to_dtype_map() + checkpoint = _CheckpointRestoreCoordinator( + object_graph_proto=object_graph_proto, + save_path=file_prefix_tensor, + dtype_map=dtype_map) + if in_graph_mode: + if self._last_restore_object_graph is not None: + raise NotImplementedError( + "Using a single Saver to restore different object graphs is not " + "currently supported when graph building. Use a different Saver " + "for each object graph (restore ops will be duplicated), or " + "file a feature request if this limitation bothers you.") + self._last_restore_checkpoint = checkpoint + self._last_restore_object_graph = object_graph_proto + checkpointable_lib._CheckpointPosition( # pylint: disable=protected-access + checkpoint=checkpoint, proto_id=0).restore(self._root_checkpointable) + load_status = CheckpointLoadStatus( + checkpoint, feed_dict=file_prefix_feed_dict) + return load_status + + +@tf_export("train.Checkpoint") +class Checkpoint(checkpointable_lib.Checkpointable): + """Groups checkpointable objects, saving and restoring them. + + `Checkpoint`'s constructor accepts keyword arguments whose values are types + that contain checkpointable state, such as `tf.train.Optimizer` + implementations, `tf.Variable`, `tf.keras.Layer` implementations, or + `tf.keras.Model` implementations. It saves these values with a checkpoint, and + maintains a `save_counter` for numbering checkpoints. + + Example usage when graph building: + + ```python + import tensorflow as tf + import os + + checkpoint_directory = "/tmp/training_checkpoints" + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + + checkpoint = tf.train.Checkpoint(optimizer=optimizer, model=model) + status = checkpoint.restore(tf.train.latest_checkpoint(checkpoint_directory)) + train_op = optimizer.minimize( ... ) + status.assert_consumed() # Optional sanity checks. + with tf.Session() as session: + # Use the Session to restore variables, or initialize them if + # tf.train.latest_checkpoint returned None. + status.initialize_or_restore(session) + for _ in range(num_training_steps): + session.run(train_op) + checkpoint.save(file_prefix=checkpoint_prefix) + ``` + + Example usage with eager execution enabled: + + ```python + import tensorflow as tf + import os + + tf.enable_eager_execution() + + checkpoint_directory = "/tmp/training_checkpoints" + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + + checkpoint = tf.train.Checkpoint(optimizer=optimizer, model=model) + status = checkpoint.restore(tf.train.latest_checkpoint(checkpoint_directory)) + for _ in range(num_training_steps): + optimizer.minimize( ... ) # Variables will be restored on creation. + status.assert_consumed() # Optional sanity checks. + checkpoint.save(file_prefix=checkpoint_prefix) + ``` + + `Checkpoint.save` and `Checkpoint.restore` write and read object-based + checkpoints, in contrast to `tf.train.Saver` which writes and reads + `variable.name` based checkpoints. Object-based checkpointing saves a graph of + dependencies between Python objects (`Layer`s, `Optimizer`s, `Variable`s, + etc.) with named edges, and this graph is used to match variables when + restoring a checkpoint. It can be more robust to changes in the Python + program, and helps to support restore-on-create for variables when executing + eagerly. Prefer `tf.train.Checkpoint` over `tf.train.Saver` for new code. + + `Checkpoint` objects have dependencies on the objects passed as keyword + arguments to their constructors, and each dependency is given a name that is + identical to the name of the keyword argument for which it was created. + TensorFlow classes like `Layer`s and `Optimizer`s will automatically add + dependencies on their variables (e.g. "kernel" and "bias" for + `tf.keras.layers.Dense`). Inheriting from `tf.keras.Model` makes managing + dependencies easy in user-defined classes, since `Model` hooks into attribute + assignment. For example: + + ```python + class Regress(tf.keras.Model): + + def __init__(self): + super(Regress, self).__init__() + self.input_transform = tf.keras.layers.Dense(10) + # ... + + def call(self, inputs): + x = self.input_transform(inputs) + # ... + ``` + + This `Model` has a dependency named "input_transform" on its `Dense` layer, + which in turn depends on its variables. As a result, saving an instance of + `Regress` using `tf.train.Checkpoint` will also save all the variables created + by the `Dense` layer. + + Attributes: + save_counter: Incremented when `save()` is called. Used to number + checkpoints. + """ + + def __init__(self, **kwargs): + """Group objects into a training checkpoint. + + Args: + **kwargs: Keyword arguments are set as attributes of this object, and are + saved with the checkpoint. Values must be checkpointable objects. + Raises: + ValueError: If objects in `kwargs` are not checkpointable. + """ + super(Checkpoint, self).__init__() + for k, v in sorted(kwargs.items(), key=lambda item: item[0]): + if not isinstance(v, checkpointable_lib.CheckpointableBase): + raise ValueError( + ("`Checkpoint` was expecting a checkpointable object (an object " + "derived from `CheckpointableBase`), got %s. If you believe this " + "object should be checkpointable (i.e. it is part of the " + "TensorFlow Python API and manages state), please open an issue.") + % (v,)) + setattr(self, k, v) + self._save_counter = None # Created lazily for restore-on-create. + self._saver = CheckpointableSaver(weakref.ref(self)) + + def _maybe_create_save_counter(self): + """Create a save counter if it does not yet exist.""" + if self._save_counter is None: + # Initialized to 0 and incremented before saving. + with ops.device("/cpu:0"): + self._save_counter = add_variable( + self, name="save_counter", initializer=0, dtype=dtypes.int64) + + @property + def save_counter(self): + """An integer variable which starts at zero and is incremented on save. + + Used to number checkpoints. + + Returns: + The save counter variable. + """ + self._maybe_create_save_counter() + return self._save_counter + + def save(self, file_prefix, session=None): + """Save a training checkpoint. + + The saved checkpoint includes variables created by this object and any + checkpointable objects it depends on at the time `Checkpoint.save()` is + called. + + Args: + file_prefix: A prefix to use for the checkpoint filenames + (/path/to/directory/and_a_prefix). Names are generated based on this + prefix and `Checkpoint.save_counter`. + session: The session to evaluate variables in. Ignored when executing + eagerly. If not provided when graph building, the default session is + used. + + Returns: + The full path to the checkpoint. + """ + in_graph_mode = not context.executing_eagerly() + if in_graph_mode: + if session is None: + session = ops.get_default_session() + if self._save_counter is None: + # When graph building, if this is a new save counter variable then it + # needs to be initialized before assign_add. This is only an issue if + # restore() has not been called first. + session.run(self.save_counter.initializer) + with ops.colocate_with(self.save_counter): + assign_op = self.save_counter.assign_add(1) + if in_graph_mode: + session.run(assign_op) + return self._saver.save( + file_prefix=file_prefix, + checkpoint_number=self.save_counter, + session=session) + + def restore(self, save_path): + """Restore a training checkpoint. + + Restores this `Checkpoint` and any objects it depends on. + + When executing eagerly, either assigns values immediately if variables to + restore have been created already, or defers restoration until the variables + are created. Dependencies added after this call will be matched if they have + a corresponding object in the checkpoint (the restore request will queue in + any checkpointable object waiting for the expected dependency to be added). + + When graph building, restoration ops are added to the graph but not run + immediately. + + To ensure that loading is complete and no more assignments will take place, + use the `assert_consumed()` method of the status object returned by + `restore`: + + ```python + checkpoint = tf.train.Checkpoint( ... ) + checkpoint.restore(path).assert_consumed() + ``` + + An exception will be raised if any Python objects in the dependency graph + were not found in the checkpoint, or if any checkpointed values do not have + a matching Python object. + + When graph building, `assert_consumed()` indicates that all of the restore + ops that will be created for this checkpoint have been created. They can be + run via the `run_restore_ops()` method of the status object: + + ```python + checkpoint.restore(path).assert_consumed().run_restore_ops() + ``` + + If the checkpoint has not been consumed completely, then the list of restore + ops will grow as more objects are added to the dependency graph. + + Name-based `tf.train.Saver` checkpoints can be loaded using this + method. There is no deferred loading, and names are used to match + variables. No restore ops are created/run until `run_restore_ops()` or + `initialize_or_restore()` are called on the returned status object, even + when executing eagerly. Re-encode name-based checkpoints using + `tf.train.Checkpoint.save` as soon as possible. + + Args: + save_path: The path to the checkpoint, as returned by `save` or + `tf.train.latest_checkpoint`. If None (as when there is no latest + checkpoint for `tf.train.latest_checkpoint` to return), returns an + object which may run initializers for objects in the dependency + graph. If the checkpoint was written by the name-based `tf.train.Saver`, + names are used to match variables. + + Returns: + A load status object, which can be used to make assertions about the + status of a checkpoint restoration and run initialization/restore ops. + + The returned status object has the following methods: + - `assert_consumed()`: + Raises an exception if any variables/objects are unmatched: either + checkpointed values which don't have a matching Python object or + Python objects in the dependency graph with no values in the + checkpoint. This method returns the status object, and so may be + chained with `initialize_or_restore` or `run_restore_ops`. + - `initialize_or_restore(session=None)`: + When graph building, runs variable initializers if `save_path` is + `None`, but otherwise runs restore operations. If no `session` is + explicitly specified, the default session is used. No effect for + object-based checkpoints when executing eagerly (variables are + initialized or restored eagerly). + - `run_restore_ops(session=None)`: + When graph building, runs restore operations. If no `session` is + explicitly specified, the default session is used. No effect for + object-based checkpoints when executing eagerly (restore operations + are run eagerly). May only be called when `save_path` is not `None`. + """ + status = self._saver.restore(save_path=save_path) + # Create the save counter now so it gets initialized with other variables + # when graph building. Creating it earlier would lead to double + # initialization when executing eagerly. + self._maybe_create_save_counter() + return status diff --git a/tensorflow/contrib/eager/python/checkpointable_utils_test.py b/tensorflow/python/training/checkpointable_utils_test.py similarity index 94% rename from tensorflow/contrib/eager/python/checkpointable_utils_test.py rename to tensorflow/python/training/checkpointable_utils_test.py index 5e1b64728a02405a3032aabcc5e12dd1f5229d2b..29fcdb70b418b08f543d75ff643b82d2a456754d 100644 --- a/tensorflow/contrib/eager/python/checkpointable_utils_test.py +++ b/tensorflow/python/training/checkpointable_utils_test.py @@ -21,10 +21,10 @@ import os import six -from tensorflow.contrib.eager.python import checkpointable_utils from tensorflow.python.client import session as session_lib from tensorflow.python.eager import backprop from tensorflow.python.eager import context +from tensorflow.python.eager import function from tensorflow.python.eager import test from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes @@ -32,7 +32,7 @@ from tensorflow.python.framework import ops from tensorflow.python.framework import test_util from tensorflow.python.keras._impl.keras.engine import sequential from tensorflow.python.keras._impl.keras.engine import training -from tensorflow.python.layers import core +from tensorflow.python.keras._impl.keras.layers import core from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import init_ops from tensorflow.python.ops import resource_variable_ops @@ -41,7 +41,8 @@ from tensorflow.python.ops import template from tensorflow.python.ops import variable_scope from tensorflow.python.training import adam from tensorflow.python.training import checkpointable -from tensorflow.python.training import saver as core_saver +from tensorflow.python.training import checkpointable_utils +from tensorflow.python.training import saver as saver_lib from tensorflow.python.training import training_util @@ -155,13 +156,13 @@ class InterfaceTests(test.TestCase): self.assertAllEqual([1., 1., 1.], self.evaluate(v2)) -class _MirroringSaveable(core_saver.BaseSaverBuilder.SaveableObject): +class _MirroringSaveable(saver_lib.BaseSaverBuilder.SaveableObject): def __init__(self, primary_variable, mirrored_variable, name): self._primary_variable = primary_variable self._mirrored_variable = mirrored_variable tensor = self._primary_variable.read_value() - spec = core_saver.BaseSaverBuilder.SaveSpec( + spec = saver_lib.BaseSaverBuilder.SaveSpec( tensor=tensor, slice_spec="", name=name) @@ -308,7 +309,7 @@ class CheckpointingTests(test.TestCase): optimizer_node.slot_variables[0] .slot_variable_node_id].attributes[0].checkpoint_key) - @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True) + @test_util.run_in_graph_and_eager_modes() def testMoreComplexSaveableReturned(self): v = _OwnsMirroredVariables() checkpoint = checkpointable_utils.Checkpoint(v=v) @@ -332,7 +333,7 @@ class CheckpointingTests(test.TestCase): def testMoreComplexSaveableReturnedWithGlobalName(self): # The same object can also be saved using the name-based saver. v = _OwnsMirroredVariables() - saver = core_saver.Saver(var_list=[v]) + saver = saver_lib.Saver(var_list=[v]) test_dir = self.get_temp_dir() prefix = os.path.join(test_dir, "ckpt") self.evaluate(v.non_dep_variable.assign(42.)) @@ -419,7 +420,7 @@ class CheckpointingTests(test.TestCase): root = checkpointable_utils.Checkpoint( optimizer=optimizer, model=model, optimizer_step=training_util.get_or_create_global_step()) - root.restore(core_saver.latest_checkpoint(checkpoint_directory)) + root.restore(saver_lib.latest_checkpoint(checkpoint_directory)) for _ in range(num_training_steps): # TODO(allenl): Use a Dataset and serialize/checkpoint it. input_value = constant_op.constant([[3.]]) @@ -447,7 +448,7 @@ class CheckpointingTests(test.TestCase): train_op = optimizer.minimize( model(input_value), global_step=root.global_step) - checkpoint_path = core_saver.latest_checkpoint(checkpoint_directory) + checkpoint_path = saver_lib.latest_checkpoint(checkpoint_directory) with self.test_session(graph=ops.get_default_graph()) as session: status = root.restore(save_path=checkpoint_path) status.initialize_or_restore(session=session) @@ -480,7 +481,7 @@ class CheckpointingTests(test.TestCase): root = checkpointable_utils.Checkpoint( optimizer=optimizer, model=model, global_step=training_util.get_or_create_global_step()) - checkpoint_path = core_saver.latest_checkpoint(checkpoint_directory) + checkpoint_path = saver_lib.latest_checkpoint(checkpoint_directory) status = root.restore(save_path=checkpoint_path) input_value = constant_op.constant([[3.]]) train_fn = functools.partial( @@ -498,6 +499,50 @@ class CheckpointingTests(test.TestCase): self.assertEqual(training_continuation + 1, self.evaluate(root.save_counter)) + # pylint: disable=cell-var-from-loop + @test_util.run_in_graph_and_eager_modes() + def testWithDefun(self): + num_training_steps = 2 + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + for training_continuation in range(3): + with ops.Graph().as_default(), self.test_session( + graph=ops.get_default_graph()), test_util.device(use_gpu=True): + model = MyModel() + # Don't actually train so we can test variable values + optimizer = adam.AdamOptimizer(0.) + root = checkpointable_utils.Checkpoint( + optimizer=optimizer, model=model, + global_step=training_util.get_or_create_global_step()) + checkpoint_path = saver_lib.latest_checkpoint(checkpoint_directory) + status = root.restore(save_path=checkpoint_path) + def train_fn(): + @function.defun + def _call_model(x): + return model(x) + with backprop.GradientTape() as tape: + loss = _call_model(constant_op.constant([[3.]])) + gradients = tape.gradient(loss, model.variables) + return optimizer.apply_gradients(zip(gradients, model.variables), + global_step=root.global_step) + if not context.executing_eagerly(): + train_fn = functools.partial( + self.evaluate, train_fn()) + status.initialize_or_restore() + for _ in range(num_training_steps): + train_fn() + if training_continuation > 0: + status.assert_consumed() + self.assertAllClose([[42.]], self.evaluate(model.variables[0])) + else: + self.evaluate(model.variables[0].assign([[42.]])) + root.save(file_prefix=checkpoint_prefix) + self.assertEqual((training_continuation + 1) * num_training_steps, + self.evaluate(root.global_step)) + self.assertEqual(training_continuation + 1, + self.evaluate(root.save_counter)) + # pylint: enable=cell-var-from-loop + def _get_checkpoint_name(self, name): root = checkpointable.Checkpointable() checkpointable_utils.add_variable( @@ -632,7 +677,7 @@ class CheckpointingTests(test.TestCase): status.run_restore_ops() self.assertEqual(-14., self.evaluate(loaded_dep_after_var.dep.var)) - @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True) + @test_util.run_in_graph_and_eager_modes() def testDeferredSlotRestoration(self): checkpoint_directory = self.get_temp_dir() @@ -697,7 +742,7 @@ class CheckpointingTests(test.TestCase): self.evaluate(train_op) slot_status.assert_consumed() - @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True) + @test_util.run_in_graph_and_eager_modes() def testOverlappingRestores(self): checkpoint_directory = self.get_temp_dir() save_root = checkpointable.Checkpointable() @@ -748,7 +793,7 @@ class CheckpointingTests(test.TestCase): second_status.run_restore_ops() self.assertEqual(12., self.evaluate(load_dep.var)) - @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True) + @test_util.run_in_graph_and_eager_modes() def testAmbiguousLoad(self): # Not OK to split one checkpoint object into two checkpoint_directory = self.get_temp_dir() @@ -771,7 +816,7 @@ class CheckpointingTests(test.TestCase): "resolved to different objects"): load_root.dep_two.dep_three = checkpointable.Checkpointable() - @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True) + @test_util.run_in_graph_and_eager_modes() def testObjectsCombined(self): # Currently fine to load two checkpoint objects into one Python object checkpoint_directory = self.get_temp_dir() @@ -894,7 +939,7 @@ class CheckpointingTests(test.TestCase): saver.save(checkpoint_prefix) self.assertEqual(before_ops, graph.get_operations()) - @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True) + @test_util.run_in_graph_and_eager_modes() def testCheckpointCleanup(self): checkpoint_directory = self.get_temp_dir() checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") @@ -914,7 +959,7 @@ class CheckpointingTests(test.TestCase): expected_filenames, os.listdir(checkpoint_directory)) - @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True) + @test_util.run_in_graph_and_eager_modes() def testCheckpointCleanupChangingVarList(self): checkpoint_directory = self.get_temp_dir() checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") @@ -1072,7 +1117,7 @@ class CheckpointingTests(test.TestCase): class TemplateTests(test.TestCase): - @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True) + @test_util.run_in_graph_and_eager_modes() def test_checkpointable_save_restore(self): def _templated(): @@ -1103,7 +1148,7 @@ class TemplateTests(test.TestCase): self.assertAllEqual([13.], self.evaluate(var_plus_one)) self.assertAllEqual([14.], self.evaluate(var2)) - @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True) + @test_util.run_in_graph_and_eager_modes() def test_checkpointable_save_restore_nested(self): def _inner_template(): @@ -1199,7 +1244,7 @@ class CheckpointCompatibilityTests(test.TestCase): with save_graph.as_default(), self.test_session( graph=save_graph) as session: root = self._initialized_model() - name_saver = core_saver.Saver() + name_saver = saver_lib.Saver() return name_saver.save( sess=session, save_path=checkpoint_prefix, global_step=root.optimizer_step) @@ -1223,9 +1268,6 @@ class CheckpointCompatibilityTests(test.TestCase): status.initialize_or_restore() self._check_sentinels(root) - # TODO(allenl): Test for the core name-based saver loading object-based - # checkpoints once object-based checkpointing is in core. - def testSaveGraphLoadEager(self): checkpoint_directory = self.get_temp_dir() checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") diff --git a/tensorflow/python/training/distribute.py b/tensorflow/python/training/distribute.py index c82b898bd04c1a259a4f41fc8e5ad8c0905a1242..21ec5292adb5bb6463b5d46d934750fac7f8fcef 100644 --- a/tensorflow/python/training/distribute.py +++ b/tensorflow/python/training/distribute.py @@ -20,6 +20,7 @@ from __future__ import print_function import threading +from tensorflow.python.data.ops import dataset_ops from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops @@ -376,7 +377,9 @@ class DistributionStrategy(object): update. Allreduce is an algorithm for performing a reduction on values from multiple devices and making the result available on all of those devices. - * TODO(josh11b): Future: partitioned variables + * In the future we will have support for TensorFlows' partitioned + variables, where a single variable is split across multiple + devices. We have then a few approaches we want to support: * Code written (as if) with no knowledge of class `DistributionStrategy`. @@ -389,8 +392,8 @@ class DistributionStrategy(object): ``` with my_distribution.scope(): - iterator = my_distribution.distribute_dataset(dataset) - # TODO(josh11b): iterator = dataset.make_one_shot_iterator() + iterator = my_distribution.distribute_dataset( + dataset).make_one_shot_iterator() tower_train_ops = my_distribution.call_for_each_tower( tower_fn, iterator.get_next()) train_op = tf.group(my_distribution.unwrap(tower_train_ops)) @@ -402,6 +405,16 @@ class DistributionStrategy(object): using `my_distribution`'s policy, and library functions called by `tower_fn` can use the `get_tower_context()` API to get enhanced behavior in this case. + + You can also create an initializable iterator instead of one shot iterator. + In that case, you will need to ensure that you initialize the iterator + before calling get_next. + ``` + iterator = my_distribution.distribute_dataset( + dataset).make_initializable_iterator()) + session.run(iterator.initializer) + ``` + * If you want to write a distributed algorithm, you may use any of the `DistributionStrategy` APIs inside a `with my_distribution.scope():` block of code. @@ -481,8 +494,8 @@ class DistributionStrategy(object): a variable (which by definition will have locality V(`v`), though will match another locality if inside a `colocate_vars_with` scope). - * `d.distribute_dataset(dataset)`: in cross-tower context, produces an - iterator with locality T + * `d.distribute_dataset(dataset).make_one_shot_iterator()`: in cross-tower + context, produces an iterator with locality T * `d.broadcast(t)`: in cross-tower context, produces a value with locality M * `d.broadcast(t, v)`: in cross-tower context, produces a value with locality V(`v`) @@ -505,7 +518,7 @@ class DistributionStrategy(object): The standard pattern for updating variables is to: - 1. Wrap your input dataset in `d.distribute_dataset()`. + 1. Wrap your input dataset in `d.distribute_dataset()` and create an iterator. 2. Define each tower `d.call_for_each_tower()` up to the point of getting a list of gradient, variable pairs. 3. Call `d.reduce("sum", t, v)` or `d.batch_reduce()` to sum the @@ -514,7 +527,7 @@ class DistributionStrategy(object): Steps 3 and 4 are done automatically by class `Optimizer` if you call its `apply_gradients` method in a tower context. Otherwise you can - manually call its `distributed_apply` method in a cross-tower context. + manually call its `_distributed_apply` method in a cross-tower context. Another thing you might want to do in the middle of your tower function is an all-reduce of some intermediate value, using `d.reduce()` or @@ -660,25 +673,38 @@ class DistributionStrategy(object): _require_distribution_strategy_scope(self) return variable_scope.variable_creator_scope(create_colocated_variable) - # TODO(josh11b): Currently this returns an iterator, but should return - # something implementing (a subset of) the Dataset API. - def distribute_dataset(self, dataset): - """Return an iterator into `dataset` split across all towers. + def _call_dataset_fn(self, dataset_fn): + result = dataset_fn() + if not isinstance(result, dataset_ops.Dataset): + raise ValueError( + "dataset_fn() must return a tf.data.Dataset when using a " + "DistributionStrategy.") + return result - Suitable for providing input to for `call_for_each_tower()`, as in: + # TODO(josh11b): `PerDeviceDataset` currently only implements a few methods of + # Dataset API such as make_one_shot_iterator and make_initializable_iterator. + # Extend to implement more functionality of datasets. + def distribute_dataset(self, dataset_fn): + """Return a `dataset` split across all towers. + + Suitable for providing input to for `call_for_each_tower()` by creating an + iterator: ``` + def dataset_fn(): + return tf.data.Dataset.from_tensors([[1.]]).repeat() with distribution_strategy.scope(): - iterator = distribution_strategy.distribute_dataset(dataset) + distributed_dataset = distribution_strategy.distribute_dataset(dataset_fn) + iterator = distributed_dataset.make_one_shot_iterator() tower_results = distribution_strategy.call_for_each_tower( tower_fn, iterator.get_next()) ``` Args: - dataset: A `tf.data.Dataset`. + dataset_fn: A function that returns a `tf.data.Dataset`. Returns: - A Dataset iterator that will produce separate splits for each tower. + A `PerDeviceDataset` that will produce data for each tower. """ raise NotImplementedError("must be implemented in descendants") @@ -1120,10 +1146,8 @@ class _DefaultDistributionStrategy(DistributionStrategy): _require_distribution_strategy_scope(self) return ops.colocate_with(colocate_with_variable) - def distribute_dataset(self, dataset): - # TODO(josh11b): Support for this when executing eagerly is currently only - # in contrib. - return dataset.make_one_shot_iterator() + def distribute_dataset(self, dataset_fn): + return self._call_dataset_fn(dataset_fn) def _broadcast(self, tensor, destinations): if destinations is None: @@ -1221,13 +1245,16 @@ _default_tower_mode = _DefaultTowerThreadMode() # So here we catch any attempts to deserialize variables # when using distribution strategies. # pylint: disable=protected-access +_original_from_proto = resource_variable_ops._from_proto_fn + + def _from_proto_fn(v, import_scope=None): if has_distribution_strategy(): raise NotImplementedError( "Deserialization of variables is not yet supported when using" "distributed strategies.") else: - resource_variable_ops._from_proto_fn(v, import_scope=import_scope) + return _original_from_proto(v, import_scope=import_scope) resource_variable_ops._from_proto_fn = _from_proto_fn # pylint: enable=protected-access diff --git a/tensorflow/python/training/input.py b/tensorflow/python/training/input.py index 44f00a96deff64012705c4c81b185a9c4fac2295..caa26581e8a0041dd1b157ab6b1f8236344582e8 100644 --- a/tensorflow/python/training/input.py +++ b/tensorflow/python/training/input.py @@ -515,8 +515,7 @@ def _store_sparse_tensors(tensor_list, enqueue_many, keep_input, def _sparse_values_to_keep(t, keep_input): """Convert a per-row `keep_input` vector to a per-value one.""" # Get the rows of every value in the sparse Tensor. - row_values = array_ops.reshape( - t.indices, [array_ops.shape(t.indices)[0], -1])[:, 0] + row_values = t.indices[:, 0] # The value should be kept iff the row should be kept. return array_ops.gather(keep_input, row_values) if keep_input.shape.ndims == 1: diff --git a/tensorflow/python/training/momentum_test.py b/tensorflow/python/training/momentum_test.py index 297a8bbde5447cff9465be36c0bb71f2490c60fc..7bd57ad3d854534e196fa7b72bebbd7195e6bca8 100644 --- a/tensorflow/python/training/momentum_test.py +++ b/tensorflow/python/training/momentum_test.py @@ -237,7 +237,17 @@ class MomentumOptimizerTest(test.TestCase): @test_util.run_in_graph_and_eager_modes(reset_test=True) def testMinimizeSparseResourceVariable(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: - var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]], dtype=dtype) + # This test invokes the ResourceSparseApplyMomentum operation, which + # did not have a registered GPU kernel as of April 2018. With graph + # execution, the placement algorithm notices this and automatically + # places the variable in CPU (host) memory. With eager execution, + # the variable would be placed in GPU memory if available, which + # would then conflict with the future invocation of the + # ResourceSparseApplyMomentum operation. + # To work around this discrepancy, for now we force the variable + # to be placed on CPU. + with ops.device("/cpu:0"): + var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]], dtype=dtype) # pylint: disable=cell-var-from-loop def loss(): @@ -256,7 +266,17 @@ class MomentumOptimizerTest(test.TestCase): @test_util.run_in_graph_and_eager_modes(reset_test=True) def testMinimizeWith2DIndiciesForEmbeddingLookup(self): - var0 = resource_variable_ops.ResourceVariable(array_ops.ones([2, 2])) + # This test invokes the ResourceSparseApplyMomentum operation, which + # did not have a registered GPU kernel as of April 2018. With graph + # execution, the placement algorithm notices this and automatically + # places the variable in CPU (host) memory. With eager execution, + # the variable would be placed in GPU memory if available, which + # would then conflict with the future invocation of the + # ResourceSparseApplyMomentum operation. + # To work around this discrepancy, for now we force the variable + # to be placed on CPU. + with ops.device("/cpu:0"): + var0 = resource_variable_ops.ResourceVariable(array_ops.ones([2, 2])) def loss(): return math_ops.reduce_sum(embedding_ops.embedding_lookup(var0, [[1]])) diff --git a/tensorflow/python/training/monitored_session.py b/tensorflow/python/training/monitored_session.py index 4ce6f6d00267410626f7d7a9e2251d3f40b6bb6e..f584a009d946a193f1ab76b3030db4f8a4954d27 100644 --- a/tensorflow/python/training/monitored_session.py +++ b/tensorflow/python/training/monitored_session.py @@ -202,7 +202,7 @@ class Scaffold(object): if self._local_init_op is None: self._local_init_op = Scaffold.get_or_default( 'local_init_op', ops.GraphKeys.LOCAL_INIT_OP, - Scaffold._default_local_init_op) + Scaffold.default_local_init_op) if self._summary_op is None: self._summary_op = Scaffold.get_or_default('summary_op', ops.GraphKeys.SUMMARY_OP, @@ -267,7 +267,17 @@ class Scaffold(object): return op @staticmethod - def _default_local_init_op(): + def default_local_init_op(): + """Returns an op that groups the default local init ops. + + This op is used during session initialization when a Scaffold is + initialized without specifying the local_init_op arg. It includes + `tf.local_variables_initializer`, `tf.tables_initializer`, and also + initializes local session resources. + + Returns: + The default Scaffold local init op. + """ return control_flow_ops.group( variables.local_variables_initializer(), lookup_ops.tables_initializer(), diff --git a/tensorflow/python/training/optimizer.py b/tensorflow/python/training/optimizer.py index 75665fc2840797dd53dd863721d2744cb1b08af5..f126d3847b6b0b43495c63b31ca915c107ede969 100644 --- a/tensorflow/python/training/optimizer.py +++ b/tensorflow/python/training/optimizer.py @@ -689,9 +689,7 @@ class Optimizer( # device_policy is set because non-mirrored tensors will be read in # `update_op`. `_resource_apply_dense`, `lr_t`, `beta1_t` and `beta2_t` # is an example. - with ops.name_scope( - "update_" + scope_name), context.context().device_policy( - context.DEVICE_PLACEMENT_SILENT): + with ops.name_scope("update_" + scope_name): return p.update_op(self, g) with ops.name_scope(name, self._name) as name: @@ -707,11 +705,8 @@ class Optimizer( return self._finish(update_ops, "update") non_slot_devices = distribution.non_slot_devices(var_list) - # Device policy is needed because hyperparameter tensors (such as - # AdamOptimizer's beta1_t) need to be copied across devices in Eager. - with context.context().device_policy(context.DEVICE_PLACEMENT_SILENT): - finish_updates = distribution.update_non_slot( - non_slot_devices, finish, self, update_ops) + finish_updates = distribution.update_non_slot( + non_slot_devices, finish, self, update_ops) if global_step is None: apply_updates = distribution.group(finish_updates, name=name) else: @@ -823,13 +818,13 @@ class Optimizer( if restored_initial_value is not None: initial_value = restored_initial_value v = variable_scope.variable(initial_value, name=name, trainable=False) - # Restore this variable by name if necessary, but don't add a - # Checkpointable dependency. Optimizers return the current graph's - # non-slot variables from _checkpoint_dependencies explicitly rather - # than unconditionally adding dependencies (since there may be multiple - # non-slot variables with the same name in different graphs, trying to - # save all of them would result in errors). - self._handle_deferred_dependencies(name=name, checkpointable=v) + # Restore this variable by name if necessary, but don't add a + # Checkpointable dependency. Optimizers return the current graph's + # non-slot variables from _checkpoint_dependencies explicitly rather + # than unconditionally adding dependencies (since there may be multiple + # non-slot variables with the same name in different graphs, trying to + # save all of them would result in errors). + self._handle_deferred_dependencies(name=name, checkpointable=v) self._non_slot_dict[key] = v return v diff --git a/tensorflow/python/training/saver.py b/tensorflow/python/training/saver.py index e40b8d22ed2ab0f4c9ff65e953f0f1cf681c8068..a74d629a8f81bacd019417b7a359d51f1080b16c 100644 --- a/tensorflow/python/training/saver.py +++ b/tensorflow/python/training/saver.py @@ -22,6 +22,7 @@ from __future__ import print_function import collections import os.path import re +import sys import time import uuid @@ -30,8 +31,10 @@ import six from google.protobuf import text_format +from tensorflow.core.protobuf import checkpointable_object_graph_pb2 from tensorflow.core.protobuf import meta_graph_pb2 from tensorflow.core.protobuf import saver_pb2 +from tensorflow.python import pywrap_tensorflow from tensorflow.python.client import session from tensorflow.python.eager import context from tensorflow.python.framework import constant_op @@ -1340,6 +1343,9 @@ class Saver(object): self._check_saver_def() self._write_version = self.saver_def.version self._save_relative_paths = save_relative_paths + # For compatibility with object-based checkpoints, we may build a second + # Saver to read the renamed keys. + self._object_restore_saver = None def build(self): if context.executing_eagerly(): @@ -1795,11 +1801,63 @@ class Saver(object): if save_path is None: raise ValueError("Can't load save_path when it is None.") logging.info("Restoring parameters from %s", save_path) - if context.executing_eagerly(): - self._build_eager(save_path, build_save=False, build_restore=True) - else: - sess.run(self.saver_def.restore_op_name, - {self.saver_def.filename_tensor_name: save_path}) + try: + if context.executing_eagerly(): + self._build_eager(save_path, build_save=False, build_restore=True) + else: + sess.run(self.saver_def.restore_op_name, + {self.saver_def.filename_tensor_name: save_path}) + except errors.NotFoundError: + exception_type, exception_value, exception_traceback = sys.exc_info() + # The checkpoint would not be loaded successfully as is. Try to parse it + # as an object-based checkpoint. + try: + reader = pywrap_tensorflow.NewCheckpointReader(save_path) + object_graph_string = reader.get_tensor( + checkpointable.OBJECT_GRAPH_PROTO_KEY) + except errors.NotFoundError: + # This is not an object-based checkpoint, or the checkpoint doesn't + # exist. Re-raise the original exception. + six.reraise(exception_type, exception_value, exception_traceback) + del exception_traceback # avoid reference cycles + + # This is an object-based checkpoint. We'll print a warning and then do + # the restore. + logging.warning( + "Restoring an object-based checkpoint using a name-based saver. This " + "may be somewhat fragile, and will re-build the Saver. Instead, " + "consider loading object-based checkpoints using " + "tf.train.Checkpoint().") + self._restore_from_object_based_checkpoint( + sess=sess, save_path=save_path, + object_graph_string=object_graph_string) + + def _restore_from_object_based_checkpoint(self, sess, save_path, + object_graph_string): + """A compatibility mode for reading object-based checkpoints.""" + object_graph_proto = ( + checkpointable_object_graph_pb2.CheckpointableObjectGraph()) + object_graph_proto.ParseFromString(object_graph_string) + names_to_keys = {} + for node in object_graph_proto.nodes: + for attribute in node.attributes: + names_to_keys[attribute.full_name] = attribute.checkpoint_key + saveables = self._builder._ValidateAndSliceInputs(self._var_list) # pylint: disable=protected-access + for saveable in saveables: + for spec in saveable.specs: + if spec.name not in names_to_keys: + raise errors.NotFoundError( + None, None, + message=("Attempting to load an object-based checkpoint using " + "variable names, but could not find %s in the " + "checkpoint.") % spec.name) + spec.name = names_to_keys[spec.name] + if self._object_restore_saver is None: + # Cache the Saver so multiple restore() calls don't pollute the graph when + # graph building. This assumes keys are consistent (i.e. this is the same + # type of object-based checkpoint we saw previously). + self._object_restore_saver = Saver(saveables) + self._object_restore_saver.restore(sess=sess, save_path=save_path) @staticmethod def _add_collection_def(meta_graph_def, key, export_scope=None): diff --git a/tensorflow/python/training/saver_test.py b/tensorflow/python/training/saver_test.py index 14dda7997948ead7b12dee953a0b2ee3b2ee8fc9..70495291bc57629252423d86e35987c5c0d2b1ee 100644 --- a/tensorflow/python/training/saver_test.py +++ b/tensorflow/python/training/saver_test.py @@ -19,6 +19,7 @@ from __future__ import division from __future__ import print_function import contextlib +import functools import math import os import random @@ -50,6 +51,8 @@ from tensorflow.python.framework import graph_io from tensorflow.python.framework import meta_graph from tensorflow.python.framework import ops as ops_lib from tensorflow.python.framework import test_util +from tensorflow.python.keras._impl.keras.engine import training +from tensorflow.python.keras._impl.keras.layers import core from tensorflow.python.lib.io import file_io from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops @@ -69,10 +72,12 @@ from tensorflow.python.platform import test from tensorflow.python.summary import summary from tensorflow.python.training import adam from tensorflow.python.training import checkpointable +from tensorflow.python.training import checkpointable_utils from tensorflow.python.training import gradient_descent from tensorflow.python.training import queue_runner_impl from tensorflow.python.training import saver as saver_module from tensorflow.python.training import saver_test_utils +from tensorflow.python.training import training_util from tensorflow.python.training.checkpoint_state_pb2 import CheckpointState from tensorflow.python.util import compat @@ -2726,7 +2731,7 @@ class ScopedGraphTest(test.TestCase): # The rest of the variables. rest_variables = list( set(variables.global_variables()) - set(var_list.keys())) - init_rest_op = variables.initialize_variables(rest_variables) + init_rest_op = variables.variables_initializer(rest_variables) with self.test_session(graph=graph) as sess: saver = saver_module.Saver(var_list=var_list, max_to_keep=1) @@ -2948,6 +2953,29 @@ class _OwnsMirroredVariables(checkpointable.CheckpointableBase): return self.non_dep_variable.name +class NonLayerCheckpointable(checkpointable.Checkpointable): + + def __init__(self): + super(NonLayerCheckpointable, self).__init__() + self.a_variable = checkpointable_utils.add_variable( + self, name="a_variable", shape=[]) + + +class MyModel(training.Model): + """A concrete Model for testing.""" + + def __init__(self): + super(MyModel, self).__init__() + self._named_dense = core.Dense(1, use_bias=True) + self._second = core.Dense(1, use_bias=False) + # We can still track Checkpointables which aren't Layers. + self._non_layer = NonLayerCheckpointable() + + def call(self, values): + ret = self._second(self._named_dense(values)) + return ret + + @test_util.with_c_api class CheckpointableCompatibilityTests(test.TestCase): @@ -3011,6 +3039,128 @@ class CheckpointableCompatibilityTests(test.TestCase): saver.restore(sess, save_path) self.assertEqual(1, v.eval_count) + def _initialized_model(self): + input_value = constant_op.constant([[3.]]) + model = MyModel() + optimizer = adam.AdamOptimizer(0.001) + optimizer_step = training_util.get_or_create_global_step() + root_checkpointable = checkpointable_utils.Checkpoint( + optimizer=optimizer, model=model, optimizer_step=optimizer_step) + train_op = optimizer.minimize( + functools.partial(model, input_value), + global_step=optimizer_step) + self.evaluate(checkpointable_utils.gather_initializers( + root_checkpointable)) + self.evaluate(train_op) + # A regular variable, a slot variable, and a non-slot Optimizer variable + # with known values to check when loading. + self.evaluate(model._named_dense.bias.assign([1.])) + self.evaluate(optimizer.get_slot( + var=model._named_dense.bias, name="m").assign([2.])) + beta1_power, _ = optimizer._get_beta_accumulators() + self.evaluate(beta1_power.assign(3.)) + return root_checkpointable + + def _set_sentinels(self, root_checkpointable): + self.evaluate(root_checkpointable.model._named_dense.bias.assign([101.])) + self.evaluate( + root_checkpointable.optimizer.get_slot( + var=root_checkpointable.model._named_dense.bias, name="m") + .assign([102.])) + beta1_power, _ = root_checkpointable.optimizer._get_beta_accumulators() + self.evaluate(beta1_power.assign(103.)) + + def _check_sentinels(self, root_checkpointable): + self.assertAllEqual( + [1.], self.evaluate(root_checkpointable.model._named_dense.bias)) + self.assertAllEqual([2.], self.evaluate( + root_checkpointable.optimizer.get_slot( + var=root_checkpointable.model._named_dense.bias, name="m"))) + beta1_power, _ = root_checkpointable.optimizer._get_beta_accumulators() + self.assertAllEqual(3., self.evaluate(beta1_power)) + + def testVariableNotFoundErrorRaised(self): + # Restore does some tricky exception handling to figure out if it should + # load an object-based checkpoint. Tests that the exception handling isn't + # too broad. + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + + a = resource_variable_ops.ResourceVariable(1., name="a") + b = resource_variable_ops.ResourceVariable(1., name="b") + a_saver = saver_module.Saver([a]) + b_saver = saver_module.Saver([b]) + with self.test_session() as sess: + sess.run(a.initializer) + save_path = a_saver.save(sess=sess, save_path=checkpoint_prefix) + with self.assertRaisesRegexp( + errors.NotFoundError, "Key b not found in checkpoint"): + b_saver.restore(sess=sess, save_path=save_path) + + def testCheckpointNotFoundErrorRaised(self): + # Restore does some tricky exception handling to figure out if it should + # load an object-based checkpoint. Tests that the exception handling isn't + # too broad. + a = resource_variable_ops.ResourceVariable(1., name="a") + saver = saver_module.Saver([a]) + with self.test_session() as sess: + with self.assertRaisesRegexp( + errors.NotFoundError, + "Failed to find any matching files for path_which_does_not_exist"): + saver.restore(sess=sess, save_path="path_which_does_not_exist") + + def testLoadFromObjectBasedGraph(self): + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + + save_graph = ops_lib.Graph() + with save_graph.as_default(), self.test_session(graph=save_graph) as sess: + root = self._initialized_model() + object_saver = checkpointable_utils.CheckpointableSaver(root) + save_path = object_saver.save(file_prefix=checkpoint_prefix) + + # An incompatible object-based checkpoint to check error messages + var = resource_variable_ops.ResourceVariable(1., name="a") + self.evaluate(var.initializer) + second_saver = checkpointable_utils.CheckpointableSaver(var) + second_path = second_saver.save(file_prefix=os.path.join( + checkpoint_directory, "second")) + + restore_graph = ops_lib.Graph() + with restore_graph.as_default(), self.test_session( + graph=restore_graph) as sess: + root = self._initialized_model() + self._set_sentinels(root) + saver = saver_module.Saver() + saver.restore(sess=sess, save_path=save_path) + self._check_sentinels(root) + before_second_restore_ops = restore_graph.get_operations() + # Test that multiple restores do not pollute the graph + saver.restore(sess=sess, save_path=save_path) + self.assertEqual(before_second_restore_ops, + restore_graph.get_operations()) + with self.assertRaisesRegexp(errors.NotFoundError, + "could not find a_variable"): + saver.restore(sess=sess, save_path=second_path) + + def testLoadFromObjectBasedEager(self): + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + + save_graph = ops_lib.Graph() + with save_graph.as_default(), self.test_session(graph=save_graph): + root = self._initialized_model() + object_saver = checkpointable_utils.CheckpointableSaver(root) + save_path = object_saver.save(file_prefix=checkpoint_prefix) + + with context.eager_mode(): + root = self._initialized_model() + self._set_sentinels(root) + saver = saver_module.Saver( + root.model.variables + root.optimizer.variables()) + saver.restore(sess=None, save_path=save_path) + self._check_sentinels(root) + if __name__ == "__main__": test.main() diff --git a/tensorflow/python/training/session_manager.py b/tensorflow/python/training/session_manager.py index a00ceb90211e371c3b2f2b32f2042d1556158595..3cb3877cc2fb846477640058b5bcdfea97ee6828 100644 --- a/tensorflow/python/training/session_manager.py +++ b/tensorflow/python/training/session_manager.py @@ -263,8 +263,6 @@ class SessionManager(object): Raises: RuntimeError: If the model cannot be initialized or recovered. - - Raises: ValueError: If both checkpoint_dir and checkpoint_filename_with_path are set. """ diff --git a/tensorflow/python/training/training.py b/tensorflow/python/training/training.py index b759b156d78cf8d869b49375058cc7ed42e82b34..d7e5078be7be6704597599efa2c9ccdad17833e1 100644 --- a/tensorflow/python/training/training.py +++ b/tensorflow/python/training/training.py @@ -156,6 +156,7 @@ from tensorflow.python.training.basic_session_run_hooks import FinalOpsHook from tensorflow.python.training.basic_session_run_hooks import FeedFnHook from tensorflow.python.training.basic_session_run_hooks import ProfilerHook from tensorflow.python.training.basic_loops import basic_train_loop +from tensorflow.python.training.checkpointable_utils import Checkpoint from tensorflow.python.training.checkpoint_utils import init_from_checkpoint from tensorflow.python.training.checkpoint_utils import list_variables from tensorflow.python.training.checkpoint_utils import load_checkpoint diff --git a/tensorflow/python/training/warm_starting_util_test.py b/tensorflow/python/training/warm_starting_util_test.py index 6e445d8bd14cc13010541c1ab0f737f96a4b1e03..7e8cbd6baeea160075b61d1191c8f1da5fe2163c 100644 --- a/tensorflow/python/training/warm_starting_util_test.py +++ b/tensorflow/python/training/warm_starting_util_test.py @@ -946,18 +946,20 @@ class WarmStartingUtilTest(test.TestCase): # emb_vocab should be correctly warm-started after vocab remapping. # Missing values are filled in with the EmbeddingColumn's initializer. self._assert_cols_to_vars( - cols_to_vars, { + cols_to_vars, + { emb_vocab: [ - # embedding_weights part 0. - np.array([[3., 3.3], [2., 2.2], [1., 1.1]]), - # embedding_weights part 1. - np.array([[0.5, 0.4], [0.42, 0.42], [0.42, 0.42]]), # linear weights part 0. np.array([[0.69]]), # linear weights part 1. - np.array([[0.71]]) + np.array([[0.71]]), + # embedding_weights part 0. + np.array([[3., 3.3], [2., 2.2], [1., 1.1]]), + # embedding_weights part 1. + np.array([[0.5, 0.4], [0.42, 0.42], [0.42, 0.42]]) ] - }, sess) + }, + sess) def testErrorConditions(self): x = variable_scope.get_variable( diff --git a/tensorflow/python/util/compat.py b/tensorflow/python/util/compat.py index 4163fcac79e3d237c4c4c4303e1db2c39e5fe7c6..3358ffe5264b16b53ae38b2108ad0fe016ea8972 100644 --- a/tensorflow/python/util/compat.py +++ b/tensorflow/python/util/compat.py @@ -42,10 +42,8 @@ import six as _six from tensorflow.python.util.all_util import remove_undocumented from tensorflow.python.util.tf_export import tf_export -from tensorflow.python.util.tf_export import tf_export -@tf_export('compat.as_bytes', 'compat.as_str') def as_bytes(bytes_or_text, encoding='utf-8'): """Converts either bytes or unicode to `bytes`, using utf-8 encoding for text. @@ -68,7 +66,6 @@ def as_bytes(bytes_or_text, encoding='utf-8'): (bytes_or_text,)) -@tf_export('compat.as_text') def as_text(bytes_or_text, encoding='utf-8'): """Returns the given argument as a unicode string. @@ -93,8 +90,12 @@ def as_text(bytes_or_text, encoding='utf-8'): # Convert an object to a `str` in both Python 2 and 3. if _six.PY2: as_str = as_bytes + tf_export('compat.as_bytes', 'compat.as_str')(as_bytes) + tf_export('compat.as_text')(as_text) else: as_str = as_text + tf_export('compat.as_bytes')(as_bytes) + tf_export('compat.as_text', 'compat.as_str')(as_text) @tf_export('compat.as_str_any') diff --git a/tensorflow/python/util/tf_inspect.py b/tensorflow/python/util/tf_inspect.py index 4ab8a72a83b466c38c50b1c76004e7a6fe942a04..663036de8a01c3937ae93f8d2bfa27f7add48e39 100644 --- a/tensorflow/python/util/tf_inspect.py +++ b/tensorflow/python/util/tf_inspect.py @@ -17,6 +17,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from collections import namedtuple import inspect as _inspect from tensorflow.python.util import tf_decorator @@ -24,6 +25,15 @@ from tensorflow.python.util import tf_decorator ArgSpec = _inspect.ArgSpec +if hasattr(_inspect, 'FullArgSpec'): + FullArgSpec = _inspect.FullArgSpec # pylint: disable=invalid-name +else: + FullArgSpec = namedtuple('FullArgSpec', [ + 'args', 'varargs', 'varkw', 'defaults', 'kwonlyargs', 'kwonlydefaults', + 'annotations' + ]) + + def currentframe(): """TFDecorator-aware replacement for inspect.currentframe.""" return _inspect.stack()[1][0] @@ -55,13 +65,36 @@ def getfullargspec(obj): # pylint: disable=redefined-builtin obj: A callable, possibly decorated. Returns: - The `FullArgSpec` (`ArgSpec` in Python 2) that describes the signature of + The `FullArgSpec` that describes the signature of the outermost decorator that changes the callable's signature. If the - callable is not decorated, `inspect.getfullargspec()` - (`inspect.getargspec()` in Python 2) will be called directly on the - callable. + callable is not decorated, `inspect.getfullargspec()` will be called + directly on the callable. """ - spec_fn = getattr(_inspect, 'getfullargspec', getattr(_inspect, 'getargspec')) + if hasattr(_inspect, 'getfullargspec'): + spec_fn = _inspect.getfullargspec + else: + def spec_fn(target): + """Spec function that adding default value from FullArgSpec. + + It is used when getfullargspec is not available (eg in PY2). + + Args: + target: the target object to inspect. + Returns: + The full argument specs with empty kwonlyargs, kwonlydefaults and + annotations. + """ + argspecs = _inspect.getargspec(target) + fullargspecs = FullArgSpec( + args=argspecs.args, + varargs=argspecs.varargs, + varkw=argspecs.keywords, + defaults=argspecs.defaults, + kwonlyargs=[], + kwonlydefaults=None, + annotations={}) + return fullargspecs + decorators, target = tf_decorator.unwrap(obj) return next((d.decorator_argspec for d in decorators if d.decorator_argspec is not None), spec_fn(target)) diff --git a/tensorflow/stream_executor/BUILD b/tensorflow/stream_executor/BUILD index 27cdb860feae62a5455e1f99995bf56b250cdb45..80fc9ff2926c53b557a7ba9e242d597a89acf79f 100644 --- a/tensorflow/stream_executor/BUILD +++ b/tensorflow/stream_executor/BUILD @@ -33,6 +33,7 @@ cc_library( }), visibility = ["//visibility:public"], deps = [ + "//tensorflow/compiler/xla:statusor", "//tensorflow/core:lib", "@local_config_cuda//cuda:cuda_headers", ], @@ -45,6 +46,7 @@ cc_library( visibility = ["//visibility:public"], deps = [ "//tensorflow/core:lib", + "//tensorflow/compiler/xla:statusor", "@local_config_cuda//cuda:cuda_headers", ] + if_static([":stream_executor_impl"]), ) @@ -75,7 +77,6 @@ cc_library( ":stream_executor", "//tensorflow/core:lib", "//tensorflow/core/kernels:ops_util", - "@com_google_absl//absl/strings", "@local_config_cuda//cuda:cuda_headers", ] + if_cuda_is_configured([ "//tensorflow/core:cuda", diff --git a/tensorflow/stream_executor/blas.cc b/tensorflow/stream_executor/blas.cc index 31724cf6c9b97e45975b9e053459f7b8f5918dfa..906d6fb7020ce35adb1438d394b34983c332f182 100644 --- a/tensorflow/stream_executor/blas.cc +++ b/tensorflow/stream_executor/blas.cc @@ -17,8 +17,7 @@ limitations under the License. #include "tensorflow/stream_executor/lib/strcat.h" -namespace perftools { -namespace gputools { +namespace stream_executor { namespace blas { string TransposeString(Transpose t) { @@ -95,5 +94,4 @@ std::ostream& operator<<(std::ostream& os, ComputationType ty) { } } // namespace blas -} // namespace gputools -} // namespace perftools +} // namespace stream_executor diff --git a/tensorflow/stream_executor/blas.h b/tensorflow/stream_executor/blas.h index c5f778a5c74519c0f35cea5d59aac3d0d4564c56..6e62b85728acd8db551a7e27459a6804a9f3d4b2 100644 --- a/tensorflow/stream_executor/blas.h +++ b/tensorflow/stream_executor/blas.h @@ -49,8 +49,7 @@ namespace Eigen { struct half; } // namespace Eigen -namespace perftools { -namespace gputools { +namespace stream_executor { class Stream; class ScratchAllocator; @@ -2100,7 +2099,6 @@ class BlasSupport { DeviceMemory> *b, int ldb) override; } // namespace blas -} // namespace gputools -} // namespace perftools +} // namespace stream_executor #endif // TENSORFLOW_STREAM_EXECUTOR_BLAS_H_ diff --git a/tensorflow/stream_executor/cuda/cuda_activation.cc b/tensorflow/stream_executor/cuda/cuda_activation.cc index 5f4cf9dbd781e865345143a964fc16ca4fdb18d4..cf6b9e2c6e4b327c06ecce318f8e8809308f6f02 100644 --- a/tensorflow/stream_executor/cuda/cuda_activation.cc +++ b/tensorflow/stream_executor/cuda/cuda_activation.cc @@ -19,8 +19,7 @@ limitations under the License. #include "tensorflow/stream_executor/stream_executor.h" #include "tensorflow/stream_executor/stream_executor_internal.h" -namespace perftools { -namespace gputools { +namespace stream_executor { namespace cuda { CudaContext* ExtractCudaContext(CUDAExecutor *cuda_exec); @@ -40,5 +39,4 @@ ScopedActivateExecutorContext::~ScopedActivateExecutorContext() { } } // namespace cuda -} // namespace gputools -} // namespace perftools +} // namespace stream_executor diff --git a/tensorflow/stream_executor/cuda/cuda_activation.h b/tensorflow/stream_executor/cuda/cuda_activation.h index c9d43a9766e049ae002df3e4ce4d8625e71f99db..04ffaef3646bb3df03407f8e74e620455bb9e5cb 100644 --- a/tensorflow/stream_executor/cuda/cuda_activation.h +++ b/tensorflow/stream_executor/cuda/cuda_activation.h @@ -25,8 +25,7 @@ limitations under the License. #include "tensorflow/stream_executor/platform/port.h" -namespace perftools { -namespace gputools { +namespace stream_executor { class StreamExecutor; @@ -56,7 +55,6 @@ class ScopedActivateExecutorContext { }; } // namespace cuda -} // namespace gputools -} // namespace perftools +} // namespace stream_executor #endif // TENSORFLOW_STREAM_EXECUTOR_CUDA_CUDA_ACTIVATION_H_ diff --git a/tensorflow/stream_executor/cuda/cuda_blas.cc b/tensorflow/stream_executor/cuda/cuda_blas.cc index c563f8f931b0a5689268329386d1252f2a45bdd1..007c0f1c86c77b035a31b4fee433ec71c26bdf9b 100644 --- a/tensorflow/stream_executor/cuda/cuda_blas.cc +++ b/tensorflow/stream_executor/cuda/cuda_blas.cc @@ -75,15 +75,14 @@ limitations under the License. #include "tensorflow/stream_executor/scratch_allocator.h" #include "tensorflow/stream_executor/stream_executor.h" -namespace perftools { -namespace gputools { +namespace stream_executor { namespace cuda { PLUGIN_REGISTRY_DEFINE_PLUGIN_ID(kCuBlasPlugin); namespace wrap { -#define PERFTOOLS_GPUTOOLS_CUBLAS_WRAP(__name) \ +#define STREAM_EXECUTOR_CUBLAS_WRAP(__name) \ struct WrapperShim__##__name { \ static const char *kName; \ template \ @@ -94,8 +93,8 @@ namespace wrap { } __name; \ const char *WrapperShim__##__name::kName = #__name; -#define PERFTOOLS_GPUTOOLS_CUBLAS_V2_WRAP(__name) \ - PERFTOOLS_GPUTOOLS_CUBLAS_WRAP(__name) +#define STREAM_EXECUTOR_CUBLAS_V2_WRAP(__name) \ + STREAM_EXECUTOR_CUBLAS_WRAP(__name) #define CUBLAS_BLAS_ROUTINE_EACH(__macro) \ __macro(cublasSnrm2) \ @@ -269,28 +268,28 @@ namespace wrap { __macro(cublasCdgmm) \ __macro(cublasZdgmm) -PERFTOOLS_GPUTOOLS_CUBLAS_V2_WRAP(cublasCreate) -PERFTOOLS_GPUTOOLS_CUBLAS_V2_WRAP(cublasDestroy) -PERFTOOLS_GPUTOOLS_CUBLAS_V2_WRAP(cublasSetStream) -PERFTOOLS_GPUTOOLS_CUBLAS_V2_WRAP(cublasSetPointerMode) -PERFTOOLS_GPUTOOLS_CUBLAS_V2_WRAP(cublasGetPointerMode) -PERFTOOLS_GPUTOOLS_CUBLAS_WRAP(cublasSgemmBatched) -PERFTOOLS_GPUTOOLS_CUBLAS_WRAP(cublasDgemmBatched) -PERFTOOLS_GPUTOOLS_CUBLAS_WRAP(cublasCgemmBatched) -PERFTOOLS_GPUTOOLS_CUBLAS_WRAP(cublasZgemmBatched) -CUBLAS_BLAS_ROUTINE_EACH(PERFTOOLS_GPUTOOLS_CUBLAS_V2_WRAP) +STREAM_EXECUTOR_CUBLAS_V2_WRAP(cublasCreate) +STREAM_EXECUTOR_CUBLAS_V2_WRAP(cublasDestroy) +STREAM_EXECUTOR_CUBLAS_V2_WRAP(cublasSetStream) +STREAM_EXECUTOR_CUBLAS_V2_WRAP(cublasSetPointerMode) +STREAM_EXECUTOR_CUBLAS_V2_WRAP(cublasGetPointerMode) +STREAM_EXECUTOR_CUBLAS_WRAP(cublasSgemmBatched) +STREAM_EXECUTOR_CUBLAS_WRAP(cublasDgemmBatched) +STREAM_EXECUTOR_CUBLAS_WRAP(cublasCgemmBatched) +STREAM_EXECUTOR_CUBLAS_WRAP(cublasZgemmBatched) +CUBLAS_BLAS_ROUTINE_EACH(STREAM_EXECUTOR_CUBLAS_V2_WRAP) #if CUDA_VERSION >= 7050 -PERFTOOLS_GPUTOOLS_CUBLAS_WRAP(cublasSgemmEx) +STREAM_EXECUTOR_CUBLAS_WRAP(cublasSgemmEx) #endif #if CUDA_VERSION >= 8000 -PERFTOOLS_GPUTOOLS_CUBLAS_WRAP(cublasGemmEx) +STREAM_EXECUTOR_CUBLAS_WRAP(cublasGemmEx) #endif #if CUDA_VERSION >= 9000 -PERFTOOLS_GPUTOOLS_CUBLAS_WRAP(cublasGetMathMode) -PERFTOOLS_GPUTOOLS_CUBLAS_WRAP(cublasSetMathMode) +STREAM_EXECUTOR_CUBLAS_WRAP(cublasGetMathMode) +STREAM_EXECUTOR_CUBLAS_WRAP(cublasSetMathMode) #endif } // namespace wrap @@ -2076,12 +2075,6 @@ bool CUDABlas::DoBlasGemvWithProfilingImpl( const DeviceMemory &a, int lda, const DeviceMemory &x, int incx, const T &beta, DeviceMemory *y, int incy, blas::ProfileResult *output_profile_result) { - struct TimerDeleter { - void operator()(CUDATimer *t) { - t->Destroy(); - delete t; - } - }; std::unique_ptr timer; if (output_profile_result != nullptr) { timer.reset(new CUDATimer(parent_)); @@ -2114,12 +2107,6 @@ bool CUDABlas::DoBlasGemmWithProfilingImpl( uint64 n, uint64 k, const ParamType &alpha, const DeviceMemory &a, int lda, const DeviceMemory &b, int ldb, const ParamType &beta, DeviceMemory *c, int ldc, blas::ProfileResult *output_profile_result) { - struct TimerDeleter { - void operator()(CUDATimer *t) { - t->Destroy(); - delete t; - } - }; std::unique_ptr timer; if (output_profile_result != nullptr) { timer.reset(new CUDATimer(parent_)); @@ -2188,12 +2175,6 @@ bool CUDABlas::DoBlasGemmWithAlgorithmImpl( return false; } - struct TimerDeleter { - void operator()(CUDATimer *t) { - t->Destroy(); - delete t; - } - }; std::unique_ptr timer; if (output_profile_result != nullptr) { timer.reset(new CUDATimer(parent_)); @@ -2821,46 +2802,39 @@ bool CUDABlas::DoBlasTrsm(Stream *stream, blas::Side side, } // namespace cuda -namespace gpu = ::perftools::gputools; - void initialize_cublas() { - gpu::port::Status status = - gpu::PluginRegistry::Instance() - ->RegisterFactory( - gpu::cuda::kCudaPlatformId, gpu::cuda::kCuBlasPlugin, "cuBLAS", - [](gpu::internal::StreamExecutorInterface - *parent) -> gpu::blas::BlasSupport * { - gpu::cuda::CUDAExecutor *cuda_executor = - dynamic_cast(parent); - if (cuda_executor == nullptr) { - LOG(ERROR) - << "Attempting to initialize an instance of the cuBLAS " - << "support library with a non-CUDA StreamExecutor"; - return nullptr; - } - - gpu::cuda::CUDABlas *blas = - new gpu::cuda::CUDABlas(cuda_executor); - if (!blas->Init()) { - // Note: Init() will log a more specific error. - delete blas; - return nullptr; - } - return blas; - }); + port::Status status = + PluginRegistry::Instance()->RegisterFactory( + cuda::kCudaPlatformId, cuda::kCuBlasPlugin, "cuBLAS", + [](internal::StreamExecutorInterface *parent) -> blas::BlasSupport * { + cuda::CUDAExecutor *cuda_executor = + dynamic_cast(parent); + if (cuda_executor == nullptr) { + LOG(ERROR) + << "Attempting to initialize an instance of the cuBLAS " + << "support library with a non-CUDA StreamExecutor"; + return nullptr; + } + + cuda::CUDABlas *blas = new cuda::CUDABlas(cuda_executor); + if (!blas->Init()) { + // Note: Init() will log a more specific error. + delete blas; + return nullptr; + } + return blas; + }); if (!status.ok()) { LOG(ERROR) << "Unable to register cuBLAS factory: " << status.error_message(); } - gpu::PluginRegistry::Instance()->SetDefaultFactory(gpu::cuda::kCudaPlatformId, - gpu::PluginKind::kBlas, - gpu::cuda::kCuBlasPlugin); + PluginRegistry::Instance()->SetDefaultFactory( + cuda::kCudaPlatformId, PluginKind::kBlas, cuda::kCuBlasPlugin); } -} // namespace gputools -} // namespace perftools +} // namespace stream_executor REGISTER_MODULE_INITIALIZER(register_cublas, - { perftools::gputools::initialize_cublas(); }); + { stream_executor::initialize_cublas(); }); diff --git a/tensorflow/stream_executor/cuda/cuda_blas.h b/tensorflow/stream_executor/cuda/cuda_blas.h index deb211c04bcaa9e98ee04c5e9066a2a13092cb06..55c414a1f921c40efa5fd7a3d1bf0564fc5fbb9c 100644 --- a/tensorflow/stream_executor/cuda/cuda_blas.h +++ b/tensorflow/stream_executor/cuda/cuda_blas.h @@ -29,8 +29,7 @@ limitations under the License. typedef struct cublasContext *cublasHandle_t; -namespace perftools { -namespace gputools { +namespace stream_executor { class Stream; @@ -162,7 +161,6 @@ class CUDABlas : public blas::BlasSupport { }; } // namespace cuda -} // namespace gputools -} // namespace perftools +} // namespace stream_executor #endif // TENSORFLOW_STREAM_EXECUTOR_CUDA_CUDA_BLAS_H_ diff --git a/tensorflow/stream_executor/cuda/cuda_diagnostics.cc b/tensorflow/stream_executor/cuda/cuda_diagnostics.cc index 933c103f524ef37f840c9e13b9e4024289e274c1..feb529297e8fff99030cb14fd4f8a9298bbc5935 100644 --- a/tensorflow/stream_executor/cuda/cuda_diagnostics.cc +++ b/tensorflow/stream_executor/cuda/cuda_diagnostics.cc @@ -51,8 +51,7 @@ limitations under the License. #include "tensorflow/stream_executor/lib/str_util.h" #include "tensorflow/stream_executor/lib/inlined_vector.h" -namespace perftools { -namespace gputools { +namespace stream_executor { namespace cuda { #ifdef __APPLE__ @@ -384,5 +383,4 @@ port::StatusOr Diagnostician::FindKernelDriverVersion() { } // namespace cuda -} // namespace gputools -} // namespace perftools +} // namespace stream_executor diff --git a/tensorflow/stream_executor/cuda/cuda_diagnostics.h b/tensorflow/stream_executor/cuda/cuda_diagnostics.h index aa68321acc858c902d1a43600a14ac5d88edb0be..f2db2eb20a18c671e055b910809dfde940a5e3f8 100644 --- a/tensorflow/stream_executor/cuda/cuda_diagnostics.h +++ b/tensorflow/stream_executor/cuda/cuda_diagnostics.h @@ -22,8 +22,7 @@ limitations under the License. #include "tensorflow/stream_executor/lib/statusor.h" #include "tensorflow/stream_executor/platform/port.h" -namespace perftools { -namespace gputools { +namespace stream_executor { namespace cuda { // e.g. DriverVersion{346, 3, 4} @@ -93,7 +92,6 @@ class Diagnostician { }; } // namespace cuda -} // namespace gputools -} // namespace perftools +} // namespace stream_executor #endif // TENSORFLOW_STREAM_EXECUTOR_CUDA_CUDA_DIAGNOSTICS_H_ diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc index 1aea0485fd221d8a4631ac96191d6577268ec8c7..102419a264984aa2edb4fee756e9a53ef615d5bc 100644 --- a/tensorflow/stream_executor/cuda/cuda_dnn.cc +++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc @@ -18,7 +18,6 @@ limitations under the License. #include #include -#include "absl/strings/str_cat.h" #include "third_party/eigen3/Eigen/Core" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/util/env_var.h" @@ -60,8 +59,7 @@ NarrowT CheckedNarrowing(const WideT& wide) { } // namespace -namespace perftools { -namespace gputools { +namespace stream_executor { using dnn::BatchDescriptor; using dnn::FilterDescriptor; @@ -113,7 +111,7 @@ string ToString(libraryPropertyType type) { case PATCH_LEVEL: return "PATCH_LEVEL"; default: - return absl::StrCat( + return port::StrCat( "(type), ">"); } } @@ -160,7 +158,7 @@ static port::ThreadPool* GetCudaThreadpool() { return cudnn_threadpool; } -#define PERFTOOLS_GPUTOOLS_CUDNN_WRAP(__name) \ +#define STREAM_EXECUTOR_CUDNN_WRAP(__name) \ struct WrapperShim__##__name { \ template \ cudnnStatus_t operator()(CUDAExecutor* parent, Args... args) { \ @@ -170,11 +168,34 @@ static port::ThreadPool* GetCudaThreadpool() { } \ } __name; +#define STREAM_EXECUTOR_CUDNN_WRAP_WITH_CHECKED_STREAM(__name) \ + struct WrapperShim__##__name { \ + template \ + cudnnStatus_t operator()(CudnnSupport* dnn, Stream* s, Args... args) \ + SHARED_LOCKS_REQUIRED(dnn->dnn_handle_mutex_) { \ + CHECK_NOTNULL(s); \ + CHECK_EQ(s, dnn->GetCurrentDnnStream()) \ + << "Stream is not set correctly!"; \ + cuda::ScopedActivateExecutorContext sac{dnn->GetParentExecutor()}; \ + cudnnStatus_t retval = ::__name(args...); \ + return retval; \ + } \ + } __name; + +// Handles cudnnSetStream differently in order to add debug information. +struct WrapperShim__cudnnSetStream { + cudnnStatus_t operator()(CudnnSupport* dnn, Stream* stream, + cudnnHandle_t handle) + EXCLUSIVE_LOCKS_REQUIRED(dnn->dnn_handle_mutex_) { + dnn->SetCurrentDnnStream(stream); + cuda::ScopedActivateExecutorContext sac{dnn->GetParentExecutor()}; + cudnnStatus_t retval = ::cudnnSetStream(handle, AsCUDAStreamValue(stream)); + return retval; + } +} cudnnSetStream; + // clang-format off #define CUDNN_DNN_ROUTINE_EACH(__macro) \ - __macro(cudnnBatchNormalizationBackward) \ - __macro(cudnnBatchNormalizationForwardInference) \ - __macro(cudnnBatchNormalizationForwardTraining) \ __macro(cudnnGetConvolutionNdForwardOutputDim) \ __macro(cudnnGetConvolutionForwardAlgorithm) \ __macro(cudnnCreateTensorDescriptor) \ @@ -191,16 +212,25 @@ static port::ThreadPool* GetCudaThreadpool() { __macro(cudnnDestroyConvolutionDescriptor) \ __macro(cudnnCreate) \ __macro(cudnnDestroy) \ - __macro(cudnnSetStream) \ - __macro(cudnnActivationForward) \ - __macro(cudnnConvolutionForward) \ - __macro(cudnnConvolutionBackwardBias) \ __macro(cudnnGetConvolutionForwardWorkspaceSize) \ - __macro(cudnnTransformTensor) \ __macro(cudnnSetConvolutionNdDescriptor) \ __macro(cudnnSetTensor4dDescriptor) \ __macro(cudnnSetTensorNdDescriptor) \ - __macro(cudnnSetFilterNdDescriptor) \ + __macro(cudnnSetFilterNdDescriptor) + +// clang-format on +CUDNN_DNN_ROUTINE_EACH(STREAM_EXECUTOR_CUDNN_WRAP) +#undef CUDNN_DNN_ROUTINE_EACH + +// clang-format off +#define CUDNN_DNN_ROUTINE_EACH_WITH_STREAM(__macro) \ + __macro(cudnnBatchNormalizationBackward) \ + __macro(cudnnBatchNormalizationForwardInference) \ + __macro(cudnnBatchNormalizationForwardTraining) \ + __macro(cudnnActivationForward) \ + __macro(cudnnConvolutionForward) \ + __macro(cudnnConvolutionBackwardBias) \ + __macro(cudnnTransformTensor) \ __macro(cudnnPoolingForward) \ __macro(cudnnPoolingBackward) \ __macro(cudnnLRNCrossChannelForward) \ @@ -208,9 +238,11 @@ static port::ThreadPool* GetCudaThreadpool() { __macro(cudnnAddTensor) \ __macro(cudnnConvolutionBackwardData) \ __macro(cudnnConvolutionBackwardFilter) -// clang-format on -CUDNN_DNN_ROUTINE_EACH(PERFTOOLS_GPUTOOLS_CUDNN_WRAP) +// clang-format on +CUDNN_DNN_ROUTINE_EACH_WITH_STREAM( + STREAM_EXECUTOR_CUDNN_WRAP_WITH_CHECKED_STREAM) +#undef CUDNN_DNN_ROUTINE_EACH_WITH_STREAM // APIs available after R3: #if CUDNN_VERSION >= 3000 @@ -219,21 +251,22 @@ CUDNN_DNN_ROUTINE_EACH(PERFTOOLS_GPUTOOLS_CUDNN_WRAP) __macro(cudnnGetConvolutionBackwardDataAlgorithm) \ __macro(cudnnGetConvolutionBackwardFilterAlgorithm) \ __macro(cudnnGetConvolutionBackwardDataWorkspaceSize) -CUDNN_DNN_ROUTINE_EACH_AFTER_R3(PERFTOOLS_GPUTOOLS_CUDNN_WRAP) +CUDNN_DNN_ROUTINE_EACH_AFTER_R3(STREAM_EXECUTOR_CUDNN_WRAP) #undef CUDNN_DNN_ROUTINE_EACH_AFTER_R3 #endif // APIs in R3 but not in R5 // clang-format off #if CUDNN_VERSION >= 3000 && CUDNN_VERSION < 5000 -#define CUDNN_DNN_ROUTINE_EACH_R3(__macro) \ +#define CUDNN_DNN_ROUTINE_EACH_R3_WITH_STREAM(__macro) \ __macro(cudnnAddTensor_v3) \ __macro(cudnnConvolutionBackwardData_v3) \ __macro(cudnnConvolutionBackwardFilter_v3) // clang-format on -CUDNN_DNN_ROUTINE_EACH_R3(PERFTOOLS_GPUTOOLS_CUDNN_WRAP) -#undef CUDNN_DNN_ROUTINE_EACH_R3 +CUDNN_DNN_ROUTINE_EACH_R3_WITH_STREAM( + STREAM_EXECUTOR_CUDNN_WRAP_WITH_CHECKED_STREAM) +#undef CUDNN_DNN_ROUTINE_EACH_R3_WITH_STREAM #endif // APIs in R5 @@ -255,29 +288,44 @@ CUDNN_DNN_ROUTINE_EACH_R3(PERFTOOLS_GPUTOOLS_CUDNN_WRAP) __macro(cudnnGetRNNTrainingReserveSize) \ __macro(cudnnGetRNNLinLayerMatrixParams) \ __macro(cudnnGetRNNLinLayerBiasParams) \ - __macro(cudnnRNNForwardInference) \ - __macro(cudnnRNNForwardTraining) \ - __macro(cudnnRNNBackwardData) \ - __macro(cudnnRNNBackwardWeights) \ __macro(cudnnSetRNNDescriptor) \ __macro(cudnnGetFilterNdDescriptor) // clang-format on - -CUDNN_DNN_ROUTINE_EACH_R5(PERFTOOLS_GPUTOOLS_CUDNN_WRAP) +CUDNN_DNN_ROUTINE_EACH_R5(STREAM_EXECUTOR_CUDNN_WRAP) #undef CUDNN_DNN_ROUTINE_EACH_R5 + +// clang-format off +#define CUDNN_DNN_ROUTINE_EACH_R5_WITH_STREAM(__macro) \ + __macro(cudnnRNNForwardInference) \ + __macro(cudnnRNNForwardTraining) \ + __macro(cudnnRNNBackwardData) \ + __macro(cudnnRNNBackwardWeights) + +// clang-format on +CUDNN_DNN_ROUTINE_EACH_R5_WITH_STREAM( + STREAM_EXECUTOR_CUDNN_WRAP_WITH_CHECKED_STREAM) +#undef CUDNN_DNN_ROUTINE_EACH_R5_WITH_STREAM #endif // APIs in R6 // clang-format off #if CUDNN_VERSION >= 6000 #define CUDNN_DNN_ROUTINE_EACH_R6(__macro) \ - __macro(cudnnConvolutionBiasActivationForward) \ __macro(cudnnSetRNNDescriptor_v6) // clang-format on -CUDNN_DNN_ROUTINE_EACH_R6(PERFTOOLS_GPUTOOLS_CUDNN_WRAP) +CUDNN_DNN_ROUTINE_EACH_R6(STREAM_EXECUTOR_CUDNN_WRAP) #undef CUDNN_DNN_ROUTINE_EACH_R6 + +// clang-format off +#define CUDNN_DNN_ROUTINE_EACH_R6_WITH_STREAM(__macro) \ + __macro(cudnnConvolutionBiasActivationForward) + +// clang-format on +CUDNN_DNN_ROUTINE_EACH_R6_WITH_STREAM( + STREAM_EXECUTOR_CUDNN_WRAP_WITH_CHECKED_STREAM) +#undef CUDNN_DNN_ROUTINE_EACH_R6_WITH_STREAM #endif // APIs in R7 @@ -288,16 +336,16 @@ CUDNN_DNN_ROUTINE_EACH_R6(PERFTOOLS_GPUTOOLS_CUDNN_WRAP) __macro(cudnnSetRNNMatrixMathType) // clang-format on -CUDNN_DNN_ROUTINE_EACH_R7(PERFTOOLS_GPUTOOLS_CUDNN_WRAP) +CUDNN_DNN_ROUTINE_EACH_R7(STREAM_EXECUTOR_CUDNN_WRAP) #undef CUDNN_DNN_ROUTINE_EACH_R7 #endif -#undef CUDNN_DNN_ROUTINE_EACH - } // namespace wrap namespace { +cudnnDataType_t GetRnnComputeType(dnn::DataType data_type); + cudnnHandle_t ToHandle(void* opaque_handle) { return static_cast(opaque_handle); } @@ -375,13 +423,30 @@ port::Status GetCudnnProperty(libraryPropertyType type, int* value) { cudnnStatus_t status = cudnnGetProperty(type, value); if (status != CUDNN_STATUS_SUCCESS) { const string error = - absl::StrCat("cudnnGetProperty failed for type: ", ToString(type), + port::StrCat("cudnnGetProperty failed for type: ", ToString(type), " with status: ", ToString(status)); LOG(ERROR) << error; return port::Status{port::error::INTERNAL, error}; } return port::Status::OK(); } + +cudnnRNNAlgo_t ToCudnnRNNAlgo(const dnn::AlgorithmDesc& algorithm) { + if (algorithm.is_default()) { + return CUDNN_RNN_ALGO_STANDARD; + } else { + cudnnRNNAlgo_t algo = static_cast(algorithm.algo_id()); + switch (algo) { + case CUDNN_RNN_ALGO_STANDARD: + case CUDNN_RNN_ALGO_PERSIST_STATIC: + case CUDNN_RNN_ALGO_PERSIST_DYNAMIC: + return algo; + default: + LOG(FATAL) << "Unsupported Cudnn RNN algorithm: " + << algorithm.algo_id(); + } + } +} #endif port::Status GetLoadedCudnnVersion(CudnnVersion* version) { @@ -401,7 +466,7 @@ port::Status GetLoadedCudnnVersion(CudnnVersion* version) { } // namespace CudnnSupport::CudnnSupport(CUDAExecutor* parent) - : parent_(parent), dnn_handle_(nullptr) {} + : parent_(parent), dnn_handle_(nullptr), current_dnn_stream_(nullptr) {} CudnnSupport::~CudnnSupport() { auto status = wrap::cudnnDestroy(parent_, ToHandle(dnn_handle_)); @@ -419,7 +484,7 @@ port::Status CudnnSupport::Init() { CudnnVersion loaded_version; TF_RETURN_IF_ERROR(GetLoadedCudnnVersion(&loaded_version)); if (!IsSourceCompatibleWithCudnnLibrary(source_version, loaded_version)) { - const tensorflow::string error = absl::StrCat( + const tensorflow::string error = port::StrCat( "Loaded runtime CuDNN library: ", loaded_version.ToString(), " but source was compiled with: ", source_version.ToString(), ". CuDNN library major and minor version needs to match or have " @@ -459,6 +524,14 @@ port::Status CudnnSupport::Init() { ToString(status))}; } +port::StatusOr +CudnnSupport::GetVersion() { + CudnnVersion version; + TF_RETURN_IF_ERROR(GetLoadedCudnnVersion(&version)); + return perftools::gputools::dnn::VersionInfo( + version.major_version, version.minor_version, version.patch_level); +} + // Turns a BatchDescriptor structure into a cudnn tensor handle within a scope. class ScopedTensorDescriptor { public: @@ -1125,6 +1198,8 @@ class CudnnRnnDescriptor : public CudnnDescriptorCommon { cudnnRNNInputMode_t input_mode, cudnnDirectionMode_t direction_mode, cudnnRNNMode_t rnn_mode, cudnnDataType_t data_type, + cudnnDataType_t compute_type, + const dnn::AlgorithmConfig& algorithm_config, float dropout, uint64 seed, ScratchAllocator* state_allocator) : parent_(parent), @@ -1135,7 +1210,9 @@ class CudnnRnnDescriptor : public CudnnDescriptorCommon { input_mode_(input_mode), direction_mode_(direction_mode), rnn_mode_(rnn_mode), - data_type_(data_type) { + data_type_(data_type), + compute_type_(compute_type), + algorithm_config_(algorithm_config) { // Create the dropout handle. cudnn_dropout_desc_.reset(new CudnnDropoutDescriptor( parent, cudnn_handle, dropout, seed, state_allocator)); @@ -1149,18 +1226,20 @@ class CudnnRnnDescriptor : public CudnnDescriptorCommon { CUDNN_RETURN_IF_FAIL(status, "Unable to create RNN descriptor"); #if CUDNN_VERSION >= 6000 // TODO: allow the user to choose an algorithm. - cudnnRNNAlgo_t rnn_algo = CUDNN_RNN_ALGO_STANDARD; + cudnnRNNAlgo_t rnn_algo = ToCudnnRNNAlgo(algorithm_config_.algorithm()); status = wrap::cudnnSetRNNDescriptor_v6( parent, cudnn_handle, rnn_desc_ /*rnnDesc*/, hidden_size /*hiddenSize*/, num_layers /*numLayers*/, dropout_handle() /*dropoutDesc*/, input_mode /*inputMode*/, direction_mode /*direction*/, - rnn_mode /*mode*/, rnn_algo /*algo*/, data_type /*dataType*/); + rnn_mode /*mode*/, rnn_algo /*algo*/, compute_type /*dataType*/); #else + CHECK(algorithm_config_.is_default()) + << "Non-default algorithm not supported for CUDA version < 6.0"; status = wrap::cudnnSetRNNDescriptor( parent, rnn_desc_ /*rnnDesc*/, hidden_size /*hiddenSize*/, num_layers /*numLayers*/, dropout_handle() /*dropoutDesc*/, input_mode /*inputMode*/, direction_mode /*direction*/, - rnn_mode /*mode*/, data_type /*dataType*/); + rnn_mode /*mode*/, compute_type /*dataType*/); #endif CUDNN_RETURN_IF_FAIL(status, "Unable to update RNN descriptor"); @@ -1171,9 +1250,7 @@ class CudnnRnnDescriptor : public CudnnDescriptorCommon { SetFailure(cudnn_params_desc_->Status()); return; } - if (data_type == CUDNN_DATA_HALF) { - set_use_tensor_op_math(true); - } + set_use_tensor_op_math(algorithm_config_.algorithm().tensor_ops_enabled()); } ~CudnnRnnDescriptor() override { if (rnn_desc_) { @@ -1207,6 +1284,10 @@ class CudnnRnnDescriptor : public CudnnDescriptorCommon { cudnnDirectionMode_t direction_mode() const { return direction_mode_; } cudnnRNNMode_t rnn_mode() const { return rnn_mode_; } cudnnDataType_t data_type() const { return data_type_; } + cudnnDataType_t compute_type() const { return compute_type_; } + const dnn::AlgorithmConfig& algorithm_config() const { + return algorithm_config_; + } int64 ParamsSizeInBytes() const override { return cudnn_params_desc_->params_size_in_bytes(); } @@ -1237,6 +1318,8 @@ class CudnnRnnDescriptor : public CudnnDescriptorCommon { cudnnDirectionMode_t direction_mode_; cudnnRNNMode_t rnn_mode_; cudnnDataType_t data_type_; + cudnnDataType_t compute_type_; + dnn::AlgorithmConfig algorithm_config_; std::unique_ptr cudnn_dropout_desc_; std::unique_ptr cudnn_params_desc_; SE_DISALLOW_COPY_AND_ASSIGN(CudnnRnnDescriptor); @@ -1609,7 +1692,8 @@ bool CudnnSupport::DoRnnForwardImpl( const CudnnRnnStateTensorDescriptor& output_c_desc, DeviceMemory* output_c_data, bool is_training, ScratchAllocator* reserve_space_allocator, - ScratchAllocator* workspace_allocator) { + ScratchAllocator* workspace_allocator, + dnn::ProfileResult* output_profile_result) { // extract model parameters RnnModelDims model_dims; bool res = ExtractAndCheckRnnForward( @@ -1624,6 +1708,12 @@ bool CudnnSupport::DoRnnForwardImpl( // check params size mutex_lock lock{dnn_handle_mutex_}; + auto set_stream_status = + wrap::cudnnSetStream(this, stream, ToHandle(dnn_handle_)); + if (set_stream_status != CUDNN_STATUS_SUCCESS) { + LOG(FATAL) << "failed to set stream for cudnn handle: " + << ToString(set_stream_status); + } if (!CheckRNNParameterSize(parent_, ToHandle(dnn_handle_), rnn_desc, input_desc)) { @@ -1666,10 +1756,25 @@ bool CudnnSupport::DoRnnForwardImpl( } } + std::unique_ptr timer; + const bool is_profiling = output_profile_result != nullptr; + if (is_profiling) { + timer.reset(new CUDATimer(parent_)); + if (!timer->Init()) { + return false; + } + // The start and stop of the timer should be as close to the Cudnn call as + // possible. It is still possible for other threads to issue workload on + // to this stream. So it could take multiple profiling measurements. + if (!timer->Start(AsCUDAStream(stream))) { + return false; + } + } // make the forward call + cudnnStatus_t status; if (!is_training) { - cudnnStatus_t status = wrap::cudnnRNNForwardInference( - parent_, ToHandle(dnn_handle_) /*handle*/, + status = wrap::cudnnRNNForwardInference( + this, stream, ToHandle(dnn_handle_) /*handle*/, rnn_desc.handle() /*rnnDesc*/, model_dims.seq_length /*seqLength*/, input_desc.handles() /*xDesc*/, input_data.opaque() /*x*/, input_h_desc.handle() /*hxDesc*/, input_h_data.opaque() /*hx*/, @@ -1680,14 +1785,9 @@ bool CudnnSupport::DoRnnForwardImpl( output_c_desc.handle() /*cyDesc*/, output_c_data->opaque() /*cy*/, workspace.opaque() /*workspace*/, workspace.size() /*workSpaceSizeInBytes*/); - if (status != CUDNN_STATUS_SUCCESS) { - LOG(ERROR) << "Failed to call cudnnRNNForwardInference: " - << ToString(status); - return false; - } } else { - cudnnStatus_t status = wrap::cudnnRNNForwardTraining( - parent_, ToHandle(dnn_handle_) /*handle*/, + status = wrap::cudnnRNNForwardTraining( + this, stream, ToHandle(dnn_handle_) /*handle*/, rnn_desc.handle() /*rnnDesc*/, model_dims.seq_length /*seqLength*/, input_desc.handles() /*xDesc*/, input_data.opaque() /*x*/, input_h_desc.handle() /*hxDesc*/, input_h_data.opaque() /*hx*/, @@ -1700,8 +1800,24 @@ bool CudnnSupport::DoRnnForwardImpl( workspace.size() /*workSpaceSizeInBytes*/, reserve_space.opaque() /*reserveSpace*/, reserve_space.size() /*reserveSpaceSizeInBytes*/); - if (status != CUDNN_STATUS_SUCCESS) { - LOG(ERROR) << "Failed to call cudnnRNNForwardTraining" + } + if (is_profiling) { + if (!timer->Stop(AsCUDAStream(stream))) { + return false; + } + if (status == CUDNN_STATUS_SUCCESS) { + auto algo_desc = rnn_desc.algorithm_config().algorithm(); + output_profile_result->set_algorithm(algo_desc); + output_profile_result->set_elapsed_time_in_ms( + timer->GetElapsedMilliseconds()); + } + } + if (status != CUDNN_STATUS_SUCCESS) { + // Silently return when we are profiling. + if (!is_profiling) { + LOG(ERROR) << "Failed to call " + << (is_training ? "cudnnRNNForwardTraining " + : "cudnnRNNForwardInference ") << ToString(status); return false; } @@ -1733,7 +1849,8 @@ bool CudnnSupport::DoRnnBackwardImpl( DeviceMemory* input_c_backprop_data, DeviceMemory* params_backprop_data, DeviceMemory* reserve_space_data, - ScratchAllocator* workspace_allocator) { + ScratchAllocator* workspace_allocator, + dnn::ProfileResult* output_profile_result) { // extract model parameters RnnModelDims model_dims; bool res = ExtractAndCheckRnnForward( @@ -1747,6 +1864,12 @@ bool CudnnSupport::DoRnnBackwardImpl( // check params size mutex_lock lock{dnn_handle_mutex_}; + auto set_stream_status = + wrap::cudnnSetStream(this, stream, ToHandle(dnn_handle_)); + if (set_stream_status != CUDNN_STATUS_SUCCESS) { + LOG(FATAL) << "failed to set stream for cudnn handle: " + << ToString(set_stream_status); + } if (!CheckRNNParameterSize(parent_, ToHandle(dnn_handle_), rnn_desc, input_desc)) { @@ -1762,12 +1885,27 @@ bool CudnnSupport::DoRnnBackwardImpl( return false; } + std::unique_ptr timer; + const bool is_profiling = output_profile_result != nullptr; + if (is_profiling) { + timer.reset(new CUDATimer(parent_)); + if (!timer->Init()) { + return false; + } + // The start and stop of the timer should be as close to the Cudnn call as + // possible. It is still possible for other threads to issue workload on + // to this stream. So it could take multiple profiling measurements. + if (!timer->Start(AsCUDAStream(stream))) { + return false; + } + } // make the backward data call cudnnStatus_t status = wrap::cudnnRNNBackwardData( - parent_, ToHandle(dnn_handle_) /*handle*/, rnn_desc.handle() /*rnnDesc*/, - model_dims.seq_length /*seqLength*/, output_desc.handles() /*yDesc*/, - output_data.opaque() /*y*/, output_desc.handles() /*dyDesc*/, - output_backprop_data.opaque() /*dy*/, output_h_desc.handle() /*dhyDesc*/, + this, stream, ToHandle(dnn_handle_) /*handle*/, + rnn_desc.handle() /*rnnDesc*/, model_dims.seq_length /*seqLength*/, + output_desc.handles() /*yDesc*/, output_data.opaque() /*y*/, + output_desc.handles() /*dyDesc*/, output_backprop_data.opaque() /*dy*/, + output_h_desc.handle() /*dhyDesc*/, output_h_backprop_data.opaque() /*dhy*/, output_c_desc.handle() /*dcyDesc*/, output_c_backprop_data.opaque() /*dcy*/, @@ -1782,7 +1920,11 @@ bool CudnnSupport::DoRnnBackwardImpl( workspace.size() /*workSpaceSizeInBytes*/, reserve_space_data->opaque() /*reserveSpace*/, reserve_space_data->size() /*reserveSpaceSizeInBytes*/); + if (status != CUDNN_STATUS_SUCCESS) { + if (is_profiling) { + timer->Stop(AsCUDAStream(stream)); + } LOG(ERROR) << "Failed to call cudnnRNNBackwardData: " << ToString(status); return false; } @@ -1792,7 +1934,7 @@ bool CudnnSupport::DoRnnBackwardImpl( stream->ThenMemZero(params_backprop_data, params_backprop_data->size()); // make the backward weight call status = wrap::cudnnRNNBackwardWeights( - parent_, ToHandle(dnn_handle_) /*handle*/, + this, stream, ToHandle(dnn_handle_) /*handle*/, rnn_desc.handle() /*rnnDesc*/, model_dims.seq_length /*seqLength*/, input_desc.handles() /*xDesc*/, input_data.opaque() /*x*/, input_h_desc.handle() /*hxDesc*/, input_h_data.opaque() /*hx*/, @@ -1804,11 +1946,23 @@ bool CudnnSupport::DoRnnBackwardImpl( reserve_space_data->opaque() /*reserveSpace*/, reserve_space_data->size() /*reserveSpaceSizeInBytes*/); if (status != CUDNN_STATUS_SUCCESS) { + if (is_profiling) { + timer->Stop(AsCUDAStream(stream)); + } LOG(ERROR) << "Failed to call cudnnRNNBackwardWeights: " << ToString(status); return false; } } + if (is_profiling) { + if (!timer->Stop(AsCUDAStream(stream))) { + return false; + } + auto algo_desc = rnn_desc.algorithm_config().algorithm(); + output_profile_result->set_algorithm(algo_desc); + output_profile_result->set_elapsed_time_in_ms( + timer->GetElapsedMilliseconds()); + } return true; } @@ -1820,15 +1974,17 @@ CudnnSupport::createRnnDescriptor(int num_layers, int hidden_size, int input_size, dnn::RnnInputMode input_mode, dnn::RnnDirectionMode direction_mode, dnn::RnnMode rnn_mode, - dnn::DataType data_type, float dropout, - uint64 seed, + dnn::DataType data_type, + const dnn::AlgorithmConfig& algorithm_config, + float dropout, uint64 seed, ScratchAllocator* state_allocator) { #if CUDNN_VERSION >= 5000 mutex_lock lock{dnn_handle_mutex_}; std::unique_ptr rnn_desc(new CudnnRnnDescriptor( parent_, ToHandle(dnn_handle_), num_layers, hidden_size, input_size, ToCudnnRnnInputMode(input_mode), ToCudnnRnnDirectionMode(direction_mode), - ToCudnnRnnMode(rnn_mode), ToCudnnDataType(data_type), dropout, seed, + ToCudnnRnnMode(rnn_mode), ToCudnnDataType(data_type), + GetRnnComputeType(data_type), algorithm_config, dropout, seed, state_allocator)); if (!rnn_desc->ok()) { return rnn_desc->Status(); @@ -1905,7 +2061,8 @@ bool CudnnSupport::DoRnnForward( const dnn::RnnStateTensorDescriptor& output_c_desc, DeviceMemory* output_c_data, bool is_training, ScratchAllocator* reserve_space_allocator, - ScratchAllocator* workspace_allocator) { + ScratchAllocator* workspace_allocator, + dnn::ProfileResult* output_profile_result) { #if CUDNN_VERSION >= 5000 const CudnnRnnDescriptor& cudnn_rnn_desc = static_cast(rnn_desc); @@ -1926,7 +2083,8 @@ bool CudnnSupport::DoRnnForward( stream, cudnn_rnn_desc, cudnn_input_desc, input_data, cudnn_input_h_desc, input_h_data, cudnn_input_c_desc, input_c_data, params, cudnn_output_desc, output_data, cudnn_output_h_desc, output_h_data, cudnn_output_c_desc, - output_c_data, is_training, reserve_space_allocator, workspace_allocator); + output_c_data, is_training, reserve_space_allocator, workspace_allocator, + output_profile_result); #else return false; #endif // CUDNN_VERSION @@ -1947,7 +2105,8 @@ bool CudnnSupport::DoRnnForward( const dnn::RnnStateTensorDescriptor& output_c_desc, DeviceMemory* output_c_data, bool is_training, ScratchAllocator* reserve_space_allocator, - ScratchAllocator* workspace_allocator) { + ScratchAllocator* workspace_allocator, + dnn::ProfileResult* output_profile_result) { #if CUDNN_VERSION >= 5000 const CudnnRnnDescriptor& cudnn_rnn_desc = static_cast(rnn_desc); @@ -1968,7 +2127,8 @@ bool CudnnSupport::DoRnnForward( stream, cudnn_rnn_desc, cudnn_input_desc, input_data, cudnn_input_h_desc, input_h_data, cudnn_input_c_desc, input_c_data, params, cudnn_output_desc, output_data, cudnn_output_h_desc, output_h_data, cudnn_output_c_desc, - output_c_data, is_training, reserve_space_allocator, workspace_allocator); + output_c_data, is_training, reserve_space_allocator, workspace_allocator, + output_profile_result); #else return false; #endif // CUDNN_VERSION @@ -1990,7 +2150,8 @@ bool CudnnSupport::DoRnnForward( const dnn::RnnStateTensorDescriptor& output_c_desc, DeviceMemory* output_c_data, bool is_training, ScratchAllocator* reserve_space_allocator, - ScratchAllocator* workspace_allocator) { + ScratchAllocator* workspace_allocator, + dnn::ProfileResult* output_profile_result) { #if CUDNN_VERSION >= 5000 const CudnnRnnDescriptor& cudnn_rnn_desc = static_cast(rnn_desc); @@ -2011,7 +2172,8 @@ bool CudnnSupport::DoRnnForward( stream, cudnn_rnn_desc, cudnn_input_desc, input_data, cudnn_input_h_desc, input_h_data, cudnn_input_c_desc, input_c_data, params, cudnn_output_desc, output_data, cudnn_output_h_desc, output_h_data, cudnn_output_c_desc, - output_c_data, is_training, reserve_space_allocator, workspace_allocator); + output_c_data, is_training, reserve_space_allocator, workspace_allocator, + output_profile_result); #else return false; #endif // CUDNN_VERSION @@ -2040,7 +2202,8 @@ bool CudnnSupport::DoRnnBackward( DeviceMemory* input_c_backprop_data, DeviceMemory* params_backprop_data, DeviceMemory* reserve_space_data, - ScratchAllocator* workspace_allocator) { + ScratchAllocator* workspace_allocator, + dnn::ProfileResult* output_profile_result) { #if CUDNN_VERSION >= 5000 const CudnnRnnDescriptor& cudnn_rnn_desc = static_cast(rnn_desc); @@ -2064,7 +2227,7 @@ bool CudnnSupport::DoRnnBackward( output_c_data, output_backprop_data, output_h_backprop_data, output_c_backprop_data, input_backprop_data, input_h_backprop_data, input_c_backprop_data, params_backprop_data, reserve_space_data, - workspace_allocator); + workspace_allocator, output_profile_result); #else return false; #endif // CUDNN_VERSION @@ -2092,7 +2255,8 @@ bool CudnnSupport::DoRnnBackward( DeviceMemory* input_c_backprop_data, DeviceMemory* params_backprop_data, DeviceMemory* reserve_space_data, - ScratchAllocator* workspace_allocator) { + ScratchAllocator* workspace_allocator, + dnn::ProfileResult* output_profile_result) { #if CUDNN_VERSION >= 5000 const CudnnRnnDescriptor& cudnn_rnn_desc = static_cast(rnn_desc); @@ -2116,7 +2280,7 @@ bool CudnnSupport::DoRnnBackward( output_c_data, output_backprop_data, output_h_backprop_data, output_c_backprop_data, input_backprop_data, input_h_backprop_data, input_c_backprop_data, params_backprop_data, reserve_space_data, - workspace_allocator); + workspace_allocator, output_profile_result); #else return false; #endif // CUDNN_VERSION @@ -2145,7 +2309,8 @@ bool CudnnSupport::DoRnnBackward( DeviceMemory* input_c_backprop_data, DeviceMemory* params_backprop_data, DeviceMemory* reserve_space_data, - ScratchAllocator* workspace_allocator) { + ScratchAllocator* workspace_allocator, + dnn::ProfileResult* output_profile_result) { #if CUDNN_VERSION >= 5000 const CudnnRnnDescriptor& cudnn_rnn_desc = static_cast(rnn_desc); @@ -2169,7 +2334,7 @@ bool CudnnSupport::DoRnnBackward( output_c_data, output_backprop_data, output_h_backprop_data, output_c_backprop_data, input_backprop_data, input_h_backprop_data, input_c_backprop_data, params_backprop_data, reserve_space_data, - workspace_allocator); + workspace_allocator, output_profile_result); #else return false; #endif // CUDNN_VERSION @@ -2364,6 +2529,41 @@ cudnnDataType_t GetConvComputeType() { return CUDNN_DATA_DOUBLE; } +// A helper struct to decide whether to use FP32 as the internal compute type +// for rnn when the input data type is FP16. At present it is turned off, +// users can explicitly control them through an env-var +// TF_FP16_RNN_USE_FP32_COMPUTE. +// After the TODO below is fixed, users should almost always use fp32 compute +// type for training. Using fp16 might suffer suboptimal accuracy due to loss +// in precision. +struct RnnDoFP32ComputationFP16Input { + static constexpr const char* kName = "TF_FP16_RNN_USE_FP32_COMPUTE"; + // TODO(jamesqin): b/78182362 flip to true when cudnn 7.1.4 fixes the bug. + // Before cudnn 7.1.4 RNN are always done in fp32, no matter what math + // precision is set. + // Set it temporary to false s.t. no error is raised when using fp16 inputs, + // fp32 math precision. + static constexpr bool kDefaultFlag = false; +}; + +// A helper function to return the internal compute type for +// RNNs in cudnn. +cudnnDataType_t GetRnnComputeType(dnn::DataType data_type) { + switch (data_type) { + case dnn::DataType::kFloat: + return CUDNN_DATA_FLOAT; + case dnn::DataType::kDouble: + return CUDNN_DATA_DOUBLE; + case dnn::DataType::kHalf: + if (CudnnEnvVar::IsEnabled()) { + return CUDNN_DATA_FLOAT; + } else { + return CUDNN_DATA_HALF; + } + default: + LOG(FATAL) << "Invalid RNN data type: " << static_cast(data_type); + } +} } // namespace template @@ -2386,8 +2586,7 @@ bool CudnnSupport::DoConvolveImpl( GetConvComputeType()}; mutex_lock lock{dnn_handle_mutex_}; - auto status = wrap::cudnnSetStream(parent_, ToHandle(dnn_handle_), - AsCUDAStreamValue(stream)); + auto status = wrap::cudnnSetStream(this, stream, ToHandle(dnn_handle_)); if (status != CUDNN_STATUS_SUCCESS) { LOG(FATAL) << "failed to set stream for cudnn handle: " << ToString(status); } @@ -2537,7 +2736,7 @@ bool CudnnSupport::DoConvolveImpl( } } status = wrap::cudnnConvolutionForward( - parent_, ToHandle(dnn_handle_), + this, stream, ToHandle(dnn_handle_), /*alpha=*/alpha, /*srcDesc=*/input_nd.handle(), /*srcData=*/input_data.opaque(), /*filterDesc=*/filter.handle(), /*filterData=*/filter_data.opaque(), /*convDesc=*/conv.handle(), @@ -2606,8 +2805,7 @@ bool CudnnSupport::DoFusedConvolveImpl( static_cast(cudnn_compute_type)}; mutex_lock lock{dnn_handle_mutex_}; - auto status = wrap::cudnnSetStream(parent_, ToHandle(dnn_handle_), - AsCUDAStreamValue(stream)); + auto status = wrap::cudnnSetStream(this, stream, ToHandle(dnn_handle_)); CHECK(status == CUDNN_STATUS_SUCCESS) << "failed to set stream for cudnn handle: " << ToString(status); @@ -2673,7 +2871,7 @@ bool CudnnSupport::DoFusedConvolveImpl( << "\noutput_data->opaque() = " << output_data->opaque(); status = wrap::cudnnConvolutionBiasActivationForward( - parent_, ToHandle(dnn_handle_), /*alpha1=*/&conv_input_scale, + this, stream, ToHandle(dnn_handle_), /*alpha1=*/&conv_input_scale, /*srcDesc=*/conv_input_nd.handle(), /*srcData=*/conv_input_data.opaque(), /*filterDesc=*/filter.handle(), /*filterData=*/filter_data.opaque(), /*convDesc=*/conv.handle(), algo, /*workSpace=*/scratch.opaque(), @@ -2743,6 +2941,30 @@ bool CudnnSupport::GetConvolveAlgorithms( return true; } +bool CudnnSupport::GetRnnAlgorithms( + std::vector* out_algorithms) { + std::vector algo_types = { + // clang-format off +#if CUDNN_VERSION >= 6000 + CUDNN_RNN_ALGO_STANDARD, + CUDNN_RNN_ALGO_PERSIST_STATIC, + CUDNN_RNN_ALGO_PERSIST_DYNAMIC, +#endif + // clang-format on + }; + + out_algorithms->clear(); + for (auto i : algo_types) { + out_algorithms->push_back({i, /*use_tensor_ops=*/false}); +#if CUDNN_VERSION >= 7100 + if (RnnTensorOpMathEnabled()) { + out_algorithms->push_back({i, /*use_tensor_ops=*/true}); + } +#endif + } + return true; +} + bool CudnnSupport::GetConvolveBackwardDataAlgorithms( bool with_winograd_nonfused, int cc_major, int cc_minor, std::vector* out_algorithms) { @@ -2854,8 +3076,7 @@ bool CudnnSupport::DoBatchNormalizationForwardImpl( bool is_training, std::function&()> var_to_inv_var, std::function inv_var_to_var) { mutex_lock lock{dnn_handle_mutex_}; - auto status = wrap::cudnnSetStream(parent_, ToHandle(dnn_handle_), - AsCUDAStreamValue(stream)); + auto status = wrap::cudnnSetStream(this, stream, ToHandle(dnn_handle_)); if (status != CUDNN_STATUS_SUCCESS) { LOG(ERROR) << "failed to set stream for cudnn handle: " << ToString(status); return false; @@ -2891,7 +3112,7 @@ bool CudnnSupport::DoBatchNormalizationForwardImpl( } status = wrap::cudnnBatchNormalizationForwardTraining( - parent_, ToHandle(dnn_handle_), mode, &one, &zero, + this, stream, ToHandle(dnn_handle_), mode, &one, &zero, x_descriptor.handle(), x.opaque(), x_descriptor.handle(), y->opaque(), scale_offset_descriptor.handle(), scale.opaque(), offset.opaque(), 1.0, batch_mean_opaque, batch_var_opaque, epsilon, saved_mean->opaque(), @@ -2908,7 +3129,7 @@ bool CudnnSupport::DoBatchNormalizationForwardImpl( const void* maybe_inv_var = estimated_variance.opaque(); #endif status = wrap::cudnnBatchNormalizationForwardInference( - parent_, ToHandle(dnn_handle_), mode, &one, &zero, + this, stream, ToHandle(dnn_handle_), mode, &one, &zero, x_descriptor.handle(), x.opaque(), x_descriptor.handle(), y->opaque(), scale_offset_descriptor.handle(), scale.opaque(), offset.opaque(), estimated_mean.opaque(), maybe_inv_var, epsilon); @@ -2959,8 +3180,7 @@ bool CudnnSupport::DoBatchNormalizationBackwardImpl( DeviceMemory* x_backprop, DeviceMemory* scale_backprop, DeviceMemory* offset_backprop) { mutex_lock lock{dnn_handle_mutex_}; - auto status = wrap::cudnnSetStream(parent_, ToHandle(dnn_handle_), - AsCUDAStreamValue(stream)); + auto status = wrap::cudnnSetStream(this, stream, ToHandle(dnn_handle_)); if (status != CUDNN_STATUS_SUCCESS) { LOG(ERROR) << "failed to set stream for cudnn handle: " << ToString(status); return false; @@ -2981,7 +3201,7 @@ bool CudnnSupport::DoBatchNormalizationBackwardImpl( float zero = 0.0; status = wrap::cudnnBatchNormalizationBackward( - parent_, ToHandle(dnn_handle_), mode, &one, &zero, &one, &zero, + this, stream, ToHandle(dnn_handle_), mode, &one, &zero, &one, &zero, x_descriptor.handle(), x.opaque(), x_descriptor.handle(), y_backprop.opaque(), x_descriptor.handle(), x_backprop->opaque(), scale_offset_descriptor.handle(), scale.opaque(), @@ -3171,7 +3391,7 @@ DeviceMemory CudnnSupport::MaybeTransformLayout( float alpha = 1.0f; float beta = 0.0f; auto status = wrap::cudnnTransformTensor( - parent_, ToHandle(dnn_handle_), &alpha, orig_out_back_nd.handle(), + this, stream, ToHandle(dnn_handle_), &alpha, orig_out_back_nd.handle(), backward_output_data.opaque(), &beta, transformed_out_back_nd.handle(), (*transform_scratch)->mutable_device_memory()->opaque()); @@ -3190,8 +3410,7 @@ bool CudnnSupport::DoTransformTensor(Stream* stream, dnn::DataType output_type, float scale, DeviceMemoryBase* output_data) { mutex_lock lock{dnn_handle_mutex_}; - cudnnStatus_t status = wrap::cudnnSetStream(parent_, ToHandle(dnn_handle_), - AsCUDAStreamValue(stream)); + auto status = wrap::cudnnSetStream(this, stream, ToHandle(dnn_handle_)); if (status != CUDNN_STATUS_SUCCESS) { LOG(FATAL) << "failed to set stream for cudnn handle: " << ToString(status); } @@ -3202,7 +3421,7 @@ bool CudnnSupport::DoTransformTensor(Stream* stream, ScopedTensorDescriptor output_tensor_desc( parent_, output_desc, ToCudnnDataType(output_type, output_desc.layout())); status = wrap::cudnnTransformTensor( - parent_, ToHandle(dnn_handle_), &scale, input_tensor_desc.handle(), + this, stream, ToHandle(dnn_handle_), &scale, input_tensor_desc.handle(), input_data.opaque(), &beta, output_tensor_desc.handle(), output_data->opaque()); if (status != CUDNN_STATUS_SUCCESS) { @@ -3229,8 +3448,7 @@ bool CudnnSupport::DoConvolveBackwardDataImpl( const dnn::AlgorithmConfig& algorithm_config, dnn::ProfileResult* output_profile_result) { mutex_lock lock{dnn_handle_mutex_}; - auto status = wrap::cudnnSetStream(parent_, ToHandle(dnn_handle_), - AsCUDAStreamValue(stream)); + auto status = wrap::cudnnSetStream(this, stream, ToHandle(dnn_handle_)); if (status != CUDNN_STATUS_SUCCESS) { LOG(FATAL) << "failed to set stream for cudnn handle: " << ToString(status); } @@ -3399,7 +3617,7 @@ bool CudnnSupport::DoConvolveBackwardDataImpl( #else status = wrap::cudnnConvolutionBackwardData_v3( #endif - parent_, ToHandle(dnn_handle_), + this, stream, ToHandle(dnn_handle_), /*alpha=*/alpha, /*filterDesc=*/filter.handle(), /*filterData=*/filter_data.opaque(), @@ -3500,8 +3718,7 @@ bool CudnnSupport::DoConvolveBackwardFilterImpl( const dnn::AlgorithmConfig& algorithm_config, dnn::ProfileResult* output_profile_result) { mutex_lock lock{dnn_handle_mutex_}; - auto status = wrap::cudnnSetStream(parent_, ToHandle(dnn_handle_), - AsCUDAStreamValue(stream)); + auto status = wrap::cudnnSetStream(this, stream, ToHandle(dnn_handle_)); if (status != CUDNN_STATUS_SUCCESS) { LOG(FATAL) << "failed to set stream for cudnn handle: " << ToString(status); } @@ -3671,7 +3888,7 @@ bool CudnnSupport::DoConvolveBackwardFilterImpl( #else status = wrap::cudnnConvolutionBackwardFilter_v3( #endif - parent_, ToHandle(dnn_handle_), /*alpha=*/alpha, + this, stream, ToHandle(dnn_handle_), /*alpha=*/alpha, /*srcDesc=*/input_nd.handle(), /*srcData=*/input_data.opaque(), /*diffDesc=*/out_back_nd.handle(), @@ -3767,8 +3984,7 @@ bool CudnnSupport::DoConvolveBackwardBiasImpl( const dnn::BatchDescriptor& bias_descriptor, DeviceMemory* backward_bias_data) { mutex_lock lock{dnn_handle_mutex_}; - auto status = wrap::cudnnSetStream(parent_, ToHandle(dnn_handle_), - AsCUDAStreamValue(stream)); + auto status = wrap::cudnnSetStream(this, stream, ToHandle(dnn_handle_)); if (status != CUDNN_STATUS_SUCCESS) { LOG(FATAL) << "failed to set stream for cudnn handle: " << ToString(status); } @@ -3783,7 +3999,7 @@ bool CudnnSupport::DoConvolveBackwardBiasImpl( float beta = 0.0; status = wrap::cudnnConvolutionBackwardBias( - parent_, ToHandle(dnn_handle_), &alpha, input_nd.handle(), + this, stream, ToHandle(dnn_handle_), &alpha, input_nd.handle(), input_data.opaque(), &beta, bias_nd.handle(), backward_bias_data->opaque()); if (status != CUDNN_STATUS_SUCCESS) { @@ -3988,8 +4204,7 @@ bool CudnnSupport::DoBiasAdd(Stream* stream, } mutex_lock lock{dnn_handle_mutex_}; - auto status = wrap::cudnnSetStream(parent_, ToHandle(dnn_handle_), - AsCUDAStreamValue(stream)); + auto status = wrap::cudnnSetStream(this, stream, ToHandle(dnn_handle_)); if (status != CUDNN_STATUS_SUCCESS) { LOG(ERROR) << "failed to set stream for cudnn handle: " << ToString(status); return false; @@ -4003,7 +4218,7 @@ bool CudnnSupport::DoBiasAdd(Stream* stream, #else status = wrap::cudnnAddTensor_v3( #endif - parent_, ToHandle(dnn_handle_), &alpha, bias_descriptor.handle(), + this, stream, ToHandle(dnn_handle_), &alpha, bias_descriptor.handle(), biases.opaque(), &beta, input_descriptor.handle(), output_data->opaque()); if (status != CUDNN_STATUS_SUCCESS) { @@ -4021,8 +4236,7 @@ bool CudnnSupport::DoActivate(Stream* stream, DeviceMemory* output_data, uint64 options) { mutex_lock lock{dnn_handle_mutex_}; - auto status = wrap::cudnnSetStream(parent_, ToHandle(dnn_handle_), - AsCUDAStreamValue(stream)); + auto status = wrap::cudnnSetStream(this, stream, ToHandle(dnn_handle_)); if (status != CUDNN_STATUS_SUCCESS) { LOG(ERROR) << "failed to set stream for cudnn handle: " << ToString(status); return false; @@ -4066,7 +4280,7 @@ bool CudnnSupport::DoActivate(Stream* stream, // Beta is the output scaling factor. float beta = 0.0; status = wrap::cudnnActivationForward( - parent_, ToHandle(dnn_handle_), + this, stream, ToHandle(dnn_handle_), #if CUDNN_VERSION >= 5000 activation_desc.handle(), #else @@ -4090,8 +4304,7 @@ bool CudnnSupport::DoPoolForward( const dnn::BatchDescriptor& output_dimensions, DeviceMemory* output_data) { mutex_lock lock{dnn_handle_mutex_}; - auto status = wrap::cudnnSetStream(parent_, ToHandle(dnn_handle_), - AsCUDAStreamValue(stream)); + auto status = wrap::cudnnSetStream(this, stream, ToHandle(dnn_handle_)); if (status != CUDNN_STATUS_SUCCESS) { LOG(ERROR) << "failed to set stream for cudnn handle: " << ToString(status); return false; @@ -4107,7 +4320,7 @@ bool CudnnSupport::DoPoolForward( CUDNN_DATA_DOUBLE}; ScopedPoolingDescriptor pooling_desc{parent_, pooling_dimensions}; status = wrap::cudnnPoolingForward( - parent_, ToHandle(dnn_handle_), pooling_desc.handle(), &alpha, + this, stream, ToHandle(dnn_handle_), pooling_desc.handle(), &alpha, src_desc.handle(), input_data.opaque(), &beta, dest_desc.handle(), output_data->opaque()); if (status != CUDNN_STATUS_SUCCESS) { @@ -4125,8 +4338,7 @@ bool CudnnSupport::DoPoolForward( const dnn::BatchDescriptor& output_dimensions, DeviceMemory* output_data) { mutex_lock lock{dnn_handle_mutex_}; - auto status = wrap::cudnnSetStream(parent_, ToHandle(dnn_handle_), - AsCUDAStreamValue(stream)); + auto status = wrap::cudnnSetStream(this, stream, ToHandle(dnn_handle_)); if (status != CUDNN_STATUS_SUCCESS) { LOG(ERROR) << "failed to set stream for cudnn handle: " << ToString(status); return false; @@ -4142,7 +4354,7 @@ bool CudnnSupport::DoPoolForward( CUDNN_DATA_FLOAT}; ScopedPoolingDescriptor pooling_desc{parent_, pooling_dimensions}; status = wrap::cudnnPoolingForward( - parent_, ToHandle(dnn_handle_), pooling_desc.handle(), &alpha, + this, stream, ToHandle(dnn_handle_), pooling_desc.handle(), &alpha, src_desc.handle(), input_data.opaque(), &beta, dest_desc.handle(), output_data->opaque()); if (status != CUDNN_STATUS_SUCCESS) { @@ -4160,8 +4372,7 @@ bool CudnnSupport::DoPoolForward( const dnn::BatchDescriptor& output_dimensions, DeviceMemory* output_data) { mutex_lock lock{dnn_handle_mutex_}; - auto status = wrap::cudnnSetStream(parent_, ToHandle(dnn_handle_), - AsCUDAStreamValue(stream)); + auto status = wrap::cudnnSetStream(this, stream, ToHandle(dnn_handle_)); if (status != CUDNN_STATUS_SUCCESS) { LOG(ERROR) << "failed to set stream for cudnn handle: " << ToString(status); return false; @@ -4176,7 +4387,7 @@ bool CudnnSupport::DoPoolForward( ScopedTensorDescriptor dest_desc{parent_, output_dimensions, CUDNN_DATA_HALF}; ScopedPoolingDescriptor pooling_desc{parent_, pooling_dimensions}; status = wrap::cudnnPoolingForward( - parent_, ToHandle(dnn_handle_), pooling_desc.handle(), &alpha, + this, stream, ToHandle(dnn_handle_), pooling_desc.handle(), &alpha, src_desc.handle(), input_data.opaque(), &beta, dest_desc.handle(), output_data->opaque()); if (status != CUDNN_STATUS_SUCCESS) { @@ -4196,8 +4407,7 @@ bool CudnnSupport::DoPoolBackward( const DeviceMemory& input_diff_data, DeviceMemory* output_diff_data) { mutex_lock lock{dnn_handle_mutex_}; - auto status = wrap::cudnnSetStream(parent_, ToHandle(dnn_handle_), - AsCUDAStreamValue(stream)); + auto status = wrap::cudnnSetStream(this, stream, ToHandle(dnn_handle_)); if (status != CUDNN_STATUS_SUCCESS) { LOG(ERROR) << "failed to set stream for cudnn handle: " << ToString(status); return false; @@ -4213,7 +4423,7 @@ bool CudnnSupport::DoPoolBackward( CUDNN_DATA_DOUBLE}; ScopedPoolingDescriptor pooling_desc{parent_, pooling_dimensions}; status = wrap::cudnnPoolingBackward( - parent_, ToHandle(dnn_handle_), pooling_desc.handle(), &alpha, + this, stream, ToHandle(dnn_handle_), pooling_desc.handle(), &alpha, dest_desc.handle(), output_data.opaque(), dest_desc.handle(), input_diff_data.opaque(), src_desc.handle(), input_data.opaque(), &beta, src_desc.handle(), output_diff_data->opaque()); @@ -4234,8 +4444,7 @@ bool CudnnSupport::DoPoolBackward( const DeviceMemory& input_diff_data, DeviceMemory* output_diff_data) { mutex_lock lock{dnn_handle_mutex_}; - auto status = wrap::cudnnSetStream(parent_, ToHandle(dnn_handle_), - AsCUDAStreamValue(stream)); + auto status = wrap::cudnnSetStream(this, stream, ToHandle(dnn_handle_)); if (status != CUDNN_STATUS_SUCCESS) { LOG(ERROR) << "failed to set stream for cudnn handle: " << ToString(status); return false; @@ -4251,7 +4460,7 @@ bool CudnnSupport::DoPoolBackward( CUDNN_DATA_FLOAT}; ScopedPoolingDescriptor pooling_desc{parent_, pooling_dimensions}; status = wrap::cudnnPoolingBackward( - parent_, ToHandle(dnn_handle_), pooling_desc.handle(), &alpha, + this, stream, ToHandle(dnn_handle_), pooling_desc.handle(), &alpha, dest_desc.handle(), output_data.opaque(), dest_desc.handle(), input_diff_data.opaque(), src_desc.handle(), input_data.opaque(), &beta, src_desc.handle(), output_diff_data->opaque()); @@ -4272,8 +4481,7 @@ bool CudnnSupport::DoPoolBackward( const DeviceMemory& input_diff_data, DeviceMemory* output_diff_data) { mutex_lock lock{dnn_handle_mutex_}; - auto status = wrap::cudnnSetStream(parent_, ToHandle(dnn_handle_), - AsCUDAStreamValue(stream)); + auto status = wrap::cudnnSetStream(this, stream, ToHandle(dnn_handle_)); if (status != CUDNN_STATUS_SUCCESS) { LOG(ERROR) << "failed to set stream for cudnn handle: " << ToString(status); return false; @@ -4288,7 +4496,7 @@ bool CudnnSupport::DoPoolBackward( ScopedTensorDescriptor dest_desc{parent_, output_dimensions, CUDNN_DATA_HALF}; ScopedPoolingDescriptor pooling_desc{parent_, pooling_dimensions}; status = wrap::cudnnPoolingBackward( - parent_, ToHandle(dnn_handle_), pooling_desc.handle(), &alpha, + this, stream, ToHandle(dnn_handle_), pooling_desc.handle(), &alpha, dest_desc.handle(), output_data.opaque(), dest_desc.handle(), input_diff_data.opaque(), src_desc.handle(), input_data.opaque(), &beta, src_desc.handle(), output_diff_data->opaque()); @@ -4323,8 +4531,7 @@ bool CudnnSupport::DoNormalizeWithDimensions( // Launch the normalization. mutex_lock lock{dnn_handle_mutex_}; - auto status = wrap::cudnnSetStream(parent_, ToHandle(dnn_handle_), - AsCUDAStreamValue(stream)); + auto status = wrap::cudnnSetStream(this, stream, ToHandle(dnn_handle_)); if (status != CUDNN_STATUS_SUCCESS) { LOG(ERROR) << "failed to set stream for cudnn handle: " << ToString(status); return false; @@ -4339,7 +4546,7 @@ bool CudnnSupport::DoNormalizeWithDimensions( float beta = 0.0f; status = wrap::cudnnLRNCrossChannelForward( - parent_, ToHandle(dnn_handle_), normalize.handle(), + this, stream, ToHandle(dnn_handle_), normalize.handle(), CUDNN_LRN_CROSS_CHANNEL_DIM1, &alpha, dims.handle(), input_data.opaque(), &beta, dims.handle(), output_data->opaque()); if (status != CUDNN_STATUS_SUCCESS) { @@ -4366,8 +4573,7 @@ bool CudnnSupport::DoNormalizeBackwardWithDimensions( } mutex_lock lock{dnn_handle_mutex_}; - auto status = wrap::cudnnSetStream(parent_, ToHandle(dnn_handle_), - AsCUDAStreamValue(stream)); + auto status = wrap::cudnnSetStream(this, stream, ToHandle(dnn_handle_)); if (status != CUDNN_STATUS_SUCCESS) { LOG(ERROR) << "failed to set stream for cudnn handle: " << ToString(status); return false; @@ -4380,7 +4586,7 @@ bool CudnnSupport::DoNormalizeBackwardWithDimensions( float beta = 0.0f; status = wrap::cudnnLRNCrossChannelBackward( - parent_, ToHandle(dnn_handle_), normalize.handle(), + this, stream, ToHandle(dnn_handle_), normalize.handle(), CUDNN_LRN_CROSS_CHANNEL_DIM1, &alpha, dims.handle(), normalized_data.opaque(), dims.handle(), normalized_variable_gradient.opaque(), dims.handle(), raw_data.opaque(), @@ -4529,46 +4735,39 @@ bool CudnnSupport::DeriveOutputBatchDescriptor( } // namespace cuda -namespace gpu = ::perftools::gputools; - void initialize_cudnn() { - gpu::port::Status status = - gpu::PluginRegistry::Instance() - ->RegisterFactory( - gpu::cuda::kCudaPlatformId, gpu::cuda::kCuDnnPlugin, "cuDNN", - [](gpu::internal::StreamExecutorInterface* - parent) -> gpu::dnn::DnnSupport* { - gpu::cuda::CUDAExecutor* cuda_executor = - dynamic_cast(parent); - if (cuda_executor == nullptr) { - LOG(ERROR) - << "Attempting to initialize an instance of the cuBLAS " - << "support library with a non-CUDA StreamExecutor"; - return nullptr; - } - - gpu::cuda::CudnnSupport* dnn = - new gpu::cuda::CudnnSupport(cuda_executor); - if (!dnn->Init().ok()) { - // Note: Init() will log a more specific error. - delete dnn; - return nullptr; - } - return dnn; - }); + port::Status status = + PluginRegistry::Instance()->RegisterFactory( + cuda::kCudaPlatformId, cuda::kCuDnnPlugin, "cuDNN", + [](internal::StreamExecutorInterface* parent) -> dnn::DnnSupport* { + cuda::CUDAExecutor* cuda_executor = + dynamic_cast(parent); + if (cuda_executor == nullptr) { + LOG(ERROR) + << "Attempting to initialize an instance of the cuBLAS " + << "support library with a non-CUDA StreamExecutor"; + return nullptr; + } + + cuda::CudnnSupport* dnn = new cuda::CudnnSupport(cuda_executor); + if (!dnn->Init().ok()) { + // Note: Init() will log a more specific error. + delete dnn; + return nullptr; + } + return dnn; + }); if (!status.ok()) { LOG(ERROR) << "Unable to register cuDNN factory: " << status.error_message(); } - gpu::PluginRegistry::Instance()->SetDefaultFactory(gpu::cuda::kCudaPlatformId, - gpu::PluginKind::kDnn, - gpu::cuda::kCuDnnPlugin); + PluginRegistry::Instance()->SetDefaultFactory( + cuda::kCudaPlatformId, PluginKind::kDnn, cuda::kCuDnnPlugin); } -} // namespace gputools -} // namespace perftools +} // namespace stream_executor REGISTER_MODULE_INITIALIZER(register_cudnn, - { perftools::gputools::initialize_cudnn(); }); + { stream_executor::initialize_cudnn(); }); diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.h b/tensorflow/stream_executor/cuda/cuda_dnn.h index 48d56f71e3195a897b6216ab9f5709326d1b86d3..5ded7cf1543ee2cf3b874bcbd27b0fc25df2d844 100644 --- a/tensorflow/stream_executor/cuda/cuda_dnn.h +++ b/tensorflow/stream_executor/cuda/cuda_dnn.h @@ -26,8 +26,7 @@ limitations under the License. #include "tensorflow/stream_executor/plugin_registry.h" #include "tensorflow/stream_executor/temporary_device_memory.h" -namespace perftools { -namespace gputools { +namespace stream_executor { namespace cuda { class CUDAExecutor; @@ -46,12 +45,14 @@ class CudnnSupport : public dnn::DnnSupport { ~CudnnSupport() override; port::Status Init() override; + port::StatusOr GetVersion() override; port::StatusOr> createRnnDescriptor( int num_layers, int hidden_size, int input_size, dnn::RnnInputMode input_mode, dnn::RnnDirectionMode direction_mode, - dnn::RnnMode rnn_mode, dnn::DataType data_type, float dropout, - uint64 seed, ScratchAllocator* state_allocator) override; + dnn::RnnMode rnn_mode, dnn::DataType data_type, + const dnn::AlgorithmConfig& algorithm_config, float dropout, uint64 seed, + ScratchAllocator* state_allocator) override; port::StatusOr> createRnnSequenceTensorDescriptor(int seq_length, int batch_size, @@ -77,7 +78,8 @@ class CudnnSupport : public dnn::DnnSupport { const dnn::RnnStateTensorDescriptor& output_c_desc, DeviceMemory* output_c_data, bool is_training, ScratchAllocator* reserve_space_allocator, - ScratchAllocator* workspace_allocator) override; + ScratchAllocator* workspace_allocator, + dnn::ProfileResult* output_profile_result) override; bool DoRnnForward(Stream* stream, const dnn::RnnDescriptor& rnn_desc, const dnn::RnnSequenceTensorDescriptor& input_desc, @@ -94,7 +96,8 @@ class CudnnSupport : public dnn::DnnSupport { const dnn::RnnStateTensorDescriptor& output_c_desc, DeviceMemory* output_c_data, bool is_training, ScratchAllocator* reserve_space_allocator, - ScratchAllocator* workspace_allocator) override; + ScratchAllocator* workspace_allocator, + dnn::ProfileResult* output_profile_result) override; bool DoRnnForward(Stream* stream, const dnn::RnnDescriptor& rnn_desc, const dnn::RnnSequenceTensorDescriptor& input_desc, @@ -111,7 +114,8 @@ class CudnnSupport : public dnn::DnnSupport { const dnn::RnnStateTensorDescriptor& output_c_desc, DeviceMemory* output_c_data, bool is_training, ScratchAllocator* reserve_space_allocator, - ScratchAllocator* workspace_allocator) override; + ScratchAllocator* workspace_allocator, + dnn::ProfileResult* output_profile_result) override; bool DoRnnBackward(Stream* stream, const dnn::RnnDescriptor& rnn_desc, const dnn::RnnSequenceTensorDescriptor& input_desc, @@ -135,7 +139,8 @@ class CudnnSupport : public dnn::DnnSupport { DeviceMemory* input_c_backprop_data, DeviceMemory* params_backprop_data, DeviceMemory* reserve_space_data, - ScratchAllocator* workspace_allocator) override; + ScratchAllocator* workspace_allocator, + dnn::ProfileResult* output_profile_result) override; bool DoRnnBackward(Stream* stream, const dnn::RnnDescriptor& rnn_desc, const dnn::RnnSequenceTensorDescriptor& input_desc, @@ -159,7 +164,8 @@ class CudnnSupport : public dnn::DnnSupport { DeviceMemory* input_c_backprop_data, DeviceMemory* params_backprop_data, DeviceMemory* reserve_space_data, - ScratchAllocator* workspace_allocator) override; + ScratchAllocator* workspace_allocator, + dnn::ProfileResult* output_profile_result) override; bool DoRnnBackward(Stream* stream, const dnn::RnnDescriptor& rnn_desc, const dnn::RnnSequenceTensorDescriptor& input_desc, @@ -183,12 +189,16 @@ class CudnnSupport : public dnn::DnnSupport { DeviceMemory* input_c_backprop_data, DeviceMemory* params_backprop_data, DeviceMemory* reserve_space_data, - ScratchAllocator* workspace_allocator) override; + ScratchAllocator* workspace_allocator, + dnn::ProfileResult* output_profile_result) override; bool GetConvolveAlgorithms( bool with_winograd_nonfused, int cc_major, int cc_minor, std::vector* out_algorithms) override; + bool GetRnnAlgorithms( + std::vector* out_algorithms) override; + bool GetConvolveBackwardDataAlgorithms( bool with_winograd_nonfused, int cc_major, int cc_minor, std::vector* out_algorithms) override; @@ -614,10 +624,27 @@ class CudnnSupport : public dnn::DnnSupport { dnn::DataType output_type, float scale, DeviceMemoryBase* output_data) override; - private: - // Guards the enqueueing of DNN operations via the dnn_handle_ below. + const Stream* GetCurrentDnnStream() const + SHARED_LOCKS_REQUIRED(dnn_handle_mutex_) { + return current_dnn_stream_; + } + + void SetCurrentDnnStream(Stream* stream) + EXCLUSIVE_LOCKS_REQUIRED(dnn_handle_mutex_) { + current_dnn_stream_ = stream; + } + + CUDAExecutor* GetParentExecutor() { return parent_; } + + // Guards the enqueueing of DNN operations via the dnn_handle_ below, and + // access to current_dnn_stream_. + // + // This is a public member because we need to add thread safty annotations in + // the cudnn wrapper functions in the cc file, which need to access this + // mutex (the annotations require C++ permission checks). mutex dnn_handle_mutex_; + private: CUDAExecutor* parent_; // Parent executor object. Not owned. // cudnn library handle. cudnnHandle_t type is not present in this header to @@ -625,6 +652,9 @@ class CudnnSupport : public dnn::DnnSupport { // single cuda_dnn translation unit. void* dnn_handle_ GUARDED_BY(dnn_handle_mutex_); + // The current cudnn stream that is set by cudnnSetStream(). + Stream* current_dnn_stream_ GUARDED_BY(dnn_handle_mutex_); + // NOTE(keveman): Temporary data layout transformation until cuDNN supports // kBatchYXDepth for backward pass. This function allocates temporary memory, // lays out the source data into the temporary but in the kBatchDepthXY @@ -746,7 +776,8 @@ class CudnnSupport : public dnn::DnnSupport { const CudnnRnnStateTensorDescriptor& output_c_desc, DeviceMemory* output_c_data, bool is_training, ScratchAllocator* reserve_space_allocator, - ScratchAllocator* workspace_allocator); + ScratchAllocator* workspace_allocator, + dnn::ProfileResult* output_profile_result); template bool DoRnnBackwardImpl(Stream* stream, const CudnnRnnDescriptor& rnn_desc, @@ -771,13 +802,13 @@ class CudnnSupport : public dnn::DnnSupport { DeviceMemory* input_c_backprop_data, DeviceMemory* params_backprop_data, DeviceMemory* reserve_space_data, - ScratchAllocator* workspace_allocator); + ScratchAllocator* workspace_allocator, + dnn::ProfileResult* output_profile_result); SE_DISALLOW_COPY_AND_ASSIGN(CudnnSupport); }; } // namespace cuda -} // namespace gputools -} // namespace perftools +} // namespace stream_executor #endif // TENSORFLOW_STREAM_EXECUTOR_CUDA_CUDA_DNN_H_ diff --git a/tensorflow/stream_executor/cuda/cuda_driver.cc b/tensorflow/stream_executor/cuda/cuda_driver.cc index 58e1e58c593a3d938d97baff2356bce2c215a7a1..71cab145b9bb5a8320df8fb78241dd0d37d54239 100644 --- a/tensorflow/stream_executor/cuda/cuda_driver.cc +++ b/tensorflow/stream_executor/cuda/cuda_driver.cc @@ -37,14 +37,6 @@ limitations under the License. #include "tensorflow/stream_executor/platform/port.h" #include "tensorflow/stream_executor/lib/inlined_vector.h" -#if defined(PLATFORM_WINDOWS) -// TODO: in windows ARRAYSIZE is defined in winnt.h but including it -// here creates a conflict with cuda.h - for now define it here. -#define ARRAYSIZE(a) \ - ((sizeof(a) / sizeof(*(a))) / \ - static_cast(!(sizeof(a) % sizeof(*(a))))) -#endif - bool FLAGS_gpuexec_cuda_driver_inject_init_error = false; bool FLAGS_gpuexec_cuda_sync_around_driver_calls = false; bool FLAGS_gpuexec_cuda_device_0_only = false; @@ -53,8 +45,7 @@ bool FLAGS_gpuexec_cuda_device_0_only = false; // matches the expected one. constexpr bool kVerifyCudaContext = false; -namespace perftools { -namespace gputools { +namespace stream_executor { namespace cuda { namespace { @@ -720,15 +711,15 @@ CUDADriver::ContextGetSharedMemConfig(CudaContext* context) { port::bit_cast(uintptr_t(info_log_buffer_bytes)), port::bit_cast(info_log_buffer.data()), port::bit_cast(uintptr_t(log_verbose))}; - CHECK(ARRAYSIZE(options) == ARRAYSIZE(option_values)); + CHECK(TF_ARRAYSIZE(options) == TF_ARRAYSIZE(option_values)); CUresult res; { // TODO(leary) Need to see if NVIDIA can expunge the leakiness in their // module loading: see http://b/13248943 - res = cuModuleLoadDataEx(module, ptx_data, ARRAYSIZE(options), options, - option_values); + res = cuModuleLoadDataEx(module, ptx_data, TF_ARRAYSIZE(options), + options, option_values); } // The PTX JIT mutates the values in the option values array to reflect the @@ -1649,5 +1640,4 @@ static port::StatusOr GetSimpleAttribute(CUdevice device, } } // namespace cuda -} // namespace gputools -} // namespace perftools +} // namespace stream_executor diff --git a/tensorflow/stream_executor/cuda/cuda_driver.h b/tensorflow/stream_executor/cuda/cuda_driver.h index fa9172b3f008d3083309126bbfa4a1ab961030e1..a9969e247e181599f2b3707f6c65c6527dd4683d 100644 --- a/tensorflow/stream_executor/cuda/cuda_driver.h +++ b/tensorflow/stream_executor/cuda/cuda_driver.h @@ -27,8 +27,7 @@ limitations under the License. #include "tensorflow/stream_executor/platform/port.h" #include "cuda/include/cuda.h" -namespace perftools { -namespace gputools { +namespace stream_executor { namespace cuda { // Identifies the memory space where an allocation resides. See @@ -506,7 +505,6 @@ class CudaContext { }; } // namespace cuda -} // namespace gputools -} // namespace perftools +} // namespace stream_executor #endif // TENSORFLOW_STREAM_EXECUTOR_CUDA_CUDA_DRIVER_H_ diff --git a/tensorflow/stream_executor/cuda/cuda_event.cc b/tensorflow/stream_executor/cuda/cuda_event.cc index 1b41502300d446b55708c2afb6f5538bdf6cf220..96dcf173566087db475e3b237591d19f06128d92 100644 --- a/tensorflow/stream_executor/cuda/cuda_event.cc +++ b/tensorflow/stream_executor/cuda/cuda_event.cc @@ -19,8 +19,7 @@ limitations under the License. #include "tensorflow/stream_executor/cuda/cuda_stream.h" #include "tensorflow/stream_executor/lib/statusor.h" -namespace perftools { -namespace gputools { +namespace stream_executor { namespace cuda { CUDAEvent::CUDAEvent(CUDAExecutor* parent) @@ -68,5 +67,4 @@ const CUevent& CUDAEvent::cuda_event() { } } // namespace cuda -} // namespace gputools -} // namespace perftools +} // namespace stream_executor diff --git a/tensorflow/stream_executor/cuda/cuda_event.h b/tensorflow/stream_executor/cuda/cuda_event.h index 56667e65d38199fd4c340147c4e40a17c5bb2b2d..f62344672ed624f1ed60b5452d33b6f8273f2b47 100644 --- a/tensorflow/stream_executor/cuda/cuda_event.h +++ b/tensorflow/stream_executor/cuda/cuda_event.h @@ -21,8 +21,7 @@ limitations under the License. #include "tensorflow/stream_executor/event.h" #include "tensorflow/stream_executor/lib/status.h" -namespace perftools { -namespace gputools { +namespace stream_executor { namespace cuda { // CUDAEvent wraps a CUevent in the platform-independent EventInterface @@ -58,7 +57,6 @@ class CUDAEvent : public internal::EventInterface { }; } // namespace cuda -} // namespace gputools -} // namespace perftools +} // namespace stream_executor #endif // TENSORFLOW_STREAM_EXECUTOR_CUDA_CUDA_EVENT_H_ diff --git a/tensorflow/stream_executor/cuda/cuda_fft.cc b/tensorflow/stream_executor/cuda/cuda_fft.cc index a922f14fb4af695877b449d2f960fae1a356a82f..5b34740f9f1f9067f949ad41bcae0b97d3d3c7f4 100644 --- a/tensorflow/stream_executor/cuda/cuda_fft.cc +++ b/tensorflow/stream_executor/cuda/cuda_fft.cc @@ -31,8 +31,7 @@ limitations under the License. #include "tensorflow/stream_executor/plugin_registry.h" #include "tensorflow/stream_executor/stream_executor_internal.h" -namespace perftools { -namespace gputools { +namespace stream_executor { namespace cuda { PLUGIN_REGISTRY_DEFINE_PLUGIN_ID(kCuFftPlugin); @@ -44,7 +43,7 @@ namespace wrap { // manner on first use. This dynamic loading technique is used to avoid DSO // dependencies on vendor libraries which may or may not be available in the // deployed binary environment. -#define PERFTOOLS_GPUTOOLS_CUFFT_WRAP(__name) \ +#define STREAM_EXECUTOR_CUFFT_WRAP(__name) \ struct WrapperShim__##__name { \ template \ cufftResult operator()(CUDAExecutor *parent, Args... args) { \ @@ -68,7 +67,7 @@ namespace wrap { __macro(cufftGetSizeMany) \ __macro(cufftMakePlanMany) -CUFFT_ROUTINE_EACH(PERFTOOLS_GPUTOOLS_CUFFT_WRAP) +CUFFT_ROUTINE_EACH(STREAM_EXECUTOR_CUFFT_WRAP) } // namespace wrap @@ -514,62 +513,59 @@ bool CUDAFft::DoFftWithDirectionInternal(Stream *stream, fft::Plan *plan, return true; } -#define PERFTOOLS_GPUTOOLS_CUDA_DEFINE_FFT(__type, __fft_type1, __fft_type2, \ - __fft_type3) \ - bool CUDAFft::DoFft(Stream *stream, fft::Plan *plan, \ - const DeviceMemory> &input, \ - DeviceMemory> *output) { \ - return DoFftWithDirectionInternal( \ - stream, plan, wrap::cufftExec##__fft_type1, input, output); \ - } \ - bool CUDAFft::DoFft(Stream *stream, fft::Plan *plan, \ - const DeviceMemory<__type> &input, \ - DeviceMemory> *output) { \ - return DoFftInternal(stream, plan, wrap::cufftExec##__fft_type2, input, \ - output); \ - } \ - bool CUDAFft::DoFft(Stream *stream, fft::Plan *plan, \ - const DeviceMemory> &input, \ - DeviceMemory<__type> *output) { \ - return DoFftInternal(stream, plan, wrap::cufftExec##__fft_type3, input, \ - output); \ +#define STREAM_EXECUTOR_CUDA_DEFINE_FFT(__type, __fft_type1, __fft_type2, \ + __fft_type3) \ + bool CUDAFft::DoFft(Stream *stream, fft::Plan *plan, \ + const DeviceMemory> &input, \ + DeviceMemory> *output) { \ + return DoFftWithDirectionInternal( \ + stream, plan, wrap::cufftExec##__fft_type1, input, output); \ + } \ + bool CUDAFft::DoFft(Stream *stream, fft::Plan *plan, \ + const DeviceMemory<__type> &input, \ + DeviceMemory> *output) { \ + return DoFftInternal(stream, plan, wrap::cufftExec##__fft_type2, input, \ + output); \ + } \ + bool CUDAFft::DoFft(Stream *stream, fft::Plan *plan, \ + const DeviceMemory> &input, \ + DeviceMemory<__type> *output) { \ + return DoFftInternal(stream, plan, wrap::cufftExec##__fft_type3, input, \ + output); \ } -PERFTOOLS_GPUTOOLS_CUDA_DEFINE_FFT(float, C2C, R2C, C2R) -PERFTOOLS_GPUTOOLS_CUDA_DEFINE_FFT(double, Z2Z, D2Z, Z2D) +STREAM_EXECUTOR_CUDA_DEFINE_FFT(float, C2C, R2C, C2R) +STREAM_EXECUTOR_CUDA_DEFINE_FFT(double, Z2Z, D2Z, Z2D) -#undef PERFTOOLS_GPUTOOLS_CUDA_DEFINE_FFT +#undef STREAM_EXECUTOR_CUDA_DEFINE_FFT } // namespace cuda -} // namespace gputools -} // namespace perftools - -namespace gpu = ::perftools::gputools; - -REGISTER_MODULE_INITIALIZER(register_cufft, { - gpu::port::Status status = - gpu::PluginRegistry::Instance() - ->RegisterFactory( - gpu::cuda::kCudaPlatformId, gpu::cuda::kCuFftPlugin, "cuFFT", - [](gpu::internal::StreamExecutorInterface - *parent) -> gpu::fft::FftSupport * { - gpu::cuda::CUDAExecutor *cuda_executor = - dynamic_cast(parent); - if (cuda_executor == nullptr) { - LOG(ERROR) - << "Attempting to initialize an instance of the cuFFT " - << "support library with a non-CUDA StreamExecutor"; - return nullptr; - } - - return new gpu::cuda::CUDAFft(cuda_executor); - }); + +void initialize_cufft() { + port::Status status = + PluginRegistry::Instance()->RegisterFactory( + cuda::kCudaPlatformId, cuda::kCuFftPlugin, "cuFFT", + [](internal::StreamExecutorInterface *parent) -> fft::FftSupport * { + cuda::CUDAExecutor *cuda_executor = + dynamic_cast(parent); + if (cuda_executor == nullptr) { + LOG(ERROR) << "Attempting to initialize an instance of the cuFFT " + << "support library with a non-CUDA StreamExecutor"; + return nullptr; + } + + return new cuda::CUDAFft(cuda_executor); + }); if (!status.ok()) { LOG(ERROR) << "Unable to register cuFFT factory: " << status.error_message(); } - gpu::PluginRegistry::Instance()->SetDefaultFactory(gpu::cuda::kCudaPlatformId, - gpu::PluginKind::kFft, - gpu::cuda::kCuFftPlugin); -}); + PluginRegistry::Instance()->SetDefaultFactory( + cuda::kCudaPlatformId, PluginKind::kFft, cuda::kCuFftPlugin); +} + +} // namespace stream_executor + +REGISTER_MODULE_INITIALIZER(register_cufft, + { stream_executor::initialize_cufft(); }); diff --git a/tensorflow/stream_executor/cuda/cuda_fft.h b/tensorflow/stream_executor/cuda/cuda_fft.h index 04c7dfe501c451e4848bef68bed9685c079dd523..8171e61418a3185455e50ee76315eb2493c36c01 100644 --- a/tensorflow/stream_executor/cuda/cuda_fft.h +++ b/tensorflow/stream_executor/cuda/cuda_fft.h @@ -26,8 +26,7 @@ limitations under the License. #include "tensorflow/stream_executor/plugin_registry.h" #include "tensorflow/stream_executor/scratch_allocator.h" -namespace perftools { -namespace gputools { +namespace stream_executor { class Stream; @@ -133,7 +132,6 @@ class CUDAFft : public fft::FftSupport { }; } // namespace cuda -} // namespace gputools -} // namespace perftools +} // namespace stream_executor #endif // TENSORFLOW_STREAM_EXECUTOR_CUDA_CUDA_FFT_H_ diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc index 5ecaf46b8cae3c1e1f312816e7e5aec8ff8ce306..7c87d33d21b58af477a541953b9030e581cfa46d 100644 --- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc +++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc @@ -66,8 +66,7 @@ limitations under the License. extern bool FLAGS_check_gpu_leaks; bool FLAGS_prefer_cubin_to_ptx = true; -namespace perftools { -namespace gputools { +namespace stream_executor { namespace cuda { // Hook that can be used to CUBIN-ate PTX before it is loaded into the driver. @@ -1127,7 +1126,7 @@ DeviceDescription *CUDAExecutor::PopulateDeviceDescription() const { builder.set_name(device_name); } - for (size_t i = 0; i < ARRAYSIZE(kAllUnqueryableDeviceParams); i++) { + for (size_t i = 0; i < TF_ARRAYSIZE(kAllUnqueryableDeviceParams); i++) { const auto ¶ms = kAllUnqueryableDeviceParams[i]; if (params.cc_major == cc_major_ && params.cc_minor == cc_minor_) { builder.set_blocks_per_core_limit(params.blocks_per_core_limit); @@ -1168,17 +1167,14 @@ DeviceDescription *CUDAExecutor::PopulateDeviceDescription() const { } // namespace cuda -namespace gpu = ::perftools::gputools; - void initialize_cuda_gpu_executor() { - *gpu::internal::MakeCUDAExecutorImplementation() = []( - const gpu::PluginConfig &config) { - return new gpu::cuda::CUDAExecutor{config}; + *internal::MakeCUDAExecutorImplementation() = [](const PluginConfig &config) { + return new cuda::CUDAExecutor{config}; }; } -} // namespace gputools -} // namespace perftools +} // namespace stream_executor -REGISTER_MODULE_INITIALIZER( - cuda_gpu_executor, {perftools::gputools::initialize_cuda_gpu_executor();}); +REGISTER_MODULE_INITIALIZER(cuda_gpu_executor, { + stream_executor::initialize_cuda_gpu_executor(); +}); diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.h b/tensorflow/stream_executor/cuda/cuda_gpu_executor.h index dbbbcd476f096ff912d391604ba349f6cb979478..f686685474b35acfb54c327401500c42109006d0 100644 --- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.h +++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.h @@ -35,8 +35,7 @@ limitations under the License. #include "tensorflow/stream_executor/platform/thread_annotations.h" #include "tensorflow/stream_executor/stream_executor_internal.h" -namespace perftools { -namespace gputools { +namespace stream_executor { namespace cuda { // CUDA-platform implementation of the platform-agnostic @@ -273,7 +272,6 @@ class CUDAExecutor : public internal::StreamExecutorInterface { }; } // namespace cuda -} // namespace gputools -} // namespace perftools +} // namespace stream_executor #endif // TENSORFLOW_STREAM_EXECUTOR_CUDA_CUDA_GPU_EXECUTOR_H_ diff --git a/tensorflow/stream_executor/cuda/cuda_helpers.h b/tensorflow/stream_executor/cuda/cuda_helpers.h index 6a6134bf881646991065cba536e955ef7c29e88c..d55706c66a9b47abfe125eaaa09e4b0cc543622a 100644 --- a/tensorflow/stream_executor/cuda/cuda_helpers.h +++ b/tensorflow/stream_executor/cuda/cuda_helpers.h @@ -27,8 +27,7 @@ limitations under the License. #include "cuda/include/cuComplex.h" #include "cuda/include/cuda.h" -namespace perftools { -namespace gputools { +namespace stream_executor { template class DeviceMemory; @@ -101,7 +100,6 @@ inline cuDoubleComplex CUDAComplexValue(std::complex val) { } } // namespace cuda -} // namespace gputools -} // namespace perftools +} // namespace stream_executor #endif // TENSORFLOW_STREAM_EXECUTOR_CUDA_CUDA_HELPERS_H_ diff --git a/tensorflow/stream_executor/cuda/cuda_kernel.h b/tensorflow/stream_executor/cuda/cuda_kernel.h index 88d29fddd06ad7c07bf8e90f490db2f4458e3684..beaebe8f1233533053c97bbac7eb283deaf96a2c 100644 --- a/tensorflow/stream_executor/cuda/cuda_kernel.h +++ b/tensorflow/stream_executor/cuda/cuda_kernel.h @@ -40,8 +40,7 @@ limitations under the License. "CUDA runtime being included into CUDA GPU executor; should be driver only." #endif -namespace perftools { -namespace gputools { +namespace stream_executor { namespace cuda { // Wraps a CUfunction to implement the platform-independent KernelInterface. @@ -124,7 +123,6 @@ inline CUDAKernel *AsCUDAKernel(KernelBase *kernel) { } } // namespace cuda -} // namespace gputools -} // namespace perftools +} // namespace stream_executor #endif // TENSORFLOW_STREAM_EXECUTOR_CUDA_CUDA_KERNEL_H_ diff --git a/tensorflow/stream_executor/cuda/cuda_platform.cc b/tensorflow/stream_executor/cuda/cuda_platform.cc index 3a738461489212a026197bc58777883349ba4b54..7a6ef5a248f7f96535fb64a3b2ca8ca15bf997cc 100644 --- a/tensorflow/stream_executor/cuda/cuda_platform.cc +++ b/tensorflow/stream_executor/cuda/cuda_platform.cc @@ -24,8 +24,7 @@ limitations under the License. #include "tensorflow/stream_executor/lib/status.h" #include "tensorflow/stream_executor/lib/stringprintf.h" -namespace perftools { -namespace gputools { +namespace stream_executor { namespace cuda { namespace { @@ -41,16 +40,16 @@ const DeviceOptions GetDeviceOptionsFromEnv() { std::getenv("TF_CUDA_PLATFORM_GPU_DEVICE_SCHEDULE"); if (gpu_schedule_string == nullptr) { - return perftools::gputools::DeviceOptions::Default(); + return DeviceOptions::Default(); } unsigned device_flags = 0; if (strcmp(kScheduleSpinString, gpu_schedule_string) == 0) { - device_flags = perftools::gputools::DeviceOptions::kScheduleSpin; + device_flags = DeviceOptions::kScheduleSpin; } else if (strcmp(kScheduleYieldString, gpu_schedule_string) == 0) { - device_flags = perftools::gputools::DeviceOptions::kScheduleYield; + device_flags = DeviceOptions::kScheduleYield; } else if (strcmp(kScheduleBlockingSyncString, gpu_schedule_string) == 0) { - device_flags = perftools::gputools::DeviceOptions::kScheduleBlockingSync; + device_flags = DeviceOptions::kScheduleBlockingSync; } else { LOG(QFATAL) << "Unknown option for environment variable " "TF_CUDA_PLATFORM_GPU_DEVICE_SCHEDULE " @@ -59,7 +58,7 @@ const DeviceOptions GetDeviceOptionsFromEnv() { << ", " << kScheduleYieldString << "}"; } - return perftools::gputools::DeviceOptions(device_flags); + return DeviceOptions(device_flags); } } // namespace @@ -202,11 +201,10 @@ static void InitializeCudaPlatform() { SE_CHECK_OK(MultiPlatformManager::RegisterPlatform(std::move(platform))); } -} // namespace gputools -} // namespace perftools +} // namespace stream_executor REGISTER_MODULE_INITIALIZER(cuda_platform, - perftools::gputools::InitializeCudaPlatform()); + stream_executor::InitializeCudaPlatform()); DECLARE_MODULE_INITIALIZER(multi_platform_manager); // Note that module initialization sequencing is not supported in the diff --git a/tensorflow/stream_executor/cuda/cuda_platform.h b/tensorflow/stream_executor/cuda/cuda_platform.h index dab25602d08f394a2f8504ef9affe92552e3311f..fc0e15d5a6a9142f064085d34fcfaedfb25f433a 100644 --- a/tensorflow/stream_executor/cuda/cuda_platform.h +++ b/tensorflow/stream_executor/cuda/cuda_platform.h @@ -31,8 +31,7 @@ limitations under the License. #include "tensorflow/stream_executor/stream_executor_pimpl.h" #include "tensorflow/stream_executor/trace_listener.h" -namespace perftools { -namespace gputools { +namespace stream_executor { namespace cuda { // Opaque and unique identifier for the CUDA platform plugin. @@ -104,7 +103,6 @@ class CudaPlatform : public Platform { }; } // namespace cuda -} // namespace gputools -} // namespace perftools +} // namespace stream_executor #endif // TENSORFLOW_STREAM_EXECUTOR_CUDA_CUDA_PLATFORM_H_ diff --git a/tensorflow/stream_executor/cuda/cuda_platform_id.cc b/tensorflow/stream_executor/cuda/cuda_platform_id.cc index dfd11a9abe87852924d5a0f52454ebd432f566e4..a7bb304cc8cde52dc8aa4512e911de3f40188100 100644 --- a/tensorflow/stream_executor/cuda/cuda_platform_id.cc +++ b/tensorflow/stream_executor/cuda/cuda_platform_id.cc @@ -15,12 +15,10 @@ limitations under the License. #include "tensorflow/stream_executor/cuda/cuda_platform_id.h" -namespace perftools { -namespace gputools { +namespace stream_executor { namespace cuda { PLATFORM_DEFINE_ID(kCudaPlatformId); } // namespace cuda -} // namespace gputools -} // namespace perftools +} // namespace stream_executor diff --git a/tensorflow/stream_executor/cuda/cuda_platform_id.h b/tensorflow/stream_executor/cuda/cuda_platform_id.h index c677724517c1394284fb8b723933fa4b71d48ceb..92bcfd8372262dc127b71d399b6249efa9eff5dc 100644 --- a/tensorflow/stream_executor/cuda/cuda_platform_id.h +++ b/tensorflow/stream_executor/cuda/cuda_platform_id.h @@ -18,8 +18,7 @@ limitations under the License. #include "tensorflow/stream_executor/platform.h" -namespace perftools { -namespace gputools { +namespace stream_executor { namespace cuda { // Opaque and unique identifier for the cuda platform. @@ -30,7 +29,6 @@ namespace cuda { extern const Platform::Id kCudaPlatformId; } // namespace cuda -} // namespace gputools -} // namespace perftools +} // namespace stream_executor #endif // TENSORFLOW_STREAM_EXECUTOR_CUDA_CUDA_PLATFORM_ID_H_ diff --git a/tensorflow/stream_executor/cuda/cuda_rng.cc b/tensorflow/stream_executor/cuda/cuda_rng.cc index 8641b6022776048baf93723d914d68b4a73f881c..e289e7ced57b164412b0fd78b538c3b08c7db5aa 100644 --- a/tensorflow/stream_executor/cuda/cuda_rng.cc +++ b/tensorflow/stream_executor/cuda/cuda_rng.cc @@ -54,15 +54,14 @@ std::ostream &operator<<(std::ostream &in, const curandStatus_t &status) { } } -namespace perftools { -namespace gputools { +namespace stream_executor { namespace cuda { PLUGIN_REGISTRY_DEFINE_PLUGIN_ID(kCuRandPlugin); namespace wrap { -#define PERFTOOLS_GPUTOOLS_CURAND_WRAP(__name) \ +#define STREAM_EXECUTOR_CURAND_WRAP(__name) \ struct WrapperShim__##__name { \ template \ curandStatus_t operator()(CUDAExecutor *parent, Args... args) { \ @@ -71,15 +70,15 @@ namespace wrap { } \ } __name; -PERFTOOLS_GPUTOOLS_CURAND_WRAP(curandCreateGenerator); -PERFTOOLS_GPUTOOLS_CURAND_WRAP(curandDestroyGenerator); -PERFTOOLS_GPUTOOLS_CURAND_WRAP(curandSetStream); -PERFTOOLS_GPUTOOLS_CURAND_WRAP(curandGenerateUniform); -PERFTOOLS_GPUTOOLS_CURAND_WRAP(curandGenerateUniformDouble); -PERFTOOLS_GPUTOOLS_CURAND_WRAP(curandSetPseudoRandomGeneratorSeed); -PERFTOOLS_GPUTOOLS_CURAND_WRAP(curandSetGeneratorOffset); -PERFTOOLS_GPUTOOLS_CURAND_WRAP(curandGenerateNormal); -PERFTOOLS_GPUTOOLS_CURAND_WRAP(curandGenerateNormalDouble); +STREAM_EXECUTOR_CURAND_WRAP(curandCreateGenerator); +STREAM_EXECUTOR_CURAND_WRAP(curandDestroyGenerator); +STREAM_EXECUTOR_CURAND_WRAP(curandSetStream); +STREAM_EXECUTOR_CURAND_WRAP(curandGenerateUniform); +STREAM_EXECUTOR_CURAND_WRAP(curandGenerateUniformDouble); +STREAM_EXECUTOR_CURAND_WRAP(curandSetPseudoRandomGeneratorSeed); +STREAM_EXECUTOR_CURAND_WRAP(curandSetGeneratorOffset); +STREAM_EXECUTOR_CURAND_WRAP(curandGenerateNormal); +STREAM_EXECUTOR_CURAND_WRAP(curandGenerateNormalDouble); } // namespace wrap @@ -271,42 +270,40 @@ bool CUDARng::SetSeed(Stream *stream, const uint8 *seed, uint64 seed_bytes) { } } // namespace cuda -} // namespace gputools -} // namespace perftools - -namespace gpu = ::perftools::gputools; - -REGISTER_MODULE_INITIALIZER(register_curand, { - gpu::port::Status status = - gpu::PluginRegistry::Instance() - ->RegisterFactory( - gpu::cuda::kCudaPlatformId, gpu::cuda::kCuRandPlugin, "cuRAND", - [](gpu::internal::StreamExecutorInterface - *parent) -> gpu::rng::RngSupport * { - gpu::cuda::CUDAExecutor *cuda_executor = - dynamic_cast(parent); - if (cuda_executor == nullptr) { - LOG(ERROR) - << "Attempting to initialize an instance of the cuRAND " - << "support library with a non-CUDA StreamExecutor"; - return nullptr; - } - - gpu::cuda::CUDARng *rng = new gpu::cuda::CUDARng(cuda_executor); - if (!rng->Init()) { - // Note: Init() will log a more specific error. - delete rng; - return nullptr; - } - return rng; - }); + +void initialize_curand() { + port::Status status = + PluginRegistry::Instance()->RegisterFactory( + cuda::kCudaPlatformId, cuda::kCuRandPlugin, "cuRAND", + [](internal::StreamExecutorInterface *parent) -> rng::RngSupport * { + cuda::CUDAExecutor *cuda_executor = + dynamic_cast(parent); + if (cuda_executor == nullptr) { + LOG(ERROR) + << "Attempting to initialize an instance of the cuRAND " + << "support library with a non-CUDA StreamExecutor"; + return nullptr; + } + + cuda::CUDARng *rng = new cuda::CUDARng(cuda_executor); + if (!rng->Init()) { + // Note: Init() will log a more specific error. + delete rng; + return nullptr; + } + return rng; + }); if (!status.ok()) { LOG(ERROR) << "Unable to register cuRAND factory: " << status.error_message(); } - gpu::PluginRegistry::Instance()->SetDefaultFactory(gpu::cuda::kCudaPlatformId, - gpu::PluginKind::kRng, - gpu::cuda::kCuRandPlugin); -}); + PluginRegistry::Instance()->SetDefaultFactory( + cuda::kCudaPlatformId, PluginKind::kRng, cuda::kCuRandPlugin); +} + +} // namespace stream_executor + +REGISTER_MODULE_INITIALIZER(register_curand, + { stream_executor::initialize_curand(); }); diff --git a/tensorflow/stream_executor/cuda/cuda_rng.h b/tensorflow/stream_executor/cuda/cuda_rng.h index 5bbfd0b37a0dafb525e2ed57ccec2daa0a0c52d8..57ef398aaa88da7de769c49820325c6c9feb4d70 100644 --- a/tensorflow/stream_executor/cuda/cuda_rng.h +++ b/tensorflow/stream_executor/cuda/cuda_rng.h @@ -24,8 +24,7 @@ limitations under the License. typedef struct curandGenerator_st *curandGenerator_t; -namespace perftools { -namespace gputools { +namespace stream_executor { class Stream; template @@ -98,7 +97,6 @@ class CUDARng : public rng::RngSupport { }; } // namespace cuda -} // namespace gputools -} // namespace perftools +} // namespace stream_executor #endif // TENSORFLOW_STREAM_EXECUTOR_CUDA_CUDA_RNG_H_ diff --git a/tensorflow/stream_executor/cuda/cuda_stream.cc b/tensorflow/stream_executor/cuda/cuda_stream.cc index 3eb37a7d84189cacb54d1e45fb0113030a9402f2..b5aa7694f7e1d8d47f3252d3ba679292155119b5 100644 --- a/tensorflow/stream_executor/cuda/cuda_stream.cc +++ b/tensorflow/stream_executor/cuda/cuda_stream.cc @@ -19,8 +19,7 @@ limitations under the License. #include "tensorflow/stream_executor/lib/status.h" #include "tensorflow/stream_executor/stream.h" -namespace perftools { -namespace gputools { +namespace stream_executor { namespace cuda { bool CUDAStream::Init() { @@ -59,5 +58,4 @@ CUstream AsCUDAStreamValue(Stream *stream) { } } // namespace cuda -} // namespace gputools -} // namespace perftools +} // namespace stream_executor diff --git a/tensorflow/stream_executor/cuda/cuda_stream.h b/tensorflow/stream_executor/cuda/cuda_stream.h index 7358243dc4087194aa67da9958546c0487e95b8a..02edff643117fc2e3c6e6f74d2932f3f4c00c66d 100644 --- a/tensorflow/stream_executor/cuda/cuda_stream.h +++ b/tensorflow/stream_executor/cuda/cuda_stream.h @@ -23,8 +23,7 @@ limitations under the License. #include "tensorflow/stream_executor/platform/thread_annotations.h" #include "tensorflow/stream_executor/stream_executor_internal.h" -namespace perftools { -namespace gputools { +namespace stream_executor { namespace cuda { class CUDAExecutor; @@ -89,7 +88,6 @@ CUDAStream *AsCUDAStream(Stream *stream); CUstream AsCUDAStreamValue(Stream *stream); } // namespace cuda -} // namespace gputools -} // namespace perftools +} // namespace stream_executor #endif // TENSORFLOW_STREAM_EXECUTOR_CUDA_CUDA_STREAM_H_ diff --git a/tensorflow/stream_executor/cuda/cuda_timer.cc b/tensorflow/stream_executor/cuda/cuda_timer.cc index 4bd5503348f4dc92a0ce3c18aaf0128174a94121..991a12a23d632bd9fb4c97a340e244f6ffb4c7d3 100644 --- a/tensorflow/stream_executor/cuda/cuda_timer.cc +++ b/tensorflow/stream_executor/cuda/cuda_timer.cc @@ -20,23 +20,24 @@ limitations under the License. #include "tensorflow/stream_executor/cuda/cuda_stream.h" #include "tensorflow/stream_executor/lib/status.h" -namespace perftools { -namespace gputools { +namespace stream_executor { namespace cuda { bool CUDATimer::Init() { CHECK(start_event_ == nullptr && stop_event_ == nullptr); CudaContext* context = parent_->cuda_context(); - if (!CUDADriver::CreateEvent(context, &start_event_, - CUDADriver::EventFlags::kDefault) - .ok()) { + port::Status status = CUDADriver::CreateEvent( + context, &start_event_, CUDADriver::EventFlags::kDefault); + if (!status.ok()) { + LOG(ERROR) << status; return false; } - if (!CUDADriver::CreateEvent(context, &stop_event_, - CUDADriver::EventFlags::kDefault) - .ok()) { - port::Status status = CUDADriver::DestroyEvent(context, &start_event_); + status = CUDADriver::CreateEvent(context, &stop_event_, + CUDADriver::EventFlags::kDefault); + if (!status.ok()) { + LOG(ERROR) << status; + status = CUDADriver::DestroyEvent(context, &start_event_); if (!status.ok()) { LOG(ERROR) << status; } @@ -71,18 +72,23 @@ float CUDATimer::GetElapsedMilliseconds() const { return elapsed_milliseconds; } -bool CUDATimer::Start(CUDAStream *stream) { - return CUDADriver::RecordEvent(parent_->cuda_context(), start_event_, - stream->cuda_stream()) - .ok(); +bool CUDATimer::Start(CUDAStream* stream) { + port::Status status = CUDADriver::RecordEvent( + parent_->cuda_context(), start_event_, stream->cuda_stream()); + if (!status.ok()) { + LOG(ERROR) << status; + } + return status.ok(); } -bool CUDATimer::Stop(CUDAStream *stream) { - return CUDADriver::RecordEvent(parent_->cuda_context(), stop_event_, - stream->cuda_stream()) - .ok(); +bool CUDATimer::Stop(CUDAStream* stream) { + port::Status status = CUDADriver::RecordEvent( + parent_->cuda_context(), stop_event_, stream->cuda_stream()); + if (!status.ok()) { + LOG(ERROR) << status; + } + return status.ok(); } } // namespace cuda -} // namespace gputools -} // namespace perftools +} // namespace stream_executor diff --git a/tensorflow/stream_executor/cuda/cuda_timer.h b/tensorflow/stream_executor/cuda/cuda_timer.h index 4a2714dc1fada4f6081f7edc421660bef300a5b7..70554ec93120fcb0251ba0995a1ce9d6e5997016 100644 --- a/tensorflow/stream_executor/cuda/cuda_timer.h +++ b/tensorflow/stream_executor/cuda/cuda_timer.h @@ -23,8 +23,7 @@ limitations under the License. #include "tensorflow/stream_executor/cuda/cuda_driver.h" #include "tensorflow/stream_executor/cuda/cuda_gpu_executor.h" -namespace perftools { -namespace gputools { +namespace stream_executor { namespace cuda { class CUDAExecutor; @@ -60,13 +59,13 @@ class CUDATimer : public internal::TimerInterface { // events. float GetElapsedMilliseconds() const; - // See perftools::gputools::Timer::Microseconds(). + // See Timer::Microseconds(). // TODO(leary) make this into an error code interface... uint64 Microseconds() const override { return GetElapsedMilliseconds() * 1e3; } - // See perftools::GPUTools::Timer::Nanoseconds(). + // See Timer::Nanoseconds(). uint64 Nanoseconds() const override { return GetElapsedMilliseconds() * 1e6; } private: @@ -77,8 +76,14 @@ class CUDATimer : public internal::TimerInterface { // executing in a stream. }; +struct TimerDeleter { + void operator()(CUDATimer *t) { + t->Destroy(); + delete t; + } +}; + } // namespace cuda -} // namespace gputools -} // namespace perftools +} // namespace stream_executor #endif // TENSORFLOW_STREAM_EXECUTOR_CUDA_CUDA_TIMER_H_ diff --git a/tensorflow/stream_executor/cuda/cudnn_version.cc b/tensorflow/stream_executor/cuda/cudnn_version.cc index 5591801aae2526d528289f9b2267d864cf766045..e8fcc0361850a561928d09f29f78fb57071c24b2 100644 --- a/tensorflow/stream_executor/cuda/cudnn_version.cc +++ b/tensorflow/stream_executor/cuda/cudnn_version.cc @@ -15,8 +15,7 @@ limitations under the License. #include "tensorflow/stream_executor/cuda/cudnn_version.h" -namespace perftools { -namespace gputools { +namespace stream_executor { namespace cuda { bool IsSourceCompatibleWithCudnnLibrary(CudnnVersion source_version, @@ -38,5 +37,4 @@ bool IsSourceCompatibleWithCudnnLibrary(CudnnVersion source_version, } } // namespace cuda -} // namespace gputools -} // namespace perftools +} // namespace stream_executor diff --git a/tensorflow/stream_executor/cuda/cudnn_version.h b/tensorflow/stream_executor/cuda/cudnn_version.h index 058cc87bfa12be523c3297711dfc573c2d465f0b..6464e7f8e8755b5b46b90a4b35d50509eb0cfde7 100644 --- a/tensorflow/stream_executor/cuda/cudnn_version.h +++ b/tensorflow/stream_executor/cuda/cudnn_version.h @@ -18,10 +18,9 @@ limitations under the License. #include -#include "absl/strings/str_join.h" +#include "tensorflow/core/lib/strings/strcat.h" -namespace perftools { -namespace gputools { +namespace stream_executor { namespace cuda { struct CudnnVersion { @@ -30,8 +29,9 @@ struct CudnnVersion { CudnnVersion(int major, int minor, int patch) : major_version(major), minor_version(minor), patch_level(patch) {} - std::string ToString() const { - return absl::StrJoin({major_version, minor_version, patch_level}, "."); + tensorflow::string ToString() const { + return tensorflow::strings::StrCat(major_version, ".", minor_version, ".", + patch_level); } int major_version; @@ -45,7 +45,6 @@ bool IsSourceCompatibleWithCudnnLibrary(CudnnVersion source_version, CudnnVersion loaded_version); } // namespace cuda -} // namespace gputools -} // namespace perftools +} // namespace stream_executor #endif // TENSORFLOW_STREAM_EXECUTOR_CUDA_CUDNN_VERSION_H_ diff --git a/tensorflow/stream_executor/cuda/cudnn_version_test.cc b/tensorflow/stream_executor/cuda/cudnn_version_test.cc index 230adafeb112f682b5ece4778921e18a4ad25f87..7d4c6399d040e9bcddff5d98d202ab00fdeffa58 100644 --- a/tensorflow/stream_executor/cuda/cudnn_version_test.cc +++ b/tensorflow/stream_executor/cuda/cudnn_version_test.cc @@ -15,11 +15,9 @@ limitations under the License. #include "tensorflow/stream_executor/cuda/cudnn_version.h" -#include "testing/base/public/gunit.h" #include "tensorflow/core/platform/test.h" -namespace perftools { -namespace gputools { +namespace stream_executor { namespace cuda { namespace { @@ -71,5 +69,4 @@ TEST(IsSourceCompatibleWithCudnnLibraryTest, Basic) { } // namespace } // namespace cuda -} // namespace gputools -} // namespace perftools +} // namespace stream_executor diff --git a/tensorflow/stream_executor/device_description.cc b/tensorflow/stream_executor/device_description.cc index 52f5319a3b16c771ce89843a963841b25df5467e..8ca0677f8a3b3173fd023bd120526a490c2e7878 100644 --- a/tensorflow/stream_executor/device_description.cc +++ b/tensorflow/stream_executor/device_description.cc @@ -21,8 +21,7 @@ limitations under the License. #include "tensorflow/stream_executor/lib/mathutil.h" #include "tensorflow/stream_executor/lib/strcat.h" -namespace perftools { -namespace gputools { +namespace stream_executor { static const uint64 kUninitializedUint64 = -1ULL; /* static */ const char *DeviceDescription::kUndefinedString = ""; @@ -234,6 +233,4 @@ uint64 CalculateRegisterLimitForTargetOccupancy( return 0; } - -} // namespace gputools -} // namespace perftools +} // namespace stream_executor diff --git a/tensorflow/stream_executor/device_description.h b/tensorflow/stream_executor/device_description.h index fcf0928096ed1f1bdf0499efb92af2bc9cb0eaa2..7f99d81ef3bc5e21d5ea225ecfbc5a97bbd01ef5 100644 --- a/tensorflow/stream_executor/device_description.h +++ b/tensorflow/stream_executor/device_description.h @@ -27,8 +27,7 @@ limitations under the License. #include "tensorflow/stream_executor/launch_dim.h" #include "tensorflow/stream_executor/platform/port.h" -namespace perftools { -namespace gputools { +namespace stream_executor { namespace internal { class DeviceDescriptionBuilder; } // namespace internal @@ -388,7 +387,6 @@ uint64 CalculateRegisterLimitForTargetOccupancy( const DeviceDescription &device_description, uint64 shared_memory_per_block, const ThreadDim &thread_dims, uint64 target_blocks_per_core); -} // namespace gputools -} // namespace perftools +} // namespace stream_executor #endif // TENSORFLOW_STREAM_EXECUTOR_DEVICE_DESCRIPTION_H_ diff --git a/tensorflow/stream_executor/device_memory.h b/tensorflow/stream_executor/device_memory.h index 4c92b7dc78523e894aaf954fc44b72e89891d153..5a5334e0f5f6e8744b92188de14d7fea0f2ff9a0 100644 --- a/tensorflow/stream_executor/device_memory.h +++ b/tensorflow/stream_executor/device_memory.h @@ -32,6 +32,16 @@ limitations under the License. namespace perftools { namespace gputools { +// Temporarily pull stream_executor into perftools::gputools while we migrate +// code to the new namespace. TODO(b/77980417): Remove this once we've +// completed the migration. +using namespace stream_executor; // NOLINT[build/namespaces] + +} // namespace gputools +} // namespace perftools + +namespace stream_executor { + class StreamExecutor; // void*-analogous device memory allocation. For the typed variation, see @@ -280,7 +290,6 @@ static_assert(sizeof(Float2) == 2 * sizeof(float), "Float2 must be packed"); static_assert(sizeof(Float4) == 4 * sizeof(float), "Float4 must be packed"); static_assert(sizeof(Double2) == 2 * sizeof(double), "Double2 must be packed"); -} // namespace gputools -} // namespace perftools +} // namespace stream_executor #endif // TENSORFLOW_STREAM_EXECUTOR_DEVICE_MEMORY_H_ diff --git a/tensorflow/stream_executor/device_options.h b/tensorflow/stream_executor/device_options.h index 169325e7d127edb83c896e1b18e5bf88641b46b1..2646950f42e986d0c732f212af0c24c403b142e5 100644 --- a/tensorflow/stream_executor/device_options.h +++ b/tensorflow/stream_executor/device_options.h @@ -25,8 +25,7 @@ limitations under the License. #include "tensorflow/stream_executor/platform/port.h" #include "tensorflow/stream_executor/platform/logging.h" -namespace perftools { -namespace gputools { +namespace stream_executor { // Indicates a set of options for a device's usage, which generally must be // provided at StreamExecutor device-initialization time. @@ -84,7 +83,6 @@ struct DeviceOptions { unsigned flags_; }; -} // namespace gputools -} // namespace perftools +} // namespace stream_executor #endif // TENSORFLOW_STREAM_EXECUTOR_DEVICE_OPTIONS_H_ diff --git a/tensorflow/stream_executor/dnn.cc b/tensorflow/stream_executor/dnn.cc index 44144a06139bf8661432cb930e53ba5218aac823..6edb572820104592c3e9e506bbc66ec008672ec0 100644 --- a/tensorflow/stream_executor/dnn.cc +++ b/tensorflow/stream_executor/dnn.cc @@ -18,8 +18,7 @@ limitations under the License. #include "tensorflow/stream_executor/lib/strcat.h" #include "tensorflow/stream_executor/lib/stringprintf.h" -namespace perftools { -namespace gputools { +namespace stream_executor { namespace dnn { bool DnnSupport::GetConvolveAlgorithms( @@ -28,6 +27,10 @@ bool DnnSupport::GetConvolveAlgorithms( return false; } +bool DnnSupport::GetRnnAlgorithms(std::vector* out_algorithms) { + return false; +} + bool DnnSupport::GetConvolveBackwardDataAlgorithms( bool with_winograd_nonfused, int cc_major, int cc_minor, std::vector* out_algorithms) { @@ -550,5 +553,4 @@ string NormalizeDescriptor::ToShortString() const { } } // namespace dnn -} // namespace gputools -} // namespace perftools +} // namespace stream_executor diff --git a/tensorflow/stream_executor/dnn.h b/tensorflow/stream_executor/dnn.h index b41536e638873412a31a0cdbbd3ba3a818dd9cf2..39f21d8b105058caa28af1b32f600383821b46c1 100644 --- a/tensorflow/stream_executor/dnn.h +++ b/tensorflow/stream_executor/dnn.h @@ -25,6 +25,7 @@ limitations under the License. #include #include #include +#include #include "tensorflow/stream_executor/device_memory.h" #include "tensorflow/stream_executor/lib/array_slice.h" @@ -37,8 +38,7 @@ namespace Eigen { struct half; } // namespace Eigen -namespace perftools { -namespace gputools { +namespace stream_executor { class HostBuffer; class Stream; @@ -875,6 +875,22 @@ enum class ElementwiseOperation { kAdd, kMultiply }; string ElementwiseOperationString(ElementwiseOperation op); +// A simple class representing the version of the backing library, to +// workaround the "too perfect forwarding" issue in gcc6+ compilers. +// See PR#16309 and issue #18402 for links discussing the issue. +class VersionInfo { + public: + VersionInfo(int major = 0, int minor = 0, int patch = 0) + : major_(major), minor_(minor), patch_(patch) {} + int major_version() { return major_; } + int minor_version() { return minor_; } + int patch() { return patch_; } + private: + int major_; + int minor_; + int patch_; +}; + // Suite of operations typically used for implementing Deep/Convolutional Neural // Nets. Note: A false return value of an operation indicates the // implementation is not available. @@ -885,6 +901,12 @@ class DnnSupport { virtual port::Status Init() = 0; + // Gets the version of the backing library, as a VersionInfo object. + virtual port::StatusOr GetVersion() { + return port::UnimplementedError( + "DnnSupport::GetVersion not implemented on this platform."); + } + // Performs a single-precision forward batch normalization operation onto // the stream. // @@ -1195,6 +1217,9 @@ class DnnSupport { bool with_winograd_nonfused, int cc_major, int cc_minor, std::vector* out_algorithms); + // Returns a list of supported rnn algorithms. + virtual bool GetRnnAlgorithms(std::vector* out_algorithms); + // Version of DoConvolve that uses pre-quantized 8 bit coefficients. // coefficient_scales specifies the scaling of each column of coefficients: // original float coefficient[row * num_columns + column] = @@ -2001,6 +2026,7 @@ class DnnSupport { dnn::RnnInputMode input_mode, dnn::RnnDirectionMode direction_mode, dnn::RnnMode rnn_mode, dnn::DataType data_type, + const dnn::AlgorithmConfig& algorithm_config, float dropout, uint64 seed, ScratchAllocator* state_allocator) { return port::Status{port::error::UNIMPLEMENTED, @@ -2076,7 +2102,8 @@ class DnnSupport { DeviceMemory* output_c_data, bool is_training, ScratchAllocator* reserve_space_allocator, - ScratchAllocator* workspace_allocator) { + ScratchAllocator* workspace_allocator, + dnn::ProfileResult* output_profile_result) { return false; } @@ -2096,7 +2123,8 @@ class DnnSupport { DeviceMemory* output_c_data, bool is_training, ScratchAllocator* reserve_space_allocator, - ScratchAllocator* workspace_allocator) { + ScratchAllocator* workspace_allocator, + dnn::ProfileResult* output_profile_result) { return false; } @@ -2116,7 +2144,8 @@ class DnnSupport { DeviceMemory* output_c_data, bool is_training, ScratchAllocator* reserve_space_allocator, - ScratchAllocator* workspace_allocator) { + ScratchAllocator* workspace_allocator, + dnn::ProfileResult* output_profile_result) { return false; } // Enqueue a backward operation of the RNN model onto the stream. @@ -2183,7 +2212,8 @@ class DnnSupport { DeviceMemory* input_c_backprop_data, DeviceMemory* params_backprop_data, DeviceMemory* reserve_space_data, - ScratchAllocator* workspace_allocator) { + ScratchAllocator* workspace_allocator, + dnn::ProfileResult* output_profile_result) { return false; } @@ -2210,7 +2240,8 @@ class DnnSupport { DeviceMemory* input_c_backprop_data, DeviceMemory* params_backprop_data, DeviceMemory* reserve_space_data, - ScratchAllocator* workspace_allocator) { + ScratchAllocator* workspace_allocator, + dnn::ProfileResult* output_profile_result) { return false; } @@ -2237,7 +2268,8 @@ class DnnSupport { DeviceMemory* input_c_backprop_data, DeviceMemory* params_backprop_data, DeviceMemory* reserve_space_data, - ScratchAllocator* workspace_allocator) { + ScratchAllocator* workspace_allocator, + dnn::ProfileResult* output_profile_result) { return false; } @@ -2268,7 +2300,6 @@ class DnnSupport { }; } // namespace dnn -} // namespace gputools -} // namespace perftools +} // namespace stream_executor #endif // TENSORFLOW_STREAM_EXECUTOR_DNN_H_ diff --git a/tensorflow/stream_executor/dso_loader.cc b/tensorflow/stream_executor/dso_loader.cc index 95168836278add5d6592ff0c3d0f7245e6f6bc5b..114143b3abef00e757da3263449454fb1908fd53 100644 --- a/tensorflow/stream_executor/dso_loader.cc +++ b/tensorflow/stream_executor/dso_loader.cc @@ -37,8 +37,7 @@ limitations under the License. #include "cuda/cuda_config.h" #endif -namespace perftools { -namespace gputools { +namespace stream_executor { namespace internal { string GetCudaVersion() { return TF_CUDA_VERSION; } @@ -291,5 +290,4 @@ static std::vector* CreatePrimordialRpaths() { } } // namespace internal -} // namespace gputools -} // namespace perftools +} // namespace stream_executor diff --git a/tensorflow/stream_executor/dso_loader.h b/tensorflow/stream_executor/dso_loader.h index 354c7b50b8209755991827b3c36afac790cb952b..9ee081cb3d64e8878fa9d7f0c33da7f6827da620 100644 --- a/tensorflow/stream_executor/dso_loader.h +++ b/tensorflow/stream_executor/dso_loader.h @@ -28,8 +28,7 @@ limitations under the License. #include "tensorflow/stream_executor/platform.h" #include "tensorflow/stream_executor/platform/mutex.h" -namespace perftools { -namespace gputools { +namespace stream_executor { namespace internal { // Permits StreamExecutor code to dynamically load a pre-determined set of @@ -114,7 +113,6 @@ class CachedDsoLoader { }; } // namespace internal -} // namespace gputools -} // namespace perftools +} // namespace stream_executor #endif // TENSORFLOW_STREAM_EXECUTOR_DSO_LOADER_H_ diff --git a/tensorflow/stream_executor/event.cc b/tensorflow/stream_executor/event.cc index c423a453e9f1cb4f1d484427c11eacf959da7a8d..50a6edd80bd39004e32f09bcde36fbc8a8b59ad9 100644 --- a/tensorflow/stream_executor/event.cc +++ b/tensorflow/stream_executor/event.cc @@ -19,8 +19,7 @@ limitations under the License. #include "tensorflow/stream_executor/stream_executor_pimpl.h" #include "tensorflow/stream_executor/stream.h" -namespace perftools { -namespace gputools { +namespace stream_executor { Event::Event(StreamExecutor* stream_exec) : stream_exec_(stream_exec), @@ -48,5 +47,4 @@ Event::Status Event::PollForStatus() { return stream_exec_->PollForEventStatus(this); } -} // namespace gputools -} // namespace perftools +} // namespace stream_executor diff --git a/tensorflow/stream_executor/event.h b/tensorflow/stream_executor/event.h index a06c26ea5191d53c7d8ffc1e2b460cb594dd6f4a..1f37262c78d82f72f8818f35db273e87a47bdc1c 100644 --- a/tensorflow/stream_executor/event.h +++ b/tensorflow/stream_executor/event.h @@ -20,8 +20,7 @@ limitations under the License. #include "tensorflow/stream_executor/platform/port.h" -namespace perftools { -namespace gputools { +namespace stream_executor { namespace internal { class EventInterface; @@ -76,7 +75,6 @@ class Event { SE_DISALLOW_COPY_AND_ASSIGN(Event); }; -} // namespace gputools -} // namespace perftools +} // namespace stream_executor #endif // TENSORFLOW_STREAM_EXECUTOR_EVENT_H_ diff --git a/tensorflow/stream_executor/executor_cache.cc b/tensorflow/stream_executor/executor_cache.cc index d1a8aae167455a7dc728999fbbaf1a119cf6a101..0b3ad7ebbc905ab1bd7e3298bb560af0e3868656 100644 --- a/tensorflow/stream_executor/executor_cache.cc +++ b/tensorflow/stream_executor/executor_cache.cc @@ -17,8 +17,7 @@ limitations under the License. #include "tensorflow/stream_executor/lib/stringprintf.h" -namespace perftools { -namespace gputools { +namespace stream_executor { port::StatusOr ExecutorCache::GetOrCreate( const StreamExecutorConfig& config, @@ -104,5 +103,4 @@ ExecutorCache::Entry::~Entry() { configurations.clear(); } -} // namespace gputools -} // namespace perftools +} // namespace stream_executor diff --git a/tensorflow/stream_executor/executor_cache.h b/tensorflow/stream_executor/executor_cache.h index 12f2275f6d8ecde4819d56ab9411c1087b76ddc5..bbeeaed787c27a62e2b0b22a75779b5350187d83 100644 --- a/tensorflow/stream_executor/executor_cache.h +++ b/tensorflow/stream_executor/executor_cache.h @@ -24,8 +24,7 @@ limitations under the License. #include "tensorflow/stream_executor/platform/mutex.h" #include "tensorflow/stream_executor/stream_executor_pimpl.h" -namespace perftools { -namespace gputools { +namespace stream_executor { // Utility class to allow Platform objects to manage cached StreamExecutors. // Thread-safe. @@ -76,7 +75,6 @@ class ExecutorCache { SE_DISALLOW_COPY_AND_ASSIGN(ExecutorCache); }; -} // namespace gputools -} // namespace perftools +} // namespace stream_executor #endif // TENSORFLOW_STREAM_EXECUTOR_EXECUTOR_CACHE_H_ diff --git a/tensorflow/stream_executor/fft.h b/tensorflow/stream_executor/fft.h index 6b1728829abdeb5c4e20534675801a437341d732..814efb2e923cb25833515f86e1b3f6f0bec437da 100644 --- a/tensorflow/stream_executor/fft.h +++ b/tensorflow/stream_executor/fft.h @@ -48,8 +48,7 @@ limitations under the License. #include #include "tensorflow/stream_executor/platform/port.h" -namespace perftools { -namespace gputools { +namespace stream_executor { class Stream; template @@ -210,7 +209,7 @@ class FftSupport { // Macro used to quickly declare overrides for abstract virtuals in the // fft::FftSupport base class. Assumes that it's emitted somewhere inside the -// ::perftools::gputools namespace. +// ::stream_executor namespace. #define TENSORFLOW_STREAM_EXECUTOR_GPU_FFT_SUPPORT_OVERRIDES \ std::unique_ptr Create1dPlan(Stream *stream, uint64 num_x, \ fft::Type type, bool in_place_fft) \ @@ -265,7 +264,6 @@ class FftSupport { DeviceMemory *output) override; } // namespace fft -} // namespace gputools -} // namespace perftools +} // namespace stream_executor #endif // TENSORFLOW_STREAM_EXECUTOR_FFT_H_ diff --git a/tensorflow/stream_executor/host/host_gpu_executor.cc b/tensorflow/stream_executor/host/host_gpu_executor.cc index 542f521ef778c3a69ec9adba74405131e07bcf1a..2c4819651acaa2c6ee99c720b2c3d80e5c2ea1a9 100644 --- a/tensorflow/stream_executor/host/host_gpu_executor.cc +++ b/tensorflow/stream_executor/host/host_gpu_executor.cc @@ -28,8 +28,7 @@ limitations under the License. bool FLAGS_stream_executor_cpu_real_clock_rate = false; -namespace perftools { -namespace gputools { +namespace stream_executor { namespace host { HostStream *AsHostStream(Stream *stream) { @@ -266,5 +265,4 @@ rng::RngSupport *HostExecutor::CreateRng() { } } // namespace host -} // namespace gputools -} // namespace perftools +} // namespace stream_executor diff --git a/tensorflow/stream_executor/host/host_gpu_executor.h b/tensorflow/stream_executor/host/host_gpu_executor.h index e2c0e6d6b77130bd190b026f1eaff68d21dbf632..0c3991c151d5bbb84333240aa1ca4f9e1587330d 100644 --- a/tensorflow/stream_executor/host/host_gpu_executor.h +++ b/tensorflow/stream_executor/host/host_gpu_executor.h @@ -28,8 +28,7 @@ limitations under the License. #include "tensorflow/stream_executor/stream_executor.h" #include "tensorflow/stream_executor/stream_executor_internal.h" -namespace perftools { -namespace gputools { +namespace stream_executor { namespace host { // An implementation of StreamExecutor that does no communication or interaction @@ -210,7 +209,6 @@ class HostExecutor : public internal::StreamExecutorInterface { }; } // namespace host -} // namespace gputools -} // namespace perftools +} // namespace stream_executor #endif // TENSORFLOW_STREAM_EXECUTOR_HOST_HOST_GPU_EXECUTOR_H_ diff --git a/tensorflow/stream_executor/host/host_platform.cc b/tensorflow/stream_executor/host/host_platform.cc index 2cb7d3696758365804786800e6a46e8bca7d80e7..00a17a05edea24d6485a948d72856537021c99ab 100644 --- a/tensorflow/stream_executor/host/host_platform.cc +++ b/tensorflow/stream_executor/host/host_platform.cc @@ -26,10 +26,7 @@ limitations under the License. #include "tensorflow/stream_executor/lib/status_macros.h" #include "tensorflow/stream_executor/lib/stringprintf.h" -namespace gpu = ::perftools::gputools; - -namespace perftools { -namespace gputools { +namespace stream_executor { namespace host { HostPlatform::HostPlatform() : name_("Host") {} @@ -93,16 +90,15 @@ void HostPlatform::UnregisterTraceListener(TraceListener* listener) { } static void InitializeHostPlatform() { - std::unique_ptr platform(new gpu::host::HostPlatform); - SE_CHECK_OK(gpu::MultiPlatformManager::RegisterPlatform(std::move(platform))); + std::unique_ptr platform(new host::HostPlatform); + SE_CHECK_OK(MultiPlatformManager::RegisterPlatform(std::move(platform))); } } // namespace host -} // namespace gputools -} // namespace perftools +} // namespace stream_executor -REGISTER_MODULE_INITIALIZER( - host_platform, perftools::gputools::host::InitializeHostPlatform()); +REGISTER_MODULE_INITIALIZER(host_platform, + stream_executor::host::InitializeHostPlatform()); DECLARE_MODULE_INITIALIZER(multi_platform_manager); // Note that module initialization sequencing is not supported in the diff --git a/tensorflow/stream_executor/host/host_platform.h b/tensorflow/stream_executor/host/host_platform.h index 0faec6c8b789766804c262fc5cf29c950fd31d3c..c6f46a2cc4028644237f206f5cd076a49e964702 100644 --- a/tensorflow/stream_executor/host/host_platform.h +++ b/tensorflow/stream_executor/host/host_platform.h @@ -33,8 +33,7 @@ limitations under the License. #include "tensorflow/stream_executor/stream_executor_pimpl.h" #include "tensorflow/stream_executor/trace_listener.h" -namespace perftools { -namespace gputools { +namespace stream_executor { namespace host { // Host (CPU) platform plugin, registered as a singleton value via module @@ -79,7 +78,6 @@ class HostPlatform : public Platform { }; } // namespace host -} // namespace gputools -} // namespace perftools +} // namespace stream_executor #endif // TENSORFLOW_STREAM_EXECUTOR_HOST_HOST_PLATFORM_H_ diff --git a/tensorflow/stream_executor/host/host_platform_id.cc b/tensorflow/stream_executor/host/host_platform_id.cc index 69a203f2985b67ecd67346e6dc394d961377e035..2256bccec3f4ef73bc667128aeb1fbf90b1dcd90 100644 --- a/tensorflow/stream_executor/host/host_platform_id.cc +++ b/tensorflow/stream_executor/host/host_platform_id.cc @@ -15,12 +15,10 @@ limitations under the License. #include "tensorflow/stream_executor/host/host_platform_id.h" -namespace perftools { -namespace gputools { +namespace stream_executor { namespace host { PLATFORM_DEFINE_ID(kHostPlatformId); } // namespace host -} // namespace gputools -} // namespace perftools +} // namespace stream_executor diff --git a/tensorflow/stream_executor/host/host_platform_id.h b/tensorflow/stream_executor/host/host_platform_id.h index 61d84ea2e2facae3a18798db15d529109b6dfc93..18d1f282f1f921bc9bb2508225398c98ffeb89bc 100644 --- a/tensorflow/stream_executor/host/host_platform_id.h +++ b/tensorflow/stream_executor/host/host_platform_id.h @@ -18,8 +18,7 @@ limitations under the License. #include "tensorflow/stream_executor/platform.h" -namespace perftools { -namespace gputools { +namespace stream_executor { namespace host { // Opaque and unique identifier for the host platform. @@ -30,7 +29,6 @@ namespace host { extern const Platform::Id kHostPlatformId; } // namespace host -} // namespace gputools -} // namespace perftools +} // namespace stream_executor #endif // TENSORFLOW_STREAM_EXECUTOR_HOST_HOST_PLATFORM_ID_H_ diff --git a/tensorflow/stream_executor/host/host_stream.cc b/tensorflow/stream_executor/host/host_stream.cc index 5961c3151600b778b4cbca7443b5913329368431..5a7d3b3dd49275edd5242c30b38bb4f505042816 100644 --- a/tensorflow/stream_executor/host/host_stream.cc +++ b/tensorflow/stream_executor/host/host_stream.cc @@ -17,8 +17,7 @@ limitations under the License. // the HostExecutor implementation. #include "tensorflow/stream_executor/host/host_stream.h" -namespace perftools { -namespace gputools { +namespace stream_executor { namespace host { HostStream::HostStream() @@ -53,5 +52,4 @@ void HostStream::BlockUntilDone() { } // namespace host -} // namespace gputools -} // namespace perftools +} // namespace stream_executor diff --git a/tensorflow/stream_executor/host/host_stream.h b/tensorflow/stream_executor/host/host_stream.h index 9894d17febcae24089bfe2948eb96bad701978d7..5d7b8a378268c3226a61fa43e738f209e84b30e9 100644 --- a/tensorflow/stream_executor/host/host_stream.h +++ b/tensorflow/stream_executor/host/host_stream.h @@ -24,8 +24,7 @@ limitations under the License. #include "tensorflow/stream_executor/lib/threadpool.h" #include "tensorflow/stream_executor/stream_executor_internal.h" -namespace perftools { -namespace gputools { +namespace stream_executor { namespace host { class HostStream : public internal::StreamInterface { @@ -52,7 +51,6 @@ class HostStream : public internal::StreamInterface { }; } // namespace host -} // namespace gputools -} // namespace perftools +} // namespace stream_executor #endif // TENSORFLOW_STREAM_EXECUTOR_HOST_HOST_STREAM_H_ diff --git a/tensorflow/stream_executor/host/host_timer.cc b/tensorflow/stream_executor/host/host_timer.cc index d84d825c92a6e8e06b004305c9d4cb032883f5a3..e138daf0e11f179c8690306b5cfef3ed95ae4cb2 100644 --- a/tensorflow/stream_executor/host/host_timer.cc +++ b/tensorflow/stream_executor/host/host_timer.cc @@ -19,8 +19,7 @@ limitations under the License. #include "tensorflow/stream_executor/stream.h" #include "tensorflow/stream_executor/stream_executor_internal.h" -namespace perftools { -namespace gputools { +namespace stream_executor { namespace host { using std::chrono::duration_cast; @@ -46,5 +45,4 @@ void HostTimer::StartNow() { start_time_ = clock::now(); } void HostTimer::StopNow() { duration_ = clock::now() - start_time_; } } // namespace host -} // namespace gputools -} // namespace perftools +} // namespace stream_executor diff --git a/tensorflow/stream_executor/host/host_timer.h b/tensorflow/stream_executor/host/host_timer.h index 17af7c0521d2ee956d6f1ee92dd2229b647785bd..5954b8023beb4fe78d37cd41c0379804a8dfa8aa 100644 --- a/tensorflow/stream_executor/host/host_timer.h +++ b/tensorflow/stream_executor/host/host_timer.h @@ -20,8 +20,7 @@ limitations under the License. #include "tensorflow/stream_executor/stream_executor_internal.h" -namespace perftools { -namespace gputools { +namespace stream_executor { namespace host { class HostTimer : public internal::TimerInterface { @@ -57,7 +56,6 @@ class HostTimer : public internal::TimerInterface { }; } // namespace host -} // namespace gputools -} // namespace perftools +} // namespace stream_executor #endif // TENSORFLOW_STREAM_EXECUTOR_HOST_HOST_TIMER_H_ diff --git a/tensorflow/stream_executor/host_buffer.h b/tensorflow/stream_executor/host_buffer.h index 8fa542e9ff8851551d424da6be073d0d9959af67..20299da5172f20b9b73c31b6491806dc57b1d2f0 100644 --- a/tensorflow/stream_executor/host_buffer.h +++ b/tensorflow/stream_executor/host_buffer.h @@ -18,8 +18,7 @@ limitations under the License. #include "tensorflow/stream_executor/dnn.h" -namespace perftools { -namespace gputools { +namespace stream_executor { // A HostBuffer is a block of memory in host memory containing the data for a // dnn::BatchDescriptor using a device-dependent memory layout. @@ -42,7 +41,6 @@ class HostBuffer { const dnn::BatchDescriptor descriptor_; }; -} // namespace gputools -} // namespace perftools +} // namespace stream_executor #endif // TENSORFLOW_STREAM_EXECUTOR_HOST_BUFFER_H_ diff --git a/tensorflow/stream_executor/kernel.cc b/tensorflow/stream_executor/kernel.cc index 636199cfa2762b7c42dd350dfd294762e3159299..d1aa596b73da3d7f8cdf4ddd6911d956239ed46d 100644 --- a/tensorflow/stream_executor/kernel.cc +++ b/tensorflow/stream_executor/kernel.cc @@ -27,8 +27,7 @@ limitations under the License. #include "tensorflow/stream_executor/platform/logging.h" #include "tensorflow/stream_executor/stream_executor.h" -namespace perftools { -namespace gputools { +namespace stream_executor { bool KernelMetadata::registers_per_thread(int *registers_per_thread) const { if (has_registers_per_thread_) { @@ -103,5 +102,4 @@ void KernelBase::set_name(port::StringPiece name) { demangled_name_ = port::Demangle(stubless_name.data()); } -} // namespace gputools -} // namespace perftools +} // namespace stream_executor diff --git a/tensorflow/stream_executor/kernel.h b/tensorflow/stream_executor/kernel.h index 5358eac1ae070efb2bead75c73208e9d283b498c..2216884b873cda98f09782866f23c06088b73e09 100644 --- a/tensorflow/stream_executor/kernel.h +++ b/tensorflow/stream_executor/kernel.h @@ -64,7 +64,7 @@ limitations under the License. // // Users typically won't need to type out the TypedKernel signature in full, it // will be typedef'd by automatically generated code; for example, see -// perftools::gputools::executor_sample::VecReduceAddKernel. +// stream_executor::executor_sample::VecReduceAddKernel. #ifndef TENSORFLOW_STREAM_EXECUTOR_KERNEL_H_ #define TENSORFLOW_STREAM_EXECUTOR_KERNEL_H_ @@ -82,8 +82,7 @@ limitations under the License. #include "tensorflow/stream_executor/lib/stringpiece.h" #include "tensorflow/stream_executor/platform/port.h" -namespace perftools { -namespace gputools { +namespace stream_executor { class DeviceMemoryBase; template @@ -639,8 +638,8 @@ struct KernelInvocationChecker { // NOTE: if you encounter an error here, you can see the mismatch by looking // at the end of the last error message, which will be of the form: // - // ...::Compatible &, - // perftools::gputools::DeviceMemory, true, + // ...::Compatible &, + // stream_executor::DeviceMemory, true, // 0>' // requested here // @@ -711,7 +710,6 @@ struct KernelParamsOk, Args...> { std::tuple, std::tuple>::CheckAllNoStaticAssert(); }; -} // namespace gputools -} // namespace perftools +} // namespace stream_executor #endif // TENSORFLOW_STREAM_EXECUTOR_KERNEL_H_ diff --git a/tensorflow/stream_executor/kernel_cache_config.h b/tensorflow/stream_executor/kernel_cache_config.h index 9d7ab1b79f6c94ce2fe64f14f7ebe3a9db44daf4..e63d6c6a0c05e8cd681a4e12f73b2468e170dea5 100644 --- a/tensorflow/stream_executor/kernel_cache_config.h +++ b/tensorflow/stream_executor/kernel_cache_config.h @@ -18,8 +18,7 @@ limitations under the License. #ifndef TENSORFLOW_STREAM_EXECUTOR_KERNEL_CACHE_CONFIG_H_ #define TENSORFLOW_STREAM_EXECUTOR_KERNEL_CACHE_CONFIG_H_ -namespace perftools { -namespace gputools { +namespace stream_executor { // This enum represents potential configurations of L1/shared memory when // running a particular kernel. These values represent user preference, and @@ -38,7 +37,6 @@ enum class KernelCacheConfig { kPreferEqual, }; -} // namespace gputools -} // namespace perftools +} // namespace stream_executor #endif // TENSORFLOW_STREAM_EXECUTOR_KERNEL_CACHE_CONFIG_H_ diff --git a/tensorflow/stream_executor/kernel_spec.cc b/tensorflow/stream_executor/kernel_spec.cc index 0404c573f0164782bb056b124ef60092c5985a5a..6a1f0a591ff0879821c6636e346dbc20105bcf7e 100644 --- a/tensorflow/stream_executor/kernel_spec.cc +++ b/tensorflow/stream_executor/kernel_spec.cc @@ -15,9 +15,7 @@ limitations under the License. #include "tensorflow/stream_executor/kernel_spec.h" - -namespace perftools { -namespace gputools { +namespace stream_executor { KernelLoaderSpec::KernelLoaderSpec(port::StringPiece kernelname) : kernelname_(kernelname.ToString()) {} @@ -247,5 +245,4 @@ MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddCudaCompressedPtxInMemory( MultiKernelLoaderSpec::MultiKernelLoaderSpec(size_t arity) : arity_(arity) {} -} // namespace gputools -} // namespace perftools +} // namespace stream_executor diff --git a/tensorflow/stream_executor/kernel_spec.h b/tensorflow/stream_executor/kernel_spec.h index 3811bd833e70b567002327cbe348574b37cb1126..7cc23bb4e64b45268f6bb00d9ea9ee4a686a0e25 100644 --- a/tensorflow/stream_executor/kernel_spec.h +++ b/tensorflow/stream_executor/kernel_spec.h @@ -56,8 +56,7 @@ limitations under the License. #include "tensorflow/stream_executor/platform/mutex.h" #include "tensorflow/stream_executor/platform/port.h" -namespace perftools { -namespace gputools { +namespace stream_executor { // Describes how to load a kernel on a target platform. // @@ -374,7 +373,6 @@ class MultiKernelLoaderSpec { size_t arity_; }; -} // namespace gputools -} // namespace perftools +} // namespace stream_executor #endif // TENSORFLOW_STREAM_EXECUTOR_KERNEL_SPEC_H_ diff --git a/tensorflow/stream_executor/launch_dim.h b/tensorflow/stream_executor/launch_dim.h index b95462667eea2c86d92060f3973e660f31d2d2dc..68f2f748407a87ec9cf3bdd411bf96c1a64b5681 100644 --- a/tensorflow/stream_executor/launch_dim.h +++ b/tensorflow/stream_executor/launch_dim.h @@ -21,7 +21,7 @@ limitations under the License. // a single PC in a unit called a warp. There is a maximum number of threads // that can execute in a shared-context entity called a block. Presently, that // number is 1024 -- again, something that should not be relied on from this -// comment, but checked via perftools::gputools::DeviceDescription. +// comment, but checked via stream_executor::DeviceDescription. // // For additional information, see // http://docs.nvidia.com/cuda/kepler-tuning-guide/#device-utilization-and-occupancy @@ -40,8 +40,7 @@ limitations under the License. #include "tensorflow/stream_executor/lib/strcat.h" #include "tensorflow/stream_executor/platform/port.h" -namespace perftools { -namespace gputools { +namespace stream_executor { // Basic type that represents a 3-dimensional index space. struct Dim3D { @@ -74,7 +73,6 @@ struct BlockDim : public Dim3D { } }; -} // namespace gputools -} // namespace perftools +} // namespace stream_executor #endif // TENSORFLOW_STREAM_EXECUTOR_LAUNCH_DIM_H_ diff --git a/tensorflow/stream_executor/lib/array_slice.h b/tensorflow/stream_executor/lib/array_slice.h index bef61bb2fc5611e84bc36320fa98f4df919372b7..8e3c4ca047bd467628551d8b8d351fa3baa956f7 100644 --- a/tensorflow/stream_executor/lib/array_slice.h +++ b/tensorflow/stream_executor/lib/array_slice.h @@ -18,14 +18,23 @@ limitations under the License. #include "tensorflow/core/lib/gtl/array_slice.h" -namespace perftools { -namespace gputools { +namespace stream_executor { namespace port { using tensorflow::gtl::ArraySlice; using tensorflow::gtl::MutableArraySlice; } // namespace port +} // namespace stream_executor + +namespace perftools { +namespace gputools { + +// Temporarily pull stream_executor into perftools::gputools while we migrate +// code to the new namespace. TODO(b/77980417): Remove this once we've +// completed the migration. +using namespace stream_executor; // NOLINT[build/namespaces] + } // namespace gputools } // namespace perftools diff --git a/tensorflow/stream_executor/lib/casts.h b/tensorflow/stream_executor/lib/casts.h index 2261944e252f6a757a154e64be0b10484a693125..ec562e804fae51ac09e336b2e03b8ab0d7f1ca0e 100644 --- a/tensorflow/stream_executor/lib/casts.h +++ b/tensorflow/stream_executor/lib/casts.h @@ -13,15 +13,14 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -// IWYU pragma: private, include "perftools/gputools/executor/stream_executor.h" +// IWYU pragma: private, include "third_party/tensorflow/stream_executor/stream_executor.h" #ifndef TENSORFLOW_STREAM_EXECUTOR_LIB_CASTS_H_ #define TENSORFLOW_STREAM_EXECUTOR_LIB_CASTS_H_ #include -namespace perftools { -namespace gputools { +namespace stream_executor { namespace port { // port::bit_cast is a template function that implements the @@ -96,7 +95,6 @@ inline Dest bit_cast(const Source& source) { } } // namespace port -} // namespace gputools -} // namespace perftools +} // namespace stream_executor #endif // TENSORFLOW_STREAM_EXECUTOR_LIB_CASTS_H_ diff --git a/tensorflow/stream_executor/lib/demangle.cc b/tensorflow/stream_executor/lib/demangle.cc index fa2b4fa005ca1a715b0593c7c270c62bcf936d21..adb6b4f2d11e537433ae86ad694d6e1de9171942 100644 --- a/tensorflow/stream_executor/lib/demangle.cc +++ b/tensorflow/stream_executor/lib/demangle.cc @@ -27,8 +27,7 @@ limitations under the License. #include #endif -namespace perftools { -namespace gputools { +namespace stream_executor { namespace port { // The API reference of abi::__cxa_demangle() can be found in @@ -49,5 +48,4 @@ string Demangle(const char *mangled) { } } // namespace port -} // namespace gputools -} // namespace perftools +} // namespace stream_executor diff --git a/tensorflow/stream_executor/lib/demangle.h b/tensorflow/stream_executor/lib/demangle.h index 30be522557808e4048c7a1f712d0b4884c1043ba..af16fa7d8cbcce5c17a7b25021546176570558bf 100644 --- a/tensorflow/stream_executor/lib/demangle.h +++ b/tensorflow/stream_executor/lib/demangle.h @@ -18,14 +18,12 @@ limitations under the License. #include "tensorflow/stream_executor/platform/port.h" -namespace perftools { -namespace gputools { +namespace stream_executor { namespace port { string Demangle(const char* mangled); } // namespace port -} // namespace gputools -} // namespace perftools +} // namespace stream_executor #endif // TENSORFLOW_STREAM_EXECUTOR_LIB_DEMANGLE_H_ diff --git a/tensorflow/stream_executor/lib/env.h b/tensorflow/stream_executor/lib/env.h index c9a22ebd55894f0ddf6654887e55289030871501..776eba04080e9e993b61997bc3e7532202ac4c5b 100644 --- a/tensorflow/stream_executor/lib/env.h +++ b/tensorflow/stream_executor/lib/env.h @@ -21,8 +21,7 @@ limitations under the License. #include "tensorflow/stream_executor/lib/stringpiece.h" #include "tensorflow/stream_executor/platform/port.h" -namespace perftools { -namespace gputools { +namespace stream_executor { namespace port { using tensorflow::Env; @@ -37,7 +36,6 @@ inline Status FileExists(const port::StringPiece& filename) { } } // namespace port -} // namespace gputools -} // namespace perftools +} // namespace stream_executor #endif // TENSORFLOW_STREAM_EXECUTOR_LIB_ENV_H_ diff --git a/tensorflow/stream_executor/lib/error.h b/tensorflow/stream_executor/lib/error.h index 89df70cb5e582508448b6886926b0c8905a14c2e..c659f5fc140d29e17fc0bf11ce707b01b2cb2e74 100644 --- a/tensorflow/stream_executor/lib/error.h +++ b/tensorflow/stream_executor/lib/error.h @@ -13,21 +13,19 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -// IWYU pragma: private, include "perftools/gputools/executor/stream_executor.h" +// IWYU pragma: private, include "third_party/tensorflow/stream_executor/stream_executor.h" #ifndef TENSORFLOW_STREAM_EXECUTOR_LIB_ERROR_H_ #define TENSORFLOW_STREAM_EXECUTOR_LIB_ERROR_H_ #include "tensorflow/core/lib/core/error_codes.pb.h" // IWYU pragma: export -namespace perftools { -namespace gputools { +namespace stream_executor { namespace port { namespace error = tensorflow::error; } // namespace port -} // namespace gputools -} // namespace perftools +} // namespace stream_executor #endif // TENSORFLOW_STREAM_EXECUTOR_LIB_ERROR_H_ diff --git a/tensorflow/stream_executor/lib/human_readable.h b/tensorflow/stream_executor/lib/human_readable.h index f918c180d98c0fc74d554e3c7e95995e6720d356..893865f6dadd1076a4daef6d820f667b57ab9023 100644 --- a/tensorflow/stream_executor/lib/human_readable.h +++ b/tensorflow/stream_executor/lib/human_readable.h @@ -22,8 +22,7 @@ limitations under the License. #include "tensorflow/stream_executor/lib/stringprintf.h" #include "tensorflow/stream_executor/platform/port.h" -namespace perftools { -namespace gputools { +namespace stream_executor { namespace port { class HumanReadableNumBytes { @@ -67,7 +66,6 @@ class HumanReadableNumBytes { }; } // namespace port -} // namespace gputools -} // namespace perftools +} // namespace stream_executor #endif // TENSORFLOW_STREAM_EXECUTOR_LIB_HUMAN_READABLE_H_ diff --git a/tensorflow/stream_executor/lib/initialize.h b/tensorflow/stream_executor/lib/initialize.h index 9a09318a6cbd215e396968bc1968be07a3029129..688b0214694478e9be1b1d14e58fda94367f547b 100644 --- a/tensorflow/stream_executor/lib/initialize.h +++ b/tensorflow/stream_executor/lib/initialize.h @@ -26,8 +26,7 @@ limitations under the License. #undef DECLARE_MODULE_INITIALIZER #undef REGISTER_MODULE_INITIALIZER_SEQUENCE -namespace perftools { -namespace gputools { +namespace stream_executor { namespace port { class Initializer { @@ -49,20 +48,18 @@ class Initializer { }; } // namespace port -} // namespace gputools -} // namespace perftools +} // namespace stream_executor -#define REGISTER_INITIALIZER(type, name, body) \ - static void google_init_##type##_##name() { body; } \ - perftools::gputools::port::Initializer google_initializer_##type##_##name( \ +#define REGISTER_INITIALIZER(type, name, body) \ + static void google_init_##type##_##name() { body; } \ + ::stream_executor::port::Initializer google_initializer_##type##_##name( \ google_init_##type##_##name) #define REGISTER_MODULE_INITIALIZER(name, body) \ REGISTER_INITIALIZER(module, name, body) -#define DECLARE_INITIALIZER(type, name) \ - extern perftools::gputools::port::Initializer \ - google_initializer_##type##_##name +#define DECLARE_INITIALIZER(type, name) \ + extern ::stream_executor::port::Initializer google_initializer_##type##_##name #define DECLARE_MODULE_INITIALIZER(name) DECLARE_INITIALIZER(module, name) diff --git a/tensorflow/stream_executor/lib/inlined_vector.h b/tensorflow/stream_executor/lib/inlined_vector.h index 55a1e3ad102c2fe25c9b4338a07029330bbf4ef6..40bdddb180f1135ef2fb3bfc51b06d9e68a3aff9 100644 --- a/tensorflow/stream_executor/lib/inlined_vector.h +++ b/tensorflow/stream_executor/lib/inlined_vector.h @@ -18,14 +18,12 @@ limitations under the License. #include "tensorflow/core/lib/gtl/inlined_vector.h" -namespace perftools { -namespace gputools { +namespace stream_executor { namespace port { using tensorflow::gtl::InlinedVector; } // namespace port -} // namespace gputools -} // namespace perftools +} // namespace stream_executor #endif // TENSORFLOW_STREAM_EXECUTOR_LIB_INLINED_VECTOR_H_ diff --git a/tensorflow/stream_executor/lib/mathutil.h b/tensorflow/stream_executor/lib/mathutil.h index e8310d55ddae20f1a92c4184d408215d7ec8664a..c225dc5f3cc9725be4b01039c10103ed6e62372e 100644 --- a/tensorflow/stream_executor/lib/mathutil.h +++ b/tensorflow/stream_executor/lib/mathutil.h @@ -25,8 +25,7 @@ limitations under the License. #include "tensorflow/stream_executor/platform/logging.h" #include "tensorflow/stream_executor/platform/port.h" -namespace perftools { -namespace gputools { +namespace stream_executor { namespace port { class MathUtil { @@ -97,7 +96,6 @@ IntegralType MathUtil::CeilOrFloorOfRatio(IntegralType numerator, } } // namespace port -} // namespace gputools -} // namespace perftools +} // namespace stream_executor #endif // TENSORFLOW_STREAM_EXECUTOR_LIB_MATHUTIL_H_ diff --git a/tensorflow/stream_executor/lib/notification.h b/tensorflow/stream_executor/lib/notification.h index 9bb3e170dc7c13ecaa636887315b4bc5cfc82f2b..472d8c9845c2ae3efbc5ab06234a7c084d044a96 100644 --- a/tensorflow/stream_executor/lib/notification.h +++ b/tensorflow/stream_executor/lib/notification.h @@ -18,14 +18,12 @@ limitations under the License. #include "tensorflow/core/platform/notification.h" -namespace perftools { -namespace gputools { +namespace stream_executor { namespace port { using tensorflow::Notification; } // namespace port -} // namespace gputools -} // namespace perftools +} // namespace stream_executor #endif // TENSORFLOW_STREAM_EXECUTOR_LIB_NOTIFICATION_H_ diff --git a/tensorflow/stream_executor/lib/numbers.cc b/tensorflow/stream_executor/lib/numbers.cc index 11a65e198d60545e3bfb76a8cbf141c4dba1b58e..b670c42ec84a86d49a4938da3fc6387af529e04b 100644 --- a/tensorflow/stream_executor/lib/numbers.cc +++ b/tensorflow/stream_executor/lib/numbers.cc @@ -17,8 +17,7 @@ limitations under the License. #include -namespace perftools { -namespace gputools { +namespace stream_executor { namespace port { bool safe_strto32(const char* str, int32* value) { @@ -38,5 +37,4 @@ bool safe_strto32(const string& str, int32* value) { } } // namespace port -} // namespace gputools -} // namespace perftools +} // namespace stream_executor diff --git a/tensorflow/stream_executor/lib/numbers.h b/tensorflow/stream_executor/lib/numbers.h index 4a8692b7461d513bbac287281b134a4fbac54707..2f48281d2d67f0fa9b4fb523c4986c5dc8acfe33 100644 --- a/tensorflow/stream_executor/lib/numbers.h +++ b/tensorflow/stream_executor/lib/numbers.h @@ -18,8 +18,7 @@ limitations under the License. #include "tensorflow/stream_executor/platform/port.h" -namespace perftools { -namespace gputools { +namespace stream_executor { namespace port { // Convert strings to floating point values. @@ -28,7 +27,6 @@ namespace port { bool safe_strto32(const string& str, int32* value); } // namespace port -} // namespace gputools -} // namespace perftools +} // namespace stream_executor #endif // TENSORFLOW_STREAM_EXECUTOR_LIB_NUMBERS_H_ diff --git a/tensorflow/stream_executor/lib/path.cc b/tensorflow/stream_executor/lib/path.cc index f2591f47f7bf75004ee00019d70e118cd9d32f0e..56e08c316f95758cd2a05d18d59c06f3d7727959 100644 --- a/tensorflow/stream_executor/lib/path.cc +++ b/tensorflow/stream_executor/lib/path.cc @@ -16,8 +16,7 @@ limitations under the License. #include "tensorflow/stream_executor/lib/path.h" #include "tensorflow/stream_executor/lib/strcat.h" -namespace perftools { -namespace gputools { +namespace stream_executor { namespace port { namespace internal { @@ -58,5 +57,4 @@ string JoinPathImpl(std::initializer_list paths) { } // namespace internal } // namespace port -} // namespace gputools -} // namespace perftools +} // namespace stream_executor diff --git a/tensorflow/stream_executor/lib/path.h b/tensorflow/stream_executor/lib/path.h index 93053dbcb6862edbaf93a27a9414ad7e4bb10be2..325f04ff47552e052d81c96bec74e816378b8254 100644 --- a/tensorflow/stream_executor/lib/path.h +++ b/tensorflow/stream_executor/lib/path.h @@ -20,8 +20,7 @@ limitations under the License. #include "tensorflow/stream_executor/lib/stringpiece.h" #include "tensorflow/stream_executor/platform/port.h" -namespace perftools { -namespace gputools { +namespace stream_executor { namespace port { using tensorflow::io::Dirname; @@ -56,7 +55,6 @@ inline string JoinPath(const T&... args) { } } // namespace port -} // namespace gputools -} // namespace perftools +} // namespace stream_executor #endif // TENSORFLOW_STREAM_EXECUTOR_LIB_PATH_H_ diff --git a/tensorflow/stream_executor/lib/process_state.cc b/tensorflow/stream_executor/lib/process_state.cc index 3d856187f05b3601525b048d3997088e145bdb81..72d71e62116726d458f709b75e50fcc0947df00e 100644 --- a/tensorflow/stream_executor/lib/process_state.cc +++ b/tensorflow/stream_executor/lib/process_state.cc @@ -25,8 +25,7 @@ limitations under the License. #endif #include -namespace perftools { -namespace gputools { +namespace stream_executor { namespace port { string Hostname() { @@ -54,5 +53,4 @@ bool GetCurrentDirectory(string* dir) { } } // namespace port -} // namespace gputools -} // namespace perftools +} // namespace stream_executor diff --git a/tensorflow/stream_executor/lib/process_state.h b/tensorflow/stream_executor/lib/process_state.h index 205e726d95ce80216a8c260c270d32b990fd5b78..248218c759e65fc2d6ee09b2f1c9e309567b1048 100644 --- a/tensorflow/stream_executor/lib/process_state.h +++ b/tensorflow/stream_executor/lib/process_state.h @@ -18,15 +18,13 @@ limitations under the License. #include "tensorflow/stream_executor/platform/port.h" -namespace perftools { -namespace gputools { +namespace stream_executor { namespace port { string Hostname(); bool GetCurrentDirectory(string* dir); } // namespace port -} // namespace gputools -} // namespace perftools +} // namespace stream_executor #endif // TENSORFLOW_STREAM_EXECUTOR_LIB_PROCESS_STATE_H_ diff --git a/tensorflow/stream_executor/lib/ptr_util.h b/tensorflow/stream_executor/lib/ptr_util.h index 3d5e56faf745e484649079cee916bc26c8b2c449..3f89794688ca390a3d1e475fbe69828be1914167 100644 --- a/tensorflow/stream_executor/lib/ptr_util.h +++ b/tensorflow/stream_executor/lib/ptr_util.h @@ -18,8 +18,7 @@ limitations under the License. #include -namespace perftools { -namespace gputools { +namespace stream_executor { namespace port { // Trait to select overloads and return types for MakeUnique. @@ -59,8 +58,17 @@ typename MakeUniqueResult::invalid MakeUnique(Args&&... /* args */) = delete; // NOLINT } // namespace port +} // namespace stream_executor + +namespace perftools { +namespace gputools { + +// Temporarily pull stream_executor into perftools::gputools while we migrate +// code to the new namespace. TODO(jlebar): Remove this once we've completed +// the migration. +using namespace stream_executor; // NOLINT[build/namespaces] + } // namespace gputools } // namespace perftools - #endif // TENSORFLOW_STREAM_EXECUTOR_LIB_PTR_UTIL_H_ diff --git a/tensorflow/stream_executor/lib/stacktrace.h b/tensorflow/stream_executor/lib/stacktrace.h index ba7e5317f0f4a852c353102d29411752f69b11a9..a15b0f30261f1a102c0f7cf8b28b5fc9b5426e25 100644 --- a/tensorflow/stream_executor/lib/stacktrace.h +++ b/tensorflow/stream_executor/lib/stacktrace.h @@ -19,14 +19,12 @@ limitations under the License. #include "tensorflow/core/platform/stacktrace.h" #include "tensorflow/stream_executor/platform/port.h" -namespace perftools { -namespace gputools { +namespace stream_executor { namespace port { using tensorflow::CurrentStackTrace; } // namespace port -} // namespace gputools -} // namespace perftools +} // namespace stream_executor #endif // TENSORFLOW_STREAM_EXECUTOR_LIB_STACKTRACE_H_ diff --git a/tensorflow/stream_executor/lib/status.h b/tensorflow/stream_executor/lib/status.h index 8c289e1927fcf9b49851389e44d3fa09fdfea3ae..407b71b405bc8a73e5aebcd18b043420b074b708 100644 --- a/tensorflow/stream_executor/lib/status.h +++ b/tensorflow/stream_executor/lib/status.h @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -// IWYU pragma: private, include "perftools/gputools/executor/stream_executor.h" +// IWYU pragma: private, include "third_party/tensorflow/stream_executor/stream_executor.h" #ifndef TENSORFLOW_STREAM_EXECUTOR_LIB_STATUS_H_ #define TENSORFLOW_STREAM_EXECUTOR_LIB_STATUS_H_ @@ -23,15 +23,14 @@ limitations under the License. #include "tensorflow/stream_executor/lib/stringpiece.h" #include "tensorflow/stream_executor/platform/logging.h" -namespace perftools { -namespace gputools { +namespace stream_executor { namespace port { using Status = tensorflow::Status; #define SE_CHECK_OK(val) TF_CHECK_OK(val) #define SE_ASSERT_OK(val) \ - ASSERT_EQ(::perftools::gputools::port::Status::OK(), (val)) + ASSERT_EQ(::stream_executor::port::Status::OK(), (val)) // Define some canonical error helpers. inline Status UnimplementedError(StringPiece message) { @@ -45,6 +44,16 @@ inline Status FailedPreconditionError(StringPiece message) { } } // namespace port +} // namespace stream_executor + +namespace perftools { +namespace gputools { + +// Temporarily pull stream_executor into perftools::gputools while we migrate +// code to the new namespace. TODO(b/77980417): Remove this once we've +// completed the migration. +using namespace stream_executor; // NOLINT[build/namespaces] + } // namespace gputools } // namespace perftools diff --git a/tensorflow/stream_executor/lib/statusor.h b/tensorflow/stream_executor/lib/statusor.h index 138738ecab54986fd7d5cd76839d59da55623b1f..dab59096740102b94c0ff63c089b83ce052ea264 100644 --- a/tensorflow/stream_executor/lib/statusor.h +++ b/tensorflow/stream_executor/lib/statusor.h @@ -13,242 +13,21 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -// IWYU pragma: private, include "perftools/gputools/executor/stream_executor.h" -// -// StatusOr is the union of a Status object and a T -// object. StatusOr models the concept of an object that is either a -// usable value, or an error Status explaining why such a value is -// not present. To this end, StatusOr does not allow its Status -// value to be Status::OK. Further, StatusOr does not allow the -// contained pointer to be NULL. -// -// The primary use-case for StatusOr is as the return value of a -// function which may fail. -// -// Example client usage for a StatusOr, where T is not a pointer: -// -// StatusOr result = DoBigCalculationThatCouldFail(); -// if (result.ok()) { -// float answer = result.ValueOrDie(); -// printf("Big calculation yielded: %f", answer); -// } else { -// LOG(ERROR) << result.status(); -// } -// -// Example client usage for a StatusOr: -// -// StatusOr result = FooFactory::MakeNewFoo(arg); -// if (result.ok()) { -// std::unique_ptr foo(result.ValueOrDie()); -// foo->DoSomethingCool(); -// } else { -// LOG(ERROR) << result.status(); -// } -// -// Example client usage for a StatusOr>: -// -// StatusOr> result = FooFactory::MakeNewFoo(arg); -// if (result.ok()) { -// std::unique_ptr foo = std::move(result.ValueOrDie()); -// foo->DoSomethingCool(); -// } else { -// LOG(ERROR) << result.status(); -// } -// -// Example factory implementation returning StatusOr: -// -// StatusOr FooFactory::MakeNewFoo(int arg) { -// if (arg <= 0) { -// return Status(port::error::INVALID_ARGUMENT, -// "Arg must be positive"); -// } else { -// return new Foo(arg); -// } -// } -// +// IWYU pragma: private, include "third_party/tensorflow/stream_executor/stream_executor.h" #ifndef TENSORFLOW_STREAM_EXECUTOR_LIB_STATUSOR_H_ #define TENSORFLOW_STREAM_EXECUTOR_LIB_STATUSOR_H_ -#include -#include "tensorflow/stream_executor/platform/port.h" -#include -#include +#include "tensorflow/compiler/xla/statusor.h" -#include "tensorflow/stream_executor/lib/error.h" -#include "tensorflow/stream_executor/lib/status.h" -#include "tensorflow/stream_executor/platform/logging.h" -#include "tensorflow/stream_executor/platform/port.h" - -namespace perftools { -namespace gputools { +namespace stream_executor { namespace port { -template -class StatusOr { - template friend class StatusOr; - - public: - // Construct a new StatusOr with Status::UNKNOWN status - StatusOr() : status_(error::UNKNOWN, "") {} - - // Construct a new StatusOr with the given non-ok status. After calling - // this constructor, calls to ValueOrDie() is invalid. - // - // NOTE: Not explicit - we want to use StatusOr as a return - // value, so it is convenient and sensible to be able to do 'return - // Status()' when the return type is StatusOr. - // - // REQUIRES: status != Status::OK. - // In optimized builds, passing Status::OK here will have the effect - // of passing PosixErrorSpace::EINVAL as a fallback. - StatusOr(const Status& status); // NOLINT - - // Construct a new StatusOr with the given value. If T is a plain pointer, - // value must not be NULL. After calling this constructor, calls to - // ValueOrDie() will succeed, and calls to status() will return OK. - // - // NOTE: Not explicit - we want to use StatusOr as a return type - // so it is convenient and sensible to be able to do 'return T()' - // when the return type is StatusOr. - // - // REQUIRES: if T is a plain pointer, value != NULL. - // In optimized builds, passing a NULL pointer here will have - // the effect of passing PosixErrorSpace::EINVAL as a fallback. - StatusOr(const T& value); // NOLINT - - // Conversion copy constructor, T must be copy constructible from U - template - StatusOr(const StatusOr& other) // NOLINT - : status_(other.status_), - value_(other.value_) {} - - // Conversion assignment operator, T must be assignable from U - template - StatusOr& operator=(const StatusOr& other) { - status_ = other.status_; - value_ = other.value_; - return *this; - } - - // Rvalue-reference overloads of the other constructors and assignment - // operators, to support move-only types and avoid unnecessary copying. - StatusOr(T&& value); // NOLINT - - // Move conversion operator to avoid unnecessary copy. - // T must be assignable from U. - // Not marked with explicit so the implicit conversion can happen. - template - StatusOr(StatusOr&& other) // NOLINT - : status_(std::move(other.status_)), - value_(std::move(other.value_)) {} - - // Move assignment operator to avoid unnecessary copy. - // T must be assignable from U - template - StatusOr& operator=(StatusOr&& other) { - status_ = std::move(other.status_); - value_ = std::move(other.value_); - return *this; - } - - // Returns a reference to our status. If this contains a T, then - // returns Status::OK. - const Status& status() const { return status_; } - - // Returns this->status().ok() - bool ok() const { return status_.ok(); } - - // Returns a reference to our current value, requires that this->ok(). - // If you need to initialize a T object from the stored value, - // ConsumeValueOrDie() may be more efficient. - const T& ValueOrDie() const; - T& ValueOrDie(); - - // Returns our current value, requires this->ok(). Use this if - // you would otherwise want to say std::move(s.ValueOrDie()), for example - // if you need to initialize a T object from the stored value and you don't - // need subsequent access to the stored value. It uses T's move constructor, - // if it has one, so it will work with move-only types, and will often be - // more efficient than ValueOrDie, but may leave the stored value - // in an arbitrary valid state. - T ConsumeValueOrDie(); - - private: - Status status_; - T value_; - - void CheckValueNotNull(const T& value); - - template - struct IsNull { - // For non-pointer U, a reference can never be NULL. - static inline bool IsValueNull(const U& t) { return false; } - }; - - template - struct IsNull { - static inline bool IsValueNull(const U* t) { return t == NULL; } - }; -}; - -//////////////////////////////////////////////////////////////////////////////// -// Implementation details for StatusOr - -template -StatusOr::StatusOr(const T& value) - : status_(), value_(value) { - CheckValueNotNull(value); -} - -template -const T& StatusOr::ValueOrDie() const { - TF_CHECK_OK(status_); - return value_; -} - -template -T& StatusOr::ValueOrDie() { - TF_CHECK_OK(status_); - return value_; -} - -template -T StatusOr::ConsumeValueOrDie() { - TF_CHECK_OK(status_); - return std::move(value_); -} - -template -StatusOr::StatusOr(const Status& status) - : status_(status) { - assert(!status.ok()); - if (status.ok()) { - status_ = - Status(error::INTERNAL, - "Status::OK is not a valid constructor argument to StatusOr"); - } -} - -template -StatusOr::StatusOr(T&& value) - : status_() { - CheckValueNotNull(value); - value_ = std::move(value); -} - +// Use XLA's StatusOr so we don't duplicate code. template -void StatusOr::CheckValueNotNull(const T& value) { - assert(!IsNull::IsValueNull(value)); - if (IsNull::IsValueNull(value)) { - status_ = - Status(error::INTERNAL, - "NULL is not a valid constructor argument to StatusOr"); - } -} +using StatusOr = ::xla::StatusOr; } // namespace port -} // namespace gputools -} // namespace perftools +} // namespace stream_executor #endif // TENSORFLOW_STREAM_EXECUTOR_LIB_STATUSOR_H_ diff --git a/tensorflow/stream_executor/lib/str_util.h b/tensorflow/stream_executor/lib/str_util.h index 5dd3d06affa424f0919f107c76ba40feeb165122..a81c6668184c158b61eff4d4b77bb7ad07165c8e 100644 --- a/tensorflow/stream_executor/lib/str_util.h +++ b/tensorflow/stream_executor/lib/str_util.h @@ -19,8 +19,7 @@ limitations under the License. #include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/stream_executor/lib/stringpiece.h" -namespace perftools { -namespace gputools { +namespace stream_executor { namespace port { using tensorflow::str_util::Join; @@ -38,7 +37,6 @@ inline string StripSuffixString(port::StringPiece str, port::StringPiece suffix) using tensorflow::str_util::Lowercase; } // namespace port -} // namespace gputools -} // namespace perftools +} // namespace stream_executor #endif // TENSORFLOW_STREAM_EXECUTOR_LIB_STR_UTIL_H_ diff --git a/tensorflow/stream_executor/lib/strcat.h b/tensorflow/stream_executor/lib/strcat.h index 424cb75f0e805a840c5043a42e7cbf3f9c7d195c..c959e4df5b2d6e3954e2edf7bbafc9efb8259dc6 100644 --- a/tensorflow/stream_executor/lib/strcat.h +++ b/tensorflow/stream_executor/lib/strcat.h @@ -13,22 +13,20 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -// IWYU pragma: private, include "perftools/gputools/executor/stream_executor.h" +// IWYU pragma: private, include "third_party/tensorflow/stream_executor/stream_executor.h" #ifndef TENSORFLOW_STREAM_EXECUTOR_LIB_STRCAT_H_ #define TENSORFLOW_STREAM_EXECUTOR_LIB_STRCAT_H_ #include "tensorflow/core/lib/strings/strcat.h" -namespace perftools { -namespace gputools { +namespace stream_executor { namespace port { using tensorflow::strings::StrCat; using tensorflow::strings::StrAppend; } // namespace port -} // namespace gputools -} // namespace perftools +} // namespace stream_executor #endif // TENSORFLOW_STREAM_EXECUTOR_LIB_STRCAT_H_ diff --git a/tensorflow/stream_executor/lib/stringpiece.h b/tensorflow/stream_executor/lib/stringpiece.h index 97ee0c9206480202db887018c21c1e41482a04e9..b80de5df306117e315b282e53ec871d60bd6d517 100644 --- a/tensorflow/stream_executor/lib/stringpiece.h +++ b/tensorflow/stream_executor/lib/stringpiece.h @@ -19,14 +19,12 @@ limitations under the License. #include "tensorflow/core/lib/core/stringpiece.h" #include "tensorflow/stream_executor/platform/port.h" -namespace perftools { -namespace gputools { +namespace stream_executor { namespace port { using tensorflow::StringPiece; } // namespace port -} // namespace gputools -} // namespace perftools +} // namespace stream_executor #endif // TENSORFLOW_STREAM_EXECUTOR_LIB_STRINGPIECE_H_ diff --git a/tensorflow/stream_executor/lib/stringprintf.h b/tensorflow/stream_executor/lib/stringprintf.h index 504de25a681c4ec880c04a46aad89ddeeb622c71..2f65ed9c6a86516cdb6f8aeaaf9c48b3622cb613 100644 --- a/tensorflow/stream_executor/lib/stringprintf.h +++ b/tensorflow/stream_executor/lib/stringprintf.h @@ -18,15 +18,13 @@ limitations under the License. #include "tensorflow/core/lib/strings/stringprintf.h" -namespace perftools { -namespace gputools { +namespace stream_executor { namespace port { using tensorflow::strings::Printf; using tensorflow::strings::Appendf; } // namespace port -} // namespace gputools -} // namespace perftools +} // namespace stream_executor #endif // TENSORFLOW_STREAM_EXECUTOR_LIB_STRINGPRINTF_H_ diff --git a/tensorflow/stream_executor/lib/thread_options.h b/tensorflow/stream_executor/lib/thread_options.h index bd7f63714e25ec88b6d54bc683fe96a313589485..079cf757acd98d994b1351423205ef99a64478f2 100644 --- a/tensorflow/stream_executor/lib/thread_options.h +++ b/tensorflow/stream_executor/lib/thread_options.h @@ -18,14 +18,12 @@ limitations under the License. #include "tensorflow/core/platform/env.h" -namespace perftools { -namespace gputools { +namespace stream_executor { namespace port { using tensorflow::ThreadOptions; } // namespace port -} // namespace gputools -} // namespace perftools +} // namespace stream_executor #endif // TENSORFLOW_STREAM_EXECUTOR_LIB_THREAD_OPTIONS_H_ diff --git a/tensorflow/stream_executor/lib/threadpool.h b/tensorflow/stream_executor/lib/threadpool.h index 35630c5106a1b732d1100c416b2abc7fe6404c55..220068ade11b98e2442cdc1c68b2aaeb422ae6b3 100644 --- a/tensorflow/stream_executor/lib/threadpool.h +++ b/tensorflow/stream_executor/lib/threadpool.h @@ -21,14 +21,12 @@ limitations under the License. #include "tensorflow/stream_executor/lib/notification.h" #include "tensorflow/stream_executor/lib/thread_options.h" -namespace perftools { -namespace gputools { +namespace stream_executor { namespace port { using tensorflow::thread::ThreadPool; } // namespace port -} // namespace gputools -} // namespace perftools +} // namespace stream_executor #endif // TENSORFLOW_STREAM_EXECUTOR_LIB_THREADPOOL_H_ diff --git a/tensorflow/stream_executor/multi_platform_manager.cc b/tensorflow/stream_executor/multi_platform_manager.cc index f9f3737a06dad3f146ef9fc8e2ec50160b3a01b5..5b51398d8cab5df7c7514bc3bedf87f5c33c6e5a 100644 --- a/tensorflow/stream_executor/multi_platform_manager.cc +++ b/tensorflow/stream_executor/multi_platform_manager.cc @@ -20,8 +20,7 @@ limitations under the License. #include "tensorflow/stream_executor/lib/str_util.h" #include "tensorflow/stream_executor/lib/stringprintf.h" -namespace perftools { -namespace gputools { +namespace stream_executor { /* static */ mutex MultiPlatformManager::platforms_mutex_{LINKER_INITIALIZED}; @@ -132,8 +131,7 @@ MultiPlatformManager::InitializePlatformWithId( GetPlatformByIdMap()->clear(); } -} // namespace gputools -} // namespace perftools +} // namespace stream_executor REGISTER_MODULE_INITIALIZER( multi_platform_manager, diff --git a/tensorflow/stream_executor/multi_platform_manager.h b/tensorflow/stream_executor/multi_platform_manager.h index 438653ee20bdb1fd83cd9e75c4bcd35af277cc28..672855d5fb6152b8bfb00d8f0faa64873711646d 100644 --- a/tensorflow/stream_executor/multi_platform_manager.h +++ b/tensorflow/stream_executor/multi_platform_manager.h @@ -22,8 +22,8 @@ limitations under the License. // In your BUILD rule, add a dependency on a platform plugin that you'd like // to use, such as: // -// //perftools/gputools/executor/cuda:cuda_platform -// //perftools/gputools/executor/opencl:opencl_platform +// //third_party/tensorflow/stream_executor/cuda:cuda_platform +// //third_party/tensorflow/stream_executor/opencl:opencl_platform // // This will register platform plugins that can be discovered via this // interface. Sample API usage: @@ -56,10 +56,10 @@ limitations under the License. // And similarly, for standard interfaces (BLAS, RNG, etc.) you can add // dependencies on support libraries, e.g.: // -// //perftools/gputools/executor/cuda:pluton_blas_plugin -// //perftools/gputools/executor/cuda:cudnn_plugin -// //perftools/gputools/executor/cuda:cublas_plugin -// //perftools/gputools/executor/cuda:curand_plugin +// //third_party/tensorflow/stream_executor/cuda:pluton_blas_plugin +// //third_party/tensorflow/stream_executor/cuda:cudnn_plugin +// //third_party/tensorflow/stream_executor/cuda:cublas_plugin +// //third_party/tensorflow/stream_executor/cuda:curand_plugin #ifndef TENSORFLOW_STREAM_EXECUTOR_MULTI_PLATFORM_MANAGER_H_ #define TENSORFLOW_STREAM_EXECUTOR_MULTI_PLATFORM_MANAGER_H_ @@ -75,8 +75,7 @@ limitations under the License. #include "tensorflow/stream_executor/platform/port.h" #include "tensorflow/stream_executor/platform/thread_annotations.h" -namespace perftools { -namespace gputools { +namespace stream_executor { // Manages multiple platforms that may be present on the current machine. class MultiPlatformManager { @@ -181,7 +180,6 @@ class MultiPlatformManager { SE_DISALLOW_COPY_AND_ASSIGN(MultiPlatformManager); }; -} // namespace gputools -} // namespace perftools +} // namespace stream_executor #endif // TENSORFLOW_STREAM_EXECUTOR_MULTI_PLATFORM_MANAGER_H_ diff --git a/tensorflow/stream_executor/platform.cc b/tensorflow/stream_executor/platform.cc index 4cdc22bd16a3ea66037696f6a9d70bcb86ef5ebb..777abced8634410440fd5c2772a3b981a1489dd6 100644 --- a/tensorflow/stream_executor/platform.cc +++ b/tensorflow/stream_executor/platform.cc @@ -22,8 +22,7 @@ limitations under the License. #include "tensorflow/stream_executor/platform/logging.h" #include "tensorflow/stream_executor/stream_executor_pimpl.h" -namespace perftools { -namespace gputools { +namespace stream_executor { string PlatformKindString(PlatformKind kind) { switch (kind) { @@ -135,5 +134,4 @@ port::Status Platform::EnablePeerAccess() { return port::Status::OK(); } -} // namespace gputools -} // namespace perftools +} // namespace stream_executor diff --git a/tensorflow/stream_executor/platform.h b/tensorflow/stream_executor/platform.h index 54f8aa86c269ff0d32648e1d4629179cafd5be76..5cb7047b6f39483f237b5bb249906d9ce8a06b9e 100644 --- a/tensorflow/stream_executor/platform.h +++ b/tensorflow/stream_executor/platform.h @@ -29,8 +29,7 @@ limitations under the License. #include "tensorflow/stream_executor/plugin.h" #include "tensorflow/stream_executor/trace_listener.h" -namespace perftools { -namespace gputools { +namespace stream_executor { class StreamExecutor; @@ -106,7 +105,7 @@ class Platform { namespace { \ int plugin_id_value; \ } \ - const perftools::gputools::Platform::Id ID_VAR_NAME = &plugin_id_value; + const ::stream_executor::Platform::Id ID_VAR_NAME = &plugin_id_value; // Returns a key uniquely identifying this platform. virtual Id id() const = 0; @@ -205,7 +204,6 @@ class Platform { SE_DISALLOW_COPY_AND_ASSIGN(Platform); }; -} // namespace gputools -} // namespace perftools +} // namespace stream_executor #endif // TENSORFLOW_STREAM_EXECUTOR_PLATFORM_H_ diff --git a/tensorflow/stream_executor/platform/default/mutex.h b/tensorflow/stream_executor/platform/default/mutex.h index 62de0cbce0b534020166c47a64c10c595fb3d5ae..c9f5a7c609e5bbe59ea456e30d575b991aa37b65 100644 --- a/tensorflow/stream_executor/platform/default/mutex.h +++ b/tensorflow/stream_executor/platform/default/mutex.h @@ -18,8 +18,7 @@ limitations under the License. #include "tensorflow/stream_executor/platform/mutex.h" -namespace perftools { -namespace gputools { +namespace stream_executor { #undef mutex_lock #undef tf_shared_lock @@ -35,7 +34,6 @@ using tensorflow::tf_shared_lock; #define tf_shared_lock(x) \ static_assert(0, "tf_shared_lock_decl_missing_var_name"); -} // namespace gputools -} // namespace perftools +} // namespace stream_executor #endif // TENSORFLOW_STREAM_EXECUTOR_PLATFORM_DEFAULT_MUTEX_H_ diff --git a/tensorflow/stream_executor/platform/port.h b/tensorflow/stream_executor/platform/port.h index 6603df487878e62271a144b14d78518044c66c81..57ad965ef1101d7d86a4f9cf0a52016e2b3ae713 100644 --- a/tensorflow/stream_executor/platform/port.h +++ b/tensorflow/stream_executor/platform/port.h @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -// IWYU pragma: private, include "perftools/gputools/executor/stream_executor.h" +// IWYU pragma: private, include "third_party/tensorflow/stream_executor/stream_executor.h" #ifndef TENSORFLOW_STREAM_EXECUTOR_PLATFORM_PORT_H_ #define TENSORFLOW_STREAM_EXECUTOR_PLATFORM_PORT_H_ @@ -22,8 +22,7 @@ limitations under the License. #include "tensorflow/core/platform/mutex.h" #include "tensorflow/core/platform/types.h" -namespace perftools { -namespace gputools { +namespace stream_executor { using tensorflow::int8; using tensorflow::int16; @@ -39,19 +38,12 @@ using tensorflow::uint64; using std::string; #endif -#if !defined(COMPILER_MSVC) -#define ARRAYSIZE(a) \ - ((sizeof(a) / sizeof(*(a))) / \ - static_cast(!(sizeof(a) % sizeof(*(a))))) -#endif - using tensorflow::LinkerInitialized; using tensorflow::LINKER_INITIALIZED; #define SE_FALLTHROUGH_INTENDED TF_FALLTHROUGH_INTENDED -} // namespace gputools -} // namespace perftools +} // namespace stream_executor #define SE_DISALLOW_COPY_AND_ASSIGN TF_DISALLOW_COPY_AND_ASSIGN #define SE_MUST_USE_RESULT TF_MUST_USE_RESULT diff --git a/tensorflow/stream_executor/plugin.cc b/tensorflow/stream_executor/plugin.cc index 6424658e22f930a7adfa4dd7fe330308e62e00cf..cfbc52ff17b3a699e97d073d0d0c77bba811db72 100644 --- a/tensorflow/stream_executor/plugin.cc +++ b/tensorflow/stream_executor/plugin.cc @@ -15,8 +15,7 @@ limitations under the License. #include "tensorflow/stream_executor/plugin.h" -namespace perftools { -namespace gputools { +namespace stream_executor { // Mostly-arbitrary ID only used as a sentinel "not otherwise initialized" // value. This value should never [need to] be specified aside by initialization @@ -51,5 +50,4 @@ PluginConfig& PluginConfig::SetRng(PluginId rng) { return *this; } -} // namespace gputools -} // namespace perftools +} // namespace stream_executor diff --git a/tensorflow/stream_executor/plugin.h b/tensorflow/stream_executor/plugin.h index 0b88b86e2b1cf8cbd3dddfa5ca3ae9cdc779a952..0505412e7ac412dda8cda1c5c4589148241c5b38 100644 --- a/tensorflow/stream_executor/plugin.h +++ b/tensorflow/stream_executor/plugin.h @@ -16,8 +16,7 @@ limitations under the License. #ifndef TENSORFLOW_STREAM_EXECUTOR_PLUGIN_H_ #define TENSORFLOW_STREAM_EXECUTOR_PLUGIN_H_ -namespace perftools { -namespace gputools { +namespace stream_executor { // A plugin ID is a unique identifier for each registered plugin type. typedef void* PluginId; @@ -83,7 +82,6 @@ class PluginConfig { PluginId blas_, dnn_, fft_, rng_; }; -} // namespace gputools -} // namespace perftools +} // namespace stream_executor #endif // TENSORFLOW_STREAM_EXECUTOR_PLUGIN_H_ diff --git a/tensorflow/stream_executor/plugin_registry.cc b/tensorflow/stream_executor/plugin_registry.cc index 54761139eaf3ac5e2edb5ccc45dabe56fa1384d4..7812703efd8b4f143c195225c2c12502378bf5f2 100644 --- a/tensorflow/stream_executor/plugin_registry.cc +++ b/tensorflow/stream_executor/plugin_registry.cc @@ -19,8 +19,7 @@ limitations under the License. #include "tensorflow/stream_executor/lib/stringprintf.h" #include "tensorflow/stream_executor/multi_platform_manager.h" -namespace perftools { -namespace gputools { +namespace stream_executor { const PluginId kNullPlugin = nullptr; @@ -244,5 +243,4 @@ EMIT_PLUGIN_SPECIALIZATIONS(DnnFactory, dnn, "DNN"); EMIT_PLUGIN_SPECIALIZATIONS(FftFactory, fft, "FFT"); EMIT_PLUGIN_SPECIALIZATIONS(RngFactory, rng, "RNG"); -} // namespace gputools -} // namespace perftools +} // namespace stream_executor diff --git a/tensorflow/stream_executor/plugin_registry.h b/tensorflow/stream_executor/plugin_registry.h index 8636a49ce6861d24eb5567bc8d8b268b840a310f..49628ecd246e1a785ef3583f5a38567f553352f5 100644 --- a/tensorflow/stream_executor/plugin_registry.h +++ b/tensorflow/stream_executor/plugin_registry.h @@ -28,8 +28,7 @@ limitations under the License. #include "tensorflow/stream_executor/plugin.h" #include "tensorflow/stream_executor/rng.h" -namespace perftools { -namespace gputools { +namespace stream_executor { namespace internal { class StreamExecutorInterface; @@ -160,7 +159,6 @@ class PluginRegistry { SE_DISALLOW_COPY_AND_ASSIGN(PluginRegistry); }; -} // namespace gputools -} // namespace perftools +} // namespace stream_executor #endif // TENSORFLOW_STREAM_EXECUTOR_PLUGIN_REGISTRY_H_ diff --git a/tensorflow/stream_executor/rng.cc b/tensorflow/stream_executor/rng.cc index 1c05005067ce7341994aa7b8f1cb9c4df734ce2c..b0efad91084b7fbfed70ecca9bf879810de2ec1d 100644 --- a/tensorflow/stream_executor/rng.cc +++ b/tensorflow/stream_executor/rng.cc @@ -17,8 +17,7 @@ limitations under the License. #include "tensorflow/stream_executor/platform/logging.h" -namespace perftools { -namespace gputools { +namespace stream_executor { namespace rng { bool RngSupport::CheckSeed(const uint8 *seed, uint64 seed_bytes) { @@ -47,5 +46,4 @@ const int RngSupport::kMaxSeedBytes; #endif } // namespace rng -} // namespace gputools -} // namespace perftools +} // namespace stream_executor diff --git a/tensorflow/stream_executor/rng.h b/tensorflow/stream_executor/rng.h index 36d0fdd454f9a98b749fb2a8f9ceb14eec9d0fe1..acbf8fce4caa8cbbc9b2288725ea7bf07ee84af4 100644 --- a/tensorflow/stream_executor/rng.h +++ b/tensorflow/stream_executor/rng.h @@ -22,8 +22,7 @@ limitations under the License. #include "tensorflow/stream_executor/platform/logging.h" #include "tensorflow/stream_executor/platform/port.h" -namespace perftools { -namespace gputools { +namespace stream_executor { class Stream; template @@ -89,7 +88,6 @@ class RngSupport { }; } // namespace rng -} // namespace gputools -} // namespace perftools +} // namespace stream_executor #endif // TENSORFLOW_STREAM_EXECUTOR_RNG_H_ diff --git a/tensorflow/stream_executor/scratch_allocator.cc b/tensorflow/stream_executor/scratch_allocator.cc index 0c1db414f2e9617070c8c12e2a334ed7977ae9f1..8fc4c4c509cfd35c8a733b01a8d85965fe524def 100644 --- a/tensorflow/stream_executor/scratch_allocator.cc +++ b/tensorflow/stream_executor/scratch_allocator.cc @@ -18,8 +18,7 @@ limitations under the License. #include "tensorflow/stream_executor/lib/status_macros.h" #include "tensorflow/stream_executor/stream.h" -namespace perftools { -namespace gputools { +namespace stream_executor { ScratchAllocator::~ScratchAllocator() {} @@ -38,5 +37,4 @@ port::StatusOr> OneTimeScratchAllocator::AllocateBytes( return temporary_->device_memory(); } -} // namespace gputools -} // namespace perftools +} // namespace stream_executor diff --git a/tensorflow/stream_executor/scratch_allocator.h b/tensorflow/stream_executor/scratch_allocator.h index 94d5ede161323ff39d7c75589958026b66f22cc5..2aed2c4437381bd29750aae1120a8783b61bfb9f 100644 --- a/tensorflow/stream_executor/scratch_allocator.h +++ b/tensorflow/stream_executor/scratch_allocator.h @@ -23,8 +23,7 @@ limitations under the License. #include "tensorflow/stream_executor/platform/port.h" #include "tensorflow/stream_executor/temporary_device_memory.h" -namespace perftools { -namespace gputools { +namespace stream_executor { class Stream; @@ -77,7 +76,6 @@ class OneTimeScratchAllocator : public ScratchAllocator { SE_DISALLOW_COPY_AND_ASSIGN(OneTimeScratchAllocator); }; -} // namespace gputools -} // namespace perftools +} // namespace stream_executor #endif // TENSORFLOW_STREAM_EXECUTOR_SCRATCH_ALLOCATOR_H_ diff --git a/tensorflow/stream_executor/shared_memory_config.h b/tensorflow/stream_executor/shared_memory_config.h index de556cb73404d5d919b779e5d712f465ff2bc821..7cbeb3bcd91816ece725afecc2e85e7f200c6045 100644 --- a/tensorflow/stream_executor/shared_memory_config.h +++ b/tensorflow/stream_executor/shared_memory_config.h @@ -19,8 +19,7 @@ limitations under the License. #ifndef TENSORFLOW_STREAM_EXECUTOR_SHARED_MEMORY_CONFIG_H_ #define TENSORFLOW_STREAM_EXECUTOR_SHARED_MEMORY_CONFIG_H_ -namespace perftools { -namespace gputools { +namespace stream_executor { // SharedMemoryConfig enum describes potential widths of shared memory banks for // a device or kernel. @@ -30,7 +29,6 @@ enum class SharedMemoryConfig { kEightByte, // Sets shared memory banks to be eight bytes wide. }; -} // namespace gputools -} // namespace perftools +} // namespace stream_executor #endif // TENSORFLOW_STREAM_EXECUTOR_SHARED_MEMORY_CONFIG_H_ diff --git a/tensorflow/stream_executor/stream.cc b/tensorflow/stream_executor/stream.cc index 1e3afde2687657e417e9e2cb3f5e2aaf0600da7a..f59d9a13acffa405f8627416d87d812fe4e62c8e 100644 --- a/tensorflow/stream_executor/stream.cc +++ b/tensorflow/stream_executor/stream.cc @@ -28,8 +28,7 @@ limitations under the License. #include "tensorflow/stream_executor/stream_executor_internal.h" #include "tensorflow/stream_executor/stream_executor_pimpl.h" -namespace perftools { -namespace gputools { +namespace stream_executor { namespace { // Code to turn parameters to functions on stream into strings that @@ -4795,7 +4794,8 @@ Stream &Stream::ThenRnnForward( const dnn::RnnStateTensorDescriptor &output_c_desc, DeviceMemory *output_c_data, bool is_training, ScratchAllocator *reserve_space_allocator, - ScratchAllocator *workspace_allocator) { + ScratchAllocator *workspace_allocator, + dnn::ProfileResult *output_profile_result) { // TODO(zhengxq): add VLOG PARAM calls. if (ok()) { if (dnn::DnnSupport *dnn = parent_->AsDnn()) { @@ -4803,7 +4803,8 @@ Stream &Stream::ThenRnnForward( this, rnn_desc, input_desc, input_data, input_h_desc, input_h_data, input_c_desc, input_c_data, params, output_desc, output_data, output_h_desc, output_h_data, output_c_desc, output_c_data, - is_training, reserve_space_allocator, workspace_allocator)); + is_training, reserve_space_allocator, workspace_allocator, + output_profile_result)); } else { SetError(); LOG(WARNING) << "Attempting to call ThenRnnForward without DNN support"; @@ -4827,7 +4828,8 @@ Stream &Stream::ThenRnnForward( const dnn::RnnStateTensorDescriptor &output_c_desc, DeviceMemory *output_c_data, bool is_training, ScratchAllocator *reserve_space_allocator, - ScratchAllocator *workspace_allocator) { + ScratchAllocator *workspace_allocator, + dnn::ProfileResult *output_profile_result) { // TODO(zhengxq): add VLOG PARAM calls. if (ok()) { if (dnn::DnnSupport *dnn = parent_->AsDnn()) { @@ -4835,7 +4837,8 @@ Stream &Stream::ThenRnnForward( this, rnn_desc, input_desc, input_data, input_h_desc, input_h_data, input_c_desc, input_c_data, params, output_desc, output_data, output_h_desc, output_h_data, output_c_desc, output_c_data, - is_training, reserve_space_allocator, workspace_allocator)); + is_training, reserve_space_allocator, workspace_allocator, + output_profile_result)); } else { SetError(); LOG(WARNING) << "Attempting to call ThenRnnForward without DNN support"; @@ -4860,7 +4863,8 @@ Stream &Stream::ThenRnnForward( const dnn::RnnStateTensorDescriptor &output_c_desc, DeviceMemory *output_c_data, bool is_training, ScratchAllocator *reserve_space_allocator, - ScratchAllocator *workspace_allocator) { + ScratchAllocator *workspace_allocator, + dnn::ProfileResult *output_profile_result) { // TODO(zhengxq): add VLOG PARAM calls. if (ok()) { if (dnn::DnnSupport *dnn = parent_->AsDnn()) { @@ -4868,7 +4872,8 @@ Stream &Stream::ThenRnnForward( this, rnn_desc, input_desc, input_data, input_h_desc, input_h_data, input_c_desc, input_c_data, params, output_desc, output_data, output_h_desc, output_h_data, output_c_desc, output_c_data, - is_training, reserve_space_allocator, workspace_allocator)); + is_training, reserve_space_allocator, workspace_allocator, + output_profile_result)); } else { SetError(); LOG(WARNING) << "Attempting to call ThenRnnForward without DNN support"; @@ -4900,7 +4905,8 @@ Stream &Stream::ThenRnnBackward( DeviceMemory *input_c_backprop_data, DeviceMemory *params_backprop_data, DeviceMemory *reserve_space_data, - ScratchAllocator *workspace_allocator) { + ScratchAllocator *workspace_allocator, + dnn::ProfileResult *output_profile_result) { // TODO(zhengxq): add VLOG PARAM calls. if (ok()) { if (dnn::DnnSupport *dnn = parent_->AsDnn()) { @@ -4910,7 +4916,8 @@ Stream &Stream::ThenRnnBackward( output_h_desc, output_h_data, output_c_desc, output_c_data, output_backprop_data, output_h_backprop_data, output_c_backprop_data, input_backprop_data, input_h_backprop_data, input_c_backprop_data, - params_backprop_data, reserve_space_data, workspace_allocator)); + params_backprop_data, reserve_space_data, workspace_allocator, + output_profile_result)); } else { SetError(); LOG(WARNING) << "Attempting to call ThenRnnBackward without DNN support"; @@ -4941,7 +4948,8 @@ Stream &Stream::ThenRnnBackward( DeviceMemory *input_c_backprop_data, DeviceMemory *params_backprop_data, DeviceMemory *reserve_space_data, - ScratchAllocator *workspace_allocator) { + ScratchAllocator *workspace_allocator, + dnn::ProfileResult *output_profile_result) { // TODO(zhengxq): add VLOG PARAM calls. if (ok()) { if (dnn::DnnSupport *dnn = parent_->AsDnn()) { @@ -4951,7 +4959,8 @@ Stream &Stream::ThenRnnBackward( output_h_desc, output_h_data, output_c_desc, output_c_data, output_backprop_data, output_h_backprop_data, output_c_backprop_data, input_backprop_data, input_h_backprop_data, input_c_backprop_data, - params_backprop_data, reserve_space_data, workspace_allocator)); + params_backprop_data, reserve_space_data, workspace_allocator, + output_profile_result)); } else { SetError(); LOG(WARNING) << "Attempting to call ThenRnnBackward without DNN support"; @@ -4983,7 +4992,8 @@ Stream &Stream::ThenRnnBackward( DeviceMemory *input_c_backprop_data, DeviceMemory *params_backprop_data, DeviceMemory *reserve_space_data, - ScratchAllocator *workspace_allocator) { + ScratchAllocator *workspace_allocator, + dnn::ProfileResult *output_profile_result) { // TODO(zhengxq): add VLOG PARAM calls. if (ok()) { if (dnn::DnnSupport *dnn = parent_->AsDnn()) { @@ -4993,7 +5003,8 @@ Stream &Stream::ThenRnnBackward( output_h_desc, output_h_data, output_c_desc, output_c_data, output_backprop_data, output_h_backprop_data, output_c_backprop_data, input_backprop_data, input_h_backprop_data, input_c_backprop_data, - params_backprop_data, reserve_space_data, workspace_allocator)); + params_backprop_data, reserve_space_data, workspace_allocator, + output_profile_result)); } else { SetError(); LOG(WARNING) << "Attempting to call ThenRnnBackward without DNN support"; @@ -5180,5 +5191,4 @@ port::Status Stream::BlockHostUntilDone() { return first_error; } -} // namespace gputools -} // namespace perftools +} // namespace stream_executor diff --git a/tensorflow/stream_executor/stream.h b/tensorflow/stream_executor/stream.h index d7d11315699b85cae4d479b79bc8fc2717b2d8fb..d4a81440e9651e739545a3e9ccaceb1877949c75 100644 --- a/tensorflow/stream_executor/stream.h +++ b/tensorflow/stream_executor/stream.h @@ -38,8 +38,7 @@ limitations under the License. #include "tensorflow/stream_executor/platform/thread_annotations.h" #include "tensorflow/stream_executor/temporary_memory_manager.h" -namespace perftools { -namespace gputools { +namespace stream_executor { namespace host { class HostBlas; @@ -1802,7 +1801,8 @@ class Stream { DeviceMemory *output_c_data, bool is_training, ScratchAllocator *reserve_space_allocator, - ScratchAllocator *workspace_allocator); + ScratchAllocator *workspace_allocator, + dnn::ProfileResult *output_profile_result); Stream &ThenRnnForward(const dnn::RnnDescriptor &rnn_desc, const dnn::RnnSequenceTensorDescriptor &input_desc, @@ -1819,7 +1819,8 @@ class Stream { const dnn::RnnStateTensorDescriptor &output_c_desc, DeviceMemory *output_c_data, bool is_training, ScratchAllocator *reserve_space_allocator, - ScratchAllocator *workspace_allocator); + ScratchAllocator *workspace_allocator, + dnn::ProfileResult *output_profile_result); Stream &ThenRnnForward(const dnn::RnnDescriptor &rnn_desc, const dnn::RnnSequenceTensorDescriptor &input_desc, @@ -1836,7 +1837,8 @@ class Stream { const dnn::RnnStateTensorDescriptor &output_c_desc, DeviceMemory *output_c_data, bool is_training, ScratchAllocator *reserve_space_allocator, - ScratchAllocator *workspace_allocator); + ScratchAllocator *workspace_allocator, + dnn::ProfileResult *output_profile_result); // Enqueue a backward operation of the RNN model onto the stream. // See DnnSupport::DoRnnBackward for more details. @@ -1863,7 +1865,8 @@ class Stream { DeviceMemory *input_c_backprop_data, DeviceMemory *params_backprop_data, DeviceMemory *reserve_space_data, - ScratchAllocator *workspace_allocator); + ScratchAllocator *workspace_allocator, + dnn::ProfileResult *output_profile_result); Stream &ThenRnnBackward(const dnn::RnnDescriptor &rnn_desc, const dnn::RnnSequenceTensorDescriptor &input_desc, @@ -1887,7 +1890,8 @@ class Stream { DeviceMemory *input_c_backprop_data, DeviceMemory *params_backprop_data, DeviceMemory *reserve_space_data, - ScratchAllocator *workspace_allocator); + ScratchAllocator *workspace_allocator, + dnn::ProfileResult *output_profile_result); Stream &ThenRnnBackward(const dnn::RnnDescriptor &rnn_desc, const dnn::RnnSequenceTensorDescriptor &input_desc, @@ -1911,7 +1915,8 @@ class Stream { DeviceMemory *input_c_backprop_data, DeviceMemory *params_backprop_data, DeviceMemory *reserve_space_data, - ScratchAllocator *workspace_allocator); + ScratchAllocator *workspace_allocator, + dnn::ProfileResult *output_profile_result); // Enqueue onto the stream a operation that transforms a tensor. // See DnnSupport::DoTransformTensor for more details. @@ -2092,7 +2097,6 @@ struct Quantization { dnn::QuantizedActivationMode::k32Bit; }; -} // namespace gputools -} // namespace perftools +} // namespace stream_executor #endif // TENSORFLOW_STREAM_EXECUTOR_STREAM_H_ diff --git a/tensorflow/stream_executor/stream_executor.h b/tensorflow/stream_executor/stream_executor.h index 2995dccf469a2977b9593f2045062d568e569dc5..d63d485df5668a5cbb5c61525401947bebeab616 100644 --- a/tensorflow/stream_executor/stream_executor.h +++ b/tensorflow/stream_executor/stream_executor.h @@ -35,4 +35,15 @@ limitations under the License. #include "tensorflow/stream_executor/stream_executor_pimpl.h" // IWYU pragma: export #include "tensorflow/stream_executor/timer.h" // IWYU pragma: export +namespace perftools { +namespace gputools { + +// Temporarily pull stream_executor into perftools::gputools while we migrate +// code to the new namespace. TODO(b/77980417): Remove this once we've +// completed the migration. +using namespace stream_executor; // NOLINT[build/namespaces] + +} // namespace gputools +} // namespace perftools + #endif // TENSORFLOW_STREAM_EXECUTOR_STREAM_EXECUTOR_H_ diff --git a/tensorflow/stream_executor/stream_executor_internal.cc b/tensorflow/stream_executor/stream_executor_internal.cc index 273d970b6fa4a581381689191b183a30f4f2bcd3..8297228e6fecddffa8fc68a1a028456dc8e75a65 100644 --- a/tensorflow/stream_executor/stream_executor_internal.cc +++ b/tensorflow/stream_executor/stream_executor_internal.cc @@ -15,8 +15,7 @@ limitations under the License. #include "tensorflow/stream_executor/stream_executor_internal.h" -namespace perftools { -namespace gputools { +namespace stream_executor { namespace internal { // -- CUDA @@ -38,5 +37,4 @@ StreamExecutorFactory* MakeOpenCLExecutorImplementation() { StreamExecutorFactory MakeHostExecutorImplementation; } // namespace internal -} // namespace gputools -} // namespace perftools +} // namespace stream_executor diff --git a/tensorflow/stream_executor/stream_executor_internal.h b/tensorflow/stream_executor/stream_executor_internal.h index 37ef182e1445a85dd0a97eac02ba064a26dc0f1d..2584c92f0c5a1129e2f10aa7148161a8d2d40c50 100644 --- a/tensorflow/stream_executor/stream_executor_internal.h +++ b/tensorflow/stream_executor/stream_executor_internal.h @@ -45,8 +45,7 @@ limitations under the License. #include "tensorflow/stream_executor/trace_listener.h" #include "tensorflow/stream_executor/lib/inlined_vector.h" -namespace perftools { -namespace gputools { +namespace stream_executor { class Stream; class Timer; @@ -343,7 +342,6 @@ extern StreamExecutorFactory MakeHostExecutorImplementation; } // namespace internal -} // namespace gputools -} // namespace perftools +} // namespace stream_executor #endif // TENSORFLOW_STREAM_EXECUTOR_STREAM_EXECUTOR_INTERNAL_H_ diff --git a/tensorflow/stream_executor/stream_executor_pimpl.cc b/tensorflow/stream_executor/stream_executor_pimpl.cc index afca1c2e597b55b1b8d0b76d4e79995d6f6af822..2e1adeb31e46d923eedfc1fab3c21271e5b76c2c 100644 --- a/tensorflow/stream_executor/stream_executor_pimpl.cc +++ b/tensorflow/stream_executor/stream_executor_pimpl.cc @@ -39,8 +39,7 @@ namespace { bool FLAGS_check_device_leaks = false; } // namespace -namespace perftools { -namespace gputools { +namespace stream_executor { namespace { string StackTraceIfVLOG10() { @@ -305,6 +304,15 @@ bool StreamExecutor::GetConvolveAlgorithms( cc_minor, out_algorithms); } +bool StreamExecutor::GetRnnAlgorithms( + std::vector *out_algorithms) { + dnn::DnnSupport *dnn_support = AsDnn(); + if (!dnn_support) { + return false; + } + return dnn_support->GetRnnAlgorithms(out_algorithms); +} + bool StreamExecutor::GetConvolveBackwardDataAlgorithms( bool with_winograd_nonfused, std::vector *out_algorithms) { @@ -344,7 +352,8 @@ port::StatusOr> StreamExecutor::createRnnDescriptor( int num_layers, int hidden_size, int input_size, dnn::RnnInputMode input_mode, dnn::RnnDirectionMode direction_mode, - dnn::RnnMode rnn_mode, dnn::DataType data_type, float dropout, uint64 seed, + dnn::RnnMode rnn_mode, dnn::DataType data_type, + const dnn::AlgorithmConfig &algorithm_config, float dropout, uint64 seed, ScratchAllocator *state_allocator) { dnn::DnnSupport *dnn_support = AsDnn(); if (!dnn_support) { @@ -353,7 +362,7 @@ StreamExecutor::createRnnDescriptor( } return dnn_support->createRnnDescriptor( num_layers, hidden_size, input_size, input_mode, direction_mode, rnn_mode, - data_type, dropout, seed, state_allocator); + data_type, algorithm_config, dropout, seed, state_allocator); } port::StatusOr> @@ -778,5 +787,4 @@ internal::StreamExecutorInterface *StreamExecutor::implementation() { return implementation_->GetUnderlyingExecutor(); } -} // namespace gputools -} // namespace perftools +} // namespace stream_executor diff --git a/tensorflow/stream_executor/stream_executor_pimpl.h b/tensorflow/stream_executor/stream_executor_pimpl.h index a2a77218cbbafeeb9d4d8ca04b2e0a8a5024ebf9..39af7115d8f697428fe7134ad0704a4ebcf033fe 100644 --- a/tensorflow/stream_executor/stream_executor_pimpl.h +++ b/tensorflow/stream_executor/stream_executor_pimpl.h @@ -37,8 +37,7 @@ limitations under the License. #include "tensorflow/stream_executor/stream_executor_internal.h" #include "tensorflow/stream_executor/trace_listener.h" -namespace perftools { -namespace gputools { +namespace stream_executor { // Structure used for device memory leak checking. struct AllocRecord { @@ -95,7 +94,7 @@ class StreamExecutor { // Parameters: // spec: The MultiKernelLoaderSpec is usually generated as a compile-time // constant into an appropriate namespace. For example, see - // perftools::gputools::executor_sample::kKernelLoaderSpecs, from which a + // stream_executor::executor_sample::kKernelLoaderSpecs, from which a // MultiKernelLoaderSpec is selected. // kernel: Outparam that the kernel is loaded into. A given Kernel // instantiation should not be loaded into more than once. @@ -349,10 +348,14 @@ class StreamExecutor { // platform that underlies this interface. bool SupportsDnn() const; - // Get the list of supported algorithms for the forward convolution opeartion. + // Returns the list of supported algorithms for the forward convolution + // operation. bool GetConvolveAlgorithms(bool with_winograd_nonfused, std::vector *out_algorithms); + // Returns the list of supported algorithms for rnn operation. + bool GetRnnAlgorithms(std::vector *out_algorithms); + // Get the list of supported algorithms for the backward convolution on data. bool GetConvolveBackwardDataAlgorithms( bool with_winograd_nonfused, @@ -372,8 +375,9 @@ class StreamExecutor { port::StatusOr> createRnnDescriptor( int num_layers, int hidden_size, int input_size, dnn::RnnInputMode input_mode, dnn::RnnDirectionMode direction_mode, - dnn::RnnMode rnn_mode, dnn::DataType data_type, float dropout, - uint64 seed, ScratchAllocator *state_allocator); + dnn::RnnMode rnn_mode, dnn::DataType data_type, + const dnn::AlgorithmConfig &algorithm_config, float dropout, uint64 seed, + ScratchAllocator *state_allocator); // Create a RNN sequence descriptor that specifies either the input or output // sequence. The caller retains the ownership of the returned descriptor. @@ -798,7 +802,6 @@ inline Stream &Stream::ThenLaunch(ThreadDim thread_dims, BlockDim block_dims, return *this; } -} // namespace gputools -} // namespace perftools +} // namespace stream_executor #endif // TENSORFLOW_STREAM_EXECUTOR_STREAM_EXECUTOR_PIMPL_H_ diff --git a/tensorflow/stream_executor/temporary_device_memory.cc b/tensorflow/stream_executor/temporary_device_memory.cc index c33166b2246b51ac045349d9e2c0794a3f3868a1..f113ce9be57b9bd954df0273fa6f9af1dc7d9546 100644 --- a/tensorflow/stream_executor/temporary_device_memory.cc +++ b/tensorflow/stream_executor/temporary_device_memory.cc @@ -17,8 +17,7 @@ limitations under the License. #include "tensorflow/stream_executor/stream.h" -namespace perftools { -namespace gputools { +namespace stream_executor { TemporaryDeviceMemoryBase::~TemporaryDeviceMemoryBase() { parent_->temporary_memory_manager()->MarkFinalized(device_memory_, @@ -64,5 +63,4 @@ TemporaryDeviceMemoryBase::TemporaryDeviceMemoryBase( DCHECK(IsAllocated()); } -} // namespace gputools -} // namespace perftools +} // namespace stream_executor diff --git a/tensorflow/stream_executor/temporary_device_memory.h b/tensorflow/stream_executor/temporary_device_memory.h index 2255e7ffd716d8f6d6aad2c447e05c6dfca766df..77be8599a2de59f9a8287337210ad0c6bf76ece7 100644 --- a/tensorflow/stream_executor/temporary_device_memory.h +++ b/tensorflow/stream_executor/temporary_device_memory.h @@ -43,8 +43,7 @@ limitations under the License. #include "tensorflow/stream_executor/device_memory.h" -namespace perftools { -namespace gputools { +namespace stream_executor { class Stream; namespace internal { @@ -132,7 +131,6 @@ class TemporaryDeviceMemory : public TemporaryDeviceMemoryBase { } }; -} // namespace gputools -} // namespace perftools +} // namespace stream_executor #endif // TENSORFLOW_STREAM_EXECUTOR_TEMPORARY_DEVICE_MEMORY_H_ diff --git a/tensorflow/stream_executor/temporary_memory_manager.cc b/tensorflow/stream_executor/temporary_memory_manager.cc index 449ab7d3f0bff5dff0c55b716753ca0befd4062f..420dbb0933db3adf69a425f6223250e66f960261 100644 --- a/tensorflow/stream_executor/temporary_memory_manager.cc +++ b/tensorflow/stream_executor/temporary_memory_manager.cc @@ -21,8 +21,7 @@ limitations under the License. #include "tensorflow/stream_executor/stream.h" #include "tensorflow/stream_executor/stream_executor_pimpl.h" -namespace perftools { -namespace gputools { +namespace stream_executor { namespace internal { void TemporaryMemoryManager::ForceDeallocateAll() { @@ -124,5 +123,4 @@ TemporaryMemoryManager::AllocateArrayBase(uint64 element_count, } } // namespace internal -} // namespace gputools -} // namespace perftools +} // namespace stream_executor diff --git a/tensorflow/stream_executor/temporary_memory_manager.h b/tensorflow/stream_executor/temporary_memory_manager.h index 2e6fbd9d62a041a5b4f2d2c8111d30ba0da69ce5..faf13380dc20afa9675a1b8d51aad3906d9c7bbe 100644 --- a/tensorflow/stream_executor/temporary_memory_manager.h +++ b/tensorflow/stream_executor/temporary_memory_manager.h @@ -31,8 +31,7 @@ limitations under the License. #include "tensorflow/stream_executor/platform/thread_annotations.h" #include "tensorflow/stream_executor/temporary_device_memory.h" -namespace perftools { -namespace gputools { +namespace stream_executor { namespace internal { // Record used inside the TemporaryMemoryManager as metadata for a given device @@ -147,7 +146,6 @@ TemporaryMemoryManager::AllocateArray(uint64 element_count) { } } // namespace internal -} // namespace gputools -} // namespace perftools +} // namespace stream_executor #endif // TENSORFLOW_STREAM_EXECUTOR_TEMPORARY_MEMORY_MANAGER_H_ diff --git a/tensorflow/stream_executor/timer.cc b/tensorflow/stream_executor/timer.cc index 41d7e4359d4145b34d094a366824501d5a606940..a29791a1049d57cbd85604e4bfce154b57155009 100644 --- a/tensorflow/stream_executor/timer.cc +++ b/tensorflow/stream_executor/timer.cc @@ -21,8 +21,7 @@ limitations under the License. #include "tensorflow/stream_executor/platform/logging.h" #include "tensorflow/stream_executor/stream_executor.h" -namespace perftools { -namespace gputools { +namespace stream_executor { Timer::Timer(StreamExecutor *parent) : parent_(parent), @@ -34,5 +33,4 @@ uint64 Timer::Microseconds() const { return implementation_->Microseconds(); } uint64 Timer::Nanoseconds() const { return implementation_->Nanoseconds(); } -} // namespace gputools -} // namespace perftools +} // namespace stream_executor diff --git a/tensorflow/stream_executor/timer.h b/tensorflow/stream_executor/timer.h index 0a37caa0f2fa0b5f489d1e495368c807df70f51f..fba7dd8f589ad49685bdf5fd8faa1ee18fb55113 100644 --- a/tensorflow/stream_executor/timer.h +++ b/tensorflow/stream_executor/timer.h @@ -20,8 +20,7 @@ limitations under the License. #include "tensorflow/stream_executor/platform/port.h" -namespace perftools { -namespace gputools { +namespace stream_executor { namespace internal { class TimerInterface; @@ -69,7 +68,6 @@ class Timer { SE_DISALLOW_COPY_AND_ASSIGN(Timer); }; -} // namespace gputools -} // namespace perftools +} // namespace stream_executor #endif // TENSORFLOW_STREAM_EXECUTOR_TIMER_H_ diff --git a/tensorflow/stream_executor/trace_listener.h b/tensorflow/stream_executor/trace_listener.h index d1e87c348b1f867009fdb6b741d984b2f58cef21..0e874a1d47b4daa12b501f87ad1c84afe84b0b6b 100644 --- a/tensorflow/stream_executor/trace_listener.h +++ b/tensorflow/stream_executor/trace_listener.h @@ -23,8 +23,7 @@ limitations under the License. #include "tensorflow/stream_executor/launch_dim.h" #include "tensorflow/stream_executor/lib/status.h" -namespace perftools { -namespace gputools { +namespace stream_executor { class Stream; @@ -69,7 +68,6 @@ class TraceListener { const port::Status* result) {} }; -} // namespace gputools -} // namespace perftools +} // namespace stream_executor #endif // TENSORFLOW_STREAM_EXECUTOR_TRACE_LISTENER_H_ diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index fcc57d506e38205d8da605653ed67fb645102c35..51e856bed0e995fc6e1e367401c93b5f3648c361 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -163,7 +163,6 @@ def if_override_eigen_strong_inline(a): def get_win_copts(is_external=False): WINDOWS_COPTS = [ - "/D__VERSION__=\\\"MSVC\\\"", "/DPLATFORM_WINDOWS", "/DEIGEN_HAS_C99_MATH", "/DTENSORFLOW_USE_EIGEN_THREADPOOL", @@ -304,6 +303,7 @@ def tf_cc_shared_object( clean_dep("//tensorflow:darwin"): [ "-Wl,-install_name,@rpath/" + name.split("/")[-1], ], + clean_dep("//tensorflow:windows"): [], "//conditions:default": [ "-Wl,-soname," + name.split("/")[-1], ], @@ -342,6 +342,22 @@ register_extension_info( label_regex_for_dep = "{extension_name}.*", ) +# A simple wrap around native.cc_binary rule. +# When using this rule, you should realize it doesn't link to any tensorflow +# dependencies by default. +def tf_native_cc_binary(name, + copts=tf_copts(), + **kwargs): + native.cc_binary( + name=name, + copts=copts, + **kwargs) + +register_extension_info( + extension_name = "tf_native_cc_binary", + label_regex_for_dep = "{extension_name}.*", +) + def tf_gen_op_wrapper_cc(name, out_ops_file, pkg="", @@ -622,9 +638,12 @@ def tf_cc_test(name, linkopts=select({ clean_dep("//tensorflow:android"): [ "-pie", - ], + ], clean_dep("//tensorflow:windows"): [], clean_dep("//tensorflow:windows_msvc"): [], + clean_dep("//tensorflow:darwin"): [ + "-lm", + ], "//conditions:default": [ "-lpthread", "-lm" @@ -790,7 +809,33 @@ def tf_cc_test_mkl(srcs, tags=[], size="medium", args=None): - if_mkl(tf_cc_tests(srcs, deps, name, linkstatic=linkstatic, tags=tags, size=size, args=args, nocopts="-fno-exceptions")) + for src in srcs: + native.cc_test( + name=src_to_test_name(src), + srcs=if_mkl([src]) + tf_binary_additional_srcs(), + copts=tf_copts(), + linkopts=select({ + clean_dep("//tensorflow:android"): [ + "-pie", + ], + clean_dep("//tensorflow:windows"): [], + clean_dep("//tensorflow:windows_msvc"): [], + "//conditions:default": [ + "-lpthread", + "-lm" + ], + }) + _rpath_linkopts(src_to_test_name(src)), + deps=deps + if_mkl( + [ + "//third_party/mkl:intel_binary_blob", + ], + ), + linkstatic=linkstatic, + tags=tags, + size=size, + args=args, + nocopts="-fno-exceptions") + def tf_cc_tests_gpu(srcs, deps, @@ -910,6 +955,7 @@ def tf_cuda_library(deps=None, cuda_deps=None, copts=tf_copts(), **kwargs): if 'linkstatic' not in kwargs or kwargs['linkstatic'] != 1: enable_text_relocation_linkopt = select({ clean_dep("//tensorflow:darwin"): [], + clean_dep("//tensorflow:windows"): [], "//conditions:default": ['-Wl,-z,notext'],}) if 'linkopts' in kwargs: kwargs['linkopts'] += enable_text_relocation_linkopt @@ -1008,16 +1054,12 @@ register_extension_info( def tf_mkl_kernel_library(name, prefix=None, srcs=None, - gpu_srcs=None, hdrs=None, deps=None, alwayslink=1, copts=tf_copts(), - nocopts="-fno-exceptions", - **kwargs): + nocopts="-fno-exceptions"): """A rule to build MKL-based TensorFlow kernel libraries.""" - gpu_srcs = gpu_srcs # unused argument - kwargs = kwargs # unused argument if not bool(srcs): srcs = [] @@ -1030,16 +1072,15 @@ def tf_mkl_kernel_library(name, hdrs = hdrs + native.glob( [prefix + "*.h"]) - if_mkl( - native.cc_library( - name=name, - srcs=srcs, - hdrs=hdrs, - deps=deps, - alwayslink=alwayslink, - copts=copts, - nocopts=nocopts - )) + native.cc_library( + name=name, + srcs=if_mkl(srcs), + hdrs=hdrs, + deps=deps, + alwayslink=alwayslink, + copts=copts, + nocopts=nocopts + ) register_extension_info( extension_name = "tf_mkl_kernel_library", @@ -1178,6 +1219,20 @@ def tf_custom_op_library_additional_deps(): "@protobuf_archive//:protobuf_headers", clean_dep("//third_party/eigen3"), clean_dep("//tensorflow/core:framework_headers_lib"), + ] + if_windows(["//tensorflow/python:pywrap_tensorflow_import_lib"]) + +# A list of targets that contains the implemenation of +# tf_custom_op_library_additional_deps. It's used to generate a DEF file for +# exporting symbols from _pywrap_tensorflow.dll on Windows. +def tf_custom_op_library_additional_deps_impl(): + return [ + "@protobuf_archive//:protobuf", + "@nsync//:nsync_cpp", + # for //third_party/eigen3 + clean_dep("//third_party/eigen3"), + # for //tensorflow/core:framework_headers_lib + clean_dep("//tensorflow/core:framework"), + clean_dep("//tensorflow/core:reader_base"), ] # Traverse the dependency graph along the "deps" attribute of the @@ -1264,6 +1319,7 @@ def tf_custom_op_library(name, srcs=[], gpu_srcs=[], deps=[], linkopts=[]): deps=deps + if_cuda(cuda_deps), data=[name + "_check_deps"], copts=tf_copts(is_external=True), + features = ["windows_export_all_symbols"], linkopts=linkopts + select({ "//conditions:default": [ "-lm", @@ -1410,7 +1466,8 @@ def tf_py_wrap_cc(name, ]) + tf_extension_copts()), linkopts=tf_extension_linkopts() + extra_linkopts, linkstatic=1, - deps=deps + extra_deps) + deps=deps + extra_deps, + **kwargs) native.genrule( name="gen_" + cc_library_pyd_name, srcs=[":" + cc_library_name], @@ -1646,7 +1703,7 @@ def tf_version_info_genrule(): ], outs=["util/version_info.cc"], cmd= - "$(location //tensorflow/tools/git:gen_git_source.py) --generate $(SRCS) \"$@\"", + "$(location //tensorflow/tools/git:gen_git_source.py) --generate $(SRCS) \"$@\" --git_tag_override=$${GIT_TAG_OVERRIDE:-}", local=1, tools=[clean_dep("//tensorflow/tools/git:gen_git_source.py")],) diff --git a/tensorflow/tools/api/generator/BUILD b/tensorflow/tools/api/generator/BUILD index 6722536358158d2adb0d7c0c05f876ac683ad726..a1c569951e99162c8048b7b760c25df7b2f29420 100644 --- a/tensorflow/tools/api/generator/BUILD +++ b/tensorflow/tools/api/generator/BUILD @@ -32,6 +32,7 @@ genrule( # api/module1/module2/__init__.py and api/module3/__init__.py. # keep sorted outs = [ + # BEGIN GENERATED FILES "api/__init__.py", "api/app/__init__.py", "api/bitwise/__init__.py", @@ -93,6 +94,7 @@ genrule( "api/logging/__init__.py", "api/losses/__init__.py", "api/manip/__init__.py", + "api/math/__init__.py", "api/metrics/__init__.py", "api/nn/__init__.py", "api/nn/rnn_cell/__init__.py", @@ -116,6 +118,7 @@ genrule( "api/train/__init__.py", "api/train/queue_runner/__init__.py", "api/user_ops/__init__.py", + # END GENERATED FILES ], cmd = "$(location create_python_api) $(OUTS)", tools = ["create_python_api"], diff --git a/tensorflow/tools/api/generator/create_python_api.py b/tensorflow/tools/api/generator/create_python_api.py index 183c4731b8176ece16a70bac421291fd76d748cb..c06a39bfbdf06cb478007a884f38728755845f19 100644 --- a/tensorflow/tools/api/generator/create_python_api.py +++ b/tensorflow/tools/api/generator/create_python_api.py @@ -67,18 +67,23 @@ def format_import(source_module_name, source_name, dest_name): return 'import %s as %s' % (source_name, dest_name) -class _ModuleImportsBuilder(object): +class _ModuleInitCodeBuilder(object): """Builds a map from module name to imports included in that module.""" def __init__(self): - self.module_imports = collections.defaultdict(list) - self._seen_api_names = set() + self.module_imports = collections.defaultdict( + lambda: collections.defaultdict(set)) + self._dest_import_to_id = collections.defaultdict(int) + # Names that start with underscore in the root module. + self._underscore_names_in_root = [] def add_import( - self, dest_module_name, source_module_name, source_name, dest_name): + self, symbol_id, dest_module_name, source_module_name, source_name, + dest_name): """Adds this import to module_imports. Args: + symbol_id: (number) Unique identifier of the symbol to import. dest_module_name: (string) Module name to add import to. source_module_name: (string) Module to import from. source_name: (string) Name of the symbol to import. @@ -89,40 +94,74 @@ class _ModuleImportsBuilder(object): dest_name has already been added to dest_module_name. """ import_str = format_import(source_module_name, source_name, dest_name) - if import_str in self.module_imports[dest_module_name]: - return # Check if we are trying to expose two different symbols with same name. full_api_name = dest_name if dest_module_name: full_api_name = dest_module_name + '.' + full_api_name - if full_api_name in self._seen_api_names: + if (full_api_name in self._dest_import_to_id and + symbol_id != self._dest_import_to_id[full_api_name] and + symbol_id != -1): raise SymbolExposedTwiceError( 'Trying to export multiple symbols with same name: %s.' % full_api_name) - self._seen_api_names.add(full_api_name) + self._dest_import_to_id[full_api_name] = symbol_id - self.module_imports[dest_module_name].append(import_str) + if not dest_module_name and dest_name.startswith('_'): + self._underscore_names_in_root.append(dest_name) + # The same symbol can be available in multiple modules. + # We store all possible ways of importing this symbol and later pick just + # one. + self.module_imports[dest_module_name][full_api_name].add(import_str) -def get_api_imports(): - """Get a map from destination module to formatted imports. + def build(self): + """Get a map from destination module to __init__.py code for that module. + + Returns: + A dictionary where + key: (string) destination module (for e.g. tf or tf.consts). + value: (string) text that should be in __init__.py files for + corresponding modules. + """ + module_text_map = {} + for dest_module, dest_name_to_imports in self.module_imports.items(): + # Sort all possible imports for a symbol and pick the first one. + imports_list = [ + sorted(imports)[0] + for _, imports in dest_name_to_imports.items()] + module_text_map[dest_module] = '\n'.join(sorted(imports_list)) + + # Expose exported symbols with underscores in root module + # since we import from it using * import. + underscore_names_str = ', '.join( + '\'%s\'' % name for name in self._underscore_names_in_root) + module_text_map[''] += ''' +_names_with_underscore = [%s] +__all__ = [s for s in dir() if not s.startswith('_')] +__all__.extend([s for s in _names_with_underscore]) +''' % underscore_names_str + + return module_text_map + + +def get_api_init_text(): + """Get a map from destination module to __init__.py code for that module. Returns: A dictionary where key: (string) destination module (for e.g. tf or tf.consts). - value: List of strings representing module imports - (for e.g. 'from foo import bar') and constant - assignments (for e.g. 'FOO = 123'). + value: (string) text that should be in __init__.py files for + corresponding modules. """ - module_imports_builder = _ModuleImportsBuilder() - visited_symbols = set() + module_code_builder = _ModuleInitCodeBuilder() # Traverse over everything imported above. Specifically, # we want to traverse over TensorFlow Python modules. for module in sys.modules.values(): # Only look at tensorflow modules. - if not module or 'tensorflow.' not in module.__name__: + if (not module or not hasattr(module, '__name__') or + 'tensorflow.' not in module.__name__): continue # Do not generate __init__.py files for contrib modules for now. if '.contrib.' in module.__name__ or module.__name__.endswith('.contrib'): @@ -130,8 +169,6 @@ def get_api_imports(): for module_contents_name in dir(module): attr = getattr(module, module_contents_name) - if id(attr) in visited_symbols: - continue # If attr is _tf_api_constants attribute, then add the constants. if module_contents_name == _API_CONSTANTS_ATTR: @@ -139,30 +176,25 @@ def get_api_imports(): for export in exports: names = export.split('.') dest_module = '.'.join(names[:-1]) - module_imports_builder.add_import( - dest_module, module.__name__, value, names[-1]) + module_code_builder.add_import( + -1, dest_module, module.__name__, value, names[-1]) continue _, attr = tf_decorator.unwrap(attr) # If attr is a symbol with _tf_api_names attribute, then # add import for it. if hasattr(attr, '__dict__') and _API_NAMES_ATTR in attr.__dict__: - # If the same symbol is available using multiple names, only create - # imports for it once. - if id(attr) in visited_symbols: - continue - visited_symbols.add(id(attr)) - for export in attr._tf_api_names: # pylint: disable=protected-access names = export.split('.') dest_module = '.'.join(names[:-1]) - module_imports_builder.add_import( - dest_module, module.__name__, module_contents_name, names[-1]) + module_code_builder.add_import( + id(attr), dest_module, module.__name__, module_contents_name, + names[-1]) # Import all required modules in their parent modules. # For e.g. if we import 'foo.bar.Value'. Then, we also # import 'bar' in 'foo'. - imported_modules = set(module_imports_builder.module_imports.keys()) + imported_modules = set(module_code_builder.module_imports.keys()) for module in imported_modules: if not module: continue @@ -175,11 +207,11 @@ def get_api_imports(): parent_module += ('.' + module_split[submodule_index-1] if parent_module else module_split[submodule_index-1]) import_from += '.' + parent_module - module_imports_builder.add_import( - parent_module, import_from, module_split[submodule_index], - module_split[submodule_index]) + module_code_builder.add_import( + -1, parent_module, import_from, + module_split[submodule_index], module_split[submodule_index]) - return module_imports_builder.module_imports + return module_code_builder.build() def create_api_files(output_files): @@ -195,16 +227,19 @@ def create_api_files(output_files): """ module_name_to_file_path = {} for output_file in output_files: + # Convert path separators to '/' for easier parsing below. + normalized_output_file = output_file.replace(os.sep, '/') if _API_DIR not in output_file: raise ValueError( 'Output files must be in api/ directory, found %s.' % output_file) # Get the module name that corresponds to output_file. # First get module directory under _API_DIR. module_dir = os.path.dirname( - output_file[output_file.rfind(_API_DIR)+len(_API_DIR):]) + normalized_output_file[ + normalized_output_file.rfind(_API_DIR)+len(_API_DIR):]) # Convert / to . module_name = module_dir.replace('/', '.').strip('.') - module_name_to_file_path[module_name] = output_file + module_name_to_file_path[module_name] = os.path.normpath(output_file) # Create file for each expected output in genrule. for module, file_path in module_name_to_file_path.items(): @@ -212,11 +247,11 @@ def create_api_files(output_files): os.makedirs(os.path.dirname(file_path)) open(file_path, 'a').close() - module_imports = get_api_imports() + module_text_map = get_api_init_text() # Add imports to output files. missing_output_files = [] - for module, exports in module_imports.items(): + for module, text in module_text_map.items(): # Make sure genrule output file list is in sync with API exports. if module not in module_name_to_file_path: module_file_path = '"api/%s/__init__.py"' % ( @@ -224,7 +259,7 @@ def create_api_files(output_files): missing_output_files.append(module_file_path) continue with open(module_name_to_file_path[module], 'w') as fp: - fp.write(_GENERATED_FILE_HEADER + '\n'.join(exports)) + fp.write(_GENERATED_FILE_HEADER + text) if missing_output_files: raise ValueError( @@ -241,6 +276,16 @@ if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument( 'outputs', metavar='O', type=str, nargs='+', - help='Python files that we expect this script to output.') + help='If a single file is passed in, then we we assume it contains a ' + 'semicolon-separated list of Python files that we expect this script to ' + 'output. If multiple files are passed in, then we assume output files ' + 'are listed directly as arguments.') args = parser.parse_args() - main(args.outputs) + if len(args.outputs) == 1: + # If we only get a single argument, then it must be a file containing + # list of outputs. + with open(args.outputs[0]) as output_list_file: + outputs = [line.strip() for line in output_list_file.read().split(';')] + else: + outputs = args.outputs + main(outputs) diff --git a/tensorflow/tools/api/generator/create_python_api_test.py b/tensorflow/tools/api/generator/create_python_api_test.py index 2760779e6e0a909cb077f534db40710ab6a11b32..218c8120453c8dca6e81146eb06e8243a3cd424d 100644 --- a/tensorflow/tools/api/generator/create_python_api_test.py +++ b/tensorflow/tools/api/generator/create_python_api_test.py @@ -56,7 +56,7 @@ class CreatePythonApiTest(test.TestCase): del sys.modules[_MODULE_NAME] def testFunctionImportIsAdded(self): - imports = create_python_api.get_api_imports() + imports = create_python_api.get_api_init_text() expected_import = ( 'from test.tensorflow.test_module import test_op as test_op1') self.assertTrue( @@ -69,14 +69,14 @@ class CreatePythonApiTest(test.TestCase): msg='%s not in %s' % (expected_import, str(imports))) def testClassImportIsAdded(self): - imports = create_python_api.get_api_imports() + imports = create_python_api.get_api_init_text() expected_import = 'from test.tensorflow.test_module import TestClass' self.assertTrue( 'TestClass' in str(imports), msg='%s not in %s' % (expected_import, str(imports))) def testConstantIsAdded(self): - imports = create_python_api.get_api_imports() + imports = create_python_api.get_api_init_text() expected = 'from test.tensorflow.test_module import _TEST_CONSTANT' self.assertTrue(expected in str(imports), msg='%s not in %s' % (expected, str(imports))) diff --git a/tensorflow/tools/api/golden/tensorflow.data.-dataset.pbtxt b/tensorflow/tools/api/golden/tensorflow.data.-dataset.pbtxt index 0900adaf762df1415c8db63c3879ca2fabc28d9f..cbbd077c97ba7f304e13f7883254f5a12668255e 100644 --- a/tensorflow/tools/api/golden/tensorflow.data.-dataset.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.data.-dataset.pbtxt @@ -64,7 +64,7 @@ tf_class { } member_method { name: "list_files" - argspec: "args=[\'file_pattern\', \'shuffle\'], varargs=None, keywords=None, defaults=[\'None\'], " + argspec: "args=[\'file_pattern\', \'shuffle\', \'seed\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " } member_method { name: "make_initializable_iterator" diff --git a/tensorflow/tools/api/golden/tensorflow.data.-fixed-length-record-dataset.pbtxt b/tensorflow/tools/api/golden/tensorflow.data.-fixed-length-record-dataset.pbtxt index 7b16ac90c925beb25e065d26e73ee2a54b06d9dc..9a56ae8675c4fdae7424859fb5105574d83da494 100644 --- a/tensorflow/tools/api/golden/tensorflow.data.-fixed-length-record-dataset.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.data.-fixed-length-record-dataset.pbtxt @@ -65,7 +65,7 @@ tf_class { } member_method { name: "list_files" - argspec: "args=[\'file_pattern\', \'shuffle\'], varargs=None, keywords=None, defaults=[\'None\'], " + argspec: "args=[\'file_pattern\', \'shuffle\', \'seed\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " } member_method { name: "make_initializable_iterator" diff --git a/tensorflow/tools/api/golden/tensorflow.data.-t-f-record-dataset.pbtxt b/tensorflow/tools/api/golden/tensorflow.data.-t-f-record-dataset.pbtxt index 9cf5f2ae2057ab4a16131527cf2ef2fa6ada28e5..e5ec824bb8959f77f99ce3527f168bacf8de200a 100644 --- a/tensorflow/tools/api/golden/tensorflow.data.-t-f-record-dataset.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.data.-t-f-record-dataset.pbtxt @@ -65,7 +65,7 @@ tf_class { } member_method { name: "list_files" - argspec: "args=[\'file_pattern\', \'shuffle\'], varargs=None, keywords=None, defaults=[\'None\'], " + argspec: "args=[\'file_pattern\', \'shuffle\', \'seed\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " } member_method { name: "make_initializable_iterator" diff --git a/tensorflow/tools/api/golden/tensorflow.data.-text-line-dataset.pbtxt b/tensorflow/tools/api/golden/tensorflow.data.-text-line-dataset.pbtxt index 8c3d6691439e619c906996a3ddaea4317c4a9597..008239789c788692a36445dc2c51e26113b6a0c5 100644 --- a/tensorflow/tools/api/golden/tensorflow.data.-text-line-dataset.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.data.-text-line-dataset.pbtxt @@ -65,7 +65,7 @@ tf_class { } member_method { name: "list_files" - argspec: "args=[\'file_pattern\', \'shuffle\'], varargs=None, keywords=None, defaults=[\'None\'], " + argspec: "args=[\'file_pattern\', \'shuffle\', \'seed\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " } member_method { name: "make_initializable_iterator" diff --git a/tensorflow/tools/api/golden/tensorflow.distributions.bijectors.-bijector.pbtxt b/tensorflow/tools/api/golden/tensorflow.distributions.bijectors.-bijector.pbtxt deleted file mode 100644 index 11565bd3e4178202fa82e2e079d1035190dbd6ec..0000000000000000000000000000000000000000 --- a/tensorflow/tools/api/golden/tensorflow.distributions.bijectors.-bijector.pbtxt +++ /dev/null @@ -1,65 +0,0 @@ -path: "tensorflow.distributions.bijectors.Bijector" -tf_class { - is_instance: "" - is_instance: "" - member { - name: "dtype" - mtype: "" - } - member { - name: "event_ndims" - mtype: "" - } - member { - name: "graph_parents" - mtype: "" - } - member { - name: "is_constant_jacobian" - mtype: "" - } - member { - name: "name" - mtype: "" - } - member { - name: "validate_args" - mtype: "" - } - member_method { - name: "__init__" - argspec: "args=[\'self\', \'event_ndims\', \'graph_parents\', \'is_constant_jacobian\', \'validate_args\', \'dtype\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'False\', \'False\', \'None\', \'None\'], " - } - member_method { - name: "forward" - argspec: "args=[\'self\', \'x\', \'name\'], varargs=None, keywords=None, defaults=[\'forward\'], " - } - member_method { - name: "forward_event_shape" - argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "forward_event_shape_tensor" - argspec: "args=[\'self\', \'input_shape\', \'name\'], varargs=None, keywords=None, defaults=[\'forward_event_shape_tensor\'], " - } - member_method { - name: "forward_log_det_jacobian" - argspec: "args=[\'self\', \'x\', \'name\'], varargs=None, keywords=None, defaults=[\'forward_log_det_jacobian\'], " - } - member_method { - name: "inverse" - argspec: "args=[\'self\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'inverse\'], " - } - member_method { - name: "inverse_event_shape" - argspec: "args=[\'self\', \'output_shape\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "inverse_event_shape_tensor" - argspec: "args=[\'self\', \'output_shape\', \'name\'], varargs=None, keywords=None, defaults=[\'inverse_event_shape_tensor\'], " - } - member_method { - name: "inverse_log_det_jacobian" - argspec: "args=[\'self\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'inverse_log_det_jacobian\'], " - } -} diff --git a/tensorflow/tools/api/golden/tensorflow.distributions.bijectors.-identity.pbtxt b/tensorflow/tools/api/golden/tensorflow.distributions.bijectors.-identity.pbtxt deleted file mode 100644 index 1e5fe624eb838e188594d03b656c12890db344a1..0000000000000000000000000000000000000000 --- a/tensorflow/tools/api/golden/tensorflow.distributions.bijectors.-identity.pbtxt +++ /dev/null @@ -1,66 +0,0 @@ -path: "tensorflow.distributions.bijectors.Identity" -tf_class { - is_instance: "" - is_instance: "" - is_instance: "" - member { - name: "dtype" - mtype: "" - } - member { - name: "event_ndims" - mtype: "" - } - member { - name: "graph_parents" - mtype: "" - } - member { - name: "is_constant_jacobian" - mtype: "" - } - member { - name: "name" - mtype: "" - } - member { - name: "validate_args" - mtype: "" - } - member_method { - name: "__init__" - argspec: "args=[\'self\', \'validate_args\', \'event_ndims\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'0\', \'identity\'], " - } - member_method { - name: "forward" - argspec: "args=[\'self\', \'x\', \'name\'], varargs=None, keywords=None, defaults=[\'forward\'], " - } - member_method { - name: "forward_event_shape" - argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "forward_event_shape_tensor" - argspec: "args=[\'self\', \'input_shape\', \'name\'], varargs=None, keywords=None, defaults=[\'forward_event_shape_tensor\'], " - } - member_method { - name: "forward_log_det_jacobian" - argspec: "args=[\'self\', \'x\', \'name\'], varargs=None, keywords=None, defaults=[\'forward_log_det_jacobian\'], " - } - member_method { - name: "inverse" - argspec: "args=[\'self\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'inverse\'], " - } - member_method { - name: "inverse_event_shape" - argspec: "args=[\'self\', \'output_shape\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "inverse_event_shape_tensor" - argspec: "args=[\'self\', \'output_shape\', \'name\'], varargs=None, keywords=None, defaults=[\'inverse_event_shape_tensor\'], " - } - member_method { - name: "inverse_log_det_jacobian" - argspec: "args=[\'self\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'inverse_log_det_jacobian\'], " - } -} diff --git a/tensorflow/tools/api/golden/tensorflow.distributions.bijectors.pbtxt b/tensorflow/tools/api/golden/tensorflow.distributions.bijectors.pbtxt deleted file mode 100644 index 1d0144f36ec332740889dc8caa5add8f41960d92..0000000000000000000000000000000000000000 --- a/tensorflow/tools/api/golden/tensorflow.distributions.bijectors.pbtxt +++ /dev/null @@ -1,11 +0,0 @@ -path: "tensorflow.distributions.bijectors" -tf_module { - member { - name: "Bijector" - mtype: "" - } - member { - name: "Identity" - mtype: "" - } -} diff --git a/tensorflow/tools/api/golden/tensorflow.distributions.pbtxt b/tensorflow/tools/api/golden/tensorflow.distributions.pbtxt index 2fba7c506ed9d2490e7c19c1746d3f4e9645424f..90b60ef074dd2eaf911291e6c725b98e2891e728 100644 --- a/tensorflow/tools/api/golden/tensorflow.distributions.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.distributions.pbtxt @@ -68,10 +68,6 @@ tf_module { name: "Uniform" mtype: "" } - member { - name: "bijectors" - mtype: "" - } member_method { name: "kl_divergence" argspec: "args=[\'distribution_a\', \'distribution_b\', \'allow_nan_stats\', \'name\'], varargs=None, keywords=None, defaults=[\'True\', \'None\'], " diff --git a/tensorflow/tools/api/golden/tensorflow.estimator.-boosted-trees-classifier.pbtxt b/tensorflow/tools/api/golden/tensorflow.estimator.-boosted-trees-classifier.pbtxt index fd9be8c75914ed37f5f36c4df5a14bd00caee20e..53a903c239b5fccb5e70d0a93d5af08245f4dc46 100644 --- a/tensorflow/tools/api/golden/tensorflow.estimator.-boosted-trees-classifier.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.estimator.-boosted-trees-classifier.pbtxt @@ -21,7 +21,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'feature_columns\', \'n_batches_per_layer\', \'model_dir\', \'n_classes\', \'weight_column\', \'label_vocabulary\', \'n_trees\', \'max_depth\', \'learning_rate\', \'l1_regularization\', \'l2_regularization\', \'tree_complexity\', \'config\'], varargs=None, keywords=None, defaults=[\'None\', \'\', \'None\', \'None\', \'100\', \'6\', \'0.1\', \'0.0\', \'0.0\', \'0.0\', \'None\'], " + argspec: "args=[\'self\', \'feature_columns\', \'n_batches_per_layer\', \'model_dir\', \'n_classes\', \'weight_column\', \'label_vocabulary\', \'n_trees\', \'max_depth\', \'learning_rate\', \'l1_regularization\', \'l2_regularization\', \'tree_complexity\', \'min_node_weight\', \'config\'], varargs=None, keywords=None, defaults=[\'None\', \'\', \'None\', \'None\', \'100\', \'6\', \'0.1\', \'0.0\', \'0.0\', \'0.0\', \'0.0\', \'None\'], " } member_method { name: "evaluate" diff --git a/tensorflow/tools/api/golden/tensorflow.estimator.-boosted-trees-regressor.pbtxt b/tensorflow/tools/api/golden/tensorflow.estimator.-boosted-trees-regressor.pbtxt index 6b305be43f845ec15f9c160d5ea4823c6ae68897..ba17c90de28899683ee5bd992cb5516b01856280 100644 --- a/tensorflow/tools/api/golden/tensorflow.estimator.-boosted-trees-regressor.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.estimator.-boosted-trees-regressor.pbtxt @@ -21,7 +21,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'feature_columns\', \'n_batches_per_layer\', \'model_dir\', \'label_dimension\', \'weight_column\', \'n_trees\', \'max_depth\', \'learning_rate\', \'l1_regularization\', \'l2_regularization\', \'tree_complexity\', \'config\'], varargs=None, keywords=None, defaults=[\'None\', \'\', \'None\', \'100\', \'6\', \'0.1\', \'0.0\', \'0.0\', \'0.0\', \'None\'], " + argspec: "args=[\'self\', \'feature_columns\', \'n_batches_per_layer\', \'model_dir\', \'label_dimension\', \'weight_column\', \'n_trees\', \'max_depth\', \'learning_rate\', \'l1_regularization\', \'l2_regularization\', \'tree_complexity\', \'min_node_weight\', \'config\'], varargs=None, keywords=None, defaults=[\'None\', \'\', \'None\', \'100\', \'6\', \'0.1\', \'0.0\', \'0.0\', \'0.0\', \'0.0\', \'None\'], " } member_method { name: "evaluate" diff --git a/tensorflow/tools/api/golden/tensorflow.estimator.-run-config.pbtxt b/tensorflow/tools/api/golden/tensorflow.estimator.-run-config.pbtxt index 759ff752b0ea6b710a2d20fd9ad665b3e6e6ea82..c8da55d8021b7659446d0771a089b7b605d86c4f 100644 --- a/tensorflow/tools/api/golden/tensorflow.estimator.-run-config.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.estimator.-run-config.pbtxt @@ -7,7 +7,7 @@ tf_class { mtype: "" } member { - name: "distribute" + name: "device_fn" mtype: "" } member { @@ -82,9 +82,13 @@ tf_class { name: "tf_random_seed" mtype: "" } + member { + name: "train_distribute" + mtype: "" + } member_method { name: "__init__" - argspec: "args=[\'self\', \'model_dir\', \'tf_random_seed\', \'save_summary_steps\', \'save_checkpoints_steps\', \'save_checkpoints_secs\', \'session_config\', \'keep_checkpoint_max\', \'keep_checkpoint_every_n_hours\', \'log_step_count_steps\', \'distribute\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'100\', \'\', \'\', \'None\', \'5\', \'10000\', \'100\', \'None\'], " + argspec: "args=[\'self\', \'model_dir\', \'tf_random_seed\', \'save_summary_steps\', \'save_checkpoints_steps\', \'save_checkpoints_secs\', \'session_config\', \'keep_checkpoint_max\', \'keep_checkpoint_every_n_hours\', \'log_step_count_steps\', \'train_distribute\', \'device_fn\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'100\', \'\', \'\', \'None\', \'5\', \'10000\', \'100\', \'None\', \'None\'], " } member_method { name: "replace" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.-model.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.-model.pbtxt index 7be2f4f61f6b9637f372591e49efc0c93c7a8c0a..cdf2da712f38170c4b82a682d4630036bbf93925 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.-model.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.-model.pbtxt @@ -3,7 +3,6 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -14,10 +13,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -74,10 +69,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "state_updates" mtype: "" @@ -128,7 +119,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" @@ -260,7 +251,7 @@ tf_class { } member_method { name: "test_on_batch" - argspec: "args=[\'self\', \'x\', \'y\', \'sample_weight\'], varargs=None, keywords=None, defaults=[\'None\'], " + argspec: "args=[\'self\', \'x\', \'y\', \'sample_weight\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " } member_method { name: "to_json" @@ -272,6 +263,6 @@ tf_class { } member_method { name: "train_on_batch" - argspec: "args=[\'self\', \'x\', \'y\', \'sample_weight\', \'class_weight\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + argspec: "args=[\'self\', \'x\', \'y\', \'sample_weight\', \'class_weight\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " } } diff --git a/tensorflow/tools/api/golden/tensorflow.keras.-sequential.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.-sequential.pbtxt index bf361cf8054571c0b056e1373acb838aaea87173..5c2c29e60fe11aa29cf2a27cac055bb7aea49dde 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.-sequential.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.-sequential.pbtxt @@ -4,7 +4,6 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -15,10 +14,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -75,10 +70,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "state_updates" mtype: "" @@ -133,7 +124,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" @@ -277,7 +268,7 @@ tf_class { } member_method { name: "test_on_batch" - argspec: "args=[\'self\', \'x\', \'y\', \'sample_weight\'], varargs=None, keywords=None, defaults=[\'None\'], " + argspec: "args=[\'self\', \'x\', \'y\', \'sample_weight\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " } member_method { name: "to_json" @@ -289,6 +280,6 @@ tf_class { } member_method { name: "train_on_batch" - argspec: "args=[\'self\', \'x\', \'y\', \'sample_weight\', \'class_weight\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + argspec: "args=[\'self\', \'x\', \'y\', \'sample_weight\', \'class_weight\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " } } diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-activation.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-activation.pbtxt index db8f626b98b70fd99f38e696aa16c72e74e86e25..96272d1b7d61430188bbbf2680bd2beb9f1e9675 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-activation.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-activation.pbtxt @@ -2,7 +2,6 @@ path: "tensorflow.keras.layers.Activation" tf_class { is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -13,10 +12,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -65,10 +60,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -103,11 +94,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-activity-regularization.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-activity-regularization.pbtxt index 809b3a5430449176a0d7423ec7f4499ceb620890..8fd55c8686de77ec764e9d564c78d0df4f545915 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-activity-regularization.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-activity-regularization.pbtxt @@ -2,7 +2,6 @@ path: "tensorflow.keras.layers.ActivityRegularization" tf_class { is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -13,10 +12,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -65,10 +60,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -103,11 +94,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-add.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-add.pbtxt index 68d41bb6cc258ca87d4664ac0fb9d5649f89ebaf..47d1532c3c8cf248f6e9a9a35e10b9559286263f 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-add.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-add.pbtxt @@ -3,7 +3,6 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -14,10 +13,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -66,10 +61,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -104,11 +95,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-alpha-dropout.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-alpha-dropout.pbtxt index 970b777e514194db4ac49fe58bea737b35436217..797d422a90a5ad21d0014a0003b11d281c25e579 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-alpha-dropout.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-alpha-dropout.pbtxt @@ -2,7 +2,6 @@ path: "tensorflow.keras.layers.AlphaDropout" tf_class { is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -13,10 +12,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -65,10 +60,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -103,11 +94,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-average-pooling1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-average-pooling1-d.pbtxt index 529c64ab293d596012aefd42e0695bd1eb7e44d1..269be1455b6bf3bbe325f3928584960578e3793d 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-average-pooling1-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-average-pooling1-d.pbtxt @@ -1,10 +1,8 @@ path: "tensorflow.keras.layers.AveragePooling1D" tf_class { is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -15,10 +13,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -67,10 +61,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -93,7 +83,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'pool_size\', \'strides\', \'padding\'], varargs=None, keywords=kwargs, defaults=[\'2\', \'None\', \'valid\'], " + argspec: "args=[\'self\', \'pool_size\', \'strides\', \'padding\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'2\', \'None\', \'valid\', \'None\'], " } member_method { name: "add_loss" @@ -105,11 +95,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-average-pooling2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-average-pooling2-d.pbtxt index 7e7c330d74fe3b71ecd0eb87e34719e47ae70784..344813621534dbb5de3719088c06313e55519dd7 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-average-pooling2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-average-pooling2-d.pbtxt @@ -1,10 +1,8 @@ path: "tensorflow.keras.layers.AveragePooling2D" tf_class { is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -15,10 +13,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -67,10 +61,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -105,11 +95,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-average-pooling3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-average-pooling3-d.pbtxt index ada8466d7473072b1878861ab36ec40b07fa1914..979008d0edb7d0f9d9c1246e1fcc7d2e2871d28c 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-average-pooling3-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-average-pooling3-d.pbtxt @@ -1,10 +1,8 @@ path: "tensorflow.keras.layers.AveragePooling3D" tf_class { is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -15,10 +13,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -67,10 +61,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -105,11 +95,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-average.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-average.pbtxt index 2a5c1cd530a7a532f6cdd3c184f4ee7eb88d23d3..0ffdffd4cdee14fbfeb68f2575300632ab21d7a6 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-average.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-average.pbtxt @@ -3,7 +3,6 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -14,10 +13,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -66,10 +61,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -104,11 +95,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-avg-pool1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-avg-pool1-d.pbtxt index 9a2cb29815d59f3761ea25e9ea36ff6489c85b88..6b00f110eea2aaa551bffe8ec225042c5469210e 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-avg-pool1-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-avg-pool1-d.pbtxt @@ -1,10 +1,8 @@ path: "tensorflow.keras.layers.AvgPool1D" tf_class { is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -15,10 +13,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -67,10 +61,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -93,7 +83,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'pool_size\', \'strides\', \'padding\'], varargs=None, keywords=kwargs, defaults=[\'2\', \'None\', \'valid\'], " + argspec: "args=[\'self\', \'pool_size\', \'strides\', \'padding\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'2\', \'None\', \'valid\', \'None\'], " } member_method { name: "add_loss" @@ -105,11 +95,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-avg-pool2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-avg-pool2-d.pbtxt index f5e991ea42e5ee2723b64574d4598dc8463f1c8c..caff5a2f1db61c6958980446d3bd54009776e1a4 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-avg-pool2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-avg-pool2-d.pbtxt @@ -1,10 +1,8 @@ path: "tensorflow.keras.layers.AvgPool2D" tf_class { is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -15,10 +13,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -67,10 +61,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -105,11 +95,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-avg-pool3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-avg-pool3-d.pbtxt index 31732214a62524017e39776cdfb9ab629746e8ae..4a7239492177aae2ee098fc3033904d3d1a31ba4 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-avg-pool3-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-avg-pool3-d.pbtxt @@ -1,10 +1,8 @@ path: "tensorflow.keras.layers.AvgPool3D" tf_class { is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -15,10 +13,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -67,10 +61,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -105,11 +95,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-batch-normalization.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-batch-normalization.pbtxt index 422eddf10db6763e10405dba5537ca161d1b8994..9804394fa53d6b3c1ff136a73212863143bbdb39 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-batch-normalization.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-batch-normalization.pbtxt @@ -1,9 +1,7 @@ path: "tensorflow.keras.layers.BatchNormalization" tf_class { is_instance: "" - is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -14,10 +12,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -66,10 +60,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -92,7 +82,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'axis\', \'momentum\', \'epsilon\', \'center\', \'scale\', \'beta_initializer\', \'gamma_initializer\', \'moving_mean_initializer\', \'moving_variance_initializer\', \'beta_regularizer\', \'gamma_regularizer\', \'beta_constraint\', \'gamma_constraint\'], varargs=None, keywords=kwargs, defaults=[\'-1\', \'0.99\', \'0.001\', \'True\', \'True\', \'zeros\', \'ones\', \'zeros\', \'ones\', \'None\', \'None\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'axis\', \'momentum\', \'epsilon\', \'center\', \'scale\', \'beta_initializer\', \'gamma_initializer\', \'moving_mean_initializer\', \'moving_variance_initializer\', \'beta_regularizer\', \'gamma_regularizer\', \'beta_constraint\', \'gamma_constraint\', \'renorm\', \'renorm_clipping\', \'renorm_momentum\', \'fused\', \'trainable\', \'virtual_batch_size\', \'adjustment\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'-1\', \'0.99\', \'0.001\', \'True\', \'True\', \'zeros\', \'ones\', \'zeros\', \'ones\', \'None\', \'None\', \'None\', \'None\', \'False\', \'None\', \'0.99\', \'None\', \'True\', \'None\', \'None\', \'None\'], " } member_method { name: "add_loss" @@ -104,11 +94,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-bidirectional.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-bidirectional.pbtxt index 9053a37916314198842bc21b0608a9b69a64c264..5e5b04c7c695c6d88e7f42b77290a582be087763 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-bidirectional.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-bidirectional.pbtxt @@ -3,7 +3,6 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -18,10 +17,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -70,10 +65,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable" mtype: "" @@ -112,11 +103,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-concatenate.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-concatenate.pbtxt index 3d536d2182fc4480a2ee5fba177543ca21fbd5ac..b8eb4079b9eea3b054f9c2ad4298f6a1669de79e 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-concatenate.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-concatenate.pbtxt @@ -3,7 +3,6 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -14,10 +13,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -66,10 +61,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -104,11 +95,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv-l-s-t-m2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv-l-s-t-m2-d.pbtxt index 6a7da1aef8db64ad11bb5a5ba357f33eeb99170b..3fdb101425d0f010f77cd70bc3721af269a36b0a 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv-l-s-t-m2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv-l-s-t-m2-d.pbtxt @@ -1,22 +1,49 @@ path: "tensorflow.keras.layers.ConvLSTM2D" tf_class { is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" + member { + name: "activation" + mtype: "" + } member { name: "activity_regularizer" mtype: "" } + member { + name: "bias_constraint" + mtype: "" + } + member { + name: "bias_initializer" + mtype: "" + } + member { + name: "bias_regularizer" + mtype: "" + } + member { + name: "data_format" + mtype: "" + } + member { + name: "dilation_rate" + mtype: "" + } + member { + name: "dropout" + mtype: "" + } member { name: "dtype" mtype: "" } member { - name: "graph" + name: "filters" mtype: "" } member { @@ -35,6 +62,22 @@ tf_class { name: "input_shape" mtype: "" } + member { + name: "kernel_constraint" + mtype: "" + } + member { + name: "kernel_initializer" + mtype: "" + } + member { + name: "kernel_regularizer" + mtype: "" + } + member { + name: "kernel_size" + mtype: "" + } member { name: "losses" mtype: "" @@ -68,7 +111,35 @@ tf_class { mtype: "" } member { - name: "scope_name" + name: "padding" + mtype: "" + } + member { + name: "recurrent_activation" + mtype: "" + } + member { + name: "recurrent_constraint" + mtype: "" + } + member { + name: "recurrent_dropout" + mtype: "" + } + member { + name: "recurrent_initializer" + mtype: "" + } + member { + name: "recurrent_regularizer" + mtype: "" + } + member { + name: "states" + mtype: "" + } + member { + name: "strides" mtype: "" } member { @@ -79,10 +150,18 @@ tf_class { name: "trainable_weights" mtype: "" } + member { + name: "unit_forget_bias" + mtype: "" + } member { name: "updates" mtype: "" } + member { + name: "use_bias" + mtype: "" + } member { name: "variables" mtype: "" @@ -105,11 +184,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" @@ -143,10 +222,6 @@ tf_class { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } - member_method { - name: "get_constants" - argspec: "args=[\'self\', \'inputs\', \'training\'], varargs=None, keywords=None, defaults=[\'None\'], " - } member_method { name: "get_initial_state" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -187,28 +262,12 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } - member_method { - name: "input_conv" - argspec: "args=[\'self\', \'x\', \'w\', \'b\', \'padding\'], varargs=None, keywords=None, defaults=[\'None\', \'valid\'], " - } - member_method { - name: "preprocess_input" - argspec: "args=[\'self\', \'inputs\', \'training\'], varargs=None, keywords=None, defaults=[\'None\'], " - } - member_method { - name: "recurrent_conv" - argspec: "args=[\'self\', \'x\', \'w\'], varargs=None, keywords=None, defaults=None" - } member_method { name: "reset_states" - argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'self\', \'states\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" } - member_method { - name: "step" - argspec: "args=[\'self\', \'inputs\', \'states\'], varargs=None, keywords=None, defaults=None" - } } diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv1-d.pbtxt index 801a0339720919f8b3f6beee0f045d58b2c0a371..0be42471e35eeba224376b24aa846db69e011274 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv1-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv1-d.pbtxt @@ -1,10 +1,8 @@ path: "tensorflow.keras.layers.Conv1D" tf_class { is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -15,10 +13,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -67,10 +61,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -93,7 +83,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'filters\', \'kernel_size\', \'strides\', \'padding\', \'dilation_rate\', \'activation\', \'use_bias\', \'kernel_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'bias_constraint\'], varargs=None, keywords=kwargs, defaults=[\'1\', \'valid\', \'1\', \'None\', \'True\', \'glorot_uniform\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'filters\', \'kernel_size\', \'strides\', \'padding\', \'data_format\', \'dilation_rate\', \'activation\', \'use_bias\', \'kernel_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'bias_constraint\'], varargs=None, keywords=kwargs, defaults=[\'1\', \'valid\', \'channels_last\', \'1\', \'None\', \'True\', \'glorot_uniform\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "add_loss" @@ -105,11 +95,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv2-d-transpose.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv2-d-transpose.pbtxt index 13352e264a5305190717bb973a3f2bce4d7f4fff..39ba31a70942811cbd36caf33c9bda90a5449703 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv2-d-transpose.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv2-d-transpose.pbtxt @@ -1,11 +1,9 @@ path: "tensorflow.keras.layers.Conv2DTranspose" tf_class { is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -16,10 +14,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -68,10 +62,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -106,11 +96,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv2-d.pbtxt index f400e4a15c362037e85ac375cee98bb5f6358669..26d9d8c476f4e429ffe112cd490ad277b478c65b 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv2-d.pbtxt @@ -1,10 +1,8 @@ path: "tensorflow.keras.layers.Conv2D" tf_class { is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -15,10 +13,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -67,10 +61,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -105,11 +95,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv3-d-transpose.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv3-d-transpose.pbtxt index b3a9f573b8ba652d2544b21f36f65fe81a6ebb50..43611017fa37077c7ff05e690dfd50a0e6e5ae1e 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv3-d-transpose.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv3-d-transpose.pbtxt @@ -1,11 +1,9 @@ path: "tensorflow.keras.layers.Conv3DTranspose" tf_class { is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -16,10 +14,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -68,10 +62,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -106,11 +96,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv3-d.pbtxt index a9be09c0abd19aeb4df30116ef2befc3948bfbf4..fa4925ab99d719104c5ca1a0003c25d85f78f3df 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv3-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv3-d.pbtxt @@ -1,10 +1,8 @@ path: "tensorflow.keras.layers.Conv3D" tf_class { is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -15,10 +13,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -67,10 +61,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -105,11 +95,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution1-d.pbtxt index be1ef5eb928d16cc6bf78c289aa20d815c728b23..c5c5d5e7c083dca63972cb7033e675aa483d8039 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution1-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution1-d.pbtxt @@ -1,10 +1,8 @@ path: "tensorflow.keras.layers.Convolution1D" tf_class { is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -15,10 +13,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -67,10 +61,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -93,7 +83,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'filters\', \'kernel_size\', \'strides\', \'padding\', \'dilation_rate\', \'activation\', \'use_bias\', \'kernel_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'bias_constraint\'], varargs=None, keywords=kwargs, defaults=[\'1\', \'valid\', \'1\', \'None\', \'True\', \'glorot_uniform\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'filters\', \'kernel_size\', \'strides\', \'padding\', \'data_format\', \'dilation_rate\', \'activation\', \'use_bias\', \'kernel_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'bias_constraint\'], varargs=None, keywords=kwargs, defaults=[\'1\', \'valid\', \'channels_last\', \'1\', \'None\', \'True\', \'glorot_uniform\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "add_loss" @@ -105,11 +95,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution2-d-transpose.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution2-d-transpose.pbtxt index 30034f7eaf6d9073695353e5c8d9ead0cc8de7cc..36dc2d2e9a70fe7a1d32352a14dabbade2a2efc2 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution2-d-transpose.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution2-d-transpose.pbtxt @@ -1,11 +1,9 @@ path: "tensorflow.keras.layers.Convolution2DTranspose" tf_class { is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -16,10 +14,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -68,10 +62,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -106,11 +96,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution2-d.pbtxt index 189b38054c004facfeeff8ad2ae87848b89040f2..23ec74370bab010d2b5ba257502a32c8ca7e4a57 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution2-d.pbtxt @@ -1,10 +1,8 @@ path: "tensorflow.keras.layers.Convolution2D" tf_class { is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -15,10 +13,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -67,10 +61,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -105,11 +95,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt index a76d85c629c1fe620dafd62a0f0e05e9009109e2..0e4089c5785ccdcafbb8b3dc1ca75ffbe49d9434 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt @@ -1,11 +1,9 @@ path: "tensorflow.keras.layers.Convolution3DTranspose" tf_class { is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -16,10 +14,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -68,10 +62,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -106,11 +96,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution3-d.pbtxt index 782195d4ad5883d8c0ea6a657cc10258f2080a55..23ddbe1a925e33432727d13dc875972136083056 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution3-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution3-d.pbtxt @@ -1,10 +1,8 @@ path: "tensorflow.keras.layers.Convolution3D" tf_class { is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -15,10 +13,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -67,10 +61,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -105,11 +95,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-cropping1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-cropping1-d.pbtxt index 2cb7a39ea595e1ff699b96554cb135377d20a488..e04ab6bea85baa1252c9c43f891f9ed5a9dedb87 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-cropping1-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-cropping1-d.pbtxt @@ -2,7 +2,6 @@ path: "tensorflow.keras.layers.Cropping1D" tf_class { is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -13,10 +12,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -65,10 +60,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -103,11 +94,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-cropping2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-cropping2-d.pbtxt index 80803306992bba3b601824a93cb3086ef3947369..655314afffd1e4fb0987c913eb69edc4254c77ef 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-cropping2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-cropping2-d.pbtxt @@ -2,7 +2,6 @@ path: "tensorflow.keras.layers.Cropping2D" tf_class { is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -13,10 +12,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -65,10 +60,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -103,11 +94,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-cropping3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-cropping3-d.pbtxt index 678f40bbc23db15ff7c1138169478fb4412a449d..d5215f1330a2fd0db4696a7b743d931e164a5a9f 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-cropping3-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-cropping3-d.pbtxt @@ -2,7 +2,6 @@ path: "tensorflow.keras.layers.Cropping3D" tf_class { is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -13,10 +12,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -65,10 +60,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -103,11 +94,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-dense.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-dense.pbtxt index fac826109b6a32305ece86c4990f08afe2236ce8..310a3c3b918684803c86d8e2ea1731604a041cdb 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-dense.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-dense.pbtxt @@ -1,9 +1,7 @@ path: "tensorflow.keras.layers.Dense" tf_class { is_instance: "" - is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -14,10 +12,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -66,10 +60,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -104,11 +94,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-depthwise-conv2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-depthwise-conv2-d.pbtxt new file mode 100644 index 0000000000000000000000000000000000000000..2d67b5f720209e3648e69659d44c3d8c4e639231 --- /dev/null +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-depthwise-conv2-d.pbtxt @@ -0,0 +1,177 @@ +path: "tensorflow.keras.layers.DepthwiseConv2D" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + member { + name: "activity_regularizer" + mtype: "" + } + member { + name: "dtype" + mtype: "" + } + member { + name: "inbound_nodes" + mtype: "" + } + member { + name: "input" + mtype: "" + } + member { + name: "input_mask" + mtype: "" + } + member { + name: "input_shape" + mtype: "" + } + member { + name: "losses" + mtype: "" + } + member { + name: "name" + mtype: "" + } + member { + name: "non_trainable_variables" + mtype: "" + } + member { + name: "non_trainable_weights" + mtype: "" + } + member { + name: "outbound_nodes" + mtype: "" + } + member { + name: "output" + mtype: "" + } + member { + name: "output_mask" + mtype: "" + } + member { + name: "output_shape" + mtype: "" + } + member { + name: "trainable_variables" + mtype: "" + } + member { + name: "trainable_weights" + mtype: "" + } + member { + name: "updates" + mtype: "" + } + member { + name: "variables" + mtype: "" + } + member { + name: "weights" + mtype: "" + } + member_method { + name: "__init__" + argspec: "args=[\'self\', \'kernel_size\', \'strides\', \'padding\', \'depth_multiplier\', \'data_format\', \'activation\', \'use_bias\', \'depthwise_initializer\', \'bias_initializer\', \'depthwise_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'depthwise_constraint\', \'bias_constraint\'], varargs=None, keywords=kwargs, defaults=[\'(1, 1)\', \'valid\', \'1\', \'None\', \'None\', \'True\', \'glorot_uniform\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\'], " + } + member_method { + name: "add_loss" + argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "add_update" + argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "add_variable" + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" + } + member_method { + name: "add_weight" + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " + } + member_method { + name: "apply" + argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" + } + member_method { + name: "build" + argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "call" + argspec: "args=[\'self\', \'inputs\', \'training\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "compute_mask" + argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "compute_output_shape" + argspec: "args=[\'instance\', \'input_shape\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "count_params" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_input_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_input_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_input_shape_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_losses_for" + argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_output_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_output_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_output_shape_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_updates_for" + argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_weights" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "set_weights" + argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-dot.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-dot.pbtxt index 285d544af2d69d564afdec748598b39b6b95670f..0e493a7f2bffc772e9cc9cd5afdc4c092fb92118 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-dot.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-dot.pbtxt @@ -3,7 +3,6 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -14,10 +13,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -66,10 +61,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -104,11 +95,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-dropout.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-dropout.pbtxt index b77976974cccb96fc2373c093d2bdf279560c46f..14726b4b6cecc39da98af4220211a1c0351b2ff1 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-dropout.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-dropout.pbtxt @@ -1,9 +1,7 @@ path: "tensorflow.keras.layers.Dropout" tf_class { is_instance: "" - is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -14,10 +12,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -66,10 +60,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -104,11 +94,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-e-l-u.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-e-l-u.pbtxt index b07714d3f2d158496e0482f8611e55ea0fb0fd51..32a50455ed8a2a146a81fde0d884ebc867b8d0ed 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-e-l-u.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-e-l-u.pbtxt @@ -2,7 +2,6 @@ path: "tensorflow.keras.layers.ELU" tf_class { is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -13,10 +12,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -65,10 +60,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -103,11 +94,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-embedding.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-embedding.pbtxt index e67d4ddfc47077d62319ab097e5333a373cbfc80..2f615d81124a7ef5e1bd7181a10abfb1b7a8df24 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-embedding.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-embedding.pbtxt @@ -2,7 +2,6 @@ path: "tensorflow.keras.layers.Embedding" tf_class { is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -13,10 +12,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -65,10 +60,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -103,11 +94,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-flatten.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-flatten.pbtxt index b2a668e5a88d312656f48ddd0e9f7aa9f6306991..82dc878a8c7f7f011df4dd3fa0445217fa250a98 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-flatten.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-flatten.pbtxt @@ -1,9 +1,7 @@ path: "tensorflow.keras.layers.Flatten" tf_class { is_instance: "" - is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -14,10 +12,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -66,10 +60,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -104,11 +94,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-g-r-u-cell.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-g-r-u-cell.pbtxt index 1fd3febad26df16576dedca1df7560bf230c08ec..d79d02b95433a7399f27ff1354cba315f8a2c3ae 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-g-r-u-cell.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-g-r-u-cell.pbtxt @@ -2,7 +2,6 @@ path: "tensorflow.keras.layers.GRUCell" tf_class { is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -13,10 +12,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -65,10 +60,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -91,7 +82,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'units\', \'activation\', \'recurrent_activation\', \'use_bias\', \'kernel_initializer\', \'recurrent_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'recurrent_regularizer\', \'bias_regularizer\', \'kernel_constraint\', \'recurrent_constraint\', \'bias_constraint\', \'dropout\', \'recurrent_dropout\', \'implementation\'], varargs=None, keywords=kwargs, defaults=[\'tanh\', \'hard_sigmoid\', \'True\', \'glorot_uniform\', \'orthogonal\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'0.0\', \'0.0\', \'1\'], " + argspec: "args=[\'self\', \'units\', \'activation\', \'recurrent_activation\', \'use_bias\', \'kernel_initializer\', \'recurrent_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'recurrent_regularizer\', \'bias_regularizer\', \'kernel_constraint\', \'recurrent_constraint\', \'bias_constraint\', \'dropout\', \'recurrent_dropout\', \'implementation\', \'reset_after\'], varargs=None, keywords=kwargs, defaults=[\'tanh\', \'hard_sigmoid\', \'True\', \'glorot_uniform\', \'orthogonal\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'0.0\', \'0.0\', \'1\', \'False\'], " } member_method { name: "add_loss" @@ -103,11 +94,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-g-r-u.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-g-r-u.pbtxt index f5f41d879dcb840551c00a7272bbcfbe51dbee89..1d38ae64bb86d11ecd352371608a11f6736bb0ac 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-g-r-u.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-g-r-u.pbtxt @@ -3,7 +3,6 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -34,10 +33,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "implementation" mtype: "" @@ -123,7 +118,7 @@ tf_class { mtype: "" } member { - name: "scope_name" + name: "reset_after" mtype: "" } member { @@ -160,7 +155,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'units\', \'activation\', \'recurrent_activation\', \'use_bias\', \'kernel_initializer\', \'recurrent_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'recurrent_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'recurrent_constraint\', \'bias_constraint\', \'dropout\', \'recurrent_dropout\', \'implementation\', \'return_sequences\', \'return_state\', \'go_backwards\', \'stateful\', \'unroll\'], varargs=None, keywords=kwargs, defaults=[\'tanh\', \'hard_sigmoid\', \'True\', \'glorot_uniform\', \'orthogonal\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'0.0\', \'0.0\', \'1\', \'False\', \'False\', \'False\', \'False\', \'False\'], " + argspec: "args=[\'self\', \'units\', \'activation\', \'recurrent_activation\', \'use_bias\', \'kernel_initializer\', \'recurrent_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'recurrent_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'recurrent_constraint\', \'bias_constraint\', \'dropout\', \'recurrent_dropout\', \'implementation\', \'return_sequences\', \'return_state\', \'go_backwards\', \'stateful\', \'unroll\', \'reset_after\'], varargs=None, keywords=kwargs, defaults=[\'tanh\', \'hard_sigmoid\', \'True\', \'glorot_uniform\', \'orthogonal\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'0.0\', \'0.0\', \'1\', \'False\', \'False\', \'False\', \'False\', \'False\', \'False\'], " } member_method { name: "add_loss" @@ -172,11 +167,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-gaussian-dropout.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-gaussian-dropout.pbtxt index f4f1a5d51c5d5689918af4facf907f79d9ca71ec..135de9cd95141a93d1a45b80ada66e339c484c89 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-gaussian-dropout.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-gaussian-dropout.pbtxt @@ -2,7 +2,6 @@ path: "tensorflow.keras.layers.GaussianDropout" tf_class { is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -13,10 +12,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -65,10 +60,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -103,11 +94,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-gaussian-noise.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-gaussian-noise.pbtxt index e502df5e177d422403d0643c18a9588afb9d9713..5db6e433ee02b8050822f76bb762329055c11aa4 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-gaussian-noise.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-gaussian-noise.pbtxt @@ -2,7 +2,6 @@ path: "tensorflow.keras.layers.GaussianNoise" tf_class { is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -13,10 +12,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -65,10 +60,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -103,11 +94,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-average-pooling1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-average-pooling1-d.pbtxt index 9c8d5bfcd8966384230e7d5cdcc1cac53a0eab9a..bf0dba0a925b4fb3a88173f4a6dfcc565c6edd91 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-average-pooling1-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-average-pooling1-d.pbtxt @@ -1,9 +1,8 @@ path: "tensorflow.keras.layers.GlobalAveragePooling1D" tf_class { is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -14,10 +13,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -66,10 +61,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -104,11 +95,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-average-pooling2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-average-pooling2-d.pbtxt index 8dd65f1f248daaf120780f19050c45d297b7902e..6da98036094228603f12f65f66d958e2e4b9daeb 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-average-pooling2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-average-pooling2-d.pbtxt @@ -1,9 +1,8 @@ path: "tensorflow.keras.layers.GlobalAveragePooling2D" tf_class { is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -14,10 +13,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -66,10 +61,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -104,11 +95,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-average-pooling3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-average-pooling3-d.pbtxt index 5e30571cc730ee23767a044036b590460deec00b..345593dec812a240251c0c07da759e131fefaad9 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-average-pooling3-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-average-pooling3-d.pbtxt @@ -1,9 +1,8 @@ path: "tensorflow.keras.layers.GlobalAveragePooling3D" tf_class { is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -14,10 +13,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -66,10 +61,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -104,11 +95,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-avg-pool1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-avg-pool1-d.pbtxt index ba90fa454696d1cb4e77d80a2dc77ff65def4714..5d3be9085e51520c48a8d33b1dde6480a0039bad 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-avg-pool1-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-avg-pool1-d.pbtxt @@ -1,9 +1,8 @@ path: "tensorflow.keras.layers.GlobalAvgPool1D" tf_class { is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -14,10 +13,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -66,10 +61,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -104,11 +95,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-avg-pool2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-avg-pool2-d.pbtxt index 8823857758307c208527b144c0cc73b566f2f115..0b79a87e0507ecc795f14a63684dd5b5d7dafc1e 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-avg-pool2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-avg-pool2-d.pbtxt @@ -1,9 +1,8 @@ path: "tensorflow.keras.layers.GlobalAvgPool2D" tf_class { is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -14,10 +13,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -66,10 +61,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -104,11 +95,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-avg-pool3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-avg-pool3-d.pbtxt index 500ced852ba6b19502769ba9052f2e364af7e283..68cdbac652f74b72d1cb769fbefbee750025767d 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-avg-pool3-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-avg-pool3-d.pbtxt @@ -1,9 +1,8 @@ path: "tensorflow.keras.layers.GlobalAvgPool3D" tf_class { is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -14,10 +13,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -66,10 +61,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -104,11 +95,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pool1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pool1-d.pbtxt index cf2717ed46b56e639fb774c1e922648e1653ec0d..d5872b444fa9ae617e6ad55bc39f43ec4be7d92f 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pool1-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pool1-d.pbtxt @@ -1,9 +1,8 @@ path: "tensorflow.keras.layers.GlobalMaxPool1D" tf_class { is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -14,10 +13,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -66,10 +61,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -104,11 +95,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pool2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pool2-d.pbtxt index a86ff1a46997f19b11e6ef03be432b45687a2df2..4b0cf9a5d38f868d9fdd16a042daea6d0f56fdf8 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pool2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pool2-d.pbtxt @@ -1,9 +1,8 @@ path: "tensorflow.keras.layers.GlobalMaxPool2D" tf_class { is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -14,10 +13,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -66,10 +61,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -104,11 +95,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pool3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pool3-d.pbtxt index e01cc7c1b09ad6a40380613d54b771c6a1c89c1c..4c1adb2131f204121cf74c9a77d346902632fef2 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pool3-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pool3-d.pbtxt @@ -1,9 +1,8 @@ path: "tensorflow.keras.layers.GlobalMaxPool3D" tf_class { is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -14,10 +13,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -66,10 +61,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -104,11 +95,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pooling1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pooling1-d.pbtxt index 259c1fb37c787f5318570b7aca6935d2f0ed997f..815f1cf580562e62db99862e51ba02e2b2051b57 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pooling1-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pooling1-d.pbtxt @@ -1,9 +1,8 @@ path: "tensorflow.keras.layers.GlobalMaxPooling1D" tf_class { is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -14,10 +13,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -66,10 +61,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -104,11 +95,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pooling2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pooling2-d.pbtxt index 0c41bf97f763f1e40e8fac714709ccac1483a00b..e027dd6cc282b75a6a6c2a78878fa57d6706b547 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pooling2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pooling2-d.pbtxt @@ -1,9 +1,8 @@ path: "tensorflow.keras.layers.GlobalMaxPooling2D" tf_class { is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -14,10 +13,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -66,10 +61,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -104,11 +95,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pooling3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pooling3-d.pbtxt index bec8817aa393ba2d8a6410408938402366cbb01d..c647b24a23258b96a3c1780fd2f3e499658cfe7d 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pooling3-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pooling3-d.pbtxt @@ -1,9 +1,8 @@ path: "tensorflow.keras.layers.GlobalMaxPooling3D" tf_class { is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -14,10 +13,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -66,10 +61,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -104,11 +95,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-input-layer.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-input-layer.pbtxt index 17be86222901c0f5a9a18c0e5f1c5bcac6c06a17..75d70734b4144627f02a8f619991356e38889389 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-input-layer.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-input-layer.pbtxt @@ -2,7 +2,6 @@ path: "tensorflow.keras.layers.InputLayer" tf_class { is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -13,10 +12,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -65,10 +60,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -103,11 +94,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-input-spec.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-input-spec.pbtxt index 3aeef347ae1f96a3ef40493cc6b722a887e81786..29edabe0483a21d7db35eec04d6ae7a855a82da1 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-input-spec.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-input-spec.pbtxt @@ -1,6 +1,6 @@ path: "tensorflow.keras.layers.InputSpec" tf_class { - is_instance: "" + is_instance: "" is_instance: "" member_method { name: "__init__" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-l-s-t-m-cell.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-l-s-t-m-cell.pbtxt index 6d2a8c56196d9b3c80f570c7f1d3ac803253fff6..0ed383a3554f81c3db490cf5d242546a14b64d15 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-l-s-t-m-cell.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-l-s-t-m-cell.pbtxt @@ -2,7 +2,6 @@ path: "tensorflow.keras.layers.LSTMCell" tf_class { is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -13,10 +12,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -65,10 +60,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -103,11 +94,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-l-s-t-m.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-l-s-t-m.pbtxt index 490b5b618c65e28f1ae2e01e8d35e7f3973cc180..6d14c9c8f69286552271ca6dab5271a5af48593f 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-l-s-t-m.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-l-s-t-m.pbtxt @@ -3,7 +3,6 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -34,10 +33,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "implementation" mtype: "" @@ -122,10 +117,6 @@ tf_class { name: "recurrent_regularizer" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "states" mtype: "" @@ -176,11 +167,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-lambda.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-lambda.pbtxt index 21a65b838af35e2f540eacab823513e7bf54b434..ddf96aba34bf574f4b9046ed932d8f136864f157 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-lambda.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-lambda.pbtxt @@ -2,7 +2,6 @@ path: "tensorflow.keras.layers.Lambda" tf_class { is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -13,10 +12,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -65,10 +60,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -103,11 +94,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-layer.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-layer.pbtxt index 127b04738e70c11b2dc1071cf174cf5de23c5133..aca282d62427f5c8186fa3ac86daebd6fba09ce5 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-layer.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-layer.pbtxt @@ -1,7 +1,6 @@ path: "tensorflow.keras.layers.Layer" tf_class { is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -12,10 +11,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -64,10 +59,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -90,7 +81,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\'], varargs=None, keywords=kwargs, defaults=None" + argspec: "args=[\'self\', \'trainable\', \'name\', \'dtype\'], varargs=None, keywords=kwargs, defaults=[\'True\', \'None\', \'None\'], " } member_method { name: "add_loss" @@ -102,11 +93,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-leaky-re-l-u.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-leaky-re-l-u.pbtxt index 87e49f2ed5b5d73aee5e9aa2511485b1f3f4bcd9..b9c53b43c87bd8c1047663001159d7286360a008 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-leaky-re-l-u.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-leaky-re-l-u.pbtxt @@ -2,7 +2,6 @@ path: "tensorflow.keras.layers.LeakyReLU" tf_class { is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -13,10 +12,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -65,10 +60,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -103,11 +94,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-locally-connected1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-locally-connected1-d.pbtxt index 1aa3aad3246b83931a47e69a4aa76fdf2b5aee22..2ee566d03b4631ebcff6bcb0b93ec274849c5b67 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-locally-connected1-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-locally-connected1-d.pbtxt @@ -2,7 +2,6 @@ path: "tensorflow.keras.layers.LocallyConnected1D" tf_class { is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -13,10 +12,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -65,10 +60,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -103,11 +94,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-locally-connected2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-locally-connected2-d.pbtxt index 5e9dc7d4774c651a186a4e320d0cfd088e87b6b3..db0d0e816a6d863f13c7eb085edf269d71e2f252 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-locally-connected2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-locally-connected2-d.pbtxt @@ -2,7 +2,6 @@ path: "tensorflow.keras.layers.LocallyConnected2D" tf_class { is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -13,10 +12,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -65,10 +60,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -103,11 +94,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-masking.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-masking.pbtxt index 0d101e5b68cdb2cdf24ed472c724cfc885e3d95d..82008b89d038e26155b8ae952c2943557ed8c35e 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-masking.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-masking.pbtxt @@ -2,7 +2,6 @@ path: "tensorflow.keras.layers.Masking" tf_class { is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -13,10 +12,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -65,10 +60,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -103,11 +94,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pool1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pool1-d.pbtxt index c85cd49ac8ce2c1fc0759671865b7174cd1c1480..31a34a17d04129c8dfe8ec6f98b9fdbc110e13f4 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pool1-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pool1-d.pbtxt @@ -1,10 +1,8 @@ path: "tensorflow.keras.layers.MaxPool1D" tf_class { is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -15,10 +13,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -67,10 +61,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -93,7 +83,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'pool_size\', \'strides\', \'padding\'], varargs=None, keywords=kwargs, defaults=[\'2\', \'None\', \'valid\'], " + argspec: "args=[\'self\', \'pool_size\', \'strides\', \'padding\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'2\', \'None\', \'valid\', \'None\'], " } member_method { name: "add_loss" @@ -105,11 +95,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pool2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pool2-d.pbtxt index 4f59e330c92f96101c65a9a24f66196e84587ccb..70d24ac75c4f850374fa8cffb881652f97e97d22 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pool2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pool2-d.pbtxt @@ -1,10 +1,8 @@ path: "tensorflow.keras.layers.MaxPool2D" tf_class { is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -15,10 +13,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -67,10 +61,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -105,11 +95,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pool3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pool3-d.pbtxt index c0ea0eb0505d20e70d641f2a646a060d7dbfabda..55b16564b30919fb48d97862e2e8cfc0fdda8de5 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pool3-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pool3-d.pbtxt @@ -1,10 +1,8 @@ path: "tensorflow.keras.layers.MaxPool3D" tf_class { is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -15,10 +13,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -67,10 +61,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -105,11 +95,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pooling1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pooling1-d.pbtxt index ca37ae51314516ae67c7725eb2ccd3d25154e2ac..a230b74c383eff93443d07e48f6818352302d9e7 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pooling1-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pooling1-d.pbtxt @@ -1,10 +1,8 @@ path: "tensorflow.keras.layers.MaxPooling1D" tf_class { is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -15,10 +13,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -67,10 +61,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -93,7 +83,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'pool_size\', \'strides\', \'padding\'], varargs=None, keywords=kwargs, defaults=[\'2\', \'None\', \'valid\'], " + argspec: "args=[\'self\', \'pool_size\', \'strides\', \'padding\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'2\', \'None\', \'valid\', \'None\'], " } member_method { name: "add_loss" @@ -105,11 +95,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pooling2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pooling2-d.pbtxt index 3ede2378347f5eddb0e8fae775a0200ea484d3f8..d98f7c39f546048b1483617b24f004d5c9759d14 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pooling2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pooling2-d.pbtxt @@ -1,10 +1,8 @@ path: "tensorflow.keras.layers.MaxPooling2D" tf_class { is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -15,10 +13,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -67,10 +61,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -105,11 +95,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pooling3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pooling3-d.pbtxt index d87e25a7ba8e7cce615431723b53a0106c2b5279..b2e96a4203758270afb8c225a05a481dbc329a84 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pooling3-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pooling3-d.pbtxt @@ -1,10 +1,8 @@ path: "tensorflow.keras.layers.MaxPooling3D" tf_class { is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -15,10 +13,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -67,10 +61,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -105,11 +95,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-maximum.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-maximum.pbtxt index e4df7b48ae6b41400375920a48ef8577bb69376e..0c45bbdf171ad9831e51d2b1ba952fab9eb3d0e5 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-maximum.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-maximum.pbtxt @@ -3,7 +3,6 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -14,10 +13,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -66,10 +61,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -104,11 +95,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-multiply.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-multiply.pbtxt index 6bf7c77743c31b6d74df35d827e9d5bc9a25d303..6423d83418aa40b57afb3d5ff22f4ec605183587 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-multiply.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-multiply.pbtxt @@ -3,7 +3,6 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -14,10 +13,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -66,10 +61,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -104,11 +95,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-p-re-l-u.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-p-re-l-u.pbtxt index c14be132b7e406c99841576be8d8fa9ab99aa816..6e17081375b7fbdcb000dfcbc0cd48ab072fe6e5 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-p-re-l-u.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-p-re-l-u.pbtxt @@ -2,7 +2,6 @@ path: "tensorflow.keras.layers.PReLU" tf_class { is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -13,10 +12,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -65,10 +60,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -103,11 +94,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-permute.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-permute.pbtxt index 72ffbceae01da900778dba1ec14e646aa17b39e5..d01d371da596256873c3799b99c45db01c674ff9 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-permute.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-permute.pbtxt @@ -2,7 +2,6 @@ path: "tensorflow.keras.layers.Permute" tf_class { is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -13,10 +12,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -65,10 +60,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -103,11 +94,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-r-n-n.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-r-n-n.pbtxt index d3e780c8b22ed580f61ffc3d9b2bad7278391402..d3f5508640e268df484af5adaf66621fa3d92d5b 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-r-n-n.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-r-n-n.pbtxt @@ -2,7 +2,6 @@ path: "tensorflow.keras.layers.RNN" tf_class { is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -13,10 +12,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -65,10 +60,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "states" mtype: "" @@ -107,11 +98,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-repeat-vector.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-repeat-vector.pbtxt index a27980a9d17397e558a4b732e3dc332a0c1e8432..44e1007f5420bbb8feda891901c138cb776c071e 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-repeat-vector.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-repeat-vector.pbtxt @@ -2,7 +2,6 @@ path: "tensorflow.keras.layers.RepeatVector" tf_class { is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -13,10 +12,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -65,10 +60,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -103,11 +94,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-reshape.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-reshape.pbtxt index 67f991276c6908ff54fd516e84533542a5f60528..8fc3ec33310f531a6ca6948dc20db67543cde69a 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-reshape.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-reshape.pbtxt @@ -2,7 +2,6 @@ path: "tensorflow.keras.layers.Reshape" tf_class { is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -13,10 +12,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -65,10 +60,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -103,11 +94,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-conv1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-conv1-d.pbtxt index fccea5e8af5ab81e712669ff1b2567d8bde8607e..457d27749504167f2865bc272251bd89b5d3297e 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-conv1-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-conv1-d.pbtxt @@ -1,11 +1,9 @@ path: "tensorflow.keras.layers.SeparableConv1D" tf_class { is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -16,10 +14,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -68,10 +62,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -106,11 +96,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-conv2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-conv2-d.pbtxt index d20663bdb0bc2eea323d35b1e3d4d27122f50472..54eda8ee2121d4fadd73a33010202850d743cc65 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-conv2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-conv2-d.pbtxt @@ -1,11 +1,9 @@ path: "tensorflow.keras.layers.SeparableConv2D" tf_class { is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -16,10 +14,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -68,10 +62,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -106,11 +96,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-convolution1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-convolution1-d.pbtxt index 889fa0a1b58bbd3babd293b7b1b45915a9ee3ca4..711196554698b79c64e1c67bc875ce33c70563c6 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-convolution1-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-convolution1-d.pbtxt @@ -1,11 +1,9 @@ path: "tensorflow.keras.layers.SeparableConvolution1D" tf_class { is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -16,10 +14,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -68,10 +62,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -106,11 +96,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-convolution2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-convolution2-d.pbtxt index c850f3fedc814b20f0f95cc3cf4fd5c973446b5b..815e34a48de542feb078d0002dbebbbf4d199e63 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-convolution2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-convolution2-d.pbtxt @@ -1,11 +1,9 @@ path: "tensorflow.keras.layers.SeparableConvolution2D" tf_class { is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -16,10 +14,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -68,10 +62,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -106,11 +96,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-simple-r-n-n-cell.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-simple-r-n-n-cell.pbtxt index 526d88ccba60eb25c68432e5baa03fd3a878f718..6614760e5e72556ac61c3788065b3faf2d286800 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-simple-r-n-n-cell.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-simple-r-n-n-cell.pbtxt @@ -2,7 +2,6 @@ path: "tensorflow.keras.layers.SimpleRNNCell" tf_class { is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -13,10 +12,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -65,10 +60,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -103,11 +94,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-simple-r-n-n.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-simple-r-n-n.pbtxt index 7fddae34472411f49d42b4d65d12034d056ec818..bfcfd71ecddfb618ceb53969e8782f535786009b 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-simple-r-n-n.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-simple-r-n-n.pbtxt @@ -3,7 +3,6 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -34,10 +33,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -114,10 +109,6 @@ tf_class { name: "recurrent_regularizer" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "states" mtype: "" @@ -164,11 +155,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-softmax.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-softmax.pbtxt index 5b9b62fc970238e49e6d4849285606d0a7908b23..9c4618c4e91ae288284b35c4a0c6bbbfe604d91d 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-softmax.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-softmax.pbtxt @@ -2,7 +2,6 @@ path: "tensorflow.keras.layers.Softmax" tf_class { is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -13,10 +12,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -65,10 +60,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -103,11 +94,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-spatial-dropout1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-spatial-dropout1-d.pbtxt index 769da30999993fad05ae0f7c04e256e6cf01a774..9a0a19d2d52f34e61c273bd4d4a27c46940dd5bf 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-spatial-dropout1-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-spatial-dropout1-d.pbtxt @@ -2,9 +2,7 @@ path: "tensorflow.keras.layers.SpatialDropout1D" tf_class { is_instance: "" is_instance: "" - is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -15,10 +13,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -67,10 +61,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -105,11 +95,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-spatial-dropout2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-spatial-dropout2-d.pbtxt index fca2e42a1519fcf3a9f0ec996c50b148b2df05fd..446f7122a6a2dc1d2f7377cef00d7b5b9a89cd3a 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-spatial-dropout2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-spatial-dropout2-d.pbtxt @@ -2,9 +2,7 @@ path: "tensorflow.keras.layers.SpatialDropout2D" tf_class { is_instance: "" is_instance: "" - is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -15,10 +13,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -67,10 +61,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -105,11 +95,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-spatial-dropout3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-spatial-dropout3-d.pbtxt index 36e8de09a967c5940bf8078234f5980a78ec8009..52a0485b5cea2be6f2f4d9b0ee31cb2388adcef7 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-spatial-dropout3-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-spatial-dropout3-d.pbtxt @@ -2,9 +2,7 @@ path: "tensorflow.keras.layers.SpatialDropout3D" tf_class { is_instance: "" is_instance: "" - is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -15,10 +13,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -67,10 +61,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -105,11 +95,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-stacked-r-n-n-cells.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-stacked-r-n-n-cells.pbtxt index a96f16fae99af9c30959d228202055e9aebfaf58..c82e7a192dfca8ca38832a53f9135125b4c34286 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-stacked-r-n-n-cells.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-stacked-r-n-n-cells.pbtxt @@ -2,7 +2,6 @@ path: "tensorflow.keras.layers.StackedRNNCells" tf_class { is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -13,10 +12,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -65,10 +60,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "state_size" mtype: "" @@ -107,11 +98,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-thresholded-re-l-u.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-thresholded-re-l-u.pbtxt index e1cbd0e150ed890ae57c1725249d1340fc2cb663..9ccf251a18034371607cfdc6091f2282136feec9 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-thresholded-re-l-u.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-thresholded-re-l-u.pbtxt @@ -2,7 +2,6 @@ path: "tensorflow.keras.layers.ThresholdedReLU" tf_class { is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -13,10 +12,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -65,10 +60,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -103,11 +94,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-time-distributed.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-time-distributed.pbtxt index f0d35728fb1c42d563ff0598dd84da51a766a764..e080a07799fe1b7ec5f73b4e7bf78053a2c9dd3c 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-time-distributed.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-time-distributed.pbtxt @@ -3,7 +3,6 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -14,10 +13,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -66,10 +61,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable" mtype: "" @@ -108,11 +99,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-up-sampling1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-up-sampling1-d.pbtxt index 74efaea6ddb22ec2fe9d41558978c183b0e06671..5fadca0b8386951976e8a6330bacf0dcf169e2d1 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-up-sampling1-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-up-sampling1-d.pbtxt @@ -2,7 +2,6 @@ path: "tensorflow.keras.layers.UpSampling1D" tf_class { is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -13,10 +12,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -65,10 +60,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -103,11 +94,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-up-sampling2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-up-sampling2-d.pbtxt index dc5bd5fd5319f9bbd601a3c4083ae566b47e1aaa..2d395bf7e87b8835bcf63d792d68dc3ac4083051 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-up-sampling2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-up-sampling2-d.pbtxt @@ -2,7 +2,6 @@ path: "tensorflow.keras.layers.UpSampling2D" tf_class { is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -13,10 +12,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -65,10 +60,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -103,11 +94,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-up-sampling3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-up-sampling3-d.pbtxt index e01ccfb74aead591f1018cdcbb1c888767ecdb20..18d58ec3b23f7d4fd13ff45f4c1d4d95e7722ed5 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-up-sampling3-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-up-sampling3-d.pbtxt @@ -2,7 +2,6 @@ path: "tensorflow.keras.layers.UpSampling3D" tf_class { is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -13,10 +12,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -65,10 +60,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -103,11 +94,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-wrapper.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-wrapper.pbtxt index 7e6f90f7623677244865ac285c134dc79f7b9b69..6223cb2f3c1230a60f3cf3dd57a0e803cf4f15d9 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-wrapper.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-wrapper.pbtxt @@ -2,7 +2,6 @@ path: "tensorflow.keras.layers.Wrapper" tf_class { is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -13,10 +12,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -65,10 +60,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable" mtype: "" @@ -107,11 +98,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-zero-padding1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-zero-padding1-d.pbtxt index 4d0d402dad442ccf52267f5ce40b05400afbfbc7..e71bba6a7f1df713cd13b4a0249d001d56bb31b0 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-zero-padding1-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-zero-padding1-d.pbtxt @@ -2,7 +2,6 @@ path: "tensorflow.keras.layers.ZeroPadding1D" tf_class { is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -13,10 +12,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -65,10 +60,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -103,11 +94,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-zero-padding2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-zero-padding2-d.pbtxt index b353a529bcf8e543d334fee57fca26ebc83036a4..aba6d8cb1f43bb070f9b17b5290afc5ce30246b5 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-zero-padding2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-zero-padding2-d.pbtxt @@ -2,7 +2,6 @@ path: "tensorflow.keras.layers.ZeroPadding2D" tf_class { is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -13,10 +12,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -65,10 +60,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -103,11 +94,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-zero-padding3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-zero-padding3-d.pbtxt index 9fe1256e616dbca4f35101df160dc55bc68bfa8a..ce545ecc954e84e702fbd24047370a3417dc0fb1 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-zero-padding3-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-zero-padding3-d.pbtxt @@ -2,7 +2,6 @@ path: "tensorflow.keras.layers.ZeroPadding3D" tf_class { is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -13,10 +12,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -65,10 +60,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -103,11 +94,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.pbtxt index 088c8e88e26f59f2753733252882f5e0e8287fb6..affc9bd09b1124acbf7ff84b97e53a419c9b6a92 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.pbtxt @@ -116,6 +116,10 @@ tf_module { name: "Dense" mtype: "" } + member { + name: "DepthwiseConv2D" + mtype: "" + } member { name: "Dot" mtype: "" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.models.-model.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.models.-model.pbtxt index 8ccf15f9ab0fcfa59907ff05a962a84d3d86ccb4..b3f3f169227c4548131d86165427655524f50d9d 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.models.-model.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.models.-model.pbtxt @@ -3,7 +3,6 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -14,10 +13,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -74,10 +69,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "state_updates" mtype: "" @@ -128,7 +119,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" @@ -260,7 +251,7 @@ tf_class { } member_method { name: "test_on_batch" - argspec: "args=[\'self\', \'x\', \'y\', \'sample_weight\'], varargs=None, keywords=None, defaults=[\'None\'], " + argspec: "args=[\'self\', \'x\', \'y\', \'sample_weight\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " } member_method { name: "to_json" @@ -272,6 +263,6 @@ tf_class { } member_method { name: "train_on_batch" - argspec: "args=[\'self\', \'x\', \'y\', \'sample_weight\', \'class_weight\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + argspec: "args=[\'self\', \'x\', \'y\', \'sample_weight\', \'class_weight\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " } } diff --git a/tensorflow/tools/api/golden/tensorflow.keras.models.-sequential.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.models.-sequential.pbtxt index be12b0bd2ec509ff394eaa3f43db0b54badd7fba..4ac6811bace97c25d62a2596523efdb1f789f7b2 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.models.-sequential.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.models.-sequential.pbtxt @@ -4,7 +4,6 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { @@ -15,10 +14,6 @@ tf_class { name: "dtype" mtype: "" } - member { - name: "graph" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -75,10 +70,6 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "scope_name" - mtype: "" - } member { name: "state_updates" mtype: "" @@ -133,7 +124,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'getter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" @@ -277,7 +268,7 @@ tf_class { } member_method { name: "test_on_batch" - argspec: "args=[\'self\', \'x\', \'y\', \'sample_weight\'], varargs=None, keywords=None, defaults=[\'None\'], " + argspec: "args=[\'self\', \'x\', \'y\', \'sample_weight\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " } member_method { name: "to_json" @@ -289,6 +280,6 @@ tf_class { } member_method { name: "train_on_batch" - argspec: "args=[\'self\', \'x\', \'y\', \'sample_weight\', \'class_weight\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + argspec: "args=[\'self\', \'x\', \'y\', \'sample_weight\', \'class_weight\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " } } diff --git a/tensorflow/tools/api/golden/tensorflow.layers.-average-pooling1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.layers.-average-pooling1-d.pbtxt index 1c4f550d7f05b8be33326cb39d7a5f3bf663f5e6..38fd78a5a828c7d0da98c97fdc01f504397c6fe6 100644 --- a/tensorflow/tools/api/golden/tensorflow.layers.-average-pooling1-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.layers.-average-pooling1-d.pbtxt @@ -1,8 +1,10 @@ path: "tensorflow.layers.AveragePooling1D" tf_class { is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -25,6 +27,10 @@ tf_class { name: "input" mtype: "" } + member { + name: "input_mask" + mtype: "" + } member { name: "input_shape" mtype: "" @@ -53,6 +59,10 @@ tf_class { name: "output" mtype: "" } + member { + name: "output_mask" + mtype: "" + } member { name: "output_shape" mtype: "" @@ -95,7 +105,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" + } + member_method { + name: "add_weight" + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" @@ -109,6 +123,10 @@ tf_class { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "compute_mask" + argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], " + } member_method { name: "compute_output_shape" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" @@ -117,10 +135,22 @@ tf_class { name: "count_params" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_input_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_input_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_input_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" @@ -133,6 +163,10 @@ tf_class { name: "get_output_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_output_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_output_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" @@ -141,4 +175,12 @@ tf_class { name: "get_updates_for" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_weights" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "set_weights" + argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" + } } diff --git a/tensorflow/tools/api/golden/tensorflow.layers.-average-pooling2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.layers.-average-pooling2-d.pbtxt index d2db0952693f2989e6a9e8748a254eb4db483206..86a524cc91e10616cd049cec93843e419ec670c7 100644 --- a/tensorflow/tools/api/golden/tensorflow.layers.-average-pooling2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.layers.-average-pooling2-d.pbtxt @@ -1,8 +1,10 @@ path: "tensorflow.layers.AveragePooling2D" tf_class { is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -25,6 +27,10 @@ tf_class { name: "input" mtype: "" } + member { + name: "input_mask" + mtype: "" + } member { name: "input_shape" mtype: "" @@ -53,6 +59,10 @@ tf_class { name: "output" mtype: "" } + member { + name: "output_mask" + mtype: "" + } member { name: "output_shape" mtype: "" @@ -95,7 +105,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" + } + member_method { + name: "add_weight" + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" @@ -109,6 +123,10 @@ tf_class { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "compute_mask" + argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], " + } member_method { name: "compute_output_shape" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" @@ -117,10 +135,22 @@ tf_class { name: "count_params" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_input_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_input_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_input_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" @@ -133,6 +163,10 @@ tf_class { name: "get_output_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_output_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_output_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" @@ -141,4 +175,12 @@ tf_class { name: "get_updates_for" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_weights" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "set_weights" + argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" + } } diff --git a/tensorflow/tools/api/golden/tensorflow.layers.-average-pooling3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.layers.-average-pooling3-d.pbtxt index 34d9a9df281c09a2e2030daf74a2ceb8066085bb..8a811fe4561ac3790e43a5553ca04fca002e420d 100644 --- a/tensorflow/tools/api/golden/tensorflow.layers.-average-pooling3-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.layers.-average-pooling3-d.pbtxt @@ -1,8 +1,10 @@ path: "tensorflow.layers.AveragePooling3D" tf_class { is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -25,6 +27,10 @@ tf_class { name: "input" mtype: "" } + member { + name: "input_mask" + mtype: "" + } member { name: "input_shape" mtype: "" @@ -53,6 +59,10 @@ tf_class { name: "output" mtype: "" } + member { + name: "output_mask" + mtype: "" + } member { name: "output_shape" mtype: "" @@ -95,7 +105,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" + } + member_method { + name: "add_weight" + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" @@ -109,6 +123,10 @@ tf_class { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "compute_mask" + argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], " + } member_method { name: "compute_output_shape" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" @@ -117,10 +135,22 @@ tf_class { name: "count_params" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_input_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_input_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_input_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" @@ -133,6 +163,10 @@ tf_class { name: "get_output_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_output_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_output_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" @@ -141,4 +175,12 @@ tf_class { name: "get_updates_for" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_weights" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "set_weights" + argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" + } } diff --git a/tensorflow/tools/api/golden/tensorflow.layers.-batch-normalization.pbtxt b/tensorflow/tools/api/golden/tensorflow.layers.-batch-normalization.pbtxt index 21ad0efecf88c42a3a679910ddfe095585a7933a..3923e706be7a352d770bf309aecd1fadb2a05e81 100644 --- a/tensorflow/tools/api/golden/tensorflow.layers.-batch-normalization.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.layers.-batch-normalization.pbtxt @@ -1,7 +1,9 @@ path: "tensorflow.layers.BatchNormalization" tf_class { is_instance: "" + is_instance: "" is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -24,6 +26,10 @@ tf_class { name: "input" mtype: "" } + member { + name: "input_mask" + mtype: "" + } member { name: "input_shape" mtype: "" @@ -52,6 +58,10 @@ tf_class { name: "output" mtype: "" } + member { + name: "output_mask" + mtype: "" + } member { name: "output_shape" mtype: "" @@ -94,7 +104,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" + } + member_method { + name: "add_weight" + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" @@ -108,6 +122,10 @@ tf_class { name: "call" argspec: "args=[\'self\', \'inputs\', \'training\'], varargs=None, keywords=None, defaults=[\'False\'], " } + member_method { + name: "compute_mask" + argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], " + } member_method { name: "compute_output_shape" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" @@ -116,10 +134,22 @@ tf_class { name: "count_params" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_input_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_input_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_input_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" @@ -132,6 +162,10 @@ tf_class { name: "get_output_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_output_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_output_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" @@ -140,4 +174,12 @@ tf_class { name: "get_updates_for" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_weights" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "set_weights" + argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" + } } diff --git a/tensorflow/tools/api/golden/tensorflow.layers.-conv1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.layers.-conv1-d.pbtxt index ed38747c7671a267bb640ecb96a4c5fcc46c5edf..7a0a8a2a51295d9009f44e2ea126e8a4694147af 100644 --- a/tensorflow/tools/api/golden/tensorflow.layers.-conv1-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.layers.-conv1-d.pbtxt @@ -1,8 +1,10 @@ path: "tensorflow.layers.Conv1D" tf_class { is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -25,6 +27,10 @@ tf_class { name: "input" mtype: "" } + member { + name: "input_mask" + mtype: "" + } member { name: "input_shape" mtype: "" @@ -53,6 +59,10 @@ tf_class { name: "output" mtype: "" } + member { + name: "output_mask" + mtype: "" + } member { name: "output_shape" mtype: "" @@ -95,7 +105,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" + } + member_method { + name: "add_weight" + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" @@ -109,6 +123,10 @@ tf_class { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "compute_mask" + argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], " + } member_method { name: "compute_output_shape" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" @@ -117,10 +135,22 @@ tf_class { name: "count_params" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_input_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_input_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_input_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" @@ -133,6 +163,10 @@ tf_class { name: "get_output_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_output_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_output_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" @@ -141,4 +175,12 @@ tf_class { name: "get_updates_for" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_weights" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "set_weights" + argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" + } } diff --git a/tensorflow/tools/api/golden/tensorflow.layers.-conv2-d-transpose.pbtxt b/tensorflow/tools/api/golden/tensorflow.layers.-conv2-d-transpose.pbtxt index ff453c6059477c20528fc768d93c65d208cdfc4a..7ed3a652519a6429ff429925c29f3c296a6d2958 100644 --- a/tensorflow/tools/api/golden/tensorflow.layers.-conv2-d-transpose.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.layers.-conv2-d-transpose.pbtxt @@ -1,9 +1,11 @@ path: "tensorflow.layers.Conv2DTranspose" tf_class { is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -26,6 +28,10 @@ tf_class { name: "input" mtype: "" } + member { + name: "input_mask" + mtype: "" + } member { name: "input_shape" mtype: "" @@ -54,6 +60,10 @@ tf_class { name: "output" mtype: "" } + member { + name: "output_mask" + mtype: "" + } member { name: "output_shape" mtype: "" @@ -96,7 +106,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" + } + member_method { + name: "add_weight" + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" @@ -110,6 +124,10 @@ tf_class { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "compute_mask" + argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], " + } member_method { name: "compute_output_shape" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" @@ -118,10 +136,22 @@ tf_class { name: "count_params" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_input_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_input_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_input_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" @@ -134,6 +164,10 @@ tf_class { name: "get_output_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_output_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_output_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" @@ -142,4 +176,12 @@ tf_class { name: "get_updates_for" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_weights" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "set_weights" + argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" + } } diff --git a/tensorflow/tools/api/golden/tensorflow.layers.-conv2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.layers.-conv2-d.pbtxt index 5583bd22dce18b0a0593b73bde509818b63b3f29..23831aa74f1c3dea99e6da407e5a63693f94e37a 100644 --- a/tensorflow/tools/api/golden/tensorflow.layers.-conv2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.layers.-conv2-d.pbtxt @@ -1,8 +1,10 @@ path: "tensorflow.layers.Conv2D" tf_class { is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -25,6 +27,10 @@ tf_class { name: "input" mtype: "" } + member { + name: "input_mask" + mtype: "" + } member { name: "input_shape" mtype: "" @@ -53,6 +59,10 @@ tf_class { name: "output" mtype: "" } + member { + name: "output_mask" + mtype: "" + } member { name: "output_shape" mtype: "" @@ -95,7 +105,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" + } + member_method { + name: "add_weight" + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" @@ -109,6 +123,10 @@ tf_class { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "compute_mask" + argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], " + } member_method { name: "compute_output_shape" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" @@ -117,10 +135,22 @@ tf_class { name: "count_params" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_input_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_input_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_input_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" @@ -133,6 +163,10 @@ tf_class { name: "get_output_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_output_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_output_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" @@ -141,4 +175,12 @@ tf_class { name: "get_updates_for" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_weights" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "set_weights" + argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" + } } diff --git a/tensorflow/tools/api/golden/tensorflow.layers.-conv3-d-transpose.pbtxt b/tensorflow/tools/api/golden/tensorflow.layers.-conv3-d-transpose.pbtxt index 63f0c32a7c8f7e530c76c64fa619102bc12f9ad9..9d41a6b09900d984706accd70a353cc26585d9b5 100644 --- a/tensorflow/tools/api/golden/tensorflow.layers.-conv3-d-transpose.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.layers.-conv3-d-transpose.pbtxt @@ -1,9 +1,11 @@ path: "tensorflow.layers.Conv3DTranspose" tf_class { is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -26,6 +28,10 @@ tf_class { name: "input" mtype: "" } + member { + name: "input_mask" + mtype: "" + } member { name: "input_shape" mtype: "" @@ -54,6 +60,10 @@ tf_class { name: "output" mtype: "" } + member { + name: "output_mask" + mtype: "" + } member { name: "output_shape" mtype: "" @@ -96,7 +106,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" + } + member_method { + name: "add_weight" + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" @@ -110,6 +124,10 @@ tf_class { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "compute_mask" + argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], " + } member_method { name: "compute_output_shape" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" @@ -118,10 +136,22 @@ tf_class { name: "count_params" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_input_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_input_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_input_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" @@ -134,6 +164,10 @@ tf_class { name: "get_output_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_output_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_output_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" @@ -142,4 +176,12 @@ tf_class { name: "get_updates_for" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_weights" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "set_weights" + argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" + } } diff --git a/tensorflow/tools/api/golden/tensorflow.layers.-conv3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.layers.-conv3-d.pbtxt index b77726252ccca30a7c6555fb569eb65b69e34998..865fe08e63c81222395d125938c8830c02030733 100644 --- a/tensorflow/tools/api/golden/tensorflow.layers.-conv3-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.layers.-conv3-d.pbtxt @@ -1,8 +1,10 @@ path: "tensorflow.layers.Conv3D" tf_class { is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -25,6 +27,10 @@ tf_class { name: "input" mtype: "" } + member { + name: "input_mask" + mtype: "" + } member { name: "input_shape" mtype: "" @@ -53,6 +59,10 @@ tf_class { name: "output" mtype: "" } + member { + name: "output_mask" + mtype: "" + } member { name: "output_shape" mtype: "" @@ -95,7 +105,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" + } + member_method { + name: "add_weight" + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" @@ -109,6 +123,10 @@ tf_class { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "compute_mask" + argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], " + } member_method { name: "compute_output_shape" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" @@ -117,10 +135,22 @@ tf_class { name: "count_params" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_input_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_input_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_input_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" @@ -133,6 +163,10 @@ tf_class { name: "get_output_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_output_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_output_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" @@ -141,4 +175,12 @@ tf_class { name: "get_updates_for" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_weights" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "set_weights" + argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" + } } diff --git a/tensorflow/tools/api/golden/tensorflow.layers.-dense.pbtxt b/tensorflow/tools/api/golden/tensorflow.layers.-dense.pbtxt index 92db9f6dcd2f77c4253eb77df4a26fb632b2a766..ee164aae204d3f6c09af79f7fbac825ce470098d 100644 --- a/tensorflow/tools/api/golden/tensorflow.layers.-dense.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.layers.-dense.pbtxt @@ -1,7 +1,9 @@ path: "tensorflow.layers.Dense" tf_class { is_instance: "" + is_instance: "" is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -24,6 +26,10 @@ tf_class { name: "input" mtype: "" } + member { + name: "input_mask" + mtype: "" + } member { name: "input_shape" mtype: "" @@ -52,6 +58,10 @@ tf_class { name: "output" mtype: "" } + member { + name: "output_mask" + mtype: "" + } member { name: "output_shape" mtype: "" @@ -94,7 +104,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" + } + member_method { + name: "add_weight" + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" @@ -108,6 +122,10 @@ tf_class { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "compute_mask" + argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], " + } member_method { name: "compute_output_shape" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" @@ -116,10 +134,22 @@ tf_class { name: "count_params" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_input_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_input_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_input_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" @@ -132,6 +162,10 @@ tf_class { name: "get_output_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_output_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_output_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" @@ -140,4 +174,12 @@ tf_class { name: "get_updates_for" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_weights" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "set_weights" + argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" + } } diff --git a/tensorflow/tools/api/golden/tensorflow.layers.-dropout.pbtxt b/tensorflow/tools/api/golden/tensorflow.layers.-dropout.pbtxt index 80fa846a24c9162d8521bdb4f098b9cd8e34aedb..8167dc79cdf9f83b9b97557638bf0702a1be86da 100644 --- a/tensorflow/tools/api/golden/tensorflow.layers.-dropout.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.layers.-dropout.pbtxt @@ -1,7 +1,9 @@ path: "tensorflow.layers.Dropout" tf_class { is_instance: "" + is_instance: "" is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -24,6 +26,10 @@ tf_class { name: "input" mtype: "" } + member { + name: "input_mask" + mtype: "" + } member { name: "input_shape" mtype: "" @@ -52,6 +58,10 @@ tf_class { name: "output" mtype: "" } + member { + name: "output_mask" + mtype: "" + } member { name: "output_shape" mtype: "" @@ -94,7 +104,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" + } + member_method { + name: "add_weight" + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" @@ -108,6 +122,10 @@ tf_class { name: "call" argspec: "args=[\'self\', \'inputs\', \'training\'], varargs=None, keywords=None, defaults=[\'False\'], " } + member_method { + name: "compute_mask" + argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], " + } member_method { name: "compute_output_shape" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" @@ -116,10 +134,22 @@ tf_class { name: "count_params" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_input_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_input_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_input_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" @@ -132,6 +162,10 @@ tf_class { name: "get_output_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_output_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_output_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" @@ -140,4 +174,12 @@ tf_class { name: "get_updates_for" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_weights" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "set_weights" + argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" + } } diff --git a/tensorflow/tools/api/golden/tensorflow.layers.-flatten.pbtxt b/tensorflow/tools/api/golden/tensorflow.layers.-flatten.pbtxt index f63213b3dde40aa54b165c1c269c26fd2cd9e3b4..efa4419692993aa9975c1af2e647288ae9f38eba 100644 --- a/tensorflow/tools/api/golden/tensorflow.layers.-flatten.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.layers.-flatten.pbtxt @@ -1,7 +1,9 @@ path: "tensorflow.layers.Flatten" tf_class { is_instance: "" + is_instance: "" is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -24,6 +26,10 @@ tf_class { name: "input" mtype: "" } + member { + name: "input_mask" + mtype: "" + } member { name: "input_shape" mtype: "" @@ -52,6 +58,10 @@ tf_class { name: "output" mtype: "" } + member { + name: "output_mask" + mtype: "" + } member { name: "output_shape" mtype: "" @@ -94,7 +104,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" + } + member_method { + name: "add_weight" + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" @@ -108,6 +122,10 @@ tf_class { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "compute_mask" + argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], " + } member_method { name: "compute_output_shape" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" @@ -116,10 +134,22 @@ tf_class { name: "count_params" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_input_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_input_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_input_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" @@ -132,6 +162,10 @@ tf_class { name: "get_output_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_output_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_output_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" @@ -140,4 +174,12 @@ tf_class { name: "get_updates_for" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_weights" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "set_weights" + argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" + } } diff --git a/tensorflow/tools/api/golden/tensorflow.layers.-input-spec.pbtxt b/tensorflow/tools/api/golden/tensorflow.layers.-input-spec.pbtxt index 7c1d05cd2bfc8be978e82566e7a3086040978b4a..2ff89f0a6faef905bcafdcb36121f506e9a9977a 100644 --- a/tensorflow/tools/api/golden/tensorflow.layers.-input-spec.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.layers.-input-spec.pbtxt @@ -1,6 +1,6 @@ path: "tensorflow.layers.InputSpec" tf_class { - is_instance: "" + is_instance: "" is_instance: "" member_method { name: "__init__" diff --git a/tensorflow/tools/api/golden/tensorflow.layers.-layer.pbtxt b/tensorflow/tools/api/golden/tensorflow.layers.-layer.pbtxt index 4e45b2d513bb72bb47433d72c310d6a34fbc0c01..b3a6dfdffa28d3628e09e6aab823534ba84edf16 100644 --- a/tensorflow/tools/api/golden/tensorflow.layers.-layer.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.layers.-layer.pbtxt @@ -1,6 +1,7 @@ path: "tensorflow.layers.Layer" tf_class { is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -23,6 +24,10 @@ tf_class { name: "input" mtype: "" } + member { + name: "input_mask" + mtype: "" + } member { name: "input_shape" mtype: "" @@ -51,6 +56,10 @@ tf_class { name: "output" mtype: "" } + member { + name: "output_mask" + mtype: "" + } member { name: "output_shape" mtype: "" @@ -81,7 +90,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'trainable\', \'name\', \'dtype\', \'activity_regularizer\'], varargs=None, keywords=kwargs, defaults=[\'True\', \'None\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'trainable\', \'name\', \'dtype\'], varargs=None, keywords=kwargs, defaults=[\'True\', \'None\', \'None\'], " } member_method { name: "add_loss" @@ -93,7 +102,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" + } + member_method { + name: "add_weight" + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" @@ -107,6 +120,10 @@ tf_class { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=kwargs, defaults=None" } + member_method { + name: "compute_mask" + argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], " + } member_method { name: "compute_output_shape" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" @@ -115,10 +132,22 @@ tf_class { name: "count_params" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_input_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_input_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_input_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" @@ -131,6 +160,10 @@ tf_class { name: "get_output_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_output_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_output_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" @@ -139,4 +172,12 @@ tf_class { name: "get_updates_for" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_weights" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "set_weights" + argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" + } } diff --git a/tensorflow/tools/api/golden/tensorflow.layers.-max-pooling1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.layers.-max-pooling1-d.pbtxt index 19ec33fce775caa634e71e2295ac945a6f70ade9..cef396489dde24698cd9a63b6247292958cfec4e 100644 --- a/tensorflow/tools/api/golden/tensorflow.layers.-max-pooling1-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.layers.-max-pooling1-d.pbtxt @@ -1,8 +1,10 @@ path: "tensorflow.layers.MaxPooling1D" tf_class { is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -25,6 +27,10 @@ tf_class { name: "input" mtype: "" } + member { + name: "input_mask" + mtype: "" + } member { name: "input_shape" mtype: "" @@ -53,6 +59,10 @@ tf_class { name: "output" mtype: "" } + member { + name: "output_mask" + mtype: "" + } member { name: "output_shape" mtype: "" @@ -95,7 +105,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" + } + member_method { + name: "add_weight" + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" @@ -109,6 +123,10 @@ tf_class { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "compute_mask" + argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], " + } member_method { name: "compute_output_shape" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" @@ -117,10 +135,22 @@ tf_class { name: "count_params" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_input_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_input_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_input_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" @@ -133,6 +163,10 @@ tf_class { name: "get_output_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_output_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_output_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" @@ -141,4 +175,12 @@ tf_class { name: "get_updates_for" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_weights" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "set_weights" + argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" + } } diff --git a/tensorflow/tools/api/golden/tensorflow.layers.-max-pooling2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.layers.-max-pooling2-d.pbtxt index 76180c333a21c592a3b53bb445df9b12d3596552..565f0c7a79661f77d5987d671266ff69268b03b2 100644 --- a/tensorflow/tools/api/golden/tensorflow.layers.-max-pooling2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.layers.-max-pooling2-d.pbtxt @@ -1,8 +1,10 @@ path: "tensorflow.layers.MaxPooling2D" tf_class { is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -25,6 +27,10 @@ tf_class { name: "input" mtype: "" } + member { + name: "input_mask" + mtype: "" + } member { name: "input_shape" mtype: "" @@ -53,6 +59,10 @@ tf_class { name: "output" mtype: "" } + member { + name: "output_mask" + mtype: "" + } member { name: "output_shape" mtype: "" @@ -95,7 +105,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" + } + member_method { + name: "add_weight" + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" @@ -109,6 +123,10 @@ tf_class { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "compute_mask" + argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], " + } member_method { name: "compute_output_shape" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" @@ -117,10 +135,22 @@ tf_class { name: "count_params" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_input_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_input_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_input_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" @@ -133,6 +163,10 @@ tf_class { name: "get_output_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_output_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_output_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" @@ -141,4 +175,12 @@ tf_class { name: "get_updates_for" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_weights" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "set_weights" + argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" + } } diff --git a/tensorflow/tools/api/golden/tensorflow.layers.-max-pooling3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.layers.-max-pooling3-d.pbtxt index ded75c8ff09efc6746ddd2284f53d2c021cc473c..595ce2eeadfc95fe44895ffd976024aee80ee948 100644 --- a/tensorflow/tools/api/golden/tensorflow.layers.-max-pooling3-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.layers.-max-pooling3-d.pbtxt @@ -1,8 +1,10 @@ path: "tensorflow.layers.MaxPooling3D" tf_class { is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -25,6 +27,10 @@ tf_class { name: "input" mtype: "" } + member { + name: "input_mask" + mtype: "" + } member { name: "input_shape" mtype: "" @@ -53,6 +59,10 @@ tf_class { name: "output" mtype: "" } + member { + name: "output_mask" + mtype: "" + } member { name: "output_shape" mtype: "" @@ -95,7 +105,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" + } + member_method { + name: "add_weight" + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" @@ -109,6 +123,10 @@ tf_class { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "compute_mask" + argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], " + } member_method { name: "compute_output_shape" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" @@ -117,10 +135,22 @@ tf_class { name: "count_params" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_input_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_input_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_input_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" @@ -133,6 +163,10 @@ tf_class { name: "get_output_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_output_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_output_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" @@ -141,4 +175,12 @@ tf_class { name: "get_updates_for" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_weights" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "set_weights" + argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" + } } diff --git a/tensorflow/tools/api/golden/tensorflow.layers.-separable-conv1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.layers.-separable-conv1-d.pbtxt index 3dbfa5453f8e0ebb02429df9c4cbdf98de6b8ced..ccca96f72248e390ca65db061d836ee58c8e3205 100644 --- a/tensorflow/tools/api/golden/tensorflow.layers.-separable-conv1-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.layers.-separable-conv1-d.pbtxt @@ -1,9 +1,11 @@ path: "tensorflow.layers.SeparableConv1D" tf_class { is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -26,6 +28,10 @@ tf_class { name: "input" mtype: "" } + member { + name: "input_mask" + mtype: "" + } member { name: "input_shape" mtype: "" @@ -54,6 +60,10 @@ tf_class { name: "output" mtype: "" } + member { + name: "output_mask" + mtype: "" + } member { name: "output_shape" mtype: "" @@ -96,7 +106,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" + } + member_method { + name: "add_weight" + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" @@ -110,6 +124,10 @@ tf_class { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "compute_mask" + argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], " + } member_method { name: "compute_output_shape" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" @@ -118,10 +136,22 @@ tf_class { name: "count_params" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_input_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_input_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_input_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" @@ -134,6 +164,10 @@ tf_class { name: "get_output_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_output_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_output_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" @@ -142,4 +176,12 @@ tf_class { name: "get_updates_for" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_weights" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "set_weights" + argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" + } } diff --git a/tensorflow/tools/api/golden/tensorflow.layers.-separable-conv2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.layers.-separable-conv2-d.pbtxt index ab171df1d1650e19836018f3316e6919f6d36def..1c99c9618254cadb1b4e95c7223ca9361e4fa861 100644 --- a/tensorflow/tools/api/golden/tensorflow.layers.-separable-conv2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.layers.-separable-conv2-d.pbtxt @@ -1,9 +1,11 @@ path: "tensorflow.layers.SeparableConv2D" tf_class { is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -26,6 +28,10 @@ tf_class { name: "input" mtype: "" } + member { + name: "input_mask" + mtype: "" + } member { name: "input_shape" mtype: "" @@ -54,6 +60,10 @@ tf_class { name: "output" mtype: "" } + member { + name: "output_mask" + mtype: "" + } member { name: "output_shape" mtype: "" @@ -96,7 +106,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" + } + member_method { + name: "add_weight" + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" @@ -110,6 +124,10 @@ tf_class { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "compute_mask" + argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], " + } member_method { name: "compute_output_shape" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" @@ -118,10 +136,22 @@ tf_class { name: "count_params" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_input_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_input_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_input_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" @@ -134,6 +164,10 @@ tf_class { name: "get_output_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_output_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_output_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" @@ -142,4 +176,12 @@ tf_class { name: "get_updates_for" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_weights" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "set_weights" + argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" + } } diff --git a/tensorflow/tools/api/golden/tensorflow.math.pbtxt b/tensorflow/tools/api/golden/tensorflow.math.pbtxt new file mode 100644 index 0000000000000000000000000000000000000000..897718c05e0d10a6f961f33b8c65f5dab1d03f5b --- /dev/null +++ b/tensorflow/tools/api/golden/tensorflow.math.pbtxt @@ -0,0 +1,7 @@ +path: "tensorflow.math" +tf_module { + member_method { + name: "polyval" + argspec: "args=[\'coeffs\', \'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " + } +} diff --git a/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-basic-l-s-t-m-cell.pbtxt b/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-basic-l-s-t-m-cell.pbtxt index 9c71a24d0500e2091e0ae94cc4dd7ed6b788a54f..e1abd43ab54f9d0c131fd94cfeefcdd7ccdb00f1 100644 --- a/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-basic-l-s-t-m-cell.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-basic-l-s-t-m-cell.pbtxt @@ -4,6 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -26,6 +27,10 @@ tf_class { name: "input" mtype: "" } + member { + name: "input_mask" + mtype: "" + } member { name: "input_shape" mtype: "" @@ -54,6 +59,10 @@ tf_class { name: "output" mtype: "" } + member { + name: "output_mask" + mtype: "" + } member { name: "output_shape" mtype: "" @@ -92,7 +101,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'num_units\', \'forget_bias\', \'state_is_tuple\', \'activation\', \'reuse\', \'name\'], varargs=None, keywords=None, defaults=[\'1.0\', \'True\', \'None\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'num_units\', \'forget_bias\', \'state_is_tuple\', \'activation\', \'reuse\', \'name\', \'dtype\'], varargs=None, keywords=None, defaults=[\'1.0\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "add_loss" @@ -104,7 +113,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" + } + member_method { + name: "add_weight" + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" @@ -118,6 +131,10 @@ tf_class { name: "call" argspec: "args=[\'self\', \'inputs\', \'state\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "compute_mask" + argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], " + } member_method { name: "compute_output_shape" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" @@ -126,10 +143,22 @@ tf_class { name: "count_params" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_input_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_input_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_input_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" @@ -142,6 +171,10 @@ tf_class { name: "get_output_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_output_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_output_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" @@ -150,6 +183,14 @@ tf_class { name: "get_updates_for" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_weights" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "set_weights" + argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "zero_state" argspec: "args=[\'self\', \'batch_size\', \'dtype\'], varargs=None, keywords=None, defaults=None" diff --git a/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-basic-r-n-n-cell.pbtxt b/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-basic-r-n-n-cell.pbtxt index 9e19f96b7452616956fb7fd3ca62d8f4b25a2122..93e7e40199884fa77ae7da6d15113b8b4ca125ab 100644 --- a/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-basic-r-n-n-cell.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-basic-r-n-n-cell.pbtxt @@ -4,6 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -26,6 +27,10 @@ tf_class { name: "input" mtype: "" } + member { + name: "input_mask" + mtype: "" + } member { name: "input_shape" mtype: "" @@ -54,6 +59,10 @@ tf_class { name: "output" mtype: "" } + member { + name: "output_mask" + mtype: "" + } member { name: "output_shape" mtype: "" @@ -92,7 +101,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'num_units\', \'activation\', \'reuse\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'num_units\', \'activation\', \'reuse\', \'name\', \'dtype\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], " } member_method { name: "add_loss" @@ -104,7 +113,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" + } + member_method { + name: "add_weight" + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" @@ -118,6 +131,10 @@ tf_class { name: "call" argspec: "args=[\'self\', \'inputs\', \'state\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "compute_mask" + argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], " + } member_method { name: "compute_output_shape" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" @@ -126,10 +143,22 @@ tf_class { name: "count_params" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_input_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_input_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_input_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" @@ -142,6 +171,10 @@ tf_class { name: "get_output_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_output_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_output_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" @@ -150,6 +183,14 @@ tf_class { name: "get_updates_for" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_weights" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "set_weights" + argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "zero_state" argspec: "args=[\'self\', \'batch_size\', \'dtype\'], varargs=None, keywords=None, defaults=None" diff --git a/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-device-wrapper.pbtxt b/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-device-wrapper.pbtxt index 7540aa62861895a7c41840476d4edb79785a77a9..3c3e38229738fec3b25f437a73f3a9d216d970af 100644 --- a/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-device-wrapper.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-device-wrapper.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -25,6 +26,10 @@ tf_class { name: "input" mtype: "" } + member { + name: "input_mask" + mtype: "" + } member { name: "input_shape" mtype: "" @@ -53,6 +58,10 @@ tf_class { name: "output" mtype: "" } + member { + name: "output_mask" + mtype: "" + } member { name: "output_shape" mtype: "" @@ -103,7 +112,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" + } + member_method { + name: "add_weight" + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" @@ -117,6 +130,10 @@ tf_class { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=kwargs, defaults=None" } + member_method { + name: "compute_mask" + argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], " + } member_method { name: "compute_output_shape" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" @@ -125,10 +142,22 @@ tf_class { name: "count_params" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_input_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_input_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_input_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" @@ -141,6 +170,10 @@ tf_class { name: "get_output_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_output_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_output_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" @@ -149,6 +182,14 @@ tf_class { name: "get_updates_for" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_weights" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "set_weights" + argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "zero_state" argspec: "args=[\'self\', \'batch_size\', \'dtype\'], varargs=None, keywords=None, defaults=None" diff --git a/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-dropout-wrapper.pbtxt b/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-dropout-wrapper.pbtxt index fc1ff386690f9c7acb11d4cc0770e394f78350ad..db16660f1145b55c824c698653094977dd6c718b 100644 --- a/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-dropout-wrapper.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-dropout-wrapper.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -25,6 +26,10 @@ tf_class { name: "input" mtype: "" } + member { + name: "input_mask" + mtype: "" + } member { name: "input_shape" mtype: "" @@ -53,6 +58,10 @@ tf_class { name: "output" mtype: "" } + member { + name: "output_mask" + mtype: "" + } member { name: "output_shape" mtype: "" @@ -107,7 +116,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" + } + member_method { + name: "add_weight" + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" @@ -121,6 +134,10 @@ tf_class { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=kwargs, defaults=None" } + member_method { + name: "compute_mask" + argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], " + } member_method { name: "compute_output_shape" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" @@ -129,10 +146,22 @@ tf_class { name: "count_params" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_input_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_input_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_input_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" @@ -145,6 +174,10 @@ tf_class { name: "get_output_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_output_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_output_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" @@ -153,6 +186,14 @@ tf_class { name: "get_updates_for" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_weights" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "set_weights" + argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "zero_state" argspec: "args=[\'self\', \'batch_size\', \'dtype\'], varargs=None, keywords=None, defaults=None" diff --git a/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-g-r-u-cell.pbtxt b/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-g-r-u-cell.pbtxt index 751122cfff3bf9c55dd9fa264fdf2e1960940724..465fc1cd9c886acaeb8ac49ff54417b9fd101b04 100644 --- a/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-g-r-u-cell.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-g-r-u-cell.pbtxt @@ -4,6 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -26,6 +27,10 @@ tf_class { name: "input" mtype: "" } + member { + name: "input_mask" + mtype: "" + } member { name: "input_shape" mtype: "" @@ -54,6 +59,10 @@ tf_class { name: "output" mtype: "" } + member { + name: "output_mask" + mtype: "" + } member { name: "output_shape" mtype: "" @@ -92,7 +101,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'num_units\', \'activation\', \'reuse\', \'kernel_initializer\', \'bias_initializer\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'num_units\', \'activation\', \'reuse\', \'kernel_initializer\', \'bias_initializer\', \'name\', \'dtype\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "add_loss" @@ -104,7 +113,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" + } + member_method { + name: "add_weight" + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" @@ -118,6 +131,10 @@ tf_class { name: "call" argspec: "args=[\'self\', \'inputs\', \'state\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "compute_mask" + argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], " + } member_method { name: "compute_output_shape" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" @@ -126,10 +143,22 @@ tf_class { name: "count_params" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_input_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_input_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_input_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" @@ -142,6 +171,10 @@ tf_class { name: "get_output_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_output_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_output_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" @@ -150,6 +183,14 @@ tf_class { name: "get_updates_for" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_weights" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "set_weights" + argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "zero_state" argspec: "args=[\'self\', \'batch_size\', \'dtype\'], varargs=None, keywords=None, defaults=None" diff --git a/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-l-s-t-m-cell.pbtxt b/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-l-s-t-m-cell.pbtxt index 4b6313f395fd8fd4ec2af78365117620263e7a55..38a387d55a4d34524cdc8af1f7ce1617eb120373 100644 --- a/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-l-s-t-m-cell.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-l-s-t-m-cell.pbtxt @@ -4,6 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -26,6 +27,10 @@ tf_class { name: "input" mtype: "" } + member { + name: "input_mask" + mtype: "" + } member { name: "input_shape" mtype: "" @@ -54,6 +59,10 @@ tf_class { name: "output" mtype: "" } + member { + name: "output_mask" + mtype: "" + } member { name: "output_shape" mtype: "" @@ -92,7 +101,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'num_units\', \'use_peepholes\', \'cell_clip\', \'initializer\', \'num_proj\', \'proj_clip\', \'num_unit_shards\', \'num_proj_shards\', \'forget_bias\', \'state_is_tuple\', \'activation\', \'reuse\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'1.0\', \'True\', \'None\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'num_units\', \'use_peepholes\', \'cell_clip\', \'initializer\', \'num_proj\', \'proj_clip\', \'num_unit_shards\', \'num_proj_shards\', \'forget_bias\', \'state_is_tuple\', \'activation\', \'reuse\', \'name\', \'dtype\'], varargs=None, keywords=None, defaults=[\'False\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'1.0\', \'True\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "add_loss" @@ -104,7 +113,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" + } + member_method { + name: "add_weight" + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" @@ -118,6 +131,10 @@ tf_class { name: "call" argspec: "args=[\'self\', \'inputs\', \'state\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "compute_mask" + argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], " + } member_method { name: "compute_output_shape" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" @@ -126,10 +143,22 @@ tf_class { name: "count_params" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_input_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_input_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_input_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" @@ -142,6 +171,10 @@ tf_class { name: "get_output_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_output_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_output_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" @@ -150,6 +183,14 @@ tf_class { name: "get_updates_for" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_weights" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "set_weights" + argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "zero_state" argspec: "args=[\'self\', \'batch_size\', \'dtype\'], varargs=None, keywords=None, defaults=None" diff --git a/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-multi-r-n-n-cell.pbtxt b/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-multi-r-n-n-cell.pbtxt index 00e8c71140596ecea237ce05a09feff1fbb49001..b9e3d934759accd885036fa4c5a7013ef64736f3 100644 --- a/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-multi-r-n-n-cell.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-multi-r-n-n-cell.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -25,6 +26,10 @@ tf_class { name: "input" mtype: "" } + member { + name: "input_mask" + mtype: "" + } member { name: "input_shape" mtype: "" @@ -53,6 +58,10 @@ tf_class { name: "output" mtype: "" } + member { + name: "output_mask" + mtype: "" + } member { name: "output_shape" mtype: "" @@ -103,7 +112,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" + } + member_method { + name: "add_weight" + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" @@ -117,6 +130,10 @@ tf_class { name: "call" argspec: "args=[\'self\', \'inputs\', \'state\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "compute_mask" + argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], " + } member_method { name: "compute_output_shape" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" @@ -125,10 +142,22 @@ tf_class { name: "count_params" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_input_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_input_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_input_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" @@ -141,6 +170,10 @@ tf_class { name: "get_output_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_output_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_output_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" @@ -149,6 +182,14 @@ tf_class { name: "get_updates_for" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_weights" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "set_weights" + argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "zero_state" argspec: "args=[\'self\', \'batch_size\', \'dtype\'], varargs=None, keywords=None, defaults=None" diff --git a/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-r-n-n-cell.pbtxt b/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-r-n-n-cell.pbtxt index 3852f90dd6c4a254e20e789bdeb7796d61cef6bc..75b5898c591cbe2b761c0f709159c5489cb8f76a 100644 --- a/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-r-n-n-cell.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-r-n-n-cell.pbtxt @@ -2,6 +2,7 @@ path: "tensorflow.nn.rnn_cell.RNNCell" tf_class { is_instance: "" is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -24,6 +25,10 @@ tf_class { name: "input" mtype: "" } + member { + name: "input_mask" + mtype: "" + } member { name: "input_shape" mtype: "" @@ -52,6 +57,10 @@ tf_class { name: "output" mtype: "" } + member { + name: "output_mask" + mtype: "" + } member { name: "output_shape" mtype: "" @@ -90,7 +99,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'trainable\', \'name\', \'dtype\', \'activity_regularizer\'], varargs=None, keywords=kwargs, defaults=[\'True\', \'None\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'trainable\', \'name\', \'dtype\'], varargs=None, keywords=kwargs, defaults=[\'True\', \'None\', \'None\'], " } member_method { name: "add_loss" @@ -102,7 +111,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" + } + member_method { + name: "add_weight" + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" @@ -116,6 +129,10 @@ tf_class { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=kwargs, defaults=None" } + member_method { + name: "compute_mask" + argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], " + } member_method { name: "compute_output_shape" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" @@ -124,10 +141,22 @@ tf_class { name: "count_params" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_input_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_input_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_input_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" @@ -140,6 +169,10 @@ tf_class { name: "get_output_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_output_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_output_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" @@ -148,6 +181,14 @@ tf_class { name: "get_updates_for" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_weights" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "set_weights" + argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "zero_state" argspec: "args=[\'self\', \'batch_size\', \'dtype\'], varargs=None, keywords=None, defaults=None" diff --git a/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-residual-wrapper.pbtxt b/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-residual-wrapper.pbtxt index 8f3f0f7506ef49014b31cd4bc04f1cb1e0d696fc..fee0dc63b997f328a4e3d44040c4056de4128eb7 100644 --- a/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-residual-wrapper.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-residual-wrapper.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -25,6 +26,10 @@ tf_class { name: "input" mtype: "" } + member { + name: "input_mask" + mtype: "" + } member { name: "input_shape" mtype: "" @@ -53,6 +58,10 @@ tf_class { name: "output" mtype: "" } + member { + name: "output_mask" + mtype: "" + } member { name: "output_shape" mtype: "" @@ -103,7 +112,11 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" + } + member_method { + name: "add_weight" + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\'], " } member_method { name: "apply" @@ -117,6 +130,10 @@ tf_class { name: "call" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=kwargs, defaults=None" } + member_method { + name: "compute_mask" + argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], " + } member_method { name: "compute_output_shape" argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" @@ -125,10 +142,22 @@ tf_class { name: "count_params" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_input_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_input_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_input_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" @@ -141,6 +170,10 @@ tf_class { name: "get_output_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_output_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_output_shape_at" argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" @@ -149,6 +182,14 @@ tf_class { name: "get_updates_for" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_weights" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "set_weights" + argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "zero_state" argspec: "args=[\'self\', \'batch_size\', \'dtype\'], varargs=None, keywords=None, defaults=None" diff --git a/tensorflow/tools/api/golden/tensorflow.pbtxt b/tensorflow/tools/api/golden/tensorflow.pbtxt index 937044aece83e49549bf6aca938bf673203f392b..0b12bc060efc2ecfa4819e77cbb0d26c5a6dc419 100644 --- a/tensorflow/tools/api/golden/tensorflow.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.pbtxt @@ -404,6 +404,10 @@ tf_module { name: "manip" mtype: "" } + member { + name: "math" + mtype: "" + } member { name: "metrics" mtype: "" @@ -908,9 +912,13 @@ tf_module { name: "decode_base64" argspec: "args=[\'input\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "decode_compressed" + argspec: "args=[\'bytes\', \'compression_type\', \'name\'], varargs=None, keywords=None, defaults=[\'\', \'None\'], " + } member_method { name: "decode_csv" - argspec: "args=[\'records\', \'record_defaults\', \'field_delim\', \'use_quote_delim\', \'name\', \'na_value\'], varargs=None, keywords=None, defaults=[\',\', \'True\', \'None\', \'\'], " + argspec: "args=[\'records\', \'record_defaults\', \'field_delim\', \'use_quote_delim\', \'name\', \'na_value\', \'select_cols\'], varargs=None, keywords=None, defaults=[\',\', \'True\', \'None\', \'\', \'None\'], " } member_method { name: "decode_json_example" @@ -1972,6 +1980,10 @@ tf_module { name: "string_split" argspec: "args=[\'source\', \'delimiter\', \'skip_empty\'], varargs=None, keywords=None, defaults=[\' \', \'True\'], " } + member_method { + name: "string_strip" + argspec: "args=[\'input\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " + } member_method { name: "string_to_hash_bucket" argspec: "args=[\'string_tensor\', \'num_buckets\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " diff --git a/tensorflow/tools/api/golden/tensorflow.summary.-file-writer.pbtxt b/tensorflow/tools/api/golden/tensorflow.summary.-file-writer.pbtxt index dcf747971b7b8bf243502b2388da635705b8ee3e..6b65b0ace3cf7740ab03390841c941592000d127 100644 --- a/tensorflow/tools/api/golden/tensorflow.summary.-file-writer.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.summary.-file-writer.pbtxt @@ -5,7 +5,7 @@ tf_class { is_instance: "" member_method { name: "__init__" - argspec: "args=[\'self\', \'logdir\', \'graph\', \'max_queue\', \'flush_secs\', \'graph_def\', \'filename_suffix\'], varargs=None, keywords=None, defaults=[\'None\', \'10\', \'120\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'logdir\', \'graph\', \'max_queue\', \'flush_secs\', \'graph_def\', \'filename_suffix\', \'session\'], varargs=None, keywords=None, defaults=[\'None\', \'10\', \'120\', \'None\', \'None\', \'None\'], " } member_method { name: "add_event" diff --git a/tensorflow/tools/api/golden/tensorflow.train.-checkpoint-saver-hook.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.-checkpoint-saver-hook.pbtxt index c3037baa8c951ecd9b60267ee7cc8674ead88dbe..327799729c9e7d9ce2931f8951db995195ff35e9 100644 --- a/tensorflow/tools/api/golden/tensorflow.train.-checkpoint-saver-hook.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.train.-checkpoint-saver-hook.pbtxt @@ -5,7 +5,7 @@ tf_class { is_instance: "" member_method { name: "__init__" - argspec: "args=[\'self\', \'checkpoint_dir\', \'save_secs\', \'save_steps\', \'saver\', \'checkpoint_basename\', \'scaffold\', \'listeners\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'model.ckpt\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'checkpoint_dir\', \'save_secs\', \'save_steps\', \'saver\', \'checkpoint_basename\', \'scaffold\', \'listeners\', \'steps_per_run\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'model.ckpt\', \'None\', \'None\', \'1\'], " } member_method { name: "after_create_session" diff --git a/tensorflow/tools/api/golden/tensorflow.train.-checkpoint.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.-checkpoint.pbtxt new file mode 100644 index 0000000000000000000000000000000000000000..17f393d27c4b917a78902a4d6f13630311cb44ef --- /dev/null +++ b/tensorflow/tools/api/golden/tensorflow.train.-checkpoint.pbtxt @@ -0,0 +1,23 @@ +path: "tensorflow.train.Checkpoint" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + member { + name: "save_counter" + mtype: "" + } + member_method { + name: "__init__" + argspec: "args=[\'self\'], varargs=None, keywords=kwargs, defaults=None" + } + member_method { + name: "restore" + argspec: "args=[\'self\', \'save_path\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "save" + argspec: "args=[\'self\', \'file_prefix\', \'session\'], varargs=None, keywords=None, defaults=[\'None\'], " + } +} diff --git a/tensorflow/tools/api/golden/tensorflow.train.-scaffold.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.-scaffold.pbtxt index 62b956c5ef7dc54e92431f25ec948e341c0e1f24..38cc98b48e78aa93f7614a9baff236f7b119f99d 100644 --- a/tensorflow/tools/api/golden/tensorflow.train.-scaffold.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.train.-scaffold.pbtxt @@ -38,6 +38,10 @@ tf_class { name: "__init__" argspec: "args=[\'self\', \'init_op\', \'init_feed_dict\', \'init_fn\', \'ready_op\', \'ready_for_local_init_op\', \'local_init_op\', \'summary_op\', \'saver\', \'copy_from_scaffold\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], " } + member_method { + name: "default_local_init_op" + argspec: "args=[], varargs=None, keywords=None, defaults=None" + } member_method { name: "finalize" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" diff --git a/tensorflow/tools/api/golden/tensorflow.train.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.pbtxt index bec72e1e609c3e32ca8366396b9b1cb577feab9d..9fb18e77afd7c9c989ad5e967be291406e7239aa 100644 --- a/tensorflow/tools/api/golden/tensorflow.train.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.train.pbtxt @@ -20,6 +20,10 @@ tf_module { name: "BytesList" mtype: "" } + member { + name: "Checkpoint" + mtype: "" + } member { name: "CheckpointSaverHook" mtype: "" diff --git a/tensorflow/tools/api/tests/BUILD b/tensorflow/tools/api/tests/BUILD index 0dc154b6d2c6884b3cd91a4f4c7c08825c123124..724b12cd4799eb76fe602c737c850e96e92faa58 100644 --- a/tensorflow/tools/api/tests/BUILD +++ b/tensorflow/tools/api/tests/BUILD @@ -23,7 +23,6 @@ py_test( ], srcs_version = "PY2AND3", deps = [ - "//tensorflow:experimental_tensorflow_py", "//tensorflow:tensorflow_py", "//tensorflow/python:client_testlib", "//tensorflow/python:lib", diff --git a/tensorflow/tools/api/tests/api_compatibility_test.py b/tensorflow/tools/api/tests/api_compatibility_test.py index 603b2a4327b94873b9908d5e0e114dcc4f7542dc..1ad6b6d1c0ae5ca1ac1329fd49e972840020e4c3 100644 --- a/tensorflow/tools/api/tests/api_compatibility_test.py +++ b/tensorflow/tools/api/tests/api_compatibility_test.py @@ -34,7 +34,6 @@ import sys import unittest import tensorflow as tf -from tensorflow import experimental_api as api from google.protobuf import text_format @@ -47,8 +46,6 @@ from tensorflow.tools.api.lib import python_object_to_proto_visitor from tensorflow.tools.common import public_api from tensorflow.tools.common import traverse -if hasattr(tf, 'experimental_api'): - del tf.experimental_api # FLAGS defined at the bottom: FLAGS = None @@ -237,49 +234,6 @@ class ApiCompatibilityTest(test.TestCase): verbose=FLAGS.verbose_diffs, update_goldens=FLAGS.update_goldens) - @unittest.skipUnless( - sys.version_info.major == 2, - 'API compabitility test goldens are generated using python2.') - def testNewAPIBackwardsCompatibility(self): - # Extract all API stuff. - visitor = python_object_to_proto_visitor.PythonObjectToProtoVisitor() - - public_api_visitor = public_api.PublicAPIVisitor(visitor) - public_api_visitor.do_not_descend_map['tf'].append('contrib') - public_api_visitor.do_not_descend_map['tf.GPUOptions'] = ['Experimental'] - # TODO(annarev): Make slide_dataset available in API. - public_api_visitor.private_map['tf'] = ['slide_dataset'] - traverse.traverse(api, public_api_visitor) - - proto_dict = visitor.GetProtos() - - # Read all golden files. - expression = os.path.join( - resource_loader.get_root_dir_with_all_resources(), - _KeyToFilePath('*')) - golden_file_list = file_io.get_matching_files(expression) - - def _ReadFileToProto(filename): - """Read a filename, create a protobuf from its contents.""" - ret_val = api_objects_pb2.TFAPIObject() - text_format.Merge(file_io.read_file_to_string(filename), ret_val) - return ret_val - - golden_proto_dict = { - _FileNameToKey(filename): _ReadFileToProto(filename) - for filename in golden_file_list - } - - # Diff them. Do not fail if called with update. - # If the test is run to update goldens, only report diffs but do not fail. - self._AssertProtoDictEquals( - golden_proto_dict, - proto_dict, - verbose=FLAGS.verbose_diffs, - update_goldens=False, - additional_missing_object_message= - 'Check if tf_export decorator/call is missing for this symbol.') - if __name__ == '__main__': parser = argparse.ArgumentParser() diff --git a/tensorflow/tools/ci_build/Dockerfile.rbe.cpu b/tensorflow/tools/ci_build/Dockerfile.rbe.cpu index 6f0798b1afc34bc08df6f3f8f467a329fcf0fe9b..3bc52b9ed611a0f0a4a269a2864d5b349ee9232c 100644 --- a/tensorflow/tools/ci_build/Dockerfile.rbe.cpu +++ b/tensorflow/tools/ci_build/Dockerfile.rbe.cpu @@ -1,4 +1,4 @@ -FROM launcher.gcr.io/google/rbe-debian8:r322167 +FROM launcher.gcr.io/google/rbe-debian8:r327695 LABEL maintainer="Yu Yi " # Copy install scripts diff --git a/tensorflow/tools/ci_build/builds/pip.sh b/tensorflow/tools/ci_build/builds/pip.sh index 82042b93c02275b51530b306d8cf4519482e5410..5fa75e1d61cceeebfa77439bb64f1c644c9dba70 100755 --- a/tensorflow/tools/ci_build/builds/pip.sh +++ b/tensorflow/tools/ci_build/builds/pip.sh @@ -123,6 +123,10 @@ done BAZEL_FLAGS=$(str_strip "${BAZEL_FLAGS}") +if [[ -z "$GIT_TAG_OVERRIDE" ]]; then + BAZEL_FLAGS+=" --action_env=GIT_TAG_OVERRIDE" +fi + echo "Using Bazel flags: ${BAZEL_FLAGS}" PIP_BUILD_TARGET="//tensorflow/tools/pip_package:build_pip_package" diff --git a/tensorflow/tools/ci_build/builds/test_user_ops.sh b/tensorflow/tools/ci_build/builds/test_user_ops.sh index caa3a40817c80b27271f76de0a95a743cb2916f6..c342367bacea9d2ba8152d928b93bf61cf60d0e7 100755 --- a/tensorflow/tools/ci_build/builds/test_user_ops.sh +++ b/tensorflow/tools/ci_build/builds/test_user_ops.sh @@ -213,27 +213,34 @@ USER_OP=$(echo "${USER_OP_SO}" | sed -e 's/\.so//') echo "Invoking user op ${USER_OP} defined in file ${USER_OP_SO} "\ "via pip installation" -ORIG_OUTPUT=$("${PYTHON_BIN_PATH}" -c "import tensorflow as tf; print(tf.Session('').run(tf.load_op_library('./${USER_OP_SO}').${USER_OP}(${OP_INPUT})))") - -# Format OUTPUT for analysis -if [[ -z $(echo "${ORIG_OUTPUT}" | grep -o ',') ]]; then - if [[ ${IS_MAC} == "1" ]]; then - OUTPUT=$(echo "${ORIG_OUTPUT}" | sed -E -e 's/[ \t]+/,/g') +function run_op() { + local ORIG_OUTPUT=$1 + local ADDITIONAL_LOG=$2 + + # Format OUTPUT for analysis + if [[ -z $(echo "${ORIG_OUTPUT}" | grep -o ',') ]]; then + if [[ ${IS_MAC} == "1" ]]; then + local OUTPUT=$(echo "${ORIG_OUTPUT}" | sed -E -e 's/[ \t]+/,/g') + else + local OUTPUT=$(echo "${ORIG_OUTPUT}" | sed -r -e 's/[ \t]+/,/g') + fi else - OUTPUT=$(echo "${ORIG_OUTPUT}" | sed -r -e 's/[ \t]+/,/g') + local OUTPUT="${ORIG_OUTPUT}" fi -else - OUTPUT="${ORIG_OUTPUT}" -fi -EQUALS_EXPECTED=$("${PYTHON_BIN_PATH}" -c "print(${OUTPUT} == ${EXPECTED_OUTPUT})") + local EQUALS_EXPECTED=$("${PYTHON_BIN_PATH}" -c "print(${OUTPUT} == ${EXPECTED_OUTPUT})") -if [[ "${EQUALS_EXPECTED}" != "True" ]]; then - die "FAILED: Output from user op (${OUTPUT}) does not match expected "\ -"output ${EXPECTED_OUTPUT}" -else - echo "Output from user op (${OUTPUT}) matches expected output" -fi + if [[ "${EQUALS_EXPECTED}" != "True" ]]; then + local ERROR="FAILED: Output from user op (${OUTPUT}) does not match expected "\ + "output ${EXPECTED_OUTPUT}"${ADDITIONAL_LOG} + die ${ERROR} + else + echo "Output from user op (${OUTPUT}) matches expected output" + fi +} + +run_op $("${PYTHON_BIN_PATH}" -c "import tensorflow as tf; print(tf.Session('').run(tf.load_op_library('./${USER_OP_SO}').${USER_OP}(${OP_INPUT})))") +run_op $("${PYTHON_BIN_PATH}" -c "import tensorflow as tf; tf.enable_eager_execution(); print(tf.load_op_library('./${USER_OP_SO}').${USER_OP}(${OP_INPUT}))") " in eager mode" popd diff --git a/tensorflow/tools/ci_build/ci_parameterized_build.sh b/tensorflow/tools/ci_build/ci_parameterized_build.sh index 9d23b508aa1c1d20d0f4b5979aa7be2c295fe325..797e0a6db52aa6216486bdc9c6a88ff353c57e15 100755 --- a/tensorflow/tools/ci_build/ci_parameterized_build.sh +++ b/tensorflow/tools/ci_build/ci_parameterized_build.sh @@ -237,7 +237,7 @@ function get_cuda_capability_version() { CTYPE=${TF_BUILD_CONTAINER_TYPE} # Determine if the machine is a Mac -OPT_FLAG="" +OPT_FLAG="--test_output=errors" if [[ "$(uname -s)" == "Darwin" ]]; then DO_DOCKER=0 diff --git a/tensorflow/tools/ci_build/install/install_pip_packages.sh b/tensorflow/tools/ci_build/install/install_pip_packages.sh index d406b83a6246d18c335fb52cea1256d7809fa61a..5aaf544afdcb881fc00553cc78f916752f4527ac 100755 --- a/tensorflow/tools/ci_build/install/install_pip_packages.sh +++ b/tensorflow/tools/ci_build/install/install_pip_packages.sh @@ -16,10 +16,15 @@ set -e -# We don't apt-get install so that we can install a newer version of pip. Not -# needed after we upgrade to Ubuntu 16.04 -easy_install -U pip -easy_install3 -U pip +# We don't apt-get install so that we can install a newer version of pip. +# Only needed for Ubuntu 14.04 ,and not needed for Ubuntu 16.04 / Debian 8,9 +if $(cat /etc/*-release | grep -q 14.04); then + easy_install -U pip==9.0.3 + easy_install3 -U pip==9.0.3 +else + pip2 install --upgrade pip==9.0.3 + pip3 install --upgrade pip==9.0.3 +fi # Install pip packages from whl files to avoid the time-consuming process of # building from source. @@ -60,8 +65,13 @@ rm -rf /usr/lib/python3/dist-packages/six* # numpy needs to be installed from source to fix segfaults. See: # https://github.com/tensorflow/tensorflow/issues/6968 # This workaround isn't needed for Ubuntu 16.04 or later. -pip2 install --no-binary=:all: --upgrade numpy==1.12.0 -pip3 install --no-binary=:all: --upgrade numpy==1.12.0 +if $(cat /etc/*-release | grep -q 14.04); then + pip2 install --no-binary=:all: --upgrade numpy==1.12.0 + pip3 install --no-binary=:all: --upgrade numpy==1.12.0 +else + pip2 install --upgrade numpy==1.12.0 + pip3 install --upgrade numpy==1.12.0 +fi pip2 install scipy==0.18.1 pip3 install scipy==0.18.1 diff --git a/tensorflow/tools/ci_build/install/install_pip_packages_remote.sh b/tensorflow/tools/ci_build/install/install_pip_packages_remote.sh index 39a6d557d185d8564a79315fc738a054325aa0bc..0beabcf5ef83000bb0b0c5d3dc077a3d2bbe118a 100755 --- a/tensorflow/tools/ci_build/install/install_pip_packages_remote.sh +++ b/tensorflow/tools/ci_build/install/install_pip_packages_remote.sh @@ -20,10 +20,8 @@ if [ ! -f /usr/bin/x86_64-linux-gnu-gcc ]; then ln -s /usr/local/bin/clang /usr/bin/x86_64-linux-gnu-gcc fi -pip2 install -U pip -pip3 install -U pip -pip2 install -U setuptools -pip3 install -U setuptools +pip2 install --upgrade setuptools +pip3 install --upgrade setuptools # The rest of the pip packages will be installed in # `install_pip_packages.sh` diff --git a/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh b/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh index aefc49f60482148e565a5262eebd5b3ac85987cf..204a82f647eed550a1ad14bd6fed4cd72b0f7dba 100755 --- a/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh +++ b/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh @@ -39,6 +39,9 @@ if [[ -z $pip35_version ]]; then fi set -e +pip3.5 install --upgrade setuptools +pip3.5 install --upgrade pip + pip3.5 install --upgrade virtualenv # Install six. diff --git a/tensorflow/tools/ci_build/linux/cpu/run_mkl.sh b/tensorflow/tools/ci_build/linux/cpu/run_mkl.sh index dbf376be6f7467466b5383c57dca539f9a470a38..2a9f29518886d5dcb96f1afc8b12d6be307aa965 100755 --- a/tensorflow/tools/ci_build/linux/cpu/run_mkl.sh +++ b/tensorflow/tools/ci_build/linux/cpu/run_mkl.sh @@ -30,7 +30,10 @@ export PYTHON_BIN_PATH=`which python2` yes "" | $PYTHON_BIN_PATH configure.py # Run bazel test command. Double test timeouts to avoid flakes. +# Setting KMP_BLOCKTIME to 0 lets OpenMP threads to sleep right after parallel execution +# in an MKL primitive. This reduces the effects of an oversubscription of OpenMP threads +# caused by executing multiple tests concurrently. bazel test --test_tag_filters=-no_oss,-oss_serial,-gpu,-benchmark-test --test_lang_filters=py -k \ --jobs=${N_JOBS} --test_timeout 300,450,1200,3600 --build_tests_only \ - --config=mkl --config=opt --test_output=errors -- \ + --config=mkl --test_env=KMP_BLOCKTIME=0 --config=opt --test_output=errors -- \ //tensorflow/... -//tensorflow/compiler/... -//tensorflow/contrib/... diff --git a/tensorflow/tools/ci_build/windows/bazel/common_env.sh b/tensorflow/tools/ci_build/windows/bazel/common_env.sh index 7d4cc7ac3005f7ff9a79d18228e86d6b74e1e8b0..0e6c0227b7ffb6b35193e133aa7d3fbcd16ce3c4 100644 --- a/tensorflow/tools/ci_build/windows/bazel/common_env.sh +++ b/tensorflow/tools/ci_build/windows/bazel/common_env.sh @@ -44,6 +44,8 @@ export PYTHON_LIB_PATH="C:/${PYTHON_BASE_PATH}/lib/site-packages" # Add python into PATH, it's needed because gen_git_source.py uses # '/usr/bin/env python' as a shebang export PATH="/c/${PYTHON_BASE_PATH}:$PATH" +# Add git into PATH needed for gen_git_source.py +export PATH="/c/Program Files/Git/cmd:$PATH" # Make sure we have pip in PATH export PATH="/c/${PYTHON_BASE_PATH}/Scripts:$PATH" diff --git a/tensorflow/tools/ci_build/windows/cpu/cmake/run_py.bat b/tensorflow/tools/ci_build/windows/cpu/cmake/run_py.bat index 3c3b223a0044b7136ea4dee20fa72cd2fed3742a..30554a084c5689768665557d593b928fbd98d8cb 100644 --- a/tensorflow/tools/ci_build/windows/cpu/cmake/run_py.bat +++ b/tensorflow/tools/ci_build/windows/cpu/cmake/run_py.bat @@ -28,6 +28,9 @@ IF DEFINED TF_NIGHTLY (ECHO TF_NIGHTLY is set to %TF_NIGHTLY%) ELSE (SET TF_NIGH :: Set pip binary location. Do not override if it is set already. IF DEFINED PIP_EXE (ECHO PIP_EXE is set to %PIP_EXE%) ELSE (SET PIP_EXE="C:\Program Files\Anaconda3\Scripts\pip.exe") +:: Install absl-py. +%PIP_EXE% install --upgrade absl-py + :: Run the CMAKE build to build the pip package. CALL %REPO_ROOT%\tensorflow\tools\ci_build\windows\cpu\cmake\run_build.bat if %errorlevel% neq 0 exit /b %errorlevel% @@ -37,9 +40,6 @@ DIR %REPO_ROOT%\%BUILD_DIR%\tf_python\dist\ /S /B > wheel_filename_file set /p WHEEL_FILENAME=&2; exit 1; } +skip_test=0 + +for ARG in "$@"; do + if [[ "$ARG" == --skip_test ]]; then + skip_test=1 + fi +done + run_configure_for_cpu_build # --define=override_eigen_strong_inline=true speeds up the compiling of conv_grad_ops_3d.cc and conv_ops_3d.cc @@ -49,6 +57,10 @@ run_configure_for_cpu_build BUILD_OPTS="--define=override_eigen_strong_inline=true" bazel build -c opt $BUILD_OPTS tensorflow/tools/pip_package:build_pip_package || exit $? +if [[ "$skip_test" == 1 ]]; then + exit 0 +fi + # Create a python test directory to avoid package name conflict PY_TEST_DIR="py_test_dir" create_python_test_dir "${PY_TEST_DIR}" @@ -65,4 +77,6 @@ bazel test -c opt $BUILD_OPTS -k --test_output=errors \ --define=no_tensorflow_py_deps=true --test_lang_filters=py \ --test_tag_filters=-no_pip,-no_windows,-no_oss \ --build_tag_filters=-no_pip,-no_windows,-no_oss --build_tests_only \ - //${PY_TEST_DIR}/tensorflow/python/... + --flaky_test_attempts=3 \ + //${PY_TEST_DIR}/tensorflow/python/... \ + //${PY_TEST_DIR}/tensorflow/contrib/... diff --git a/tensorflow/tools/ci_build/windows/gpu/cmake/run_py.bat b/tensorflow/tools/ci_build/windows/gpu/cmake/run_py.bat index 97829892b10059f9d9663e103534891d1481abec..3b437d3c58c384c389820e57ae6bcc57c6c13efb 100644 --- a/tensorflow/tools/ci_build/windows/gpu/cmake/run_py.bat +++ b/tensorflow/tools/ci_build/windows/gpu/cmake/run_py.bat @@ -31,6 +31,9 @@ IF DEFINED PIP_EXE (ECHO PIP_EXE is set to %PIP_EXE%) ELSE (SET PIP_EXE="C:\Prog :: Set ctest binary location. IF DEFINED CTEST_EXE (ECHO CTEST_EXE is set to %CTEST_EXE%) ELSE (SET CTEST_EXE="C:\Program Files\cmake\bin\ctest.exe") +:: Install absl-py. +%PIP_EXE% install --upgrade absl-py + :: Run the CMAKE build to build the pip package. CALL %REPO_ROOT%\tensorflow\tools\ci_build\windows\gpu\cmake\run_build.bat if %errorlevel% neq 0 exit /b %errorlevel% @@ -40,9 +43,6 @@ DIR %REPO_ROOT%\%BUILD_DIR%\tf_python\dist\ /S /B > wheel_filename_file set /p WHEEL_FILENAME=> .tf_configure.bazelrc bazel clean # Run bazel test command. Double test timeouts to avoid flakes. -bazel test --config=cuda --test_tag_filters=-no_gpu,-benchmark-test -k \ +bazel test --config=cuda --test_tag_filters=-no_gpu,-benchmark-test,-no_oss -k \ --jobs=${N_JOBS} --test_timeout 300,450,1200,3600 \ --build_tests_only --test_output=errors --local_test_jobs=8 \ --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute \ diff --git a/tensorflow/tools/def_file_filter/BUILD b/tensorflow/tools/def_file_filter/BUILD new file mode 100644 index 0000000000000000000000000000000000000000..e390e0fb05c1d881e5fbafb43ea7576347949439 --- /dev/null +++ b/tensorflow/tools/def_file_filter/BUILD @@ -0,0 +1,9 @@ +# Description: +# Tools for filtering DEF file for TensorFlow on Windows +# +# On Windows, we use a DEF file generated by Bazel to export +# symbols from the tensorflow dynamic library(_pywrap_tensorflow.dll). +# The maximum number of symbols that can be exported per DLL is 64K, +# so we have to filter some useless symbols through this python script. + +package(default_visibility = ["//visibility:public"]) diff --git a/tensorflow/tools/def_file_filter/BUILD.tpl b/tensorflow/tools/def_file_filter/BUILD.tpl new file mode 100644 index 0000000000000000000000000000000000000000..3cb72f49797d8014e4df3f44155c967ab0e2f9b3 --- /dev/null +++ b/tensorflow/tools/def_file_filter/BUILD.tpl @@ -0,0 +1,15 @@ +# Description: +# Tools for filtering DEF file for TensorFlow on Windows +# +# On Windows, we use a DEF file generated by Bazel to export +# symbols from the tensorflow dynamic library(_pywrap_tensorflow.dll). +# The maximum number of symbols that can be exported per DLL is 64K, +# so we have to filter some useless symbols through this python script. + +package(default_visibility = ["//visibility:public"]) + +py_binary( + name = "def_file_filter", + srcs = ["def_file_filter.py"], + srcs_version = "PY2AND3", +) diff --git a/tensorflow/tools/def_file_filter/def_file_filter.py.tpl b/tensorflow/tools/def_file_filter/def_file_filter.py.tpl new file mode 100644 index 0000000000000000000000000000000000000000..8bdc03eb0f19fd6daae826727f429bc1255f0eca --- /dev/null +++ b/tensorflow/tools/def_file_filter/def_file_filter.py.tpl @@ -0,0 +1,168 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""def_file_filter.py - tool to filter a windows def file. + +The def file can be used to export symbols from the tensorflow dll to enable +tf.load_library(). + +Because the linker allows only 64K symbols to be exported per dll +we filter the symbols down to the essentials. The regular expressions +we use for this are specific to tensorflow. + +TODO: this works fine but there is an issue with exporting +'const char * const' and importing it from a user_ops. The problem is +on the importing end and using __declspec(dllimport) works around it. +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +import io +import os +import re +import subprocess +import sys +import tempfile + +# External tools we use that come with visual studio sdk +UNDNAME = "%{undname_bin_path}" + +# Exclude if matched +EXCLUDE_RE = re.compile(r"RTTI|deleting destructor|::internal::") + +# Include if matched before exclude +INCLUDEPRE_RE = re.compile(r"google::protobuf::internal::ExplicitlyConstructed|" + r"google::protobuf::internal::ArenaImpl::AllocateAligned|" # for contrib/data/_prefetching_ops + r"google::protobuf::internal::ArenaImpl::AddCleanup|" # for contrib/data/_prefetching_ops + r"google::protobuf::Arena::OnArenaAllocation|" # for contrib/data/_prefetching_ops + r"tensorflow::internal::LogMessage|" + r"tensorflow::internal::LogString|" + r"tensorflow::internal::CheckOpMessageBuilder|" + r"tensorflow::internal::MakeCheckOpValueString|" + r"tensorflow::internal::PickUnusedPortOrDie|" + r"tensorflow::internal::ValidateDevice|" + r"tensorflow::ops::internal::Enter|" + r"tensorflow::strings::internal::AppendPieces|" + r"tensorflow::strings::internal::CatPieces|" + r"tensorflow::io::internal::JoinPathImpl") + +# Include if matched after exclude +INCLUDE_RE = re.compile(r"^(TF_\w*)$|" + r"^(TFE_\w*)$|" + r"nsync::|" + r"tensorflow::|" + r"functor::|" + r"perftools::gputools") + +# We want to identify data members explicitly in the DEF file, so that no one +# can implicitly link against the DLL if they use one of the variables exported +# from the DLL and the header they use does not decorate the symbol with +# __declspec(dllimport). It is easier to detect what a data symbol does +# NOT look like, so doing it with the below regex. +DATA_EXCLUDE_RE = re.compile(r"[)(]|" + r"vftable|" + r"vbtable|" + r"vcall|" + r"RTTI|" + r"protobuf::internal::ExplicitlyConstructed") + +def get_args(): + """Parse command line.""" + filename_list = lambda x: x.split(";") + parser = argparse.ArgumentParser() + parser.add_argument("--input", type=filename_list, + help="paths to input def file", + required=True) + parser.add_argument("--output", help="output deffile", required=True) + parser.add_argument("--target", help="name of the target", required=True) + args = parser.parse_args() + return args + + +def main(): + """main.""" + args = get_args() + + # Pipe dumpbin to extract all linkable symbols from libs. + # Good symbols are collected in candidates and also written to + # a temp file. + candidates = [] + tmpfile = tempfile.NamedTemporaryFile(mode="w", delete=False) + for def_file_path in args.input: + def_file = open(def_file_path, 'r') + for line in def_file: + cols = line.split() + sym = cols[0] + tmpfile.file.write(sym + "\n") + candidates.append(sym) + tmpfile.file.close() + + # Run the symbols through undname to get their undecorated name + # so we can filter on something readable. + with open(args.output, "w") as def_fp: + # track dupes + taken = set() + + # Header for the def file. + def_fp.write("LIBRARY " + args.target + "\n") + def_fp.write("EXPORTS\n") + def_fp.write("\t ??1OpDef@tensorflow@@UEAA@XZ\n") + + # Each symbols returned by undname matches the same position in candidates. + # We compare on undname but use the decorated name from candidates. + dupes = 0 + proc = subprocess.Popen([UNDNAME, tmpfile.name], stdout=subprocess.PIPE) + for idx, line in enumerate(io.TextIOWrapper(proc.stdout, encoding="utf-8")): + decorated = candidates[idx] + if decorated in taken: + # Symbol is already in output, done. + dupes += 1 + continue + + if not INCLUDEPRE_RE.search(line): + if EXCLUDE_RE.search(line): + continue + if not INCLUDE_RE.search(line): + continue + + if "deleting destructor" in line: + # Some of the symbols convered by INCLUDEPRE_RE export deleting + # destructor symbols, which is a bad idea. + # So we filter out such symbols here. + continue + + if DATA_EXCLUDE_RE.search(line): + def_fp.write("\t" + decorated + "\n") + else: + def_fp.write("\t" + decorated + " DATA\n") + taken.add(decorated) + def_fp.close() + + exit_code = proc.wait() + if exit_code != 0: + print("{} failed, exit={}".format(UNDNAME, exit_code)) + return exit_code + + os.unlink(tmpfile.name) + + print("symbols={}, taken={}, dupes={}" + .format(len(candidates), len(taken), dupes)) + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tensorflow/tools/def_file_filter/def_file_filter_configure.bzl b/tensorflow/tools/def_file_filter/def_file_filter_configure.bzl new file mode 100644 index 0000000000000000000000000000000000000000..47539b2423e602bb9771541ae5b01ba76c79f56f --- /dev/null +++ b/tensorflow/tools/def_file_filter/def_file_filter_configure.bzl @@ -0,0 +1,56 @@ +"""Repository rule for def file filter autoconfiguration. + +This repository reuses Bazel's VC detect mechanism to find undname.exe, +which is a tool used in def_file_filter.py. + +def_file_filter.py is for filtering the DEF file for TensorFlow on Windows. +On Windows, we use a DEF file generated by Bazel to export symbols from the +tensorflow dynamic library(_pywrap_tensorflow.dll). The maximum number of +symbols that can be exported per DLL is 64K, so we have to filter some useless +symbols through this python script. + +`def_file_filter_config` depends on the following environment variables: + * `BAZEL_VC` + * `BAZEL_VS` + * `VS90COMNTOOLS` + * `VS100COMNTOOLS` + * `VS110COMNTOOLS` + * `VS120COMNTOOLS` + * `VS140COMNTOOLS` +""" + +load("@bazel_tools//tools/cpp:windows_cc_configure.bzl", "find_vc_path") +load("@bazel_tools//tools/cpp:windows_cc_configure.bzl", "find_msvc_tool") +load("@bazel_tools//tools/cpp:lib_cc_configure.bzl", "auto_configure_fail") + +def _def_file_filter_configure_impl(repository_ctx): + if repository_ctx.os.name.lower().find("windows") == -1: + repository_ctx.symlink(Label("//tensorflow/tools/def_file_filter:BUILD.tpl"), "BUILD") + repository_ctx.file("def_file_filter.py", "") + return + vc_path = find_vc_path(repository_ctx) + if vc_path == "visual-studio-not-found": + auto_configure_fail("Visual C++ build tools not found on your machine") + undname_bin_path = find_msvc_tool(repository_ctx, vc_path, "undname.exe").replace("\\", "\\\\") + + repository_ctx.template( + "def_file_filter.py", + Label("//tensorflow/tools/def_file_filter:def_file_filter.py.tpl"), + { + "%{undname_bin_path}": undname_bin_path, + }) + repository_ctx.symlink(Label("//tensorflow/tools/def_file_filter:BUILD.tpl"), "BUILD") + + +def_file_filter_configure = repository_rule( + implementation = _def_file_filter_configure_impl, + environ = [ + "BAZEL_VC", + "BAZEL_VS", + "VS90COMNTOOLS", + "VS100COMNTOOLS", + "VS110COMNTOOLS", + "VS120COMNTOOLS", + "VS140COMNTOOLS" + ], +) diff --git a/tensorflow/tools/docker/Dockerfile b/tensorflow/tools/docker/Dockerfile index 024cb40eb4b9380fa09bd0e371826783d1ebdc45..78cb4d250e84a4a165dd42db6845170c1751ffbe 100644 --- a/tensorflow/tools/docker/Dockerfile +++ b/tensorflow/tools/docker/Dockerfile @@ -47,7 +47,7 @@ RUN pip --no-cache-dir install \ http://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.0.0-cp27-none-linux_x86_64.whl # --- ~ DO NOT EDIT OR DELETE BETWEEN THE LINES --- # -# RUN ln -s /usr/bin/python3 /usr/bin/python# +# RUN ln -s -f /usr/bin/python3 /usr/bin/python# # Set up our notebook config. COPY jupyter_notebook_config.py /root/.jupyter/ diff --git a/tensorflow/tools/docker/Dockerfile.devel b/tensorflow/tools/docker/Dockerfile.devel index 11f476d12c086f70335d9a69d7f3b86b525b5623..390d7442c37b1d10f945d8657f5034403e3e2b96 100644 --- a/tensorflow/tools/docker/Dockerfile.devel +++ b/tensorflow/tools/docker/Dockerfile.devel @@ -38,6 +38,8 @@ RUN pip --no-cache-dir install \ && \ python -m ipykernel.kernelspec +# RUN ln -s -f /usr/bin/python3 /usr/bin/python# + # Set up our notebook config. COPY jupyter_notebook_config.py /root/.jupyter/ @@ -70,7 +72,7 @@ RUN mkdir /bazel && \ # Download and build TensorFlow. WORKDIR /tensorflow -RUN git clone --branch=r1.7 --depth=1 https://github.com/tensorflow/tensorflow.git . +RUN git clone --branch=r1.8 --depth=1 https://github.com/tensorflow/tensorflow.git . # TODO(craigcitro): Don't install the pip package, since it makes it # more difficult to experiment with local changes. Instead, just add diff --git a/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl b/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl index 037d13116efc5ddf76c31eb87d7f81d31c3591f5..c65e0b72bc582d39b75ad042e4c673aa603639be 100644 --- a/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl +++ b/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl @@ -3,7 +3,7 @@ FROM tensorflow/tensorflow:latest-devel LABEL maintainer="Clayne Robison" # These arguments are parameterized. Use --build-args to override. -ARG TF_BRANCH=r1.7 +ARG TF_BRANCH=r1.8 ARG WHL_DIR=/whl RUN apt-get update && apt-get install -y --no-install-recommends \ diff --git a/tensorflow/tools/docker/Dockerfile.devel-gpu b/tensorflow/tools/docker/Dockerfile.devel-gpu index 1fcb6428b21b4ca495bef2b3249b6463e9ef0a10..293028d229adba2380d6a91f409d11208c8d875c 100644 --- a/tensorflow/tools/docker/Dockerfile.devel-gpu +++ b/tensorflow/tools/docker/Dockerfile.devel-gpu @@ -47,6 +47,8 @@ RUN pip --no-cache-dir install \ && \ python -m ipykernel.kernelspec +# RUN ln -s -f /usr/bin/python3 /usr/bin/python# + # Set up our notebook config. COPY jupyter_notebook_config.py /root/.jupyter/ @@ -79,7 +81,7 @@ RUN mkdir /bazel && \ # Download and build TensorFlow. WORKDIR /tensorflow -RUN git clone --branch=r1.7 --depth=1 https://github.com/tensorflow/tensorflow.git . +RUN git clone --branch=r1.8 --depth=1 https://github.com/tensorflow/tensorflow.git . # Configure the build for our CUDA configuration. ENV CI_BUILD_PYTHON python diff --git a/tensorflow/tools/docker/Dockerfile.gpu b/tensorflow/tools/docker/Dockerfile.gpu index 625321e1235202f78a2d5e1a5b2d9d05e1e3f9ba..9e1708662e79746e54af4409756774a306990438 100644 --- a/tensorflow/tools/docker/Dockerfile.gpu +++ b/tensorflow/tools/docker/Dockerfile.gpu @@ -54,7 +54,7 @@ RUN pip --no-cache-dir install \ http://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-0.0.0-cp27-none-linux_x86_64.whl # --- ~ DO NOT EDIT OR DELETE BETWEEN THE LINES --- # -# RUN ln -s /usr/bin/python3 /usr/bin/python# +# RUN ln -s -f /usr/bin/python3 /usr/bin/python# # Set up our notebook config. COPY jupyter_notebook_config.py /root/.jupyter/ diff --git a/tensorflow/tools/docker/README.md b/tensorflow/tools/docker/README.md index f46c56e11aa72cd0df20f0d8478de2f42dbb3b72..525f2995ceecd48ee7463fc207406c5f9b25f61e 100644 --- a/tensorflow/tools/docker/README.md +++ b/tensorflow/tools/docker/README.md @@ -16,12 +16,12 @@ quick links here: We currently maintain two Docker container images: -* `gcr.io/tensorflow/tensorflow` - TensorFlow with all dependencies - CPU only! +* `tensorflow/tensorflow` - TensorFlow with all dependencies - CPU only! -* `gcr.io/tensorflow/tensorflow:latest-gpu` - TensorFlow with all dependencies +* `tensorflow/tensorflow:latest-gpu` - TensorFlow with all dependencies and support for NVidia CUDA -Note: We also publish the same containers into +Note: We store all our containers on [Docker Hub](https://hub.docker.com/r/tensorflow/tensorflow/tags/). @@ -29,12 +29,12 @@ Note: We also publish the same containers into Run non-GPU container using - $ docker run -it -p 8888:8888 gcr.io/tensorflow/tensorflow + $ docker run -it -p 8888:8888 tensorflow/tensorflow For GPU support install NVidia drivers (ideally latest) and [nvidia-docker](https://github.com/NVIDIA/nvidia-docker). Run using - $ nvidia-docker run -it -p 8888:8888 gcr.io/tensorflow/tensorflow:latest-gpu + $ nvidia-docker run -it -p 8888:8888 tensorflow/tensorflow:latest-gpu Note: If you would have a problem running nvidia-docker you may try the old method @@ -44,7 +44,7 @@ it there and try using nvidia-docker as described above. $ # The old, not recommended way to run docker with gpu support: $ export CUDA_SO=$(\ls /usr/lib/x86_64-linux-gnu/libcuda.* | xargs -I{} echo '-v {}:{}') $ export DEVICES=$(\ls /dev/nvidia* | xargs -I{} echo '--device {}:{}') - $ docker run -it -p 8888:8888 $CUDA_SO $DEVICES gcr.io/tensorflow/tensorflow:latest-gpu + $ docker run -it -p 8888:8888 $CUDA_SO $DEVICES tensorflow/tensorflow:latest-gpu ## More containers diff --git a/tensorflow/tools/docker/notebooks/3_mnist_from_scratch.ipynb b/tensorflow/tools/docker/notebooks/3_mnist_from_scratch.ipynb index 5585ebdcd366ec9db0c47004647970cb27c8bb75..824fe14560bb2c3bfb0729f9e5b5cffa63db19ca 100644 --- a/tensorflow/tools/docker/notebooks/3_mnist_from_scratch.ipynb +++ b/tensorflow/tools/docker/notebooks/3_mnist_from_scratch.ipynb @@ -1207,7 +1207,7 @@ "source": [ "# Training computation: logits + cross-entropy loss.\n", "logits = model(train_data_node, True)\n", - "loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(\n", + "loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(\n", " labels=train_labels_node, logits=logits))\n", "\n", "# L2 regularization for the fully connected parameters.\n", @@ -2031,7 +2031,7 @@ "views": {} }, "kernelspec": { - "display_name": "Python [default]", + "display_name": "Python 3", "language": "python", "name": "python3" }, @@ -2049,5 +2049,5 @@ } }, "nbformat": 4, - "nbformat_minor": 0 + "nbformat_minor": 1 } diff --git a/tensorflow/tools/docker/parameterized_docker_build.sh b/tensorflow/tools/docker/parameterized_docker_build.sh index b4fba5b8f5e19c2fbb8c7261d8cf293757df503c..05de25f2cb11d76f223a31bc12329e6ab7368e8a 100755 --- a/tensorflow/tools/docker/parameterized_docker_build.sh +++ b/tensorflow/tools/docker/parameterized_docker_build.sh @@ -284,7 +284,7 @@ if [[ "${TF_DOCKER_BUILD_IS_DEVEL}" == "no" ]]; then if sed -i -e 's/python /python3 /g' "${DOCKERFILE}" && \ sed -i -e 's/python-dev/python3-dev/g' "${DOCKERFILE}" && \ sed -i -e 's/pip /pip3 /g' "${DOCKERFILE}" && \ - sed -i -e 's^# RUN ln -s /usr/bin/python3 /usr/bin/python#^RUN ln -s /usr/bin/python3 /usr/bin/python^' "${DOCKERFILE}" + sed -i -e 's^# RUN ln -s -f /usr/bin/python3 /usr/bin/python#^RUN ln -s -f /usr/bin/python3 /usr/bin/python^' "${DOCKERFILE}" then echo "Modified Dockerfile for python version "\ "${TF_DOCKER_BUILD_PYTHON_VERSION} at: ${DOCKERFILE}" @@ -306,7 +306,7 @@ else sed -i -e 's^/tmp/pip^/tmp/pip3^g' "${DOCKERFILE}" && \ sed -i -e 's/pip /pip3 /g' "${DOCKERFILE}" && \ sed -i -e 's/ENV CI_BUILD_PYTHON python/ENV CI_BUILD_PYTHON python3/g' "${DOCKERFILE}" && \ - sed -i -e 's^# RUN ln -s /usr/bin/python3 /usr/bin/python#^RUN ln -s /usr/bin/python3 /usr/bin/python^' "${DOCKERFILE}" + sed -i -e 's^# RUN ln -s -f /usr/bin/python3 /usr/bin/python#^RUN ln -s -f /usr/bin/python3 /usr/bin/python^' "${DOCKERFILE}" then echo "Modified Dockerfile further for python version ${TF_DOCKER_BUILD_PYTHON_VERSION} at: ${DOCKERFILE}" else diff --git a/tensorflow/tools/docs/BUILD b/tensorflow/tools/docs/BUILD index d370fbd24692a2806640279644c91e4fad5a3757..58b5ef8345c9de83e2d50cd01fe11e11f51fe298 100644 --- a/tensorflow/tools/docs/BUILD +++ b/tensorflow/tools/docs/BUILD @@ -37,7 +37,7 @@ py_library( srcs = ["parser.py"], srcs_version = "PY2AND3", visibility = ["//visibility:public"], - deps = ["@com_github_andreif_codegen"], + deps = ["@astor_archive//:astor"], ) py_test( @@ -103,10 +103,11 @@ py_test( data = ["//tensorflow:docs_src"], srcs_version = "PY2AND3", tags = [ - # No reason to run sanitizers for this test. + # No reason to run sanitizers or fastbuild for this test. "noasan", "nomsan", "notsan", + "optonly", ], deps = [ ":generate_lib", diff --git a/tensorflow/tools/docs/build_docs_test.py b/tensorflow/tools/docs/build_docs_test.py index ae293f6576456ecdbb8a4b1ee4e8e4f40482ad94..0cbf8b478fa8877c81dd4c06d2713ae2e72f8b58 100644 --- a/tensorflow/tools/docs/build_docs_test.py +++ b/tensorflow/tools/docs/build_docs_test.py @@ -19,7 +19,6 @@ from __future__ import division from __future__ import print_function import os -import sys import textwrap import tensorflow as tf @@ -39,10 +38,6 @@ class Flags(object): class BuildDocsTest(googletest.TestCase): def testBuildDocs(self): - if sys.version_info >= (3, 0): - print('Warning: Doc generation is not supported from python3.') - return - doc_generator = generate_lib.DocGenerator() doc_generator.set_py_modules([('tf', tf), ('tfdbg', tf_debug)]) diff --git a/tensorflow/tools/docs/generate.py b/tensorflow/tools/docs/generate.py index c750539a76a85ec122fb87f44160422158966314..fc93085e3e0316cf274f4d9b325d6af0ea3a2f83 100644 --- a/tensorflow/tools/docs/generate.py +++ b/tensorflow/tools/docs/generate.py @@ -43,10 +43,6 @@ if __name__ == '__main__': flags = doc_generator.parse_known_args() - # Suppress documentation of some symbols that users should never use. - del tf.layers.Layer.inbound_nodes - del tf.layers.Layer.outbound_nodes - # tf_debug is not imported with tf, it's a separate module altogether doc_generator.set_py_modules([('tf', tf), ('tfdbg', tf_debug)]) diff --git a/tensorflow/tools/docs/generate_lib.py b/tensorflow/tools/docs/generate_lib.py index 34dd419f15676babfa9a36c2c0960b01248b6f69..111d54d8205f805cc24d21c610acc81610b8d47d 100644 --- a/tensorflow/tools/docs/generate_lib.py +++ b/tensorflow/tools/docs/generate_lib.py @@ -21,7 +21,6 @@ from __future__ import print_function import argparse import fnmatch import os -import sys import six @@ -134,8 +133,12 @@ def write_docs(output_dir, parser_config, yaml_toc, root_title='TensorFlow'): try: if not os.path.exists(directory): os.makedirs(directory) - with open(path, 'w') as f: - f.write(pretty_docs.build_md_page(page_info)) + # This function returns raw bytes in PY2 or unicode in PY3. + text = pretty_docs.build_md_page(page_info) + if six.PY3: + text = text.encode('utf-8') + with open(path, 'wb') as f: + f.write(text) except OSError as e: print('Cannot write documentation for %s to %s: %s' % (full_name, directory, e)) @@ -308,6 +311,10 @@ def build_doc_index(src_dir): continue title_parser = _GetMarkdownTitle() title_parser.process(os.path.join(dirpath, base_name)) + if title_parser.title is None: + msg = ('`{}` has no markdown title (# title)'.format( + os.path.join(dirpath, base_name))) + raise ValueError(msg) key_parts = os.path.join(suffix, base_name[:-3]).split('/') if key_parts[-1] == 'index': key_parts = key_parts[:-1] @@ -433,19 +440,19 @@ def _other_docs(src_dir, output_dir, reference_resolver, file_pattern='*.md'): full_out_path = os.path.join(output_dir, suffix) if not fnmatch.fnmatch(base_name, file_pattern): print('Copying un-matched file %s...' % suffix) - open(full_out_path, 'w').write(open(full_in_path).read()) + open(full_out_path, 'wb').write(open(full_in_path, 'rb').read()) continue if dirpath.endswith('/api_guides/python'): print('Processing Python guide %s...' % base_name) content = tag_updater.process(full_in_path) else: print('Processing doc %s...' % suffix) - content = open(full_in_path).read() + content = open(full_in_path, 'rb').read().decode('utf-8') content = reference_resolver.replace_references(content, relative_path_to_root) - with open(full_out_path, 'w') as f: - f.write(content) + with open(full_out_path, 'wb') as f: + f.write(content.encode('utf-8')) print('Done.') @@ -454,8 +461,6 @@ class DocGenerator(object): """Main entry point for generating docs.""" def __init__(self): - if sys.version_info >= (3, 0): - sys.exit('Doc generation is not supported from python3.') self.argument_parser = argparse.ArgumentParser() self._py_modules = None self._private_map = _get_default_private_map() diff --git a/tensorflow/tools/docs/generate_lib_test.py b/tensorflow/tools/docs/generate_lib_test.py index 1ceaf31f1c3b83e2c2cb3c0d2022ce98781aed4b..ea6d28a02b1f3c07fe8783fd59e345dade1fc804 100644 --- a/tensorflow/tools/docs/generate_lib_test.py +++ b/tensorflow/tools/docs/generate_lib_test.py @@ -52,9 +52,6 @@ class DummyVisitor(object): class GenerateTest(googletest.TestCase): def test_write(self): - if sys.version_info >= (3, 0): - self.skipTest('Warning: Doc generation is not supported from python3.') - module = sys.modules[__name__] index = { diff --git a/tensorflow/tools/docs/parser.py b/tensorflow/tools/docs/parser.py index d2a63ecc4960117eb64fcc4f94bf882d4a3f91dd..fb0bd2c2ff438aa9b3fa04719c447a2f3a91a95e 100644 --- a/tensorflow/tools/docs/parser.py +++ b/tensorflow/tools/docs/parser.py @@ -26,7 +26,7 @@ import os import re import sys -import codegen +import astor import six from google.protobuf.message import Message as ProtoMessage @@ -621,20 +621,20 @@ def _parse_md_docstring(py_object, relative_path_to_root, reference_resolver): def _get_arg_spec(func): """Extracts signature information from a function or functools.partial object. - For functions, uses `tf_inspect.getargspec`. For `functools.partial` objects, - corrects the signature of the underlying function to take into account the - removed arguments. + For functions, uses `tf_inspect.getfullargspec`. For `functools.partial` + objects, corrects the signature of the underlying function to take into + account the removed arguments. Args: func: A function whose signature to extract. Returns: - An `ArgSpec` namedtuple `(args, varargs, keywords, defaults)`, as returned - by `tf_inspect.getargspec`. + An `FullArgSpec` namedtuple `(args, varargs, varkw, defaults, etc.)`, + as returned by `tf_inspect.getfullargspec`. """ - # getargspec does not work for functools.partial objects directly. + # getfullargspec does not work for functools.partial objects directly. if isinstance(func, functools.partial): - argspec = tf_inspect.getargspec(func.func) + argspec = tf_inspect.getfullargspec(func.func) # Remove the args from the original function that have been used up. first_default_arg = ( len(argspec.args or []) - len(argspec.defaults or [])) @@ -657,12 +657,16 @@ def _get_arg_spec(func): argspec_defaults.pop(i-first_default_arg) else: first_default_arg -= 1 - return tf_inspect.ArgSpec(args=argspec_args, - varargs=argspec.varargs, - keywords=argspec.keywords, - defaults=tuple(argspec_defaults)) + return tf_inspect.FullArgSpec( + args=argspec_args, + varargs=argspec.varargs, + varkw=argspec.varkw, + defaults=tuple(argspec_defaults), + kwonlyargs=[], + kwonlydefaults=None, + annotations={}) else: # Regular function or method, getargspec will work fine. - return tf_inspect.getargspec(func) + return tf_inspect.getfullargspec(func) def _remove_first_line_indent(string): @@ -670,11 +674,14 @@ def _remove_first_line_indent(string): return '\n'.join([line[indent:] for line in string.split('\n')]) +PAREN_NUMBER_RE = re.compile(r'^\(([0-9.e-]+)\)') + + def _generate_signature(func, reverse_index): """Given a function, returns a list of strings representing its args. This function produces a list of strings representing the arguments to a - python function. It uses tf_inspect.getargspec, which + python function. It uses tf_inspect.getfullargspec, which does not generalize well to Python 3.x, which is more flexible in how *args and **kwargs are handled. This is not a problem in TF, since we have to remain compatible to Python 2.7 anyway. @@ -725,7 +732,11 @@ def _generate_signature(func, reverse_index): if id(default) in reverse_index: default_text = reverse_index[id(default)] elif ast_default is not None: - default_text = codegen.to_source(ast_default) + default_text = ( + astor.to_source(ast_default).rstrip('\n').replace('\t', '\\t') + .replace('\n', '\\n').replace('"""', "'")) + default_text = PAREN_NUMBER_RE.sub('\\1', default_text) + if default_text != repr(default): # This may be an internal name. If so, handle the ones we know about. # TODO(wicke): This should be replaced with a lookup in the index. @@ -758,8 +769,8 @@ def _generate_signature(func, reverse_index): # Add *args and *kwargs. if argspec.varargs: args_list.append('*' + argspec.varargs) - if argspec.keywords: - args_list.append('**' + argspec.keywords) + if argspec.varkw: + args_list.append('**' + argspec.varkw) return args_list @@ -1136,9 +1147,11 @@ class _ClassPageInfo(object): for short_name in parser_config.tree[self.full_name]: # Remove builtin members that we never want to document. - if short_name in ['__class__', '__base__', '__weakref__', '__doc__', - '__module__', '__dict__', '__abstractmethods__', - '__slots__', '__getnewargs__']: + if short_name in [ + '__class__', '__base__', '__weakref__', '__doc__', '__module__', + '__dict__', '__abstractmethods__', '__slots__', '__getnewargs__', + '__str__', '__repr__', '__hash__' + ]: continue child_name = '.'.join([self.full_name, short_name]) @@ -1183,7 +1196,8 @@ class _ClassPageInfo(object): # obvious what they do, don't include them in the docs if there's no # docstring. if not child_doc.brief.strip() and short_name in [ - '__str__', '__repr__', '__hash__', '__del__', '__copy__']: + '__del__', '__copy__' + ]: print('Skipping %s, defined in %s, no docstring.' % (child_name, defining_class)) continue diff --git a/tensorflow/tools/docs/parser_test.py b/tensorflow/tools/docs/parser_test.py index fca5436ca5fadd1fb5da07d7523bb51c871164b5..274d48ef66071a4e6a5ebea65087f18382fea6a2 100644 --- a/tensorflow/tools/docs/parser_test.py +++ b/tensorflow/tools/docs/parser_test.py @@ -398,7 +398,6 @@ class ParserTest(googletest.TestCase): self.assertIn('test_function', docs) def test_argspec_for_functools_partial(self): - # pylint: disable=unused-argument def test_function_for_partial1(arg1, arg2, kwarg1=1, kwarg2=2): pass @@ -409,42 +408,95 @@ class ParserTest(googletest.TestCase): # pylint: disable=protected-access # Make sure everything works for regular functions. - expected = tf_inspect.ArgSpec(['arg1', 'arg2', 'kwarg1', 'kwarg2'], None, - None, (1, 2)) + expected = tf_inspect.FullArgSpec( + args=['arg1', 'arg2', 'kwarg1', 'kwarg2'], + varargs=None, + varkw=None, + defaults=(1, 2), + kwonlyargs=[], + kwonlydefaults=None, + annotations={}) self.assertEqual(expected, parser._get_arg_spec(test_function_for_partial1)) # Make sure doing nothing works. - expected = tf_inspect.ArgSpec(['arg1', 'arg2', 'kwarg1', 'kwarg2'], None, - None, (1, 2)) + expected = tf_inspect.FullArgSpec( + args=['arg1', 'arg2', 'kwarg1', 'kwarg2'], + varargs=None, + varkw=None, + defaults=(1, 2), + kwonlyargs=[], + kwonlydefaults=None, + annotations={}) partial = functools.partial(test_function_for_partial1) self.assertEqual(expected, parser._get_arg_spec(partial)) # Make sure setting args from the front works. - expected = tf_inspect.ArgSpec(['arg2', 'kwarg1', 'kwarg2'], None, None, - (1, 2)) + expected = tf_inspect.FullArgSpec( + args=['arg2', 'kwarg1', 'kwarg2'], + varargs=None, + varkw=None, + defaults=(1, 2), + kwonlyargs=[], + kwonlydefaults=None, + annotations={}) partial = functools.partial(test_function_for_partial1, 1) self.assertEqual(expected, parser._get_arg_spec(partial)) - expected = tf_inspect.ArgSpec(['kwarg2',], None, None, (2,)) + expected = tf_inspect.FullArgSpec( + args=['kwarg2'], + varargs=None, + varkw=None, + defaults=(2,), + kwonlyargs=[], + kwonlydefaults=None, + annotations={}) partial = functools.partial(test_function_for_partial1, 1, 2, 3) self.assertEqual(expected, parser._get_arg_spec(partial)) # Make sure setting kwargs works. - expected = tf_inspect.ArgSpec(['arg1', 'arg2', 'kwarg2'], None, None, (2,)) + expected = tf_inspect.FullArgSpec( + args=['arg1', 'arg2', 'kwarg2'], + varargs=None, + varkw=None, + defaults=(2,), + kwonlyargs=[], + kwonlydefaults=None, + annotations={}) partial = functools.partial(test_function_for_partial1, kwarg1=0) self.assertEqual(expected, parser._get_arg_spec(partial)) - expected = tf_inspect.ArgSpec(['arg1', 'arg2', 'kwarg1'], None, None, (1,)) + expected = tf_inspect.FullArgSpec( + args=['arg1', 'arg2', 'kwarg1'], + varargs=None, + varkw=None, + defaults=(1,), + kwonlyargs=[], + kwonlydefaults=None, + annotations={}) partial = functools.partial(test_function_for_partial1, kwarg2=0) self.assertEqual(expected, parser._get_arg_spec(partial)) - expected = tf_inspect.ArgSpec(['arg1'], None, None, ()) + expected = tf_inspect.FullArgSpec( + args=['arg1'], + varargs=None, + varkw=None, + defaults=(), + kwonlyargs=[], + kwonlydefaults=None, + annotations={}) partial = functools.partial(test_function_for_partial1, arg2=0, kwarg1=0, kwarg2=0) self.assertEqual(expected, parser._get_arg_spec(partial)) # Make sure *args, *kwargs is accounted for. - expected = tf_inspect.ArgSpec([], 'my_args', 'my_kwargs', ()) + expected = tf_inspect.FullArgSpec( + args=[], + varargs='my_args', + varkw='my_kwargs', + defaults=(), + kwonlyargs=[], + kwonlydefaults=None, + annotations={}) partial = functools.partial(test_function_for_partial2, 0, 1) self.assertEqual(expected, parser._get_arg_spec(partial)) @@ -524,10 +576,6 @@ class TestParseFunctionDetails(googletest.TestCase): class TestGenerateSignature(googletest.TestCase): def test_known_object(self): - if sys.version_info >= (3, 0): - print('Warning: Doc generation is not supported from python3.') - return - known_object = object() reverse_index = {id(known_object): 'location.of.object.in.api'} diff --git a/tensorflow/tools/docs/pretty_docs.py b/tensorflow/tools/docs/pretty_docs.py index 543b5fa6fefcd8e8dca99ad7eac7cca76781ccd3..55ab5bdd49a427e680221f4864b3f31a65b12e8d 100644 --- a/tensorflow/tools/docs/pretty_docs.py +++ b/tensorflow/tools/docs/pretty_docs.py @@ -101,7 +101,7 @@ def _build_class_page(page_info): link_template = '[`{short_name}`]({url})' parts.append(', '.join( - link_template.format(**base.__dict__) for base in page_info.bases)) + link_template.format(**base._asdict()) for base in page_info.bases)) parts.append('\n\n') @@ -159,7 +159,7 @@ def _build_class_page(page_info): h3 = ('

' '{short_name}' '

\n\n') - parts.append(h3.format(**method_info.__dict__)) + parts.append(h3.format(**method_info._asdict())) if method_info.signature is not None: parts.append(_build_signature(method_info, use_full_name=False)) @@ -217,7 +217,7 @@ def _build_module_page(page_info): template = '[`{short_name}`]({url}) module' for item in page_info.modules: - parts.append(template.format(**item.__dict__)) + parts.append(template.format(**item._asdict())) if item.doc.brief: parts.append(': ' + item.doc.brief) @@ -229,7 +229,7 @@ def _build_module_page(page_info): template = '[`class {short_name}`]({url})' for item in page_info.classes: - parts.append(template.format(**item.__dict__)) + parts.append(template.format(**item._asdict())) if item.doc.brief: parts.append(': ' + item.doc.brief) @@ -241,7 +241,7 @@ def _build_module_page(page_info): template = '[`{short_name}(...)`]({url})' for item in page_info.functions: - parts.append(template.format(**item.__dict__)) + parts.append(template.format(**item._asdict())) if item.doc.brief: parts.append(': ' + item.doc.brief) @@ -254,7 +254,7 @@ def _build_module_page(page_info): parts.append('## Other Members\n\n') for item in page_info.other_members: - parts.append('`{short_name}`\n\n'.format(**item.__dict__)) + parts.append('`{short_name}`\n\n'.format(**item._asdict())) return ''.join(parts) diff --git a/tensorflow/tools/docs/py_guide_parser.py b/tensorflow/tools/docs/py_guide_parser.py index 216353ecee377260efd5a19c8536ac41c17592a9..328f42d18f1efb0fd82725a4683abad2df0d5a19 100644 --- a/tensorflow/tools/docs/py_guide_parser.py +++ b/tensorflow/tools/docs/py_guide_parser.py @@ -44,7 +44,7 @@ class PyGuideParser(object): def process(self, full_path): """Read and process the file at `full_path`.""" - md_string = open(full_path).read() + md_string = open(full_path, 'rb').read().decode('utf-8') self._lines = md_string.split('\n') seen = set() diff --git a/tensorflow/tools/git/gen_git_source.py b/tensorflow/tools/git/gen_git_source.py index cbcdbf5b807a585865e2e3f19291e55388d55cb1..a8d878cf16cb9aae1c8639d97cb2802f685c2a8a 100755 --- a/tensorflow/tools/git/gen_git_source.py +++ b/tensorflow/tools/git/gen_git_source.py @@ -125,7 +125,7 @@ def configure(src_base_path, gen_path, debug=False): try: # In python 3.5, symlink function exists even on Windows. But requires # Windows Admin privileges, otherwise an OSError will be thrown. - if hasattr(os, 'symlink'): + if hasattr(os, "symlink"): os.symlink(src, os.path.join(gen_path, target)) else: shutil.copy2(src, os.path.join(gen_path, target)) @@ -139,7 +139,7 @@ def configure(src_base_path, gen_path, debug=False): print("gen_git_source.py: spec is %r" % spec) -def get_git_version(git_base_path): +def get_git_version(git_base_path, git_tag_override): """Get the git version from the repository. This function runs `git describe ...` in the path given as `git_base_path`. @@ -152,6 +152,9 @@ def get_git_version(git_base_path): Args: git_base_path: where the .git directory is located + git_tag_override: Override the value for the git tag. This is useful for + releases where we want to build the release before the git tag is + created. Returns: A bytestring representing the git version """ @@ -161,8 +164,19 @@ def get_git_version(git_base_path): "git", str("--git-dir=%s/.git" % git_base_path), str("--work-tree=" + git_base_path), "describe", "--long", "--tags" ]).strip()) + if git_tag_override and val: + split_val = val.split("-") + if len(split_val) < 3: + raise Exception( + ("Expected git version in format 'TAG-COMMITS AFTER TAG-HASH' " + "but got '%s'") % val) + # There might be "-" in the tag name. But we can be sure that the final + # two "-" are those inserted by the git describe command. + abbrev_commit = split_val[-1] + val = bytes( + "-".join([git_tag_override, "0", abbrev_commit])) return val if val else unknown_label - except subprocess.CalledProcessError: + except (subprocess.CalledProcessError, OSError): return unknown_label @@ -178,7 +192,15 @@ def write_version_info(filename, git_version): contents = """/* Generated by gen_git_source.py */ #include const char* tf_git_version() {return "%s";} -const char* tf_compiler_version() {return __VERSION__;} +const char* tf_compiler_version() { +#ifdef _MSC_VER +#define STRINGIFY(x) #x +#define TOSTRING(x) STRINGIFY(x) + return "MSVC " TOSTRING(_MSC_FULL_VER); +#else + return __VERSION__; +#endif +} const int tf_cxx11_abi_flag() { #ifdef _GLIBCXX_USE_CXX11_ABI return _GLIBCXX_USE_CXX11_ABI; @@ -197,7 +219,7 @@ const int tf_monolithic_build() { open(filename, "w").write(contents) -def generate(arglist): +def generate(arglist, git_tag_override=None): """Generate version_info.cc as given `destination_file`. Args: @@ -217,6 +239,10 @@ def generate(arglist): `ref_symlink` is unused in this script but passed, because the build system uses that file to detect when commits happen. + git_tag_override: Override the value for the git tag. This is useful for + releases where we want to build the release before the git tag is + created. + Raises: RuntimeError: If ./configure needs to be run, RuntimeError will be raised. """ @@ -234,11 +260,11 @@ def generate(arglist): raise RuntimeError( "Run ./configure again, branch was '%s' but is now '%s'" % (old_branch, new_branch)) - git_version = get_git_version(data["path"]) + git_version = get_git_version(data["path"], git_tag_override) write_version_info(dest_file, git_version) -def raw_generate(output_file): +def raw_generate(output_file, source_dir, git_tag_override=None): """Simple generator used for cmake/make build systems. This does not create any symlinks. It requires the build system @@ -246,9 +272,13 @@ def raw_generate(output_file): Args: output_file: Output filename for the version info cc + source_dir: Base path of the source code + git_tag_override: Override the value for the git tag. This is useful for + releases where we want to build the release before the git tag is + created. """ - git_version = get_git_version(".") + git_version = get_git_version(source_dir, git_tag_override) write_version_info(output_file, git_version) @@ -270,6 +300,11 @@ parser.add_argument( "--gen_root_path", type=str, help="Root path to place generated git files (created by --configure).") +parser.add_argument( + "--git_tag_override", type=str, + help="Override git tag value in the __git_version__ string. Useful when " + "creating release builds before the release tag is created.") + parser.add_argument( "--generate", type=str, @@ -281,6 +316,11 @@ parser.add_argument( type=str, help="Generate version_info.cc (simpler version used for cmake/make)") +parser.add_argument( + "--source_dir", + type=str, + help="Base path of the source code (used for cmake/make)") + args = parser.parse_args() if args.configure is not None: @@ -288,9 +328,12 @@ if args.configure is not None: raise RuntimeError("Must pass --gen_root_path arg when running --configure") configure(args.configure, args.gen_root_path, debug=args.debug) elif args.generate is not None: - generate(args.generate) + generate(args.generate, args.git_tag_override) elif args.raw_generate is not None: - raw_generate(args.raw_generate) + source_path = "." + if args.source_dir is not None: + source_path = args.source_dir + raw_generate(args.raw_generate, source_path, args.git_tag_override) else: raise RuntimeError("--configure or --generate or --raw_generate " - "must be used") + "must be used") \ No newline at end of file diff --git a/tensorflow/tools/git/gen_git_source.sh b/tensorflow/tools/git/gen_git_source.sh index db20bb00e84b47bd15244e70b925f59e62731deb..cd128af6b36f2f99e5cf91961476d30384227e9b 100755 --- a/tensorflow/tools/git/gen_git_source.sh +++ b/tensorflow/tools/git/gen_git_source.sh @@ -28,7 +28,15 @@ fi cat < ${OUTPUT_FILENAME} #include const char* tf_git_version() {return "${GIT_VERSION}";} -const char* tf_compiler_version() {return __VERSION__;} +const char* tf_compiler_version() { +#ifdef _MSC_VER +#define STRINGIFY(x) #x +#define TOSTRING(x) STRINGIFY(x) + return "MSVC " TOSTRING(_MSC_FULL_VER); +#else + return __VERSION__; +#endif +} const int tf_cxx11_abi_flag() { #ifdef _GLIBCXX_USE_CXX11_ABI return _GLIBCXX_USE_CXX11_ABI; diff --git a/tensorflow/tools/graph_transforms/backports_test.cc b/tensorflow/tools/graph_transforms/backports_test.cc index ab9a61afa7eb1680580c7e0c41f8ff1b47ef6742..80a954e062b06924c6048ac8b011dc1034706e8e 100644 --- a/tensorflow/tools/graph_transforms/backports_test.cc +++ b/tensorflow/tools/graph_transforms/backports_test.cc @@ -20,6 +20,7 @@ limitations under the License. #include "tensorflow/cc/ops/standard_ops.h" #include "tensorflow/core/framework/tensor_testutil.h" #include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/platform/test.h" #include "tensorflow/core/platform/test_benchmark.h" #include "tensorflow/core/public/session.h" @@ -191,7 +192,7 @@ TEST(BackportTensorArrayV3Test, TestBackportTensorArrayV3Subtypes) { std::map node_lookup; MapNamesToNodes(result, &node_lookup); ASSERT_EQ(1, node_lookup.count("v3_node")); - EXPECT_TRUE(StringPiece(node_lookup.at("v3_node")->op()).ends_with("V2")); + EXPECT_TRUE(str_util::EndsWith(node_lookup.at("v3_node")->op(), "V2")); } } diff --git a/tensorflow/tools/graph_transforms/fold_constants_lib.cc b/tensorflow/tools/graph_transforms/fold_constants_lib.cc index 250f54e20fba6e24fe95741b1437ac3718ace6fb..85660f94a85dce29360525f7bb7474494b3f010f 100644 --- a/tensorflow/tools/graph_transforms/fold_constants_lib.cc +++ b/tensorflow/tools/graph_transforms/fold_constants_lib.cc @@ -283,6 +283,10 @@ Status FoldConstants(const GraphDef& input_graph_def, }; } + TF_RETURN_IF_ERROR(context.GetOneInt64Parameter( + "max_constant_size_in_bytes", cf_opts.max_constant_size_in_bytes, + &cf_opts.max_constant_size_in_bytes)); + // Constant folding. bool was_mutated; TF_RETURN_IF_ERROR(ConstantFold(cf_opts, nullptr, Env::Default(), nullptr, diff --git a/tensorflow/tools/graph_transforms/fold_constants_test.cc b/tensorflow/tools/graph_transforms/fold_constants_test.cc index 41106de008d832a022290e6da38cca8ad6d23ffd..a082399a87dbaad913be421fe273ba89b6f7340e 100644 --- a/tensorflow/tools/graph_transforms/fold_constants_test.cc +++ b/tensorflow/tools/graph_transforms/fold_constants_test.cc @@ -24,6 +24,7 @@ limitations under the License. #include "tensorflow/core/framework/tensor_shape.pb.h" #include "tensorflow/core/framework/tensor_testutil.h" #include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/platform/test.h" #include "tensorflow/core/platform/test_benchmark.h" #include "tensorflow/core/public/session.h" @@ -209,10 +210,10 @@ class ConstantFoldingTest : public ::testing::Test { for (const NodeDef& node : graph_def.node()) { const StringPiece name(node.name()); const int occurrence_count = folded_node_map.count(node.name()); - if (name.ends_with("expect_removed")) { + if (str_util::EndsWith(name, "expect_removed")) { EXPECT_EQ(0, occurrence_count) << "node.name()=" << node.name(); } - if (name.ends_with("expect_remains")) { + if (str_util::EndsWith(name, "expect_remains")) { EXPECT_EQ(1, occurrence_count) << "node.name()=" << node.name(); } } @@ -370,6 +371,46 @@ class ConstantFoldingTest : public ::testing::Test { EXPECT_EQ(0, node_map.count("b")); EXPECT_EQ(1, node_map.count("c")); } + + void TestMaxConstantSizeInBytes() { + auto root = tensorflow::Scope::NewRootScope(); + + const int width = 100; + + Tensor a_data(DT_FLOAT, TensorShape({width})); + test::FillIota(&a_data, 1.0f); + Output a_const = ::tensorflow::ops::Const( + root.WithOpName("a_expect_remains"), Input::Initializer(a_data)); + + Tensor b_data(DT_FLOAT, TensorShape({width})); + test::FillIota(&b_data, 1.0f); + Output b_const = ::tensorflow::ops::Const( + root.WithOpName("b_expect_remains"), Input::Initializer(b_data)); + + Output add = ::tensorflow::ops::Add(root.WithOpName("add_expect_remains"), + a_const, b_const); + + Output placeholder = ::tensorflow::ops::Placeholder( + root.WithOpName("placeholder_expect_remains"), DT_FLOAT); + + Output mul = ::tensorflow::ops::Mul( + root.WithOpName("output_expect_remains"), add, placeholder); + + GraphDef graph_def; + TF_ASSERT_OK(root.ToGraphDef(&graph_def)); + + Tensor placeholder_tensor(DT_FLOAT, TensorShape({width})); + test::FillIota(&placeholder_tensor, 1.0f); + + // Setting the maximum constant size to 10 bytes should stop the constant + // folding at add(a, b) that would have yielded a constant of + // 100*sizeof(float) bytes. + graph_transforms::TransformFuncContext context; + context.params["max_constant_size_in_bytes"] = {"10"}; + TestConstantFolding(graph_def, + {{"placeholder_expect_remains", placeholder_tensor}}, + {}, {"output_expect_remains"}, context); + } }; TEST_F(ConstantFoldingTest, TestSimpleAdd) { TestSimpleAdd(); } @@ -394,5 +435,9 @@ TEST_F(ConstantFoldingTest, TestRemoveUnusedNodesMultipleOutputs) { TestRemoveUnusedNodesMultipleOutputs(); } +TEST_F(ConstantFoldingTest, TestMaxConstantSizeInBytes) { + TestMaxConstantSizeInBytes(); +} + } // namespace graph_transforms } // namespace tensorflow diff --git a/tensorflow/tools/graph_transforms/freeze_requantization_ranges.cc b/tensorflow/tools/graph_transforms/freeze_requantization_ranges.cc index 2436c7e4a2dc5c8172de2a35abbfc551d6e410fd..f401723808c086bd69743b75b8b4d972e8ab0b83 100644 --- a/tensorflow/tools/graph_transforms/freeze_requantization_ranges.cc +++ b/tensorflow/tools/graph_transforms/freeze_requantization_ranges.cc @@ -40,8 +40,8 @@ Status ExtractMinMaxRecords(const string& log_file_name, for (const string& file_line : file_lines) { // We expect to find a line with components separated by semicolons, so to // start make sure that the basic structure is in place/ - StringPiece line(file_line); - if (!line.contains(print_suffix + ";" + requant_prefix)) { + if (!str_util::StrContains(file_line, + print_suffix + ";" + requant_prefix)) { continue; } std::vector line_parts = str_util::Split(file_line, ';'); @@ -53,8 +53,7 @@ Status ExtractMinMaxRecords(const string& log_file_name, bool min_max_found = false; int min_max_index; for (int i = 1; i < line_parts.size(); ++i) { - StringPiece line_part(line_parts[i]); - if (line_part.starts_with(requant_prefix)) { + if (str_util::StartsWith(line_parts[i], requant_prefix)) { min_max_found = true; min_max_index = i; } @@ -90,7 +89,7 @@ Status ExtractMinMaxRecords(const string& log_file_name, continue; } StringPiece name_string = line_parts[min_max_index - 1]; - if (!name_string.ends_with(print_suffix)) { + if (!str_util::EndsWith(name_string, print_suffix)) { continue; } string name = diff --git a/tensorflow/tools/graph_transforms/insert_logging.cc b/tensorflow/tools/graph_transforms/insert_logging.cc index e1ee2b420b062937b5e50c10a05406df3cbd7977..377665448c244aeace78f231ba0c263613afd9a0 100644 --- a/tensorflow/tools/graph_transforms/insert_logging.cc +++ b/tensorflow/tools/graph_transforms/insert_logging.cc @@ -19,6 +19,7 @@ limitations under the License. #include "tensorflow/core/graph/graph_constructor.h" #include "tensorflow/core/graph/node_builder.h" #include "tensorflow/core/graph/subgraph.h" +#include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/platform/init_main.h" #include "tensorflow/core/public/session.h" #include "tensorflow/core/util/command_line_flags.h" @@ -101,7 +102,7 @@ Status InsertLogging(const GraphDef& input_graph_def, const bool op_matches = (ops.count(node.op()) > 0); bool prefix_matches = false; for (const string& prefix : prefixes) { - if (StringPiece(node.name()).starts_with(prefix)) { + if (str_util::StartsWith(node.name(), prefix)) { prefix_matches = true; } } diff --git a/tensorflow/tools/graph_transforms/sparsify_gather.cc b/tensorflow/tools/graph_transforms/sparsify_gather.cc index 701e350fc39d083665f5420e6b73510c182e12ce..cc82100148117c7846ba5781e1a97e172ad7f03c 100644 --- a/tensorflow/tools/graph_transforms/sparsify_gather.cc +++ b/tensorflow/tools/graph_transforms/sparsify_gather.cc @@ -22,6 +22,7 @@ limitations under the License. #include "tensorflow/core/graph/graph_constructor.h" #include "tensorflow/core/graph/node_builder.h" #include "tensorflow/core/graph/subgraph.h" +#include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/platform/init_main.h" #include "tensorflow/core/public/session.h" #include "tensorflow/core/util/command_line_flags.h" @@ -88,7 +89,7 @@ void CreateConstNode(const Tensor& tensor, const string& name, string GetMonolithicTensorKey(const string& tensor_slice_name) { std::vector names = Split(tensor_slice_name, "/"); - if (StringPiece(names[names.size() - 1]).starts_with("part_")) { + if (str_util::StartsWith(names[names.size() - 1], "part_")) { CHECK_GE(names.size(), 2); names.pop_back(); } @@ -102,8 +103,8 @@ Status ObtainTensorSlice(const GraphDef& input_graph_def, for (const auto& node : input_graph_def.node()) { std::vector node_name_parts = Split(node.name(), "/"); if (node_name_parts.size() == 2 && - StringPiece(node_name_parts[0]).starts_with("save") && - StringPiece(node_name_parts[1]).starts_with("Assign") && + str_util::StartsWith(node_name_parts[0], "save") && + str_util::StartsWith(node_name_parts[1], "Assign") && node.input(0) == target_name) { restore_node_name = node.input(1); break; diff --git a/tensorflow/tools/graph_transforms/transform_graph.cc b/tensorflow/tools/graph_transforms/transform_graph.cc index 28387c2b48c06ecffd2afa0705a8dea5bc368460..8ce8f5e24b9f002e50d456c8ccab8a6414fca724 100644 --- a/tensorflow/tools/graph_transforms/transform_graph.cc +++ b/tensorflow/tools/graph_transforms/transform_graph.cc @@ -24,6 +24,9 @@ limitations under the License. #include "tensorflow/core/util/command_line_flags.h" #include "tensorflow/tools/graph_transforms/file_utils.h" #include "tensorflow/tools/graph_transforms/transform_utils.h" +#if !defined(PLATFORM_WINDOWS) +#include +#endif namespace tensorflow { namespace graph_transforms { @@ -130,16 +133,64 @@ Status ParseTransformParameters(const string& transforms_string, return Status::OK(); } +std::string ExpandPath(const std::string& path_string) { +#if defined(PLATFORM_WINDOWS) + return path_string; +#else + if (path_string.empty() || path_string[0] != '~') { + return path_string; + } + + const char* home = NULL; + std::string::size_type prefix = path_string.find_first_of('/'); + if (path_string.length() == 1 || prefix == 1) { + // The value of $HOME, e.g., ~/foo + home = getenv("HOME"); + if (!home) { + // If HOME is not available, get uid + struct passwd* pw = getpwuid(getuid()); + if (pw) { + home = pw->pw_dir; + } + } + } else { + // The value of ~user, e.g., ~user/foo + std::string user(path_string, 1, (prefix == std::string::npos) + ? std::string::npos + : prefix - 1); + struct passwd* pw = getpwnam(user.c_str()); + if (pw) { + home = pw->pw_dir; + } + } + + if (!home) { + return path_string; + } + + string path(home); + if (prefix == std::string::npos) { + return path; + } + + if (path.length() == 0 || path[path.length() - 1] != '/') { + path += '/'; + } + path += path_string.substr(prefix + 1); + return path; +#endif +} + int ParseFlagsAndTransformGraph(int argc, char* argv[], bool init_main) { - string in_graph = ""; - string out_graph = ""; + string in_graph_string = ""; + string out_graph_string = ""; string inputs_string = ""; string outputs_string = ""; string transforms_string = ""; bool output_as_text = false; std::vector flag_list = { - Flag("in_graph", &in_graph, "input graph file name"), - Flag("out_graph", &out_graph, "output graph file name"), + Flag("in_graph", &in_graph_string, "input graph file name"), + Flag("out_graph", &out_graph_string, "output graph file name"), Flag("inputs", &inputs_string, "inputs"), Flag("outputs", &outputs_string, "outputs"), Flag("transforms", &transforms_string, "list of transforms"), @@ -166,11 +217,11 @@ int ParseFlagsAndTransformGraph(int argc, char* argv[], bool init_main) { LOG(ERROR) << "Unknown argument " << argv[1] << ".\n" << usage; return -1; } - if (in_graph.empty()) { + if (in_graph_string.empty()) { LOG(ERROR) << "in_graph graph can't be empty.\n" << usage; return -1; } - if (out_graph.empty()) { + if (out_graph_string.empty()) { LOG(ERROR) << "out_graph graph can't be empty.\n" << usage; return -1; } @@ -179,6 +230,9 @@ int ParseFlagsAndTransformGraph(int argc, char* argv[], bool init_main) { return -1; } + string in_graph = ExpandPath(in_graph_string); + string out_graph = ExpandPath(out_graph_string); + std::vector inputs = str_util::Split(inputs_string, ','); std::vector outputs = str_util::Split(outputs_string, ','); TransformParameters transform_params; @@ -197,7 +251,7 @@ int ParseFlagsAndTransformGraph(int argc, char* argv[], bool init_main) { GraphDef graph_def; Status load_status = LoadTextOrBinaryGraphFile(in_graph, &graph_def); if (!load_status.ok()) { - LOG(ERROR) << "Loading graph '" << in_graph << "' failed with " + LOG(ERROR) << "Loading graph '" << in_graph_string << "' failed with " << load_status.error_message(); LOG(ERROR) << usage; return -1; @@ -219,7 +273,7 @@ int ParseFlagsAndTransformGraph(int argc, char* argv[], bool init_main) { save_status = WriteBinaryProto(Env::Default(), out_graph, graph_def); } if (!save_status.ok()) { - LOG(ERROR) << "Saving graph '" << out_graph << "' failed with " + LOG(ERROR) << "Saving graph '" << out_graph_string << "' failed with " << save_status.error_message(); return -1; } diff --git a/tensorflow/tools/graph_transforms/transform_graph_test.cc b/tensorflow/tools/graph_transforms/transform_graph_test.cc index bc2412fcbdba90731318eea1a2239aa914b35ffc..b276229aa44f747ee81ebcdfe204468929c9eb53 100644 --- a/tensorflow/tools/graph_transforms/transform_graph_test.cc +++ b/tensorflow/tools/graph_transforms/transform_graph_test.cc @@ -22,6 +22,7 @@ limitations under the License. #include "tensorflow/core/framework/tensor_testutil.h" #include "tensorflow/core/lib/core/status_test_util.h" #include "tensorflow/core/lib/io/path.h" +#include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/platform/test.h" #include "tensorflow/core/platform/test_benchmark.h" #include "tensorflow/core/public/session.h" @@ -112,12 +113,11 @@ class TransformGraphTest : public ::testing::Test { graph_transforms::MapNamesToNodes(out_graph_def, &out_node_map); for (const NodeDef& node : out_graph_def.node()) { - const StringPiece name(node.name()); const int occurrence_count = out_node_map.count(node.name()); - if (name.ends_with("expect_removed")) { + if (str_util::EndsWith(node.name(), "expect_removed")) { EXPECT_EQ(0, occurrence_count) << "node.name()=" << node.name(); } - if (name.ends_with("expect_remains")) { + if (str_util::EndsWith(node.name(), "expect_remains")) { EXPECT_EQ(1, occurrence_count) << "node.name()=" << node.name(); } } @@ -139,7 +139,7 @@ class TransformGraphTest : public ::testing::Test { Status no_such_status = TransformGraph({}, {}, {{"test_no_such_transform", {}}}, &graph_def); EXPECT_TRUE( - StringPiece(no_such_status.ToString()).contains("not recognized")); + str_util::StrContains(no_such_status.ToString(), "not recognized")); } void TestParseTransformParameters() { diff --git a/tensorflow/tools/graph_transforms/transform_utils.cc b/tensorflow/tools/graph_transforms/transform_utils.cc index 55f28a9e1d8d639a316c9bd121204d603217dea3..367048965d146d782267f23330a435ae72f7f49a 100644 --- a/tensorflow/tools/graph_transforms/transform_utils.cc +++ b/tensorflow/tools/graph_transforms/transform_utils.cc @@ -88,7 +88,7 @@ void NodeNamePartsFromInput(const string& input_name, string* prefix, *suffix = ":" + input_parts[1]; } StringPiece node_name_piece(input_parts[0]); - if (node_name_piece.Consume("^")) { + if (str_util::ConsumePrefix(&node_name_piece, "^")) { *prefix = "^"; } else { *prefix = ""; @@ -200,8 +200,7 @@ Status SortByExecutionOrder(const GraphDef& input_graph_def, // for merge only wait for one non-control input. int32 num_control_edges = 0; for (int i = 0; i < node_def.input_size(); ++i) { - StringPiece input_name(node_def.input(i)); - if (input_name.starts_with("^")) { + if (str_util::StartsWith(node_def.input(i), "^")) { num_control_edges++; } } @@ -504,7 +503,7 @@ Status RenameNodeInputs(const GraphDef& input_graph_def, const string& dest_name = input_to_rename.second; bool is_match; string match_name; - if (StringPiece(source_name).ends_with(":*")) { + if (str_util::EndsWith(source_name, ":*")) { is_match = true; string prefix; string unused_node_name; diff --git a/tensorflow/tools/lib_package/BUILD b/tensorflow/tools/lib_package/BUILD index 0ede8c63704ac4a474eb0d19e17cf5f365abca77..569b6678cabddd17eaf21921d668221f1a865625 100644 --- a/tensorflow/tools/lib_package/BUILD +++ b/tensorflow/tools/lib_package/BUILD @@ -118,6 +118,7 @@ genrule( "@com_googlesource_code_re2//:LICENSE", "@cub_archive//:LICENSE.TXT", "@curl//:COPYING", + "@double_conversion//:LICENSE", "@eigen_archive//:COPYING.MPL2", "@farmhash_archive//:COPYING", "@fft2d//:fft/readme.txt", @@ -155,6 +156,7 @@ genrule( "@com_googlesource_code_re2//:LICENSE", "@cub_archive//:LICENSE.TXT", "@curl//:COPYING", + "@double_conversion//:LICENSE", "@eigen_archive//:COPYING.MPL2", "@farmhash_archive//:COPYING", "@fft2d//:fft/readme.txt", diff --git a/tensorflow/tools/lib_package/README.md b/tensorflow/tools/lib_package/README.md index 700814826035a3274739f1d3784140ea0a59b36f..cb6aef262456a202b93a2787626688b7712e2352 100644 --- a/tensorflow/tools/lib_package/README.md +++ b/tensorflow/tools/lib_package/README.md @@ -35,8 +35,8 @@ The following commands: bazel test --config opt //tensorflow/tools/lib_package:libtensorflow_test bazel build --config opt \ //tensorflow/tools/lib_package:libtensorflow_jni.tar.gz \ - //tensorflow/tools/lib_package:libtensorflow.jar \ - //tensorflow/tools/lib_package:libtensorflow-src.jar + //tensorflow/java:libtensorflow.jar \ + //tensorflow/java:libtensorflow-src.jar ``` test and produce the following: @@ -44,9 +44,9 @@ test and produce the following: - The native library (`libtensorflow_jni.so`) packaged in an archive at: `bazel-bin/tensorflow/tools/lib_package/libtensorflow_jni.tar.gz` - The Java archive at: - `bazel-bin/tensorflow/tools/lib_package/libtensorflow.jar` + `bazel-bin/tensorflow/java/libtensorflow.jar` - The Java archive for Java sources at: - `bazel-bin/tensorflow/tools/lib_package/libtensorflow-src.jar` + `bazel-bin/tensorflow/java/libtensorflow-src.jar` ## Release diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD index 62fec2c402b64a38e1ea21e11dd9021f2bd6cce1..7b508f87ab75e27cfdb3c41002873a8d38117f13 100644 --- a/tensorflow/tools/pip_package/BUILD +++ b/tensorflow/tools/pip_package/BUILD @@ -48,36 +48,67 @@ py_binary( deps = ["//tensorflow:tensorflow_py"], ) +COMMON_PIP_DEPS = [ + ":licenses", + "MANIFEST.in", + "README", + "setup.py", + ":included_headers", + "//tensorflow:tensorflow_py", + "//tensorflow/contrib/autograph:autograph", + "//tensorflow/contrib/autograph/converters:converters", + "//tensorflow/contrib/autograph/converters:test_lib", + "//tensorflow/contrib/autograph/impl:impl", + "//tensorflow/contrib/autograph/pyct:pyct", + "//tensorflow/contrib/autograph/pyct/static_analysis:static_analysis", + "//tensorflow/contrib/boosted_trees:boosted_trees_pip", + "//tensorflow/contrib/cluster_resolver:cluster_resolver_pip", + "//tensorflow/contrib/data/python/kernel_tests:dataset_serialization_test", + "//tensorflow/contrib/data/python/ops:contrib_op_loader", + "//tensorflow/contrib/eager/python/examples:examples_pip", + "//tensorflow/contrib/eager/python:evaluator", + "//tensorflow/contrib/gan:gan", + "//tensorflow/contrib/graph_editor:graph_editor_pip", + "//tensorflow/contrib/keras:keras", + "//tensorflow/contrib/labeled_tensor:labeled_tensor_pip", + "//tensorflow/contrib/nn:nn_py", + "//tensorflow/contrib/predictor:predictor_pip", + "//tensorflow/contrib/proto:proto_pip", + "//tensorflow/contrib/receptive_field:receptive_field_pip", + "//tensorflow/contrib/rpc:rpc_pip", + "//tensorflow/contrib/session_bundle:session_bundle_pip", + "//tensorflow/contrib/signal:signal_py", + "//tensorflow/contrib/signal:test_util", + "//tensorflow/contrib/slim:slim", + "//tensorflow/contrib/slim/python/slim/data:data_pip", + "//tensorflow/contrib/slim/python/slim/nets:nets_pip", + "//tensorflow/contrib/specs:specs", + "//tensorflow/contrib/summary:summary_test_util", + "//tensorflow/contrib/tensor_forest:init_py", + "//tensorflow/contrib/tensor_forest/hybrid:hybrid_pip", + "//tensorflow/contrib/timeseries:timeseries_pip", + "//tensorflow/contrib/tpu", + "//tensorflow/examples/tutorials/mnist:package", + "//tensorflow/python:distributed_framework_test_lib", + "//tensorflow/python:meta_graph_testdata", + "//tensorflow/python:spectral_ops_test_util", + "//tensorflow/python:util_example_parser_configuration", + "//tensorflow/python/debug:debug_pip", + "//tensorflow/python/eager:eager_pip", + "//tensorflow/python/kernel_tests/testdata:self_adjoint_eig_op_test_files", + "//tensorflow/python/saved_model:saved_model", + "//tensorflow/python/tools:tools_pip", + "//tensorflow/python:test_ops", + "//tensorflow/tools/dist_test/server:grpc_tensorflow_server", +] + # On Windows, python binary is a zip file of runfiles tree. # Add everything to its data dependency for generating a runfiles tree # for building the pip package on Windows. py_binary( name = "simple_console_for_windows", srcs = ["simple_console_for_windows.py"], - data = [ - "MANIFEST.in", - "README", - "setup.py", - ":included_headers", - "//tensorflow/contrib/nn:nn_py", - "//tensorflow/contrib/session_bundle:session_bundle_pip", - "//tensorflow/contrib/signal:signal_py", - "//tensorflow/contrib/slim/python/slim/data:data_pip", - "//tensorflow/python:util_example_parser_configuration", - "//tensorflow/python/debug:debug_pip", - "//tensorflow/python/saved_model", - "//tensorflow/python:spectral_ops_test_util", - "//tensorflow/python/tools:tools_pip", - "//tensorflow/python/eager:eager_pip", - "//tensorflow/contrib/summary:summary_test_util", - # These targets don't build on Windows yet. Exclude them for now. - # "//tensorflow/contrib/slim", - # "//tensorflow/contrib/slim/python/slim/nets:nets_pip", - # "//tensorflow/contrib/specs", - # "//tensorflow/contrib/tensor_forest:init_py", - # "//tensorflow/contrib/tensor_forest/hybrid:hybrid_pip", - # "//tensorflow/examples/tutorials/mnist:package", - ], + data = COMMON_PIP_DEPS, srcs_version = "PY2AND3", deps = ["//tensorflow:tensorflow_py"], ) @@ -97,6 +128,7 @@ filegroup( "@com_googlesource_code_re2//:LICENSE", "@cub_archive//:LICENSE.TXT", "@curl//:COPYING", + "@double_conversion//:LICENSE", "@eigen_archive//:COPYING.MPL2", "@farmhash_archive//:COPYING", "@fft2d//:fft/readme.txt", @@ -111,6 +143,7 @@ filegroup( "@kafka//:LICENSE", "@libxsmm_archive//:LICENSE", "@lmdb//:LICENSE", + "@local_config_nccl//:LICENSE", "@local_config_sycl//sycl:LICENSE.text", "@grpc//third_party/nanopb:LICENSE.txt", "@grpc//third_party/address_sorting:LICENSE", @@ -127,8 +160,6 @@ filegroup( "@org_python_pypi_backports_weakref//:LICENSE", ] + if_mkl([ "//third_party/mkl:LICENSE", - ]) + if_not_windows([ - "@nccl_archive//:LICENSE.txt", ]) + tf_additional_license_deps(), ) @@ -138,63 +169,13 @@ sh_binary( data = select({ "//tensorflow:windows": [":simple_console_for_windows"], "//tensorflow:windows_msvc": [":simple_console_for_windows"], - "//conditions:default": [ - ":licenses", - "MANIFEST.in", - "README", - "setup.py", - ":included_headers", + "//conditions:default": COMMON_PIP_DEPS + [ ":simple_console", - "//tensorflow:tensorflow_py", - "//tensorflow/contrib/boosted_trees:boosted_trees_pip", - "//tensorflow/contrib/cluster_resolver:cluster_resolver_pip", - "//tensorflow/contrib/data/python/kernel_tests:dataset_serialization_test", - "//tensorflow/contrib/data/python/ops:contrib_op_loader", - "//tensorflow/contrib/eager/python/examples:examples_pip", - "//tensorflow/contrib/eager/python:checkpointable_utils", - "//tensorflow/contrib/eager/python:evaluator", - "//tensorflow/contrib/gan:gan", - "//tensorflow/contrib/graph_editor:graph_editor_pip", - "//tensorflow/contrib/keras:keras", - "//tensorflow/contrib/labeled_tensor:labeled_tensor_pip", "//tensorflow/contrib/lite/python:interpreter_test_data", "//tensorflow/contrib/lite/python:tf_lite_py_pip", "//tensorflow/contrib/lite/toco:toco", "//tensorflow/contrib/lite/toco/python:toco_wrapper", "//tensorflow/contrib/lite/toco/python:toco_from_protos", - "//tensorflow/contrib/nn:nn_py", - "//tensorflow/contrib/predictor:predictor_pip", - "//tensorflow/contrib/autograph:autograph", - "//tensorflow/contrib/autograph/converters:converters", - "//tensorflow/contrib/autograph/converters:test_lib", - "//tensorflow/contrib/autograph/impl:impl", - "//tensorflow/contrib/autograph/pyct:pyct", - "//tensorflow/contrib/autograph/pyct/static_analysis:static_analysis", - "//tensorflow/contrib/receptive_field:receptive_field_pip", - "//tensorflow/contrib/session_bundle:session_bundle_pip", - "//tensorflow/contrib/signal:signal_py", - "//tensorflow/contrib/signal:test_util", - "//tensorflow/contrib/slim:slim", - "//tensorflow/contrib/slim/python/slim/data:data_pip", - "//tensorflow/contrib/slim/python/slim/nets:nets_pip", - "//tensorflow/contrib/specs:specs", - "//tensorflow/contrib/summary:summary_test_util", - "//tensorflow/contrib/tensor_forest:init_py", - "//tensorflow/contrib/tensor_forest/hybrid:hybrid_pip", - "//tensorflow/contrib/timeseries:timeseries_pip", - "//tensorflow/contrib/tpu", - "//tensorflow/examples/tutorials/mnist:package", - "//tensorflow/python:distributed_framework_test_lib", - "//tensorflow/python:meta_graph_testdata", - "//tensorflow/python:spectral_ops_test_util", - "//tensorflow/python:util_example_parser_configuration", - "//tensorflow/python/debug:debug_pip", - "//tensorflow/python/eager:eager_pip", - "//tensorflow/python/kernel_tests/testdata:self_adjoint_eig_op_test_files", - "//tensorflow/python/saved_model:saved_model", - "//tensorflow/python/tools:tools_pip", - "//tensorflow/python:test_ops", - "//tensorflow/tools/dist_test/server:grpc_tensorflow_server", ], }) + if_mkl(["//third_party/mkl:intel_binary_blob"]) + if_tensorrt([ "//tensorflow/contrib/tensorrt:init_py", diff --git a/tensorflow/tools/pip_package/build_pip_package.sh b/tensorflow/tools/pip_package/build_pip_package.sh index feb3114bde8040991c435bc05247352e6c618106..8f0cf8c3d194807b6c82f50b5ac8c7fe7527fea5 100755 --- a/tensorflow/tools/pip_package/build_pip_package.sh +++ b/tensorflow/tools/pip_package/build_pip_package.sh @@ -162,7 +162,9 @@ function main() { # Before we leave the top-level directory, make sure we know how to # call python. - source tools/python_bin_path.sh + if [[ -e tools/python_bin_path.sh ]]; then + source tools/python_bin_path.sh + fi pushd ${TMPDIR} rm -f MANIFEST diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py index 365e8d6b08d654138debd7acad5cf4aac5d07d55..b88d023cbcaa9d6444ae14824227ef13145db4f1 100644 --- a/tensorflow/tools/pip_package/setup.py +++ b/tensorflow/tools/pip_package/setup.py @@ -22,20 +22,21 @@ import os import re import sys -from setuptools import find_packages, setup, Command +from setuptools import Command +from setuptools import find_packages +from setuptools import setup from setuptools.command.install import install as InstallCommandBase from setuptools.dist import Distribution # This version string is semver compatible, but incompatible with pip. # For pip, we will remove all '-' characters from this string, and use the # result for pip. -_VERSION = '1.7.0-rc1' +_VERSION = '1.8.0-rc1' REQUIRED_PACKAGES = [ 'absl-py >= 0.1.6', 'astor >= 0.6.0', 'gast >= 0.2.0', - 'grpcio >= 1.8.6', 'numpy >= 1.13.3', 'six >= 1.10.0', 'protobuf >= 3.4.0', @@ -43,6 +44,12 @@ REQUIRED_PACKAGES = [ 'termcolor >= 1.1.0', ] +if sys.byteorder == 'little': + # grpcio does not build correctly on big-endian machines due to lack of + # BoringSSL support. + # See https://github.com/tensorflow/tensorflow/issues/17882. + REQUIRED_PACKAGES.append('grpcio >= 1.8.6') + project_name = 'tensorflow' if '--project_name' in sys.argv: project_name_idx = sys.argv.index('--project_name') @@ -92,7 +99,9 @@ TEST_PACKAGES = [ 'scipy >= 0.15.1', ] + class BinaryDistribution(Distribution): + def has_ext_modules(self): return True @@ -174,9 +183,9 @@ class InstallHeaders(Command): def find_files(pattern, root): """Return all the files matching pattern below root dir.""" - for path, _, files in os.walk(root): + for dirpath, _, files in os.walk(root): for filename in fnmatch.filter(files, pattern): - yield os.path.join(path, filename) + yield os.path.join(dirpath, filename) matches = ['../' + x for x in find_files('*', 'external') if '.py' not in x] diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index fe6b9407d6dc9c9d7a7fa51c2ce47a72d311e608..884dcc5d0c0b36207b7a76ec4225b39b9533d262 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -2,6 +2,7 @@ load("//third_party/gpus:cuda_configure.bzl", "cuda_configure") load("//third_party/tensorrt:tensorrt_configure.bzl", "tensorrt_configure") +load("//third_party:nccl/nccl_configure.bzl", "nccl_configure") load("//third_party/mkl:build_defs.bzl", "mkl_repository") load("//third_party/git:git_configure.bzl", "git_configure") load("//third_party/py:python_configure.bzl", "python_configure") @@ -13,6 +14,8 @@ load("//third_party:repo.bzl", "tf_http_archive") load("//third_party/clang_toolchain:cc_configure_clang.bzl", "cc_download_clang_toolchain") load("@io_bazel_rules_closure//closure/private:java_import_external.bzl", "java_import_external") load("@io_bazel_rules_closure//closure:defs.bzl", "filegroup_external") +load("//tensorflow/tools/def_file_filter:def_file_filter_configure.bzl", + "def_file_filter_configure") # Sanitize a dependency so that it works correctly from code that includes @@ -29,10 +32,15 @@ def tf_workspace(path_prefix="", tf_repo_name=""): cc_download_clang_toolchain(name="local_config_download_clang") cuda_configure(name="local_config_cuda") tensorrt_configure(name="local_config_tensorrt") + nccl_configure(name="local_config_nccl") git_configure(name="local_config_git") sycl_configure(name="local_config_sycl") python_configure(name="local_config_python") + # For windows bazel build + # TODO: Remove def file filter when TensorFlow can export symbols properly on Windows. + def_file_filter_configure(name = "local_config_def_file_filter") + # Point //external/local_config_arm_compiler to //external/arm_compiler arm_compiler_configure( name="local_config_arm_compiler", @@ -42,31 +50,31 @@ def tf_workspace(path_prefix="", tf_repo_name=""): mkl_repository( name = "mkl_linux", urls = [ - "https://mirror.bazel.build/intel/mkl-dnn/releases/download/v0.12/mklml_lnx_2018.0.1.20171227.tgz", - "https://github.com/intel/mkl-dnn/releases/download/v0.12/mklml_lnx_2018.0.1.20171227.tgz", + "https://mirror.bazel.build/github.com/intel/mkl-dnn/releases/download/v0.13/mklml_lnx_2018.0.2.20180127.tgz", + "https://github.com/intel/mkl-dnn/releases/download/v0.13/mklml_lnx_2018.0.2.20180127.tgz", ], - sha256 = "feacc3d82565c1231470359b42c696236fae873704e0b013436afba5fd4fd30f", - strip_prefix = "mklml_lnx_2018.0.1.20171227", + sha256 = "74844bd77294742bf2396ff040369d1aa4cdd9e826fcd38cf8398ae83564d146", + strip_prefix = "mklml_lnx_2018.0.2.20180127", build_file = clean_dep("//third_party/mkl:mkl.BUILD") ) mkl_repository( name = "mkl_windows", urls = [ - "https://mirror.bazel.build/intel/mkl-dnn/releases/download/v0.12/mklml_win_2018.0.1.20171227.zip", - "https://github.com/intel/mkl-dnn/releases/download/v0.12/mklml_win_2018.0.1.20171227.zip" + "https://mirror.bazel.build/github.com/intel/mkl-dnn/releases/download/v0.13/mklml_win_2018.0.2.20180127.zip", + "https://github.com/intel/mkl-dnn/releases/download/v0.13/mklml_win_2018.0.2.20180127.zip" ], - sha256 = "24bae8d7b22b431a654acadea43f2243c46ae6b1e5a73a4a936825f31d284ee4", - strip_prefix = "mklml_win_2018.0.1.20171227", + sha256 = "d8fbf0faa0684bffa3548005d05fe5cfe56ff9dbc0e15e7612d7ac01055a6ded", + strip_prefix = "mklml_win_2018.0.2.20180127", build_file = clean_dep("//third_party/mkl:mkl.BUILD") ) mkl_repository( name = "mkl_darwin", urls = [ - "https://mirror.bazel.build/intel/mkl-dnn/releases/download/v0.12/mklml_mac_2018.0.1.20171227.tgz", - "https://github.com/intel/mkl-dnn/releases/download/v0.12/mklml_mac_2018.0.1.20171227.tgz" + "https://mirror.bazel.build/github.com/intel/mkl-dnn/releases/download/v0.13/mklml_mac_2018.0.2.20180127.tgz", + "https://github.com/intel/mkl-dnn/releases/download/v0.13/mklml_mac_2018.0.2.20180127.tgz" ], - sha256 = "0e954ec6fd3dc5e37f64c4043f6b5613dd687558da3df1028b3b7c29ff5cf77f", - strip_prefix = "mklml_mac_2018.0.1.20171227", + sha256 = "aa740d71e14562bfea56e6829e6dc186e7487cbcf6748a88dec73826b7ec1943", + strip_prefix = "mklml_mac_2018.0.2.20180127", build_file = clean_dep("//third_party/mkl:mkl.BUILD") ) @@ -77,11 +85,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""): tf_http_archive( name = "mkl_dnn", urls = [ - "https://mirror.bazel.build/github.com/intel/mkl-dnn/archive/v0.12.tar.gz", - "https://github.com/intel/mkl-dnn/archive/v0.12.tar.gz", + "https://mirror.bazel.build/github.com/intel/mkl-dnn/archive/v0.13.tar.gz", + "https://github.com/intel/mkl-dnn/archive/v0.13.tar.gz", ], - sha256 = "86fa2a8c12a56e3b725945acedeaa82492746be02545aba6d710f097e013e19e", - strip_prefix = "mkl-dnn-0.12", + sha256 = "d2cfd93a70cfe86ebe054477c530c9b5c1218b70f75856eb6d1956c68ee89e8f", + strip_prefix = "mkl-dnn-0.13", build_file = clean_dep("//third_party/mkl_dnn:mkldnn.BUILD"), ) @@ -159,11 +167,12 @@ def tf_workspace(path_prefix="", tf_repo_name=""): tf_http_archive( name = "gemmlowp", urls = [ - "https://mirror.bazel.build/github.com/google/gemmlowp/archive/7c7c744640ddc3d0af18fb245b4d23228813a71b.zip", - "https://github.com/google/gemmlowp/archive/7c7c744640ddc3d0af18fb245b4d23228813a71b.zip", + # TODO (yongtang): uncomment once mirror.bazel.build is propagated. + # "https://mirror.bazel.build/github.com/google/gemmlowp/archive/38ebac7b059e84692f53e5938f97a9943c120d98.zip", + "https://github.com/google/gemmlowp/archive/38ebac7b059e84692f53e5938f97a9943c120d98.zip", ], - sha256 = "b852cc90259a7357c8a323f108f2cec6e85979fc3b18b5590b99e0130044b2cf", - strip_prefix = "gemmlowp-7c7c744640ddc3d0af18fb245b4d23228813a71b", + sha256 = "b87faa7294dfcc5d678f22a59d2c01ca94ea1e2a3b488c38a95a67889ed0a658", + strip_prefix = "gemmlowp-38ebac7b059e84692f53e5938f97a9943c120d98", ) tf_http_archive( @@ -202,33 +211,33 @@ def tf_workspace(path_prefix="", tf_repo_name=""): tf_http_archive( name = "jpeg", urls = [ - "https://mirror.bazel.build/github.com/libjpeg-turbo/libjpeg-turbo/archive/1.5.1.tar.gz", - "https://github.com/libjpeg-turbo/libjpeg-turbo/archive/1.5.1.tar.gz", + "https://mirror.bazel.build/github.com/libjpeg-turbo/libjpeg-turbo/archive/1.5.3.tar.gz", + "https://github.com/libjpeg-turbo/libjpeg-turbo/archive/1.5.3.tar.gz", ], - sha256 = "c15a9607892113946379ccea3ca8b85018301b200754f209453ab21674268e77", - strip_prefix = "libjpeg-turbo-1.5.1", + sha256 = "1a17020f859cb12711175a67eab5c71fc1904e04b587046218e36106e07eabde", + strip_prefix = "libjpeg-turbo-1.5.3", build_file = clean_dep("//third_party/jpeg:jpeg.BUILD"), ) tf_http_archive( name = "png_archive", urls = [ - "https://mirror.bazel.build/github.com/glennrp/libpng/archive/v1.2.53.tar.gz", - "https://github.com/glennrp/libpng/archive/v1.2.53.tar.gz", + "https://mirror.bazel.build/github.com/glennrp/libpng/archive/v1.6.34.tar.gz", + "https://github.com/glennrp/libpng/archive/v1.6.34.tar.gz", ], - sha256 = "716c59c7dfc808a4c368f8ada526932be72b2fcea11dd85dc9d88b1df1dfe9c2", - strip_prefix = "libpng-1.2.53", + sha256 = "e45ce5f68b1d80e2cb9a2b601605b374bdf51e1798ef1c2c2bd62131dfcf9eef", + strip_prefix = "libpng-1.6.34", build_file = clean_dep("//third_party:png.BUILD"), ) tf_http_archive( name = "org_sqlite", urls = [ - "https://mirror.bazel.build/www.sqlite.org/2017/sqlite-amalgamation-3200000.zip", - "http://www.sqlite.org/2017/sqlite-amalgamation-3200000.zip", + "https://mirror.bazel.build/www.sqlite.org/2018/sqlite-amalgamation-3230100.zip", + "https://www.sqlite.org/2018/sqlite-amalgamation-3230100.zip", ], - sha256 = "208780b3616f9de0aeb50822b7a8f5482f6515193859e91ed61637be6ad74fd4", - strip_prefix = "sqlite-amalgamation-3200000", + sha256 = "4239a1f69e5721d07d9a374eb84d594225229e54be4ee628da2995f4315d8dfc", + strip_prefix = "sqlite-amalgamation-3230100", build_file = clean_dep("//third_party:sqlite.BUILD"), ) @@ -307,18 +316,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): strip_prefix = "backports.weakref-1.0rc1/src", build_file = clean_dep("//third_party:backports_weakref.BUILD"), ) - - tf_http_archive( - name = "com_github_andreif_codegen", - urls = [ - "https://mirror.bazel.build/github.com/andreif/codegen/archive/1.0.tar.gz", - "https://github.com/andreif/codegen/archive/1.0.tar.gz", - ], - sha256 = "2dadd04a2802de27e0fe5a19b76538f6da9d39ff244036afa00c1bba754de5ee", - strip_prefix = "codegen-1.0", - build_file = clean_dep("//third_party:codegen.BUILD"), - ) - + filegroup_external( name = "org_python_license", licenses = ["notice"], # Python 2.0 @@ -430,11 +428,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""): tf_http_archive( name = "grpc", urls = [ - "https://mirror.bazel.build/github.com/grpc/grpc/archive/bd6bdf93279a39a8cd92978fd7c9d14eccd98fc2.tar.gz", - "https://github.com/grpc/grpc/archive/bd6bdf93279a39a8cd92978fd7c9d14eccd98fc2.tar.gz", + "https://mirror.bazel.build/github.com/grpc/grpc/archive/d184fa229d75d336aedea0041bd59cb93e7e267f.tar.gz", + "https://github.com/grpc/grpc/archive/d184fa229d75d336aedea0041bd59cb93e7e267f.tar.gz", ], - sha256 = "0a05bd355e4571b01d813dddffa38e57e689ac41b264dc9b1bd6ec66463ef5d6", - strip_prefix = "grpc-bd6bdf93279a39a8cd92978fd7c9d14eccd98fc2", + sha256 = "895b31310e718a61f7335759a778c068a6edde1c089883598a0830cbb7075673", + strip_prefix = "grpc-d184fa229d75d336aedea0041bd59cb93e7e267f", ) @@ -454,11 +452,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""): tf_http_archive( name = "llvm", urls = [ - "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/1c3cdea2f181d8e14ee184466c5fb237f1b4cda8.tar.gz", - "https://github.com/llvm-mirror/llvm/archive/1c3cdea2f181d8e14ee184466c5fb237f1b4cda8.tar.gz", + "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/c1e9b6f826c86c87a7e7173f1baf7e7df9f43e32.tar.gz", + "https://github.com/llvm-mirror/llvm/archive/c1e9b6f826c86c87a7e7173f1baf7e7df9f43e32.tar.gz", ], - sha256 = "1efbb9b05af88368be984d2f6526061d4a857181ef10f8841889a3a46869bb01", - strip_prefix = "llvm-1c3cdea2f181d8e14ee184466c5fb237f1b4cda8", + sha256 = "92b7c01074f694a77b4d664951d1ec071e30ef19c61e673158e95fbb6e447b54", + strip_prefix = "llvm-c1e9b6f826c86c87a7e7173f1baf7e7df9f43e32", build_file = clean_dep("//third_party/llvm:llvm.BUILD"), ) @@ -497,11 +495,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""): tf_http_archive( name = "zlib_archive", urls = [ - "https://mirror.bazel.build/zlib.net/zlib-1.2.8.tar.gz", - "http://zlib.net/fossils/zlib-1.2.8.tar.gz", + "https://mirror.bazel.build/zlib.net/zlib-1.2.11.tar.gz", + "https://zlib.net/zlib-1.2.11.tar.gz", ], - sha256 = "36658cb768a54c1d4dec43c3116c27ed893e88b02ecfcb44f2166f9c0b7f2a0d", - strip_prefix = "zlib-1.2.8", + sha256 = "c3e5e9fdd5004dcb542feda5ee4f0ff0744628baf8ed2dd5d66f8ca1197cb1a1", + strip_prefix = "zlib-1.2.11", build_file = clean_dep("//third_party:zlib.BUILD"), ) @@ -518,11 +516,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""): tf_http_archive( name = "snappy", urls = [ - "https://mirror.bazel.build/github.com/google/snappy/archive/1.1.4.tar.gz", - "https://github.com/google/snappy/archive/1.1.4.tar.gz", + "https://mirror.bazel.build/github.com/google/snappy/archive/1.1.7.tar.gz", + "https://github.com/google/snappy/archive/1.1.7.tar.gz", ], - sha256 = "2f7504c73d85bac842e893340333be8cb8561710642fc9562fccdd9d2c3fcc94", - strip_prefix = "snappy-1.1.4", + sha256 = "3dfa02e873ff51a11ee02b9ca391807f0c8ea0529a4924afa645fbf97163f9d4", + strip_prefix = "snappy-1.1.7", build_file = clean_dep("//third_party:snappy.BUILD"), ) @@ -534,7 +532,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "2ca86fb6179ecbff789cc67c836139c1bbc0324ed8c04643405a30bf26325176", strip_prefix = "nccl-03d856977ecbaac87e598c0c4bafca96761b9ac7", - build_file = clean_dep("//third_party:nccl.BUILD"), + build_file = clean_dep("//third_party:nccl/nccl_archive.BUILD"), ) tf_http_archive( @@ -696,6 +694,16 @@ def tf_workspace(path_prefix="", tf_repo_name=""): build_file = clean_dep("//third_party/flatbuffers:flatbuffers.BUILD"), ) + native.new_http_archive( + name = "double_conversion", + urls = [ + "https://github.com/google/double-conversion/archive/3992066a95b823efc8ccc1baf82a1cfc73f6e9b8.zip", + ], + sha256 = "2f7fbffac0d98d201ad0586f686034371a6d152ca67508ab611adc2386ad30de", + strip_prefix = "double-conversion-3992066a95b823efc8ccc1baf82a1cfc73f6e9b8", + build_file = clean_dep("//third_party:double_conversion.BUILD") + ) + tf_http_archive( name = "tflite_mobilenet", sha256 = "23f814d1c076bdf03715dfb6cab3713aa4fbdf040fd5448c43196bd2e97a4c1b", @@ -755,6 +763,10 @@ def tf_workspace(path_prefix="", tf_repo_name=""): name = "grpc_cpp_plugin", actual = "@grpc//:grpc_cpp_plugin", ) + native.bind( + name = "grpc_python_plugin", + actual = "@grpc//:grpc_python_plugin", + ) # gRPC has three empty C++ functions which it wants the user to define # at build time. https://github.com/grpc/grpc/issues/13590 diff --git a/third_party/codegen.BUILD b/third_party/codegen.BUILD deleted file mode 100644 index df436c81635a71421a67fa8d8c84eb8dfcc97d7b..0000000000000000000000000000000000000000 --- a/third_party/codegen.BUILD +++ /dev/null @@ -1,16 +0,0 @@ -# -*- mode: python; -*- -# -# Description: -# Extension to ast that allow ast -> python code generation. - -package(default_visibility = ["//visibility:public"]) - -licenses(["notice"]) # New BSD - -exports_files(["LICENSE"]) - -py_library( - name = "com_github_andreif_codegen", - srcs = glob(["codegen.py"]), - srcs_version = "PY2AND3", -) diff --git a/third_party/double_conversion.BUILD b/third_party/double_conversion.BUILD new file mode 100644 index 0000000000000000000000000000000000000000..9f905216c036bf5e48e1a1b94cd3dd61f3e53c41 --- /dev/null +++ b/third_party/double_conversion.BUILD @@ -0,0 +1,38 @@ +# Bazel(http://bazel.io) BUILD file + +licenses(["notice"]) + +exports_files(["LICENSE"]) + +cc_library( + name = "double-conversion", + srcs = [ + "double-conversion/bignum.cc", + "double-conversion/bignum-dtoa.cc", + "double-conversion/cached-powers.cc", + "double-conversion/diy-fp.cc", + "double-conversion/double-conversion.cc", + "double-conversion/fast-dtoa.cc", + "double-conversion/fixed-dtoa.cc", + "double-conversion/strtod.cc", + "double-conversion/utils.h", + ], + hdrs = [ + "double-conversion/bignum.h", + "double-conversion/bignum-dtoa.h", + "double-conversion/cached-powers.h", + "double-conversion/diy-fp.h", + "double-conversion/double-conversion.h", + "double-conversion/fast-dtoa.h", + "double-conversion/fixed-dtoa.h", + "double-conversion/ieee.h", + "double-conversion/strtod.h", + ], + includes = [ + ".", + ], + linkopts = [ + "-lm", + ], + visibility = ["//visibility:public"], +) diff --git a/third_party/llvm/llvm.BUILD b/third_party/llvm/llvm.BUILD index 28293a36593d8fa67a2d85631a0769e03d508354..cbb1b2fe429e90088510661fe333f208f26ff061 100644 --- a/third_party/llvm/llvm.BUILD +++ b/third_party/llvm/llvm.BUILD @@ -162,13 +162,6 @@ all_cmake_vars = select({ }) # Performs CMake variable substitutions on configuration header files. -expand_cmake_vars( - name = "datatypes_gen", - src = "include/llvm/Support/DataTypes.h.cmake", - cmake_vars = all_cmake_vars, - dst = "include/llvm/Support/DataTypes.h", -) - expand_cmake_vars( name = "config_gen", src = "include/llvm/Config/config.h.cmake", @@ -305,9 +298,7 @@ cc_binary( srcs = glob([ "utils/TableGen/*.cpp", "utils/TableGen/*.h", - ]) + [ - "lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h", - ], + ]), linkopts = [ "-lm", "-ldl", @@ -2014,9 +2005,7 @@ cc_library( "include/llvm/Support/WasmRelocs/*.def", ]) + [ "include/llvm/BinaryFormat/MachO.def", - "include/llvm/Support/DataTypes.h", "include/llvm/Support/VCSRevision.h", - "include/llvm/ExecutionEngine/ObjectMemoryBuffer.h", ], deps = [ ":config", @@ -2063,6 +2052,7 @@ cc_library( "include/llvm/Target/*.def", "include/llvm/Target/*.inc", "include/llvm/CodeGen/*.def", + "include/llvm/CodeGen/*.inc", ]), deps = [ ":analysis", diff --git a/third_party/nccl/LICENSE b/third_party/nccl/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..146d9b765c5db44c2f5bea8fa5010eef5ec0c68f --- /dev/null +++ b/third_party/nccl/LICENSE @@ -0,0 +1,203 @@ +Copyright 2018 The TensorFlow Authors. All rights reserved. + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2018, The TensorFlow Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/third_party/nccl.BUILD b/third_party/nccl/nccl_archive.BUILD similarity index 95% rename from third_party/nccl.BUILD rename to third_party/nccl/nccl_archive.BUILD index b2b8e188248f90805bc2904dca9111550a7dfed8..a05899e38d531c066c774302e4ffd75ce7e482e7 100644 --- a/third_party/nccl.BUILD +++ b/third_party/nccl/nccl_archive.BUILD @@ -43,6 +43,7 @@ cc_library( "-Iexternal/nccl_archive/src", "-O3", ] + cuda_default_copts(), + include_prefix = "third_party/nccl", linkopts = select({ "@org_tensorflow//tensorflow:android": [ "-pie", @@ -61,6 +62,7 @@ cc_library( "-lrt", ], }), + strip_include_prefix = "src", visibility = ["//visibility:public"], deps = ["@local_config_cuda//cuda:cuda_headers"], ) diff --git a/third_party/nccl/nccl_configure.bzl b/third_party/nccl/nccl_configure.bzl new file mode 100644 index 0000000000000000000000000000000000000000..9dfcb1836989d6c092739100e00e7000e6556c10 --- /dev/null +++ b/third_party/nccl/nccl_configure.bzl @@ -0,0 +1,172 @@ +# -*- Python -*- +"""Repository rule for NCCL configuration. + +`nccl_configure` depends on the following environment variables: + + * `TF_NCCL_VERSION`: The NCCL version. + * `NCCL_INSTALL_PATH`: The installation path of the NCCL library. +""" + +load( + "//third_party/gpus:cuda_configure.bzl", + "auto_configure_fail", + "find_cuda_define", + "matches_version", +) + +_NCCL_INSTALL_PATH = "NCCL_INSTALL_PATH" +_TF_NCCL_VERSION = "TF_NCCL_VERSION" + +_DEFINE_NCCL_MAJOR = "#define NCCL_MAJOR" +_DEFINE_NCCL_MINOR = "#define NCCL_MINOR" +_DEFINE_NCCL_PATCH = "#define NCCL_PATCH" + +_NCCL_DUMMY_BUILD_CONTENT = """ +filegroup( + name = "LICENSE", + visibility = ["//visibility:public"], +) + +cc_library( + name = "nccl", + visibility = ["//visibility:public"], +) +""" + +_NCCL_ARCHIVE_BUILD_CONTENT = """ +filegroup( + name = "LICENSE", + data = ["@nccl_archive//:LICENSE.txt"], + visibility = ["//visibility:public"], +) + +alias( + name = "nccl", + actual = "@nccl_archive//:nccl", + visibility = ["//visibility:public"], +) +""" + +_NCCL_LOCAL_BUILD_TEMPLATE = """ +filegroup( + name = "LICENSE", + data = ["nccl/NCCL-SLA.txt"], + visibility = ["//visibility:public"], +) + +cc_library( + name = "nccl", + srcs = ["nccl/lib/libnccl.so.%s"], + hdrs = ["nccl/include/nccl.h"], + include_prefix = "third_party/nccl", + strip_include_prefix = "nccl/include", + deps = [ + "@local_config_cuda//cuda:cuda_headers", + ], + visibility = ["//visibility:public"], +) +""" + + +def _find_nccl_header(repository_ctx, nccl_install_path): + """Finds the NCCL header on the system. + + Args: + repository_ctx: The repository context. + nccl_install_path: The NCCL library install directory. + + Returns: + The path to the NCCL header. + """ + header_path = repository_ctx.path("%s/include/nccl.h" % nccl_install_path) + if not header_path.exists: + auto_configure_fail("Cannot find %s" % str(header_path)) + return header_path + + +def _check_nccl_version(repository_ctx, nccl_install_path, nccl_version): + """Checks whether the header file matches the specified version of NCCL. + + Args: + repository_ctx: The repository context. + nccl_install_path: The NCCL library install directory. + nccl_version: The expected NCCL version. + + Returns: + A string containing the library version of NCCL. + """ + header_path = _find_nccl_header(repository_ctx, nccl_install_path) + header_dir = str(header_path.realpath.dirname) + major_version = find_cuda_define(repository_ctx, header_dir, "nccl.h", + _DEFINE_NCCL_MAJOR) + minor_version = find_cuda_define(repository_ctx, header_dir, "nccl.h", + _DEFINE_NCCL_MINOR) + patch_version = find_cuda_define(repository_ctx, header_dir, "nccl.h", + _DEFINE_NCCL_PATCH) + header_version = "%s.%s.%s" % (major_version, minor_version, patch_version) + if not matches_version(nccl_version, header_version): + auto_configure_fail( + ("NCCL library version detected from %s/nccl.h (%s) does not match " + + "TF_NCCL_VERSION (%s). To fix this rerun configure again.") % + (header_dir, header_version, nccl_version)) + + +def _find_nccl_lib(repository_ctx, nccl_install_path, nccl_version): + """Finds the given NCCL library on the system. + + Args: + repository_ctx: The repository context. + nccl_install_path: The NCCL library installation directory. + nccl_version: The version of NCCL library files as returned + by _nccl_version. + + Returns: + The path to the NCCL library. + """ + lib_path = repository_ctx.path("%s/lib/libnccl.so.%s" % (nccl_install_path, + nccl_version)) + if not lib_path.exists: + auto_configure_fail("Cannot find NCCL library %s" % str(lib_path)) + return lib_path + + +def _nccl_configure_impl(repository_ctx): + """Implementation of the nccl_configure repository rule.""" + if _TF_NCCL_VERSION not in repository_ctx.os.environ: + # Add a dummy build file to make bazel query happy. + repository_ctx.file("BUILD", _NCCL_DUMMY_BUILD_CONTENT) + return + + nccl_version = repository_ctx.os.environ[_TF_NCCL_VERSION].strip() + if matches_version("1", nccl_version): + # Alias to GitHub target from @nccl_archive. + if not matches_version(nccl_version, "1.3"): + auto_configure_fail( + "NCCL from GitHub must use version 1.3 (got %s)" % nccl_version) + repository_ctx.file("BUILD", _NCCL_ARCHIVE_BUILD_CONTENT) + else: + # Create target for locally installed NCCL. + nccl_install_path = repository_ctx.os.environ[_NCCL_INSTALL_PATH].strip() + _check_nccl_version(repository_ctx, nccl_install_path, nccl_version) + repository_ctx.symlink(nccl_install_path, "nccl") + repository_ctx.file("BUILD", _NCCL_LOCAL_BUILD_TEMPLATE % nccl_version) + + +nccl_configure = repository_rule( + implementation=_nccl_configure_impl, + environ=[ + _NCCL_INSTALL_PATH, + _TF_NCCL_VERSION, + ], +) +"""Detects and configures the NCCL configuration. + +Add the following to your WORKSPACE FILE: + +```python +nccl_configure(name = "local_config_nccl") +``` + +Args: + name: A unique name for this workspace rule. +""" diff --git a/third_party/png.BUILD b/third_party/png.BUILD index 6a7ad719aa7d02855e0b319553ea8e86209553c1..76ab32d69c35055b3796b8f612133394758db330 100644 --- a/third_party/png.BUILD +++ b/third_party/png.BUILD @@ -9,15 +9,20 @@ cc_library( name = "png", srcs = [ "png.c", + "pngdebug.h", "pngerror.c", "pngget.c", + "pnginfo.h", + "pnglibconf.h", "pngmem.c", "pngpread.c", + "pngpriv.h", "pngread.c", "pngrio.c", "pngrtran.c", "pngrutil.c", "pngset.c", + "pngstruct.h", "pngtrans.c", "pngwio.c", "pngwrite.c", @@ -33,3 +38,10 @@ cc_library( visibility = ["//visibility:public"], deps = ["@zlib_archive//:zlib"], ) + +genrule( + name = "snappy_stubs_public_h", + srcs = ["scripts/pnglibconf.h.prebuilt"], + outs = ["pnglibconf.h"], + cmd = "sed -e 's/PNG_ZLIB_VERNUM 0/PNG_ZLIB_VERNUM 0x12b0/' $< >$@", +) diff --git a/third_party/repo.bzl b/third_party/repo.bzl index aa178fa8cab92d9d299e5ed09927d8572816a0af..36f5aa5bdee43a511abf5634af85643ac7e11cfc 100644 --- a/third_party/repo.bzl +++ b/third_party/repo.bzl @@ -17,6 +17,7 @@ _SINGLE_URL_WHITELIST = depset([ "arm_compiler", "ortools_archive", + "gemmlowp", ]) def _is_windows(ctx): @@ -68,7 +69,7 @@ def _apply_delete(ctx, paths): _execute_and_check_ret_code(ctx, cmd) def _tf_http_archive(ctx): - if ("mirror.bazel.build" not in ctx.attr.urls[0] or + if ("mirror.bazel.build" not in ctx.attr.urls[0] and (len(ctx.attr.urls) < 2 and ctx.attr.name not in _SINGLE_URL_WHITELIST)): fail("tf_http_archive(urls) must have redundant URLs. The " + diff --git a/third_party/snappy.BUILD b/third_party/snappy.BUILD index fd48ed8941e159a8d6176ef3f4e1982d6600e1c2..cc11f52d0eb3e04ad1fde6b2c8ba41e4baad5417 100644 --- a/third_party/snappy.BUILD +++ b/third_party/snappy.BUILD @@ -4,25 +4,12 @@ licenses(["notice"]) # BSD 3-Clause exports_files(["COPYING"]) -config_setting( - name = "windows", - values = {"cpu": "x64_windows"}, - visibility = ["//visibility:public"], -) - -config_setting( - name = "windows_msvc", - values = {"cpu": "x64_windows_msvc"}, - visibility = ["//visibility:public"], -) - cc_library( name = "snappy", srcs = [ + "config.h", "snappy.cc", "snappy.h", - "snappy-c.cc", - "snappy-c.h", "snappy-internal.h", "snappy-sinksource.cc", "snappy-sinksource.h", @@ -32,30 +19,85 @@ cc_library( ], hdrs = ["snappy.h"], copts = select({ - ":windows": [], - ":windows_msvc": [], + "@org_tensorflow//tensorflow:windows": [ + "/DHAVE_CONFIG_H", + "/EHsc", + ], + "@org_tensorflow//tensorflow:windows_msvc": [ + "/DHAVE_CONFIG_H", + "/EHsc", + ], "//conditions:default": [ + "-DHAVE_CONFIG_H", + "-fno-exceptions", + "-Wno-sign-compare", "-Wno-shift-negative-value", "-Wno-implicit-function-declaration", ], }), ) +genrule( + name = "config_h", + outs = ["config.h"], + cmd = "\n".join([ + "cat <<'EOF' >$@", + "#define HAVE_STDDEF_H 1", + "#define HAVE_STDINT_H 1", + "", + "#ifdef __has_builtin", + "# if !defined(HAVE_BUILTIN_EXPECT) && __has_builtin(__builtin_expect)", + "# define HAVE_BUILTIN_EXPECT 1", + "# endif", + "# if !defined(HAVE_BUILTIN_CTZ) && __has_builtin(__builtin_ctzll)", + "# define HAVE_BUILTIN_CTZ 1", + "# endif", + "#elif defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ >= 4)", + "# ifndef HAVE_BUILTIN_EXPECT", + "# define HAVE_BUILTIN_EXPECT 1", + "# endif", + "# ifndef HAVE_BUILTIN_CTZ", + "# define HAVE_BUILTIN_CTZ 1", + "# endif", + "#endif", + "", + "#ifdef __has_include", + "# if !defined(HAVE_BYTESWAP_H) && __has_include()", + "# define HAVE_BYTESWAP_H 1", + "# endif", + "# if !defined(HAVE_UNISTD_H) && __has_include()", + "# define HAVE_UNISTD_H 1", + "# endif", + "# if !defined(HAVE_SYS_ENDIAN_H) && __has_include()", + "# define HAVE_SYS_ENDIAN_H 1", + "# endif", + "# if !defined(HAVE_SYS_MMAN_H) && __has_include()", + "# define HAVE_SYS_MMAN_H 1", + "# endif", + "# if !defined(HAVE_SYS_UIO_H) && __has_include()", + "# define HAVE_SYS_UIO_H 1", + "# endif", + "#endif", + "", + "#ifndef SNAPPY_IS_BIG_ENDIAN", + "# ifdef __s390x__", + "# define SNAPPY_IS_BIG_ENDIAN 1", + "# elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__", + "# define SNAPPY_IS_BIG_ENDIAN 1", + "# endif", + "#endif", + "EOF", + ]), +) + genrule( name = "snappy_stubs_public_h", srcs = ["snappy-stubs-public.h.in"], outs = ["snappy-stubs-public.h"], cmd = ("sed " + - "-e 's/@ac_cv_have_stdint_h@/1/g' " + - "-e 's/@ac_cv_have_stddef_h@/1/g' " + - "-e 's/@ac_cv_have_stdint_h@/1/g' " + - select({ - "@org_tensorflow//tensorflow:windows": "-e 's/@ac_cv_have_sys_uio_h@/0/g' ", - "@org_tensorflow//tensorflow:windows_msvc": "-e 's/@ac_cv_have_sys_uio_h@/0/g' ", - "//conditions:default": "-e 's/@ac_cv_have_sys_uio_h@/1/g' ", - }) + - "-e 's/@SNAPPY_MAJOR@/1/g' " + - "-e 's/@SNAPPY_MINOR@/1/g' " + - "-e 's/@SNAPPY_PATCHLEVEL@/4/g' " + + "-e 's/$${\\(.*\\)_01}/\\1/g' " + + "-e 's/$${SNAPPY_MAJOR}/1/g' " + + "-e 's/$${SNAPPY_MINOR}/1/g' " + + "-e 's/$${SNAPPY_PATCHLEVEL}/4/g' " + "$< >$@"), ) diff --git a/third_party/zlib.BUILD b/third_party/zlib.BUILD index d164ee719c1fa4a304b82f223a432b9d087db827..e8048dd98adcca2ad6fa07fd582d2090901660e3 100644 --- a/third_party/zlib.BUILD +++ b/third_party/zlib.BUILD @@ -2,18 +2,6 @@ package(default_visibility = ["//visibility:public"]) licenses(["notice"]) # BSD/MIT-like license (for zlib) -config_setting( - name = "windows", - values = {"cpu": "x64_windows"}, - visibility = ["//visibility:public"], -) - -config_setting( - name = "windows_msvc", - values = {"cpu": "x64_windows_msvc"}, - visibility = ["//visibility:public"], -) - cc_library( name = "zlib", srcs = [ @@ -45,8 +33,8 @@ cc_library( ], hdrs = ["zlib.h"], copts = select({ - ":windows": [], - ":windows_msvc": [], + "@org_tensorflow//tensorflow:windows": [], + "@org_tensorflow//tensorflow:windows_msvc": [], "//conditions:default": [ "-Wno-shift-negative-value", "-DZ_HAVE_UNISTD_H",